hac_utils.py 5.86 KB
Newer Older
Anthony Larcher's avatar
Anthony Larcher committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# -*- coding: utf-8 -*-
#
# This file is part of s4d.
#
# s4d is a python package for speaker diarization.
# Home page: http://www-lium.univ-lemans.fr/s4d/
#
# s4d is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
# s4d is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with s4d.  If not, see <http://www.gnu.org/licenses/>.


"""
Anthony Larcher's avatar
Anthony Larcher committed
23
Copyright 2014-2020 Sylvain Meignier
Anthony Larcher's avatar
Anthony Larcher committed
24
25
"""

Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
26
27
28
29
import copy
import numpy as np
import logging

Sylvain Meignier's avatar
Sylvain Meignier committed
30

Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
31
32
33
34
35
36
37
38
39
40
def argmin(distances, nb):
    """
    Get argmin and min indexes between 0 and nb of a distance matrix
    :param distances: a numpy.ndarray
    :param nb: int
    :return: row and column indexes, the value
    """
    if nb <= 1:
        return 0, 0, np.inf
    tmp_dist = distances[0:nb, 0:nb]
Sylvain Meignier's avatar
new    
Sylvain Meignier committed
41
    # numpy.nanargmin : give the absolute position in the matrix, ie 1 number
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
42
    # unravel_index: give the row and col positions
Sylvain Meignier's avatar
Sylvain Meignier committed
43
44
45
    try:
        i, j = np.unravel_index(np.nanargmin(tmp_dist), tmp_dist.shape)
    except ValueError:
Sylvain Meignier's avatar
??    
Sylvain Meignier committed
46
47
        logging.warning('value are NaN, nb:'+str(nb))
        logging.warning(distances)
Sylvain Meignier's avatar
Sylvain Meignier committed
48
49
50
        logging.warning(tmp_dist)
        return 0, 0, np.inf

Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
51
52
53
    v = distances[i, j]
    return i, j, v

Sylvain Meignier's avatar
??    
Sylvain Meignier committed
54
55
56
57
58
59
60
61
62
63
def argmax(distances, nb):
    """
    Get argmin and min indexes between 0 and nb of a distance matrix
    :param distances: a numpy.ndarray
    :param nb: int
    :return: row and column indexes, the value
    """
    if nb <= 1:
        return 0, 0, np.inf
    tmp_dist = distances[0:nb, 0:nb]
Sylvain Meignier's avatar
new    
Sylvain Meignier committed
64
    # numpy.nanargmin : give the absolute position in the matrix, ie 1 number
Sylvain Meignier's avatar
??    
Sylvain Meignier committed
65
66
67
68
69
70
71
72
73
74
75
76
    # unravel_index: give the row and col positions
    try:
        i, j = np.unravel_index(np.nanargmax(tmp_dist), tmp_dist.shape)
    except ValueError:
        logging.warning('value are NaN, nb:'+str(nb))
        logging.warning(distances)
        logging.warning(tmp_dist)
        return 0, 0, -np.inf

    v = distances[i, j]
    return i, j, v

Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
77
78
79
80
81
82
83
84
85
86

def roll(mat, j):
    """
    delete the line j and column j in the matrix
    :param mat: numpy.ndarray
    :param j: int
    :return: numpy.ndarray
    """
    return np.delete(np.delete(mat, j, 1), j, 0)

Sylvain Meignier's avatar
Sylvain Meignier committed
87

Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
88
89
90
91
def bic_square_root(ni, nj, alpha, dim):
    """
    Compute a BIC square root distance described in [Stafylakis2010]_.

92
    .. [Stafylakis2010] T. Stafylakis, V. Katsouros, and G. Carayannis. The segmental bayesian information criterion and its applications to speaker diarization. Selected Topics in Signal Processing, IEEE Journal of, 4(5):857-866, 2010.
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
93
94
95
96
97
98
99
100
101
102
103
104
105
106

    :param ni: covariance matrix of speaker i
    :param nj: covariance matrix of speaker j
    :param alpha: a threshold
    :param dim: the dimenssion of the features
    :return: a float
    """
    nij = ni + nj
    constant_covariance = 0.5 * alpha * (0.5 * ((dim + 1) * dim))
    constant_mean = 0.5 * alpha * dim
    mean = ((np.sqrt(ni) * np.log(ni)) + (np.sqrt(nj) * np.log(nj))) - (np.sqrt(nij) * np.log(nij))
    covariance = (np.log(ni) + np.log(nj)) - np.log(nij)
    #print(ni, nj, nij, alpha, dim, constant_covariance, constant_mean, mean, covariance)
    return (constant_covariance * covariance) + (constant_mean * mean)
Sylvain Meignier's avatar
Sylvain Meignier committed
107
108
109
110
111
112
113
114
115
116
117
118
119

def stat_server_remove(stat_server, index):
    """
    " remove data at position index
    :param index: the index to remove
    """
    stat_server.segset = np.delete(stat_server.segset, index)
    stat_server.modelset = np.delete(stat_server.modelset, index)
    stat_server.start = np.delete(stat_server.start, index)
    stat_server.stop = np.delete(stat_server.stop, index)
    stat_server.stat0 = np.delete(stat_server.stat0, index, axis=0)
    stat_server.stat1 = np.delete(stat_server.stat1, index, axis=0)

Sylvain Meignier's avatar
Sylvain Meignier committed
120

Sylvain Meignier's avatar
??    
Sylvain Meignier committed
121
def stat_server_merge(stat_server, i, j, wi=1.0, wj=1.0):
Sylvain Meignier's avatar
Sylvain Meignier committed
122
123
124
125
126
    """
    merge the ith and jth stat0 and stat1 into ith data, remove jth data
    :param i: index destination
    :param j: index removed
    """
Sylvain Meignier's avatar
??    
Sylvain Meignier committed
127
    if stat_server.stop[i] != 0 and stat_server.stop[i] is not None:
Sylvain Meignier's avatar
Sylvain Meignier committed
128
        logging.warning('segment information will be wrong')
Sylvain Meignier's avatar
??    
Sylvain Meignier committed
129
130
131
    stat_server.stat0[i, :] = (wi * stat_server.stat0[j, :] + wj * stat_server.stat0[i, :]) / (wi + wj)
    stat_server.stat1[i, :] = (wi * stat_server.stat1[j, :] + wj * stat_server.stat1[i, :]) / (wi + wj)
    stat_server_remove(stat_server, j)
Sylvain Meignier's avatar
Sylvain Meignier committed
132

Sylvain Meignier's avatar
Sylvain Meignier committed
133

Sylvain Meignier's avatar
Sylvain Meignier committed
134
135
136
137
138
139
140
141
142
143
def idmap_remove(idmap, index):
    """
    " remove data at position index
    :param index: the index to remove
    """
    idmap.leftids = np.delete(idmap.leftids, index)
    idmap.rightids = np.delete(idmap.rightids, index)
    idmap.start = np.delete(idmap.start, index)
    idmap.stop = np.delete(idmap.stop, index)

Sylvain Meignier's avatar
Sylvain Meignier committed
144

Sylvain Meignier's avatar
Sylvain Meignier committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def scores_remove(scores, index_model=None, index_seg=None):
    """
    " remove data at position index_model and/or index_seg
    :param index_model: the index in model set to remove
    :param index_seg: the index in segment set to remove
    """

    if index_seg is not None:
        scores.segset = np.delete(scores.segset, index_seg)
        scores.scoremask = np.delete(scores.scoremask, index_seg, axis=1)
        scores.scoremat = np.delete(scores.scoremat, index_seg, axis=1)

    if index_model is not None:
        scores.modelset = np.delete(scores.modelset, index_model)
        scores.scoremask = np.delete(scores.scoremask, index_model, axis=0)
        scores.scoremat = np.delete(scores.scoremat, index_model, axis=0)

Sylvain Meignier's avatar
Sylvain Meignier committed
162
163
164
165
166
167
168
169
170
171

def scores2distance(scores, threshold):
    distance = (scores.scoremat + scores.scoremat.T) / 2.0 * -1.0
    np.fill_diagonal(distance, np.inf)
    min = np.min(distance)-1
    distance -= min
    np.fill_diagonal(distance, 0.0)
    t = -1.0 * threshold - min
    return distance, t