segmentation.py 12 KB
Newer Older
Anthony Larcher's avatar
Anthony Larcher committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# -*- coding: utf-8 -*-
#
# This file is part of s4d.
#
# s4d is a python package for speaker diarization.
# Home page: http://www-lium.univ-lemans.fr/s4d/
#
# s4d is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
# s4d is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with s4d.  If not, see <http://www.gnu.org/licenses/>.


"""
Anthony Larcher's avatar
Anthony Larcher committed
23
Copyright 2014-2020 Sylvain Meignier
Anthony Larcher's avatar
Anthony Larcher committed
24
"""
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
25

Anthony Larcher's avatar
Anthony Larcher committed
26
import copy
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
27
28
29
30
31
import logging
import numpy as np
import pandas as pd
import scipy

Anthony Larcher's avatar
Anthony Larcher committed
32
33
34
35
36
from .diar import Diar
from .clustering.hac_bic import GaussFull
from .clustering.hac_utils import bic_square_root


Sylvain Meignier's avatar
?    
Sylvain Meignier committed
37
def sanity_check(cep, show, cluster='init'):
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
38
39
40
41
    """
    Removes equal MFCC of *cep* and return a diarization.

    :param cep: numpy.ndarry containing MFCC
Sylvain Meignier's avatar
Sylvain Meignier committed
42
    :param show: speaker of the show
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
    :return: a dirization object
    """
    table = Diar()

    # 1- diff on cep[i] - cep[i+1]
    # 2- sum of the n MFCC
    # 3- take equal values, give a boolean array
    b = np.sum(np.diff(cep, axis=0), axis=1) == 0
    # make a xor on the boolean array, true index+1 correspond to a boundary
    bits = b[:-1] ^ b[1:]
    # convert true value into a list of feature indexes
    # append 0 at the beginning of the list, append the last index to the list
    idx = [0] + (np.arange(len(bits))[bits] + 1).tolist() + [cep.shape[0]]
    # for each pair of indexes (idx[i] and idx[i+1]), create a segment
    for i in range(0, len(idx) - 1, 2):
Sylvain Meignier's avatar
?    
Sylvain Meignier committed
58
        table.append(show=show, start=idx[i], stop=idx[i + 1], cluster=cluster)
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
59
60
61
62

    return table


63
def init_seg(cep, show='empty', cluster='init'):
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
64
    """
Florent Desnous 's avatar
Florent Desnous committed
65
    Return an initial segmentation composed of one segment from the first to the
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
66
67
68
    last feature in *cep*.

    :param cep: numpy.ndarry containing MFCC
Sylvain Meignier's avatar
Sylvain Meignier committed
69
    :param show: the speaker of the cep
70
    :param cluster: str
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
71
72
73
74
    :return: a Diar object
    """
    length = cep.shape[0]
    table_out = Diar()
75
    table_out.append(show=show, start=0, stop=length, cluster=cluster)
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
76
77
    return table_out

Sylvain Meignier's avatar
Sylvain Meignier committed
78

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
79
def adjust(cep, diarization):
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
80
    """
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
81
    Moves the border of segment of *diarization* into lowest energy region and split
Florent Desnous 's avatar
Florent Desnous committed
82
    segments greater than 30s
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
83

Anthony Larcher's avatar
Anthony Larcher committed
84
    :todo: change numpy.convolve to the panda version
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
85
86

    :param cep: a numpy.ndarray containing MFCC
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
87
    :param diarization: a Diarization object
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
88
89
    :return: a Diar object
    """
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
90
    energy_index = 0
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
91
92
    box = np.ones(100) / 100

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
93
94
    smooth = np.convolve(cep[:, energy_index], box, mode='same')
    adj_table = _adjust(smooth, diarization)
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
95
96
    return _split_e(smooth, adj_table, 30*100)

Sylvain Meignier's avatar
Sylvain Meignier committed
97

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
98
def _adjust(smooth, diarization, window_size=25):
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
99
    """
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
100
    The segment boundaries of *diarization* are moved slightly: segment start and
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
101
102
103
    segment stop will be located in low energy regions.

    :param smooth: sliding means of the energy (numpy.ndarry)
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
104
105
    :param diarization: the diarization object to adjust
    :param window_size: the half size of the zone to find the minimum energy around a
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
106
107
108
    border
    :return: a Diar object
    """
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
109
110
111
112
113
    diarization_out = copy.deepcopy(diarization)
    diarization_out.sort(['start'])
    prev = diarization_out[0]
    for i in range(1, len(diarization_out)):
        cur = diarization_out[i]
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
114
        start = cur['start']
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
115
116
117
        p = np.argmin(smooth[start - window_size:start + window_size])
        l1 = p + start - window_size - prev['start']
        l2 = prev['stop'] - p + start - window_size
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
118
        if l1 > 500 and l2 > 500:
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
119
120
            prev['stop'] = p + start - window_size
            cur['start'] = p + start - window_size
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
121
        prev = cur
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
122
    return diarization_out
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
123
124


Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
125
def _split_e(smooth, diarization, split_size):
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
126
    """
Florent Desnous 's avatar
Florent Desnous committed
127
128
    Long segments of *diarization* are  cut recursively at their points of lowest
    energy in order to yield segments shorter than *split_size* seconds.
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
129

Florent Desnous 's avatar
Florent Desnous committed
130
    :param smooth: sliding means of the energy (numpy.ndarray)
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
131
    :param diarization: a Diarization object
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
132
133
134
    :param split_size: maximum size of a segment
    :return: a Diar object
    """
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
135
136
137
138
    diarization_out = Diar()
    for segment in diarization:
        _split_seg(smooth, segment, 250, split_size, diarization_out.segments)
    return diarization_out
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
139
140


Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
141
def _split_seg(smooth, segment, min_seg_size, split_size, lst):
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
142
    """
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
143
    *segment*, a long segment, is cut recursively at their points of lowest energy
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
144
    in order to yield segments shorter than *split_size* seconds. The new
Florent Desnous 's avatar
Florent Desnous committed
145
    segments greater than *min_seg_size* are appended into *lst*
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
146
147

    :param smooth: sliding means of the energy (numpy.ndarry)
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
148
    :param segment: a segment
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
149
150
151
152
153
    :param min_seg_size: minimum size of a segment
    :param split_size: maximum size of a segment
    :param lst: the new segments are added to this list
    :return:
    """
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
154
155
156
    stop = segment['stop'] - min_seg_size
    start = segment['start'] + min_seg_size
    l = segment['stop'] - segment['start']
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
157
158
    if l > split_size:
        m = start + np.argmin(smooth[start:stop])
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
159
        row_left = copy.deepcopy(segment)
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
160
        row_left['stop'] = m
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
161
        row_right = copy.deepcopy(segment)
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
162
163
164
165
        row_right['start'] = m
        _split_seg(smooth, row_left, min_seg_size, split_size, lst)
        _split_seg(smooth, row_right, min_seg_size, split_size, lst)
    else:
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
166
        lst.append(copy.deepcopy(segment))
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
167
168


Sylvain Meignier's avatar
new    
Sylvain Meignier committed
169

Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
170
171
def div_gauss(cep, show='empty', win=250, shift=0):
    """
Florent Desnous 's avatar
Florent Desnous committed
172
    Segmentation based on gaussian divergence.
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
173
174
175
176

    The segmentation detects the instantaneous change points corresponding to
    segment boundaries. The proposed algorithm is based on the detection of
    local maxima. It detects the change points through a gaussian divergence
Florent Desnous 's avatar
Florent Desnous committed
177
178
179
180
    (see equation below), computed using Gaussians with diagonal covariance 
    matrices. The left and right gaussians are estimated over a five-second 
    window sliding along the whole signal (2.5 seconds for each gaussian, 
    given *win* =250 features).
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
181
    A change point, i.e. a segment boundary, is present in the middle of the
Florent Desnous 's avatar
Florent Desnous committed
182
    window when the gaussian divergence score reaches a local maximum.
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
183
184
185
186
187


        :math:`GD(s_l,s_r)=(\\mu_r-\\mu_l)^t\\Sigma_l^{-1/2}\\Sigma_r^{-1/2}(\\mu_r-\\mu_l)`

    where :math:`s_l` is the left segment modeled by the mean :math:`\mu_l` and
Florent Desnous 's avatar
Florent Desnous committed
188
    the diagonal covariance matrix :math:`\\Sigma_l`, :math:`s_r` is the right
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
189
190
191
192
    segment modeled by the mean :math:`\mu_r` and the diagonal covariance
    matrix :math:`\\Sigma_r`.

    :param cep: numpy array of frames
Sylvain Meignier's avatar
Sylvain Meignier committed
193
    :param show: speaker of the show
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
    :param win: windows size in number of frames
    :return: a diarization object (s4d annotation)
    """

    length = cep.shape[0]
    # start and stop of the rolling windows A
    start_a = win - 1  # end of NAN
    stop_a = length - win
    # start and stop of the rolling windows B
    start_b = win + win - 1  # end of nan + delay
    stop_b = length

    # put features in a Pandas DataFrame
    df = pd.DataFrame(cep)
    # compute rolling mean and std in the window of size win, get numpy array
    # mean and std have NAN at the beginning and the end of the output array
Anthony Larcher's avatar
Anthony Larcher committed
210
211
    # mean = pd.rolling_mean(df, win).values
    # std = pd.rolling_std(df, win).values
Sylvain Meignier's avatar
merge    
Sylvain Meignier committed
212
213
214
    r = df.rolling(window=win, center=False)
    mean = r.mean().values
    std = r.std().values
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229

    # compute GD scores using 2 windows A and B
    dist = (np.square(mean[start_a:stop_a, :] - mean[start_b:stop_b, :]) / (
        std[start_a:stop_a, :] * std[start_b:stop_b, :])).sum(axis=1)

    # replace missing value to match cep size
    dist_pad = np.lib.pad(dist, (win - 1, win), 'constant',
                          constant_values=(dist[0], dist[-1]))

    # remove non-speech frame
    # find local maximal at + or - win size
    borders = scipy.signal.argrelmax(dist_pad, order=win)[0].tolist()
    # append the first and last
    borders = [0] + borders + [length]

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
230
    diarization_out = Diar()
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
231
232
    spk = 0
    for i in range(0, len(borders) - 1):
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
233
        diarization_out.append(show=show, start=shift+borders[i],
Anthony Larcher's avatar
Anthony Larcher committed
234
                               stop=shift+borders[i + 1], cluster='S' + str(spk))
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
235
        spk += 1
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
236
    return diarization_out
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
237
238


Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
239
240
241
242
def segmentation(cep, diarization, win_size=250):
    diarization_out = Diar()
    for segment in diarization:
        l = segment.duration()
Sylvain Meignier's avatar
Sylvain Meignier committed
243
        # logging.info('start: ', seg['start'],'end: ', seg['stop'], 'len: ', l)
Sylvain Meignier's avatar
merge    
Sylvain Meignier committed
244
        if l > 2 * win_size:
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
245
246
247
            cep_seg = segment.seg_features(cep)
            tmp = div_gauss(cep_seg, show=segment['show'], win=win_size, shift=segment['start'])
            diarization_out.append_diar(tmp)
Sylvain Meignier's avatar
merge    
Sylvain Meignier committed
248
        else:
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
249
            diarization_out.append_seg(segment)
Sylvain Meignier's avatar
merge    
Sylvain Meignier committed
250

Anthony Larcher's avatar
Anthony Larcher committed
251
    i = 0
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
252
253
    for segment in diarization_out:
        segment['cluster'] = 'S'+str(i)
Sylvain Meignier's avatar
merge    
Sylvain Meignier committed
254
255
        i += 1

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
256
    return diarization_out
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
257

Sylvain Meignier's avatar
Sylvain Meignier committed
258

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
259
def bic_linear(cep, diarization, alpha, sr=False):
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
260
261
262
263
264
265
266
267
268
269
270
271
    """
    This segmentation over the signal fuses consecutive segments of the same
    speaker from the start to the end of the record.  The measure employs
    the :math:`\Delta BIC` based on Bayesian Information Criterion , using full
    covariance Gaussians (see :class:`gauss.GaussFull`), as defined in equation below.

        :math:`\\Delta BIC_{i,j} = PBIC_{i+j} - PBIC_{i} - PBIC_{j} -  P`

        :math:`PBIC_{x}  = \\frac{n_x}{2} \\log|\\Sigma_x|`

        :math:`cst  = \\frac{1}{2} \\alpha \\left(d + \\frac{d(d+1)}{2}\\right)`

Florent Desnous 's avatar
Florent Desnous committed
272
        :math:`P  = cst \\times log(n_i+n_j)`
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
273
274

    where :math:`|\\Sigma_i|`, :math:`|\\Sigma_j|` and :math:`|\\Sigma|` are the
Florent Desnous 's avatar
Florent Desnous committed
275
    determinants of gaussians associated to the left and right segments
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
276
277
    :math:`i`, :math:`j`
    and :math:`i+j`. :math:`\\alpha` is a parameter to set up. The penalty
Sylvain Meignier's avatar
Sylvain Meignier committed
278
    factor :math:`P` depends on :math:`d`, the dimension of the cep, as
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
279
280
281
    well as on :math:`n_i` and :math:`n_j`, refering to the total length of
    left segment :math:`i` and right segment :math:`j` respectively.

Florent Desnous 's avatar
Florent Desnous committed
282
    if *sr* is True, BIC distance is replaced by the square root bic
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
283
284
    (see :py:func:`clustering.hac_utils.bic_square_root`)

Sylvain Meignier's avatar
Sylvain Meignier committed
285
    :param cep: numpy.ndarray
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
286
    :param diarization: a Diarization object
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
287
288
289
290
291
292
    :param alpha: the threshold
    :param sr: boolean
    :return: a Diar object
    """
    # logger = logging.getLogger(__name__)

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
293
294
    diarization_out = copy.deepcopy(diarization)
    diarization_out.sort(['show', 'start'])
Sylvain Meignier's avatar
Sylvain Meignier committed
295
    dim = cep.shape[1]
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
296
297
    cst = GaussFull.cst_bic(dim, alpha)

Sylvain Meignier's avatar
Sylvain Meignier committed
298
299
    if len(diarization) <= 1:
        return diarization_out
Anthony Larcher's avatar
Anthony Larcher committed
300
    segment1 = diarization_out[0]
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
301
302
303
304
    features1 = segment1.seg_features(cep)
    model1 = GaussFull(segment1['cluster'], dim)
    model1.add(features1)
    model1.compute()
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
305
306
    i = 1

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
307
    while i < len(diarization_out):
Anthony Larcher's avatar
Anthony Larcher committed
308
        segment2 = diarization_out[i]
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
309
        if segment2['start'] > segment1['stop']+1:
Sylvain Meignier's avatar
Sylvain Meignier committed
310
            # logging.warning('there is a hole between segment')
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
311
            i += 1
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
312
            segment1 = segment2
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
313
            continue
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
314
315
316
317
        features2 = segment2.seg_features(cep)
        model2 = GaussFull(segment2['cluster'], dim)
        model2.add(features2)
        model2.compute()
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
318

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
319
320
        model12 = GaussFull.merge(model1, model2)
        p = cst * np.log(model1.count + model2.count)
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
321
        if sr:
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
322
323
            p = bic_square_root(model1.count, model2.count, alpha, dim)
        delta_bic = model12.partial_bic - model1.partial_bic - model2.partial_bic - p
Anthony Larcher's avatar
Anthony Larcher committed
324
        # print(i, v, p)
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
325
        if delta_bic < 0.0:
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
326
327
328
329
330
            logging.debug('linear remove %s %s: %i/%i %f', model1.name, model2.name, i,
                          len(diarization_out), delta_bic)
            segment1['stop'] = segment2['stop']
            model1 = model12
            del diarization_out[i]
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
331
        else:
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
332
333
334
335
            logging.debug('linear next %s %s: %i/%i %f', model1.name, model2.name, i,
                          len(diarization_out), delta_bic)
            segment1 = segment2
            model1 = model2
Sylvain Meignier's avatar
Origin  
Sylvain Meignier committed
336
            i += 1
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
337
    return diarization_out