segmentation.py 12 KB
 Anthony Larcher committed Jan 24, 2020 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 # -*- coding: utf-8 -*- # # This file is part of s4d. # # s4d is a python package for speaker diarization. # Home page: http://www-lium.univ-lemans.fr/s4d/ # # s4d is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation, either version 3 of the License, # or (at your option) any later version. # # s4d is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with s4d. If not, see . """  Anthony Larcher committed Jan 24, 2020 23 Copyright 2014-2020 Sylvain Meignier  Anthony Larcher committed Jan 24, 2020 24 """  Sylvain Meignier committed Feb 01, 2016 25   Anthony Larcher committed Jan 24, 2020 26 import copy  Sylvain Meignier committed Feb 01, 2016 27 28 29 30 31 import logging import numpy as np import pandas as pd import scipy  Anthony Larcher committed Jan 24, 2020 32 33 34 35 36 from .diar import Diar from .clustering.hac_bic import GaussFull from .clustering.hac_utils import bic_square_root  Sylvain Meignier committed Sep 21, 2016 37 def sanity_check(cep, show, cluster='init'):  Sylvain Meignier committed Feb 01, 2016 38 39 40 41  """ Removes equal MFCC of *cep* and return a diarization. :param cep: numpy.ndarry containing MFCC  Sylvain Meignier committed Aug 24, 2016 42  :param show: speaker of the show  Sylvain Meignier committed Feb 01, 2016 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57  :return: a dirization object """ table = Diar() # 1- diff on cep[i] - cep[i+1] # 2- sum of the n MFCC # 3- take equal values, give a boolean array b = np.sum(np.diff(cep, axis=0), axis=1) == 0 # make a xor on the boolean array, true index+1 correspond to a boundary bits = b[:-1] ^ b[1:] # convert true value into a list of feature indexes # append 0 at the beginning of the list, append the last index to the list idx = [0] + (np.arange(len(bits))[bits] + 1).tolist() + [cep.shape[0]] # for each pair of indexes (idx[i] and idx[i+1]), create a segment for i in range(0, len(idx) - 1, 2):  Sylvain Meignier committed Sep 21, 2016 58  table.append(show=show, start=idx[i], stop=idx[i + 1], cluster=cluster)  Sylvain Meignier committed Feb 01, 2016 59 60 61 62  return table  Sylvain Meignier committed Jun 06, 2016 63 def init_seg(cep, show='empty', cluster='init'):  Sylvain Meignier committed Feb 01, 2016 64  """  Florent Desnous committed Feb 22, 2018 65  Return an initial segmentation composed of one segment from the first to the  Sylvain Meignier committed Feb 01, 2016 66 67 68  last feature in *cep*. :param cep: numpy.ndarry containing MFCC  Sylvain Meignier committed Aug 24, 2016 69  :param show: the speaker of the cep  Sylvain Meignier committed Jun 06, 2016 70  :param cluster: str  Sylvain Meignier committed Feb 01, 2016 71 72 73 74  :return: a Diar object """ length = cep.shape[0] table_out = Diar()  Sylvain Meignier committed Jun 06, 2016 75  table_out.append(show=show, start=0, stop=length, cluster=cluster)  Sylvain Meignier committed Feb 01, 2016 76 77  return table_out  Sylvain Meignier committed Apr 29, 2016 78   Sylvain Meignier committed Jul 29, 2016 79 def adjust(cep, diarization):  Sylvain Meignier committed Feb 01, 2016 80  """  Sylvain Meignier committed Jul 29, 2016 81  Moves the border of segment of *diarization* into lowest energy region and split  Florent Desnous committed Feb 22, 2018 82  segments greater than 30s  Sylvain Meignier committed Feb 01, 2016 83   Anthony Larcher committed Jan 24, 2020 84  :todo: change numpy.convolve to the panda version  Sylvain Meignier committed Feb 01, 2016 85 86  :param cep: a numpy.ndarray containing MFCC  Sylvain Meignier committed Jul 29, 2016 87  :param diarization: a Diarization object  Sylvain Meignier committed Feb 01, 2016 88 89  :return: a Diar object """  Sylvain Meignier committed Jul 29, 2016 90  energy_index = 0  Sylvain Meignier committed Feb 01, 2016 91 92  box = np.ones(100) / 100  Sylvain Meignier committed Jul 29, 2016 93 94  smooth = np.convolve(cep[:, energy_index], box, mode='same') adj_table = _adjust(smooth, diarization)  Sylvain Meignier committed Feb 01, 2016 95 96  return _split_e(smooth, adj_table, 30*100)  Sylvain Meignier committed Apr 29, 2016 97   Sylvain Meignier committed Jul 29, 2016 98 def _adjust(smooth, diarization, window_size=25):  Sylvain Meignier committed Feb 01, 2016 99  """  Sylvain Meignier committed Jul 29, 2016 100  The segment boundaries of *diarization* are moved slightly: segment start and  Sylvain Meignier committed Feb 01, 2016 101 102 103  segment stop will be located in low energy regions. :param smooth: sliding means of the energy (numpy.ndarry)  Sylvain Meignier committed Jul 29, 2016 104 105  :param diarization: the diarization object to adjust :param window_size: the half size of the zone to find the minimum energy around a  Sylvain Meignier committed Feb 01, 2016 106 107 108  border :return: a Diar object """  Sylvain Meignier committed Jul 29, 2016 109 110 111 112 113  diarization_out = copy.deepcopy(diarization) diarization_out.sort(['start']) prev = diarization_out[0] for i in range(1, len(diarization_out)): cur = diarization_out[i]  Sylvain Meignier committed Feb 01, 2016 114  start = cur['start']  Sylvain Meignier committed Jul 29, 2016 115 116 117  p = np.argmin(smooth[start - window_size:start + window_size]) l1 = p + start - window_size - prev['start'] l2 = prev['stop'] - p + start - window_size  Sylvain Meignier committed Feb 01, 2016 118  if l1 > 500 and l2 > 500:  Sylvain Meignier committed Jul 29, 2016 119 120  prev['stop'] = p + start - window_size cur['start'] = p + start - window_size  Sylvain Meignier committed Feb 01, 2016 121  prev = cur  Sylvain Meignier committed Jul 29, 2016 122  return diarization_out  Sylvain Meignier committed Feb 01, 2016 123 124   Sylvain Meignier committed Jul 29, 2016 125 def _split_e(smooth, diarization, split_size):  Sylvain Meignier committed Feb 01, 2016 126  """  Florent Desnous committed Feb 22, 2018 127 128  Long segments of *diarization* are cut recursively at their points of lowest energy in order to yield segments shorter than *split_size* seconds.  Sylvain Meignier committed Feb 01, 2016 129   Florent Desnous committed Feb 22, 2018 130  :param smooth: sliding means of the energy (numpy.ndarray)  Sylvain Meignier committed Jul 29, 2016 131  :param diarization: a Diarization object  Sylvain Meignier committed Feb 01, 2016 132 133 134  :param split_size: maximum size of a segment :return: a Diar object """  Sylvain Meignier committed Jul 29, 2016 135 136 137 138  diarization_out = Diar() for segment in diarization: _split_seg(smooth, segment, 250, split_size, diarization_out.segments) return diarization_out  Sylvain Meignier committed Feb 01, 2016 139 140   Sylvain Meignier committed Jul 29, 2016 141 def _split_seg(smooth, segment, min_seg_size, split_size, lst):  Sylvain Meignier committed Feb 01, 2016 142  """  Sylvain Meignier committed Jul 29, 2016 143  *segment*, a long segment, is cut recursively at their points of lowest energy  Sylvain Meignier committed Feb 01, 2016 144  in order to yield segments shorter than *split_size* seconds. The new  Florent Desnous committed Feb 22, 2018 145  segments greater than *min_seg_size* are appended into *lst*  Sylvain Meignier committed Feb 01, 2016 146 147  :param smooth: sliding means of the energy (numpy.ndarry)  Sylvain Meignier committed Jul 29, 2016 148  :param segment: a segment  Sylvain Meignier committed Feb 01, 2016 149 150 151 152 153  :param min_seg_size: minimum size of a segment :param split_size: maximum size of a segment :param lst: the new segments are added to this list :return: """  Sylvain Meignier committed Jul 29, 2016 154 155 156  stop = segment['stop'] - min_seg_size start = segment['start'] + min_seg_size l = segment['stop'] - segment['start']  Sylvain Meignier committed Feb 01, 2016 157 158  if l > split_size: m = start + np.argmin(smooth[start:stop])  Sylvain Meignier committed Jul 29, 2016 159  row_left = copy.deepcopy(segment)  Sylvain Meignier committed Feb 01, 2016 160  row_left['stop'] = m  Sylvain Meignier committed Jul 29, 2016 161  row_right = copy.deepcopy(segment)  Sylvain Meignier committed Feb 01, 2016 162 163 164 165  row_right['start'] = m _split_seg(smooth, row_left, min_seg_size, split_size, lst) _split_seg(smooth, row_right, min_seg_size, split_size, lst) else:  Sylvain Meignier committed Jul 29, 2016 166  lst.append(copy.deepcopy(segment))  Sylvain Meignier committed Feb 01, 2016 167 168   Sylvain Meignier committed Mar 19, 2016 169   Sylvain Meignier committed Feb 01, 2016 170 171 def div_gauss(cep, show='empty', win=250, shift=0): """  Florent Desnous committed Feb 22, 2018 172  Segmentation based on gaussian divergence.  Sylvain Meignier committed Feb 01, 2016 173 174 175 176  The segmentation detects the instantaneous change points corresponding to segment boundaries. The proposed algorithm is based on the detection of local maxima. It detects the change points through a gaussian divergence  Florent Desnous committed Feb 22, 2018 177 178 179 180  (see equation below), computed using Gaussians with diagonal covariance matrices. The left and right gaussians are estimated over a five-second window sliding along the whole signal (2.5 seconds for each gaussian, given *win* =250 features).  Sylvain Meignier committed Feb 01, 2016 181  A change point, i.e. a segment boundary, is present in the middle of the  Florent Desnous committed Feb 22, 2018 182  window when the gaussian divergence score reaches a local maximum.  Sylvain Meignier committed Feb 01, 2016 183 184 185 186 187  :math:GD(s_l,s_r)=(\\mu_r-\\mu_l)^t\\Sigma_l^{-1/2}\\Sigma_r^{-1/2}(\\mu_r-\\mu_l) where :math:s_l is the left segment modeled by the mean :math:\mu_l and  Florent Desnous committed Feb 22, 2018 188  the diagonal covariance matrix :math:\\Sigma_l, :math:s_r is the right  Sylvain Meignier committed Feb 01, 2016 189 190 191 192  segment modeled by the mean :math:\mu_r and the diagonal covariance matrix :math:\\Sigma_r. :param cep: numpy array of frames  Sylvain Meignier committed Aug 24, 2016 193  :param show: speaker of the show  Sylvain Meignier committed Feb 01, 2016 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209  :param win: windows size in number of frames :return: a diarization object (s4d annotation) """ length = cep.shape[0] # start and stop of the rolling windows A start_a = win - 1 # end of NAN stop_a = length - win # start and stop of the rolling windows B start_b = win + win - 1 # end of nan + delay stop_b = length # put features in a Pandas DataFrame df = pd.DataFrame(cep) # compute rolling mean and std in the window of size win, get numpy array # mean and std have NAN at the beginning and the end of the output array  Anthony Larcher committed Jan 24, 2020 210 211  # mean = pd.rolling_mean(df, win).values # std = pd.rolling_std(df, win).values  Sylvain Meignier committed Apr 26, 2016 212 213 214  r = df.rolling(window=win, center=False) mean = r.mean().values std = r.std().values  Sylvain Meignier committed Feb 01, 2016 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229  # compute GD scores using 2 windows A and B dist = (np.square(mean[start_a:stop_a, :] - mean[start_b:stop_b, :]) / ( std[start_a:stop_a, :] * std[start_b:stop_b, :])).sum(axis=1) # replace missing value to match cep size dist_pad = np.lib.pad(dist, (win - 1, win), 'constant', constant_values=(dist[0], dist[-1])) # remove non-speech frame # find local maximal at + or - win size borders = scipy.signal.argrelmax(dist_pad, order=win)[0].tolist() # append the first and last borders = [0] + borders + [length]  Sylvain Meignier committed Jul 29, 2016 230  diarization_out = Diar()  Sylvain Meignier committed Feb 01, 2016 231 232  spk = 0 for i in range(0, len(borders) - 1):  Sylvain Meignier committed Jul 29, 2016 233  diarization_out.append(show=show, start=shift+borders[i],  Anthony Larcher committed Jan 24, 2020 234  stop=shift+borders[i + 1], cluster='S' + str(spk))  Sylvain Meignier committed Feb 01, 2016 235  spk += 1  Sylvain Meignier committed Jul 29, 2016 236  return diarization_out  Sylvain Meignier committed Feb 01, 2016 237 238   Sylvain Meignier committed Jul 29, 2016 239 240 241 242 def segmentation(cep, diarization, win_size=250): diarization_out = Diar() for segment in diarization: l = segment.duration()  Sylvain Meignier committed Apr 29, 2016 243  # logging.info('start: ', seg['start'],'end: ', seg['stop'], 'len: ', l)  Sylvain Meignier committed Apr 26, 2016 244  if l > 2 * win_size:  Sylvain Meignier committed Jul 29, 2016 245 246 247  cep_seg = segment.seg_features(cep) tmp = div_gauss(cep_seg, show=segment['show'], win=win_size, shift=segment['start']) diarization_out.append_diar(tmp)  Sylvain Meignier committed Apr 26, 2016 248  else:  Sylvain Meignier committed Jul 29, 2016 249  diarization_out.append_seg(segment)  Sylvain Meignier committed Apr 26, 2016 250   Anthony Larcher committed Jan 24, 2020 251  i = 0  Sylvain Meignier committed Jul 29, 2016 252 253  for segment in diarization_out: segment['cluster'] = 'S'+str(i)  Sylvain Meignier committed Apr 26, 2016 254 255  i += 1  Sylvain Meignier committed Jul 29, 2016 256  return diarization_out  Sylvain Meignier committed Feb 01, 2016 257   Sylvain Meignier committed Apr 29, 2016 258   Sylvain Meignier committed Jul 29, 2016 259 def bic_linear(cep, diarization, alpha, sr=False):  Sylvain Meignier committed Feb 01, 2016 260 261 262 263 264 265 266 267 268 269 270 271  """ This segmentation over the signal fuses consecutive segments of the same speaker from the start to the end of the record. The measure employs the :math:\Delta BIC based on Bayesian Information Criterion , using full covariance Gaussians (see :class:gauss.GaussFull), as defined in equation below. :math:\\Delta BIC_{i,j} = PBIC_{i+j} - PBIC_{i} - PBIC_{j} - P :math:PBIC_{x} = \\frac{n_x}{2} \\log|\\Sigma_x| :math:cst = \\frac{1}{2} \\alpha \\left(d + \\frac{d(d+1)}{2}\\right)  Florent Desnous committed Feb 22, 2018 272  :math:P = cst \\times log(n_i+n_j)  Sylvain Meignier committed Feb 01, 2016 273 274  where :math:|\\Sigma_i|, :math:|\\Sigma_j| and :math:|\\Sigma| are the  Florent Desnous committed Feb 22, 2018 275  determinants of gaussians associated to the left and right segments  Sylvain Meignier committed Feb 01, 2016 276 277  :math:i, :math:j and :math:i+j. :math:\\alpha is a parameter to set up. The penalty  Sylvain Meignier committed Apr 29, 2016 278  factor :math:P depends on :math:d, the dimension of the cep, as  Sylvain Meignier committed Feb 01, 2016 279 280 281  well as on :math:n_i and :math:n_j, refering to the total length of left segment :math:i and right segment :math:j respectively.  Florent Desnous committed Feb 22, 2018 282  if *sr* is True, BIC distance is replaced by the square root bic  Sylvain Meignier committed Feb 01, 2016 283 284  (see :py:func:clustering.hac_utils.bic_square_root)  Sylvain Meignier committed Apr 29, 2016 285  :param cep: numpy.ndarray  Sylvain Meignier committed Jul 29, 2016 286  :param diarization: a Diarization object  Sylvain Meignier committed Feb 01, 2016 287 288 289 290 291 292  :param alpha: the threshold :param sr: boolean :return: a Diar object """ # logger = logging.getLogger(__name__)  Sylvain Meignier committed Jul 29, 2016 293 294  diarization_out = copy.deepcopy(diarization) diarization_out.sort(['show', 'start'])  Sylvain Meignier committed Apr 29, 2016 295  dim = cep.shape[1]  Sylvain Meignier committed Feb 01, 2016 296 297  cst = GaussFull.cst_bic(dim, alpha)  Sylvain Meignier committed Aug 24, 2016 298 299  if len(diarization) <= 1: return diarization_out  Anthony Larcher committed Jan 24, 2020 300  segment1 = diarization_out[0]  Sylvain Meignier committed Jul 29, 2016 301 302 303 304  features1 = segment1.seg_features(cep) model1 = GaussFull(segment1['cluster'], dim) model1.add(features1) model1.compute()  Sylvain Meignier committed Feb 01, 2016 305 306  i = 1  Sylvain Meignier committed Jul 29, 2016 307  while i < len(diarization_out):  Anthony Larcher committed Jan 24, 2020 308  segment2 = diarization_out[i]  Sylvain Meignier committed Jul 29, 2016 309  if segment2['start'] > segment1['stop']+1:  Sylvain Meignier committed Apr 29, 2016 310  # logging.warning('there is a hole between segment')  Sylvain Meignier committed Feb 01, 2016 311  i += 1  Sylvain Meignier committed Jul 29, 2016 312  segment1 = segment2  Sylvain Meignier committed Feb 01, 2016 313  continue  Sylvain Meignier committed Jul 29, 2016 314 315 316 317  features2 = segment2.seg_features(cep) model2 = GaussFull(segment2['cluster'], dim) model2.add(features2) model2.compute()  Sylvain Meignier committed Feb 01, 2016 318   Sylvain Meignier committed Jul 29, 2016 319 320  model12 = GaussFull.merge(model1, model2) p = cst * np.log(model1.count + model2.count)  Sylvain Meignier committed Feb 01, 2016 321  if sr:  Sylvain Meignier committed Jul 29, 2016 322 323  p = bic_square_root(model1.count, model2.count, alpha, dim) delta_bic = model12.partial_bic - model1.partial_bic - model2.partial_bic - p  Anthony Larcher committed Jan 24, 2020 324  # print(i, v, p)  Sylvain Meignier committed Feb 01, 2016 325  if delta_bic < 0.0:  Sylvain Meignier committed Jul 29, 2016 326 327 328 329 330  logging.debug('linear remove %s %s: %i/%i %f', model1.name, model2.name, i, len(diarization_out), delta_bic) segment1['stop'] = segment2['stop'] model1 = model12 del diarization_out[i]  Sylvain Meignier committed Feb 01, 2016 331  else:  Sylvain Meignier committed Jul 29, 2016 332 333 334 335  logging.debug('linear next %s %s: %i/%i %f', model1.name, model2.name, i, len(diarization_out), delta_bic) segment1 = segment2 model1 = model2  Sylvain Meignier committed Feb 01, 2016 336  i += 1  Sylvain Meignier committed Jul 29, 2016 337  return diarization_out