features_server.py 30.7 KB
Newer Older
Anthony Larcher's avatar
Anthony Larcher committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# -*- coding: utf-8 -*-
#
# This file is part of SIDEKIT.
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#    
# SIDEKIT is free software: you can redistribute it and/or modify
# it under the terms of the GNU LLesser General Public License as 
# published by the Free Software Foundation, either version 3 of the License, 
# or (at your option) any later version.
#
# SIDEKIT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with SIDEKIT.  If not, see <http://www.gnu.org/licenses/>.

"""
Anthony Larcher's avatar
v1.3.7    
Anthony Larcher committed
25
Copyright 2014-2021 Sylvain Meignier and Anthony Larcher
Anthony Larcher's avatar
Anthony Larcher committed
26
27
28
29

    :mod:`features_server` provides methods to manage features

"""
Anthony Larcher's avatar
Anthony Larcher committed
30
import copy
Anthony Larcher's avatar
Anthony Larcher committed
31
import multiprocessing
32
import numpy
Anthony Larcher's avatar
Anthony Larcher committed
33
import logging
34
35
import h5py

36
from sidekit.frontend.features import pca_dct, shifted_delta_cepstral, compute_delta, framing, dct_basis
Anthony Larcher's avatar
Anthony Larcher committed
37
from sidekit.frontend.io import read_hdf5_segment
38
39
40
from sidekit.frontend.vad import label_fusion
from sidekit.frontend.normfeat import cms, cmvn, stg, cep_sliding_norm, rasta_filt
from sidekit.sv_utils import parse_mask
41
from sidekit.features_extractor import FeaturesExtractor
42

Anthony Larcher's avatar
Anthony Larcher committed
43

44
__license__ = "LGPL"
Anthony Larcher's avatar
Anthony Larcher committed
45
__author__ = "Anthony Larcher & Sylvain Meignier"
Anthony Larcher's avatar
v1.3.7    
Anthony Larcher committed
46
__copyright__ = "Copyright 2014-2021 Anthony Larcher"
47
48
49
50
__maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reStructuredText'
Anthony Larcher's avatar
Anthony Larcher committed
51
#comment
Anthony Larcher's avatar
Anthony Larcher committed
52

Anthony Larcher's avatar
Anthony Larcher committed
53
class FeaturesServer(object):
Anthony Larcher's avatar
Anthony Larcher committed
54
    """
Anthony Larcher's avatar
Anthony Larcher committed
55
56
57
    Management of features. FeaturesServer instances load datasets from a HDF5 files
    (that can be read from disk or produced by a FeaturesExtractor object)
    Datasets read from one or many files are concatenated and processed
Anthony Larcher's avatar
Anthony Larcher committed
58
59
    """

Anthony Larcher's avatar
Anthony Larcher committed
60
61
    def __init__(self,
                 features_extractor=None,
62
63
64
65
                 feature_filename_structure=None,
                 sources=None,
                 dataset_list=None,
                 mask=None,
Anthony Larcher's avatar
Anthony Larcher committed
66
                 feat_norm=None,
67
                 global_cmvn=None,
Anthony Larcher's avatar
Anthony Larcher committed
68
                 dct_pca=False,
Anthony Larcher's avatar
Anthony Larcher committed
69
                 dct_pca_config=None,
Anthony Larcher's avatar
Anthony Larcher committed
70
                 sdc=False,
Anthony Larcher's avatar
Anthony Larcher committed
71
                 sdc_config=None,
Anthony Larcher's avatar
Anthony Larcher committed
72
73
                 delta=None,
                 double_delta=None,
Anthony Larcher's avatar
Anthony Larcher committed
74
                 delta_filter=None,
75
76
                 context=None,
                 traps_dct_nb=None,
Anthony Larcher's avatar
Anthony Larcher committed
77
                 rasta=None,
Anthony Larcher's avatar
Anthony Larcher committed
78
                 keep_all_features=True):
79
        """
Anthony Larcher's avatar
Anthony Larcher committed
80
81
82
83
84
85
86
        Initialize a FeaturesServer for two cases:
        1. each call to load will load datasets from a single file. This mode requires to provide a dataset_list
        (lists of datasets to load from each file.
        2. each call to load will load datasets from several files (possibly several datasets from each file)
        and concatenate them. In this mode, you should provide a FeaturesServer for each source, thus, datasets
        read from each source can be post-processed independently before being concatenated with others. The dataset
        resulting from the concatenation from all sources is then post-processed.
Anthony Larcher's avatar
modif    
Anthony Larcher committed
87

Anthony Larcher's avatar
Anthony Larcher committed
88
        :param features_extractor: a FeaturesExtractor if required to extract features from audio file
Anthony Larcher's avatar
modif    
Anthony Larcher committed
89
        if None, data are loaded from an existing HDF5 file
90
        :param feature_filename_structure: structure of the filename to use to load HDF5 files
Anthony Larcher's avatar
Anthony Larcher committed
91
92
93
94
95
96
97
        :param sources: tuple of sources to load features different files (optional: for the case where datasets
        are loaded from several files and concatenated.
        :param dataset_list: string of the form '["cep", "fb", vad", energy", "bnf"]' (only when loading datasets
        from a single file) list of datasets to load.
        :param mask: string of the form '[1-3,10,15-20]' mask to apply on the concatenated dataset
        to select specific components. In this example, coefficients 1,2,3,10,15,16,17,18,19,20 are kept
        In this example,
Anthony Larcher's avatar
Anthony Larcher committed
98
99
100
101
102
103
104
105
106
107
108
109
110
        :param feat_norm: tpye of normalization to apply as post-processing
        :param global_cmvn: boolean, if True, use a global mean and std when normalizing the frames
        :param dct_pca: if True, add temporal context by using a PCA-DCT approach
        :param dct_pca_config: configuration of the PCA-DCT, default is (12, 12, none)
        :param sdc: if True, compute shifted delta cepstra coefficients
        :param sdc_config: configuration to compute sdc coefficients, default is (1,3,7)
        :param delta: if True, append the first order derivative
        :param double_delta: if True, append the second order derivative
        :param delta_filter: coefficients of the filter used to compute delta coefficients
        :param context: add a left and right context, default is (0,0)
        :param traps_dct_nb: number of DCT coefficients to keep when computing TRAP coefficients
        :param rasta: if True, perform RASTA filtering
        :param keep_all_features: boolean, if True, keep all features, if False, keep frames according to the vad labels
111
        :return:
Anthony Larcher's avatar
Anthony Larcher committed
112
        """
Anthony Larcher's avatar
Anthony Larcher committed
113
        self.features_extractor = None
114
115
116
117
118
        self.feature_filename_structure = '{}'
        self.sources = ()
        self.dataset_list = None

        # Post processing options
Anthony Larcher's avatar
Anthony Larcher committed
119
120
        self.mask = None
        self.feat_norm = None
121
        self.global_cmvn = None
122
        self.dct_pca = False
Anthony Larcher's avatar
Anthony Larcher committed
123
        self.dct_pca_config = (12, 12, None)
Anthony Larcher's avatar
Anthony Larcher committed
124
        self.sdc = False
125
        self.sdc_config = (1, 3, 7)
Anthony Larcher's avatar
Anthony Larcher committed
126
127
        self.delta = False
        self.double_delta = False
128
        self.delta_filter = numpy.array([.25, .5, .25, 0, -.25, -.5, -.25])
Anthony Larcher's avatar
Anthony Larcher committed
129
        self.context = (0, 0)
130
        self.traps_dct_nb = 0
Anthony Larcher's avatar
Anthony Larcher committed
131
        self.rasta = False
Anthony Larcher's avatar
Anthony Larcher committed
132
        self.keep_all_features = True
133

Anthony Larcher's avatar
Anthony Larcher committed
134
135
        if features_extractor is not None:
            self.features_extractor = features_extractor
136
137
138
139
140
141
142
143
        if feature_filename_structure is not None:
            self.feature_filename_structure = feature_filename_structure
        if sources is not None:
            self.sources = sources
        if dataset_list is not None:
            self.dataset_list = dataset_list
        if mask is not None:
            self.mask = parse_mask(mask)
Anthony Larcher's avatar
Anthony Larcher committed
144
145
        if feat_norm is not None:
            self.feat_norm = feat_norm
146
147
        if global_cmvn is not None:
            self.global_cmvn = global_cmvn
Anthony Larcher's avatar
Anthony Larcher committed
148
149
150
151
152
153
154
155
        if dct_pca is not None:
            self.dct_pca = dct_pca
        if dct_pca_config is not None:
            self.dct_pca_config = dct_pca_config
        if sdc is not None:
            self.sdc = sdc
        if sdc_config is not None:
            self.sdc_config = sdc_config
Anthony Larcher's avatar
Anthony Larcher committed
156
157
158
159
160
        if delta is not None:
            self.delta = delta
        if double_delta is not None:
            self.double_delta = double_delta
        if delta_filter is not None:
Anthony Larcher's avatar
Anthony Larcher committed
161
            self.delta_filter = delta_filter
162
163
164
165
        if context is not None:
            self.context = context
        if traps_dct_nb is not None:
            self.traps_dct_nb = traps_dct_nb
Anthony Larcher's avatar
Anthony Larcher committed
166
167
        if rasta is not None:
            self.rasta = rasta
Anthony Larcher's avatar
Anthony Larcher committed
168
169
        if keep_all_features is not None:
            self.keep_all_features = keep_all_features
170

Anthony Larcher's avatar
Anthony Larcher committed
171
        self.show = 'empty'
Anthony Larcher's avatar
Anthony Larcher committed
172
        self.input_feature_filename = 'empty'
Anthony Larcher's avatar
Anthony Larcher committed
173
        self.start_stop = (None, None)
Sylvain Meignier's avatar
Sylvain Meignier committed
174
        self.previous_load = None
Anthony Larcher's avatar
Anthony Larcher committed
175
176

    def __repr__(self):
177
178
179
180
181
        """

        :return: a string to display the object
        """
        ch = '\t show: {} \n\n'.format(self.show)
Anthony Larcher's avatar
Anthony Larcher committed
182
        ch += '\t input_feature_filename: {} \n\n'.format(self.input_feature_filename)
183
184
185
186
187
188
189
190
191
192
193
194
195
196
        ch += '\t feature_filename_structure: {} \n'.format(self.feature_filename_structure)
        ch += '\t  \n'
        ch += '\t  \n\n'
        ch += '\t Post processing options: \n'
        ch += '\t\t mask: {}  \n'.format(self.mask)
        ch += '\t\t feat_norm: {} \n'.format(self.feat_norm)
        ch += '\t\t dct_pca: {}, dct_pca_config: {} \n'.format(self.dct_pca,
                                                               self.dct_pca_config)
        ch += '\t\t sdc: {}, sdc_config: {} \n'.format(self.sdc,
                                                       self.sdc_config)
        ch += '\t\t delta: {}, double_delta: {}, delta_filter: {} \n'.format(self.delta,
                                                                             self.double_delta,
                                                                             self.delta_filter)
        ch += '\t\t rasta: {} \n'.format(self.rasta)
Anthony Larcher's avatar
Anthony Larcher committed
197
        ch += '\t\t keep_all_features: {} \n'.format(self.keep_all_features)
198

Anthony Larcher's avatar
Anthony Larcher committed
199
        return ch
Anthony Larcher's avatar
Anthony Larcher committed
200

201
    def post_processing(self, feat, label, global_mean=None, global_std=None):
Anthony Larcher's avatar
Anthony Larcher committed
202
        """
Anthony Larcher's avatar
Anthony Larcher committed
203
204
        After cepstral coefficients, filter banks or bottleneck parameters are computed or read from file
        post processing is applied.
Anthony Larcher's avatar
Anthony Larcher committed
205

Anthony Larcher's avatar
Anthony Larcher committed
206
207
208
209
210
211
        :param feat: the matrix of acoustic parameters to post-process
        :param label: the VAD labels for the acoustic parameters
        :param global_mean: vector or mean to use for normalization
        :param global_std: vector of standard deviation to use for normalization

        :return: the matrix of acoustic parameters ingand their VAD labels after post-process
Anthony Larcher's avatar
Anthony Larcher committed
212
        """
213
        # Perform RASTA filtering if required
Anthony Larcher's avatar
Anthony Larcher committed
214
215
        if self.rasta:
            feat, label = self._rasta(feat, label)
Anthony Larcher's avatar
Anthony Larcher committed
216
        
217
218
219
220
        # Add temporal context
        if self.delta or self.double_delta:
            feat = self._delta_and_2delta(feat)
        elif self.dct_pca:
Anthony Larcher's avatar
Anthony Larcher committed
221
            feat = pca_dct(feat, self.dct_pca_config[0], self.dct_pca_config[1], self.dct_pca_config[2])
222
        elif self.sdc:
Anthony Larcher's avatar
Anthony Larcher committed
223
            feat = shifted_delta_cepstral(feat, d=self.sdc_config[0], p=self.sdc_config[1], k=self.sdc_config[2])
Anthony Larcher's avatar
clean    
Anthony Larcher committed
224
    
225
226
227
228
        # Apply a mask on the features
        if self.mask is not None:
            feat = self._mask(feat)

229
        # Smooth the labels and fuse the channels if more than one.
230
        logging.debug('Smooth the labels and fuse the channels if more than one')
Anthony Larcher's avatar
Anthony Larcher committed
231
        label = label_fusion(label)
Anthony Larcher's avatar
Anthony Larcher committed
232
        
233
        # Normalize the data
Anthony Larcher's avatar
Anthony Larcher committed
234
235
236
        if self.feat_norm is None:
            logging.debug('no norm')
        else:
237
            self._normalize(label, feat, global_mean, global_std)
Anthony Larcher's avatar
Anthony Larcher committed
238

239
        # if not self.keep_all_features, only selected features and labels are kept
Anthony Larcher's avatar
Anthony Larcher committed
240
        if not self.keep_all_features:
241
            logging.debug('no keep all')
242
243
            feat = feat[label]
            label = label[label]
Anthony Larcher's avatar
Anthony Larcher committed
244
         
245
        return feat, label
Anthony Larcher's avatar
Anthony Larcher committed
246

247
    def _mask(self, cep):
Anthony Larcher's avatar
Anthony Larcher committed
248
        """
Anthony Larcher's avatar
Anthony Larcher committed
249
250
251
        Keep only the MFCC index present in the filter list
        :param cep: acoustic parameters to filter

252
        :return: return the list of MFCC given by filter list
Anthony Larcher's avatar
Anthony Larcher committed
253
        """
254
255
256
257
        if len(self.mask) == 0:
            raise Exception('filter list is empty')
        logging.debug('applied mask')
        return cep[:, self.mask]
Anthony Larcher's avatar
Anthony Larcher committed
258

259
    def _normalize(self, label, cep, global_mean=None, global_std=None):
Anthony Larcher's avatar
Anthony Larcher committed
260
        """
Anthony Larcher's avatar
Anthony Larcher committed
261
        Normalize acoustic parameters in place
Anthony Larcher's avatar
Anthony Larcher committed
262

Anthony Larcher's avatar
Anthony Larcher committed
263
264
265
266
        :param label: vad labels to use for normalization
        :param cep: acoustic parameters to normalize
        :param global_mean: mean vector to use if provided
        :param global_std: standard deviation vector to use if provided
Anthony Larcher's avatar
Anthony Larcher committed
267
268
269
        """
        # Perform feature normalization on the entire session.
        if self.feat_norm is None:
270
            logging.debug('no norm')
Anthony Larcher's avatar
Anthony Larcher committed
271
            pass
Anthony Larcher's avatar
Anthony Larcher committed
272
        elif self.feat_norm == 'cms':
273
            logging.debug('cms norm')
274
            cms(cep, label, global_mean)
Anthony Larcher's avatar
Anthony Larcher committed
275
        elif self.feat_norm == 'cmvn':
276
            logging.debug('cmvn norm')
277
            cmvn(cep, label, global_mean, global_std)
Anthony Larcher's avatar
Anthony Larcher committed
278
        elif self.feat_norm == 'stg':
279
            logging.debug('stg norm')
280
            stg(cep, label=label)
Sylvain Meignier's avatar
Sylvain Meignier committed
281
        elif self.feat_norm == 'cmvn_sliding':
282
            logging.debug('sliding cmvn norm')
Sylvain Meignier's avatar
Sylvain Meignier committed
283
            cep_sliding_norm(cep, label=label, win=301, center=True, reduce=True)
Sylvain Meignier's avatar
Sylvain Meignier committed
284
        elif self.feat_norm == 'cms_sliding':
285
            logging.debug('sliding cms norm')
Sylvain Meignier's avatar
Sylvain Meignier committed
286
            cep_sliding_norm(cep, label=label, win=301, center=True, reduce=False)
Anthony Larcher's avatar
Anthony Larcher committed
287
        else:
Anthony Larcher's avatar
Anthony Larcher committed
288
            logging.warning('Wrong feature normalisation type')
Anthony Larcher's avatar
Anthony Larcher committed
289

290
    def _delta_and_2delta(self, cep):
Anthony Larcher's avatar
Anthony Larcher committed
291
        """
292
293
294
295
        Add deltas and double deltas.
        :param cep: a matrix of cepstral cefficients

        :return: the cepstral coefficient stacked with deltas and double deltas
Anthony Larcher's avatar
Anthony Larcher committed
296
        """
297
        if self.delta:
298
            logging.debug('add delta')
299
300
301
            delta = compute_delta(cep, filt=self.delta_filter)
            cep = numpy.column_stack((cep, delta))
            if self.double_delta:
302
                logging.debug('add delta delta')
303
304
305
                double_delta = compute_delta(delta, filt=self.delta_filter)
                cep = numpy.column_stack((cep, double_delta))
        return cep
Anthony Larcher's avatar
Anthony Larcher committed
306

307
308
309
310
311
312
    def _rasta(self, cep, label):
        """
        Performs RASTA filtering if required.
        The two first frames are copied from the third to keep
        the length consistent
        !!! if vad is None: label[] is empty
Anthony Larcher's avatar
Anthony Larcher committed
313

Anthony Larcher's avatar
Anthony Larcher committed
314
315
        :param cep: the acoustic features to filter
        :param label: the VAD label
316
317
318
        :return:
        """
        if self.rasta:
319
            logging.debug('perform RASTA %s', self.rasta)
320
321
322
323
            cep = rasta_filt(cep)
            cep[:2, :] = cep[2, :]
            label[:2] = label[2]
        return cep, label
Anthony Larcher's avatar
Anthony Larcher committed
324

325
    def get_context(self, feat, start=None, stop=None, label=None):
326
        """
Anthony Larcher's avatar
Anthony Larcher committed
327
        Add a left and right context to each frame.
Anthony Larcher's avatar
Anthony Larcher committed
328
        First and last frames are duplicated to provide context at the begining and at the end
329
330
331
332
333

        :param feat: sequence of feature frames (one fame per line)
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment
        :param label: vad label if available
Anthony Larcher's avatar
Anthony Larcher committed
334
335

        :return: a sequence of frames with their left and right context
336
        """
337
338
339
340
341
342
        if start is None:
            start = 0
        if stop is None:
            stop = feat.shape[0]
        context_feat = framing(
            numpy.pad(feat,
Anthony Larcher's avatar
Anthony Larcher committed
343
                      ((max(self.context[0] - start, 0), max(stop - feat.shape[0] + self.context[1] + 1, 0)),
Anthony Larcher's avatar
Anthony Larcher committed
344
                       (0, 0)),
Anthony Larcher's avatar
Anthony Larcher committed
345
346
                      mode='edge')[start - self.context[0] + max(self.context[0] - start, 0):
            stop + self.context[1] + max(self.context[0] - start, 0), :], win_size=1+sum(self.context)
347
        ).reshape(-1, (1+sum(self.context)) * feat.shape[1])
Anthony Larcher's avatar
Anthony Larcher committed
348

Anthony Larcher's avatar
Anthony Larcher committed
349
350
351
352
        if label is not None:
            context_label = label[start:stop]
        else:
            context_label = None
353
354
355
356

        return context_feat, context_label

    def get_traps(self, feat, start=None, stop=None, label=None):
Anthony Larcher's avatar
Anthony Larcher committed
357
358
359
360
361
362
363
364
365
366
367
        """
        Compute TRAP parameters. The input frames are concatenated to add their left and right context,
        a Hamming window is applied and a DCT reduces the dimensionality of the resulting vector.

        :param feat: input acoustic parameters to process
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment
        :param label: vad label if available

        :return: a sequence of TRAP parameters
        """
Anthony Larcher's avatar
Anthony Larcher committed
368
369
370
371
372
373

        if start is None:
            start = 0
        if stop is None:
            stop = feat.shape[0]

374
        context_feat = framing(
Anthony Larcher's avatar
Anthony Larcher committed
375
376
            numpy.pad(
                      feat, 
Anthony Larcher's avatar
Anthony Larcher committed
377
                      ((self.context[0]-start, stop - feat.shape[0] + self.context[1] + 1), (0, 0)),
Anthony Larcher's avatar
Anthony Larcher committed
378
                      mode='edge'
Anthony Larcher's avatar
Anthony Larcher committed
379
380
                      )[start-self.context[0] +
                        max(self.context[0]-start, 0):stop + self.context[1] + max(self.context[0]-start, 0), :],
381
382
            win_size=1+sum(self.context)
        ).transpose(0, 2, 1)
Anthony Larcher's avatar
Anthony Larcher committed
383
384
        hamming_dct = (dct_basis(self.traps_dct_nb, sum(self.context) + 1) *
                       numpy.hamming(sum(self.context) + 1)).T
Anthony Larcher's avatar
Anthony Larcher committed
385
386
387
388
389

        if label is not None:
            context_label = label[start:stop]
        else:
            context_label = None
Anthony Larcher's avatar
Anthony Larcher committed
390

391
392
393
394
395
396
        return numpy.dot(
            context_feat.reshape(-1, hamming_dct.shape[0]),
            hamming_dct
        ).reshape(context_feat.shape[0], -1), context_label

    def load(self, show, channel=0, input_feature_filename=None, label=None, start=None, stop=None):
397
        """
Anthony Larcher's avatar
Anthony Larcher committed
398
        Depending of the setting of the FeaturesServer, can either:
399

Anthony Larcher's avatar
Anthony Larcher committed
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
        1. Get the datasets from a single HDF5 file
            The HDF5 file is loaded from disk or processed on the fly
            via the FeaturesExtractor of the current FeaturesServer

        2. Load datasets from multiple input HDF5 files. The datasets are post-processed separately, then concatenated
            and post-process

        :param show: ID of the show to load (should be the same for each HDF5 file to read from)
        :param channel: audio channel index in case the parameters are extracted from an audio file
        :param input_feature_filename: name of the input feature file in case it is independent from the ID of the show
        :param label: vad labels
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment

        :return: acoustic parameters and their vad labels
415
        """
Anthony Larcher's avatar
Anthony Larcher committed
416
417
418
        # In case the name of the input file does not include the ID of the show
        # (i.e., feature_filename_structure does not include {})
        # self.audio_filename_structure is updated to use the input_feature_filename
Anthony Larcher's avatar
Anthony Larcher committed
419
420
421
422
        if self.show == show \
                and self.input_feature_filename == input_feature_filename\
                and self.start_stop == (start, stop)  \
                and self.previous_load is not None:
Sylvain Meignier's avatar
Sylvain Meignier committed
423
424
425
            logging.debug('return previous load')
            return self.previous_load
        self.show = show
Anthony Larcher's avatar
Anthony Larcher committed
426
        self.input_feature_filename = input_feature_filename
Anthony Larcher's avatar
Anthony Larcher committed
427
        self.start_stop = (start, stop)
Anthony Larcher's avatar
Anthony Larcher committed
428
        
429
430
431
432
        feature_filename = None
        if input_feature_filename is not None:
            self.feature_filename_structure = input_feature_filename
            feature_filename = self.feature_filename_structure.format(show)
Anthony Larcher's avatar
Anthony Larcher committed
433

434
        if self.dataset_list is not None:
Sylvain Meignier's avatar
Sylvain Meignier committed
435
            self.previous_load = self.get_features(show,
Anthony Larcher's avatar
Anthony Larcher committed
436
437
438
439
                                                   channel=channel,
                                                   input_feature_filename=feature_filename,
                                                   label=label,
                                                   start=start, stop=stop)
Anthony Larcher's avatar
Anthony Larcher committed
440
        else:
441
            logging.info('Extract tandem features from multiple sources')
442
            self.previous_load = self.get_tandem_features(show,
Anthony Larcher's avatar
Anthony Larcher committed
443
444
                                                          channel=channel,
                                                          label=label,
Anthony Larcher's avatar
Anthony Larcher committed
445
                                                          start=start, stop=stop)
Sylvain Meignier's avatar
Sylvain Meignier committed
446
        return self.previous_load
447

448
    def get_features(self, show, channel=0, input_feature_filename=None, label=None, start=None, stop=None):
Anthony Larcher's avatar
Anthony Larcher committed
449
        """
450
451
452
        Get the datasets from a single HDF5 file
        The HDF5 file is loaded from disk or processed on the fly
        via the FeaturesExtractor of the current FeaturesServer
Anthony Larcher's avatar
Anthony Larcher committed
453

Anthony Larcher's avatar
Anthony Larcher committed
454
455
456
457
458
459
460
461
        :param show: ID of the show
        :param channel: index of the channel to read
        :param input_feature_filename: name of the input file in case it does not include the ID of the show
        :param label: vad labels
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment

        :return: acoustic parameters and their vad labels
Anthony Larcher's avatar
Anthony Larcher committed
462
        """
463
        """
Anthony Larcher's avatar
Anthony Larcher committed
464
465
        Si le nom du fichier d'entrée est totalement indépendant du show
        -> si feature_filename_structure ne contient pas "{}"
466
467
468
469
        on peut mettre à jour: self.audio_filename_structure pour entrer directement le nom du fichier de feature
        """
        if input_feature_filename is not None:
            self.feature_filename_structure = input_feature_filename
Anthony Larcher's avatar
Anthony Larcher committed
470

471
        # If no extractor for this source, open hdf5 file and return handler
Anthony Larcher's avatar
Anthony Larcher committed
472
        if self.features_extractor is None:
Anthony Larcher's avatar
Anthony Larcher committed
473
            h5f = h5py.File(self.feature_filename_structure.format(show), "r")
474
475
476

        # If an extractor is provided for this source, extract features and return an hdf5 handler
        else:
Anthony Larcher's avatar
Anthony Larcher committed
477
            h5f = self.features_extractor.extract(show, channel, input_audio_filename=input_feature_filename)
478

Anthony Larcher's avatar
Anthony Larcher committed
479
480
481
482
483
484
        feat, label, global_mean, global_std, global_cmvn = read_hdf5_segment(h5f,
                                                                 show,
                                                                 dataset_list=self.dataset_list,
                                                                 label=label,
                                                                 start=start, stop=stop,
                                                                 global_cmvn=self.global_cmvn)
Anthony Larcher's avatar
Anthony Larcher committed
485

486
        # Post-process the features and return the features and vad label
487
488
489
490
        if global_cmvn:
            feat, label = self.post_processing(feat, label, global_mean, global_std)
        else:
            feat, label = self.post_processing(feat, label)
Anthony Larcher's avatar
clean    
Anthony Larcher committed
491

Anthony Larcher's avatar
Anthony Larcher committed
492
        return feat, label
493

Anthony Larcher's avatar
Anthony Larcher committed
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
    def get_features_per_speaker(self, show, idmap, channel=0, input_feature_filename=None, label=None):
        """
        Load a single file and return a dictionary with spk_ids as keys and (feature, label) as data
        :param show:
        :param channel:
        :param input_feature_filename:
        :param label:
        :param idmap:
        :return:
        """
        if input_feature_filename is not None:
            self.feature_filename_structure = input_feature_filename

        # If no extractor for this source, open hdf5 file and return handler
        if self.features_extractor is None:
            h5f = h5py.File(self.feature_filename_structure.format(show), "r")

        # If an extractor is provided for this source, extract features and return an hdf5 handler
        else:
            h5f = self.features_extractor.extract(show, channel, input_audio_filename=input_feature_filename)

515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
        tmp_dict = dict()
        for spk_id, start, stop in zip(idmap.leftids, idmap.start, idmap.stop):
            if spk_id not in tmp_dict:
                tmp_dict[spk_id] = numpy.arange(start, stop - 1)
            else:
                tmp_dict[spk_id] = numpy.concatenate((tmp_dict[spk_id], numpy.arange(start, stop - 1)), axis=0)

        feat, lbl, global_mean, global_std, global_cmvn = read_hdf5_segment(h5f,
                                                                            show,
                                                                            dataset_list=self.dataset_list,
                                                                            label=label,
                                                                            start=None, stop=None,
                                                                            global_cmvn=self.global_cmvn)

        fe = FeaturesExtractor(audio_filename_structure="",
                               feature_filename_structure=None,
                               sampling_frequency=16000,
                               lower_frequency=133.3333,
                               higher_frequency=6855.4976,
                               filter_bank="log",
                               filter_bank_size=40,
                               window_size=0.025,
                               shift=0.01,
                               ceps_number=13,
                               pre_emphasis=0.97,
                               keep_all_features=True,
                               vad='percentil',
                               save_param=["energy", "cep", "vad"]
                               )

Anthony Larcher's avatar
Anthony Larcher committed
545
        feat_per_spk = dict()
546
        for spk_id in tmp_dict.keys():
Anthony Larcher's avatar
Anthony Larcher committed
547
548
549
550
551
552
            lbl1 = copy.deepcopy(lbl)
            feat1 = copy.deepcopy(feat)
            
            _, threshold_id = fe._vad(None, feat1[tmp_dict[spk_id], 0], None, None)
            label_id = feat1[:, 0] > threshold_id
            lbl1[tmp_dict[spk_id]] = label_id[tmp_dict[spk_id]].flatten()
Anthony Larcher's avatar
Anthony Larcher committed
553
554
555

            # Post-process the features and return the features and vad label
            if global_cmvn:
Anthony Larcher's avatar
Anthony Larcher committed
556
                tmp_feat, tmp_lbl = self.post_processing(feat1[tmp_dict[spk_id], :], lbl1[tmp_dict[spk_id]], global_mean, global_std)
Anthony Larcher's avatar
Anthony Larcher committed
557
            else:
Anthony Larcher's avatar
Anthony Larcher committed
558
                tmp_feat, tmp_lbl = self.post_processing(feat1[tmp_dict[spk_id], :], lbl1[tmp_dict[spk_id]])
Anthony Larcher's avatar
Anthony Larcher committed
559
           
Anthony Larcher's avatar
Anthony Larcher committed
560
            feat_per_spk[spk_id] = (tmp_feat, tmp_lbl)
Anthony Larcher's avatar
Anthony Larcher committed
561
562
563

        return feat_per_spk

564
    def get_tandem_features(self, show, channel=0, label=None, start=None, stop=None):
Anthony Larcher's avatar
Anthony Larcher committed
565
        """
Anthony Larcher's avatar
Anthony Larcher committed
566
        Read acoustic parameters from multiple HDF5 files (from disk or extracted by FeaturesExtractor objects).
567

Anthony Larcher's avatar
Anthony Larcher committed
568
569
570
571
572
573
574
        :param show: Id of the show
        :param channel: index of the channel
        :param label: vad labels
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment

        :return: acoustic parameters and their vad labels
Anthony Larcher's avatar
bug vad    
Anthony Larcher committed
575
        """
576
577
        # Each source has its own sources (including subserver) that provides features and label
        features = []
Anthony Larcher's avatar
Anthony Larcher committed
578
        for features_server, get_vad in self.sources:
579
            # Get features from this source
580
            feat, lbl = features_server.get_features(show, channel=channel, label=label, start=start, stop=stop)
581
582
583
            if get_vad:
                label = lbl
            features.append(feat)
584

585
586
587
        features = numpy.hstack(features)

        # If the VAD is not required, return all labels at True
Sylvain Meignier's avatar
?    
Sylvain Meignier committed
588
        if label is None:
589
590
591
            label = numpy.ones(feat.shape[0], dtype='bool')

        # Apply the final post-processing on the concatenated features
Anthony Larcher's avatar
Anthony Larcher committed
592
        return self.post_processing(features, label)
593

594
    def mean_std(self, show, channel=0, start=None, stop=None):
Anthony Larcher's avatar
Anthony Larcher committed
595
596
597
598
599
        """
        Compute the mean and standard deviation vectors for a segment of acoustic features

        :param show: the ID of the show
        :param channel: the index of the channel
Anthony Larcher's avatar
Anthony Larcher committed
600
        param start: index of the first frame of the selected segment
Anthony Larcher's avatar
Anthony Larcher committed
601
602
603
604
        :param stop: index of the last frame of the selected segment

        :return: the number of frames, the mean of the frames and their standard deviation
        """
605
        feat, _ = self.load(show, channel=channel, start=start, stop=stop)
606
        return feat.shape[0], feat.sum(axis=0), numpy.sum(feat**2, axis=0)
Anthony Larcher's avatar
Anthony Larcher committed
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637

    def stack_features(self,
                       show_list,
                       channel_list=None,
                       feature_filename_list=None,
                       label_list=None,
                       start_list=None,
                       stop_list=None):
        """
        Load acoustic features from a list of fils and return them stacked in a 2D-array
        one line per frame.

        :param show_list:
        :param channel_list:
        :param label_list:
        :param start_list:
        :param stop_list:
        :return:
        """
        if channel_list is None:
            channel_list = numpy.zeros(len(show_list))
        if feature_filename_list is None:
            feature_filename_list = numpy.empty(len(show_list), dtype='|O')
        if label_list is None:
            label_list = numpy.empty(len(show_list), dtype='|O')
        if start_list is None:
            start_list = numpy.empty(len(show_list), dtype='|O')
        if stop_list is None:
            stop_list = numpy.empty(len(show_list), dtype='|O')

        features_list = []
Anthony Larcher's avatar
Anthony Larcher committed
638
        for idx, load_arg  in enumerate(zip(show_list, channel_list, feature_filename_list, label_list, start_list, stop_list)):
639
            logging.info("load file {} / {}".format(idx + 1, len(show_list)))
Anthony Larcher's avatar
Anthony Larcher committed
640
            features_list.append(self.load(*load_arg)[0])
Anthony Larcher's avatar
Anthony Larcher committed
641

Anthony Larcher's avatar
Anthony Larcher committed
642
        return numpy.vstack(features_list)
Anthony Larcher's avatar
Anthony Larcher committed
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662


    def _stack_features_worker(self,
                               input_queue,
                               output_queue):
        """Load a list of feature files into a Queue object
        
        :param input: a Queue object
        :param output: a list of Queue objects to fill
        """
        while True:
            next_task = input_queue.get()
            if next_task is None:
                # Poison pill means shutdown
                output_queue.put(None)
                input_queue.task_done()
                break
            output_queue.put(self.load(*next_task)[0])
            input_queue.task_done()

Anthony Larcher's avatar
Anthony Larcher committed
663

Anthony Larcher's avatar
Anthony Larcher committed
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
    #@profile
    def stack_features_parallel(self,  # fileList, numThread=1):
                                show_list,
                                channel_list=None,
                                feature_filename_list=None,
                                label_list=None,
                                start_list=None,
                                stop_list=None,
                                num_thread=1):
        """Load a list of feature files and stack them in a unique ndarray. 
        The list of files to load is splited in sublists processed in parallel
        
        :param fileList: a list of files to load
        :param numThread: numbe of thead (optional, default is 1)
        """
        if channel_list is None:
            channel_list = numpy.zeros(len(show_list))
        if feature_filename_list is None:
            feature_filename_list = numpy.empty(len(show_list), dtype='|O')
        if label_list is None:
            label_list = numpy.empty(len(show_list), dtype='|O')
        if start_list is None:
            start_list = numpy.empty(len(show_list), dtype='|O')
        if stop_list is None:
            stop_list = numpy.empty(len(show_list), dtype='|O')

        #queue_in = Queue.Queue(maxsize=len(fileList)+numThread)
        queue_in = multiprocessing.JoinableQueue(maxsize=len(show_list)+num_thread)
        queue_out = []
        
        # Start worker processes
        jobs = []
        for i in range(num_thread):
            queue_out.append(multiprocessing.Queue())
            p = multiprocessing.Process(target=self._stack_features_worker, 
                                        args=(queue_in, queue_out[i]))
            jobs.append(p)
            p.start()
Anthony Larcher's avatar
Anthony Larcher committed
702

Anthony Larcher's avatar
Anthony Larcher committed
703
        # Submit tasks
Anthony Larcher's avatar
Anthony Larcher committed
704
705
        for task in zip(show_list, channel_list, feature_filename_list,
                        label_list, start_list, stop_list):
Anthony Larcher's avatar
Anthony Larcher committed
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
            queue_in.put(task)

        # Add None to the queue to kill the workers
        for task in range(num_thread):
            queue_in.put(None)
        
        # Wait for all the tasks to finish
        queue_in.join()
        output = []
        for q in queue_out:
            while True:
                data = q.get()
                if data is None:
                    break
                output.append(data)

        for p in jobs:
            p.join()
        return numpy.concatenate(output, axis=0)