features_server.py 29.1 KB
Newer Older
Anthony Larcher's avatar
Anthony Larcher committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# -*- coding: utf-8 -*-
#
# This file is part of SIDEKIT.
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#    
# SIDEKIT is free software: you can redistribute it and/or modify
# it under the terms of the GNU LLesser General Public License as 
# published by the Free Software Foundation, either version 3 of the License, 
# or (at your option) any later version.
#
# SIDEKIT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with SIDEKIT.  If not, see <http://www.gnu.org/licenses/>.

"""
Anthony Larcher's avatar
Anthony Larcher committed
25
Copyright 2014-2017 Sylvain Meignier and Anthony Larcher
Anthony Larcher's avatar
Anthony Larcher committed
26
27
28
29

    :mod:`features_server` provides methods to manage features

"""
Anthony Larcher's avatar
Anthony Larcher committed
30
import multiprocessing
31
import numpy
Anthony Larcher's avatar
Anthony Larcher committed
32
import logging
33
34
import h5py

35
from sidekit.frontend.features import pca_dct, shifted_delta_cepstral, compute_delta, framing, dct_basis
36
37
38
39
from sidekit.frontend.vad import label_fusion
from sidekit.frontend.normfeat import cms, cmvn, stg, cep_sliding_norm, rasta_filt
from sidekit.sv_utils import parse_mask

Anthony Larcher's avatar
Anthony Larcher committed
40

41
__license__ = "LGPL"
Anthony Larcher's avatar
Anthony Larcher committed
42
__author__ = "Anthony Larcher & Sylvain Meignier"
Anthony Larcher's avatar
Anthony Larcher committed
43
__copyright__ = "Copyright 2014-2016 Anthony Larcher"
44
45
46
47
__maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reStructuredText'
Anthony Larcher's avatar
Anthony Larcher committed
48

Anthony Larcher's avatar
Anthony Larcher committed
49

Anthony Larcher's avatar
Anthony Larcher committed
50
class FeaturesServer(object):
Anthony Larcher's avatar
Anthony Larcher committed
51
    """
Anthony Larcher's avatar
Anthony Larcher committed
52
53
54
    Management of features. FeaturesServer instances load datasets from a HDF5 files
    (that can be read from disk or produced by a FeaturesExtractor object)
    Datasets read from one or many files are concatenated and processed
Anthony Larcher's avatar
Anthony Larcher committed
55
56
    """

Anthony Larcher's avatar
Anthony Larcher committed
57
58
    def __init__(self,
                 features_extractor=None,
59
60
61
62
                 feature_filename_structure=None,
                 sources=None,
                 dataset_list=None,
                 mask=None,
Anthony Larcher's avatar
Anthony Larcher committed
63
                 feat_norm=None,
64
                 global_cmvn=None,
Anthony Larcher's avatar
Anthony Larcher committed
65
                 dct_pca=False,
Anthony Larcher's avatar
Anthony Larcher committed
66
                 dct_pca_config=None,
Anthony Larcher's avatar
Anthony Larcher committed
67
                 sdc=False,
Anthony Larcher's avatar
Anthony Larcher committed
68
                 sdc_config=None,
Anthony Larcher's avatar
Anthony Larcher committed
69
70
                 delta=None,
                 double_delta=None,
Anthony Larcher's avatar
Anthony Larcher committed
71
                 delta_filter=None,
72
73
                 context=None,
                 traps_dct_nb=None,
Anthony Larcher's avatar
Anthony Larcher committed
74
                 rasta=None,
Anthony Larcher's avatar
Anthony Larcher committed
75
                 keep_all_features=True):
76
        """
Anthony Larcher's avatar
Anthony Larcher committed
77
78
79
80
81
82
83
        Initialize a FeaturesServer for two cases:
        1. each call to load will load datasets from a single file. This mode requires to provide a dataset_list
        (lists of datasets to load from each file.
        2. each call to load will load datasets from several files (possibly several datasets from each file)
        and concatenate them. In this mode, you should provide a FeaturesServer for each source, thus, datasets
        read from each source can be post-processed independently before being concatenated with others. The dataset
        resulting from the concatenation from all sources is then post-processed.
Anthony Larcher's avatar
modif    
Anthony Larcher committed
84

Anthony Larcher's avatar
Anthony Larcher committed
85
        :param features_extractor: a FeaturesExtractor if required to extract features from audio file
Anthony Larcher's avatar
modif    
Anthony Larcher committed
86
        if None, data are loaded from an existing HDF5 file
87
        :param feature_filename_structure: structure of the filename to use to load HDF5 files
Anthony Larcher's avatar
Anthony Larcher committed
88
89
90
91
92
93
94
        :param sources: tuple of sources to load features different files (optional: for the case where datasets
        are loaded from several files and concatenated.
        :param dataset_list: string of the form '["cep", "fb", vad", energy", "bnf"]' (only when loading datasets
        from a single file) list of datasets to load.
        :param mask: string of the form '[1-3,10,15-20]' mask to apply on the concatenated dataset
        to select specific components. In this example, coefficients 1,2,3,10,15,16,17,18,19,20 are kept
        In this example,
Anthony Larcher's avatar
Anthony Larcher committed
95
96
97
98
99
100
101
102
103
104
105
106
107
        :param feat_norm: tpye of normalization to apply as post-processing
        :param global_cmvn: boolean, if True, use a global mean and std when normalizing the frames
        :param dct_pca: if True, add temporal context by using a PCA-DCT approach
        :param dct_pca_config: configuration of the PCA-DCT, default is (12, 12, none)
        :param sdc: if True, compute shifted delta cepstra coefficients
        :param sdc_config: configuration to compute sdc coefficients, default is (1,3,7)
        :param delta: if True, append the first order derivative
        :param double_delta: if True, append the second order derivative
        :param delta_filter: coefficients of the filter used to compute delta coefficients
        :param context: add a left and right context, default is (0,0)
        :param traps_dct_nb: number of DCT coefficients to keep when computing TRAP coefficients
        :param rasta: if True, perform RASTA filtering
        :param keep_all_features: boolean, if True, keep all features, if False, keep frames according to the vad labels
108
        :return:
Anthony Larcher's avatar
Anthony Larcher committed
109
        """
Anthony Larcher's avatar
Anthony Larcher committed
110
        self.features_extractor = None
111
112
113
114
115
        self.feature_filename_structure = '{}'
        self.sources = ()
        self.dataset_list = None

        # Post processing options
Anthony Larcher's avatar
Anthony Larcher committed
116
117
        self.mask = None
        self.feat_norm = None
118
        self.global_cmvn = None
119
        self.dct_pca = False
Anthony Larcher's avatar
Anthony Larcher committed
120
        self.dct_pca_config = (12, 12, None)
Anthony Larcher's avatar
Anthony Larcher committed
121
        self.sdc = False
122
        self.sdc_config = (1, 3, 7)
Anthony Larcher's avatar
Anthony Larcher committed
123
124
        self.delta = False
        self.double_delta = False
125
        self.delta_filter = numpy.array([.25, .5, .25, 0, -.25, -.5, -.25])
Anthony Larcher's avatar
Anthony Larcher committed
126
        self.context = (0, 0)
127
        self.traps_dct_nb = 0
Anthony Larcher's avatar
Anthony Larcher committed
128
        self.rasta = False
Anthony Larcher's avatar
Anthony Larcher committed
129
        self.keep_all_features = True
130

Anthony Larcher's avatar
Anthony Larcher committed
131
132
        if features_extractor is not None:
            self.features_extractor = features_extractor
133
134
135
136
137
138
139
140
        if feature_filename_structure is not None:
            self.feature_filename_structure = feature_filename_structure
        if sources is not None:
            self.sources = sources
        if dataset_list is not None:
            self.dataset_list = dataset_list
        if mask is not None:
            self.mask = parse_mask(mask)
Anthony Larcher's avatar
Anthony Larcher committed
141
142
        if feat_norm is not None:
            self.feat_norm = feat_norm
143
144
        if global_cmvn is not None:
            self.global_cmvn = global_cmvn
Anthony Larcher's avatar
Anthony Larcher committed
145
146
147
148
149
150
151
152
        if dct_pca is not None:
            self.dct_pca = dct_pca
        if dct_pca_config is not None:
            self.dct_pca_config = dct_pca_config
        if sdc is not None:
            self.sdc = sdc
        if sdc_config is not None:
            self.sdc_config = sdc_config
Anthony Larcher's avatar
Anthony Larcher committed
153
154
155
156
157
        if delta is not None:
            self.delta = delta
        if double_delta is not None:
            self.double_delta = double_delta
        if delta_filter is not None:
Anthony Larcher's avatar
Anthony Larcher committed
158
            self.delta_filter = delta_filter
159
160
161
162
        if context is not None:
            self.context = context
        if traps_dct_nb is not None:
            self.traps_dct_nb = traps_dct_nb
Anthony Larcher's avatar
Anthony Larcher committed
163
164
        if rasta is not None:
            self.rasta = rasta
Anthony Larcher's avatar
Anthony Larcher committed
165
166
        if keep_all_features is not None:
            self.keep_all_features = keep_all_features
167

Anthony Larcher's avatar
Anthony Larcher committed
168
        self.show = 'empty'
Anthony Larcher's avatar
Anthony Larcher committed
169
        self.input_feature_filename = 'empty'
Anthony Larcher's avatar
Anthony Larcher committed
170
        self.start_stop = (None, None)
Sylvain Meignier's avatar
Sylvain Meignier committed
171
        self.previous_load = None
Anthony Larcher's avatar
Anthony Larcher committed
172
173

    def __repr__(self):
174
175
176
177
178
        """

        :return: a string to display the object
        """
        ch = '\t show: {} \n\n'.format(self.show)
Anthony Larcher's avatar
Anthony Larcher committed
179
        ch += '\t input_feature_filename: {} \n\n'.format(self.input_feature_filename)
180
181
182
183
184
185
186
187
188
189
190
191
192
193
        ch += '\t feature_filename_structure: {} \n'.format(self.feature_filename_structure)
        ch += '\t  \n'
        ch += '\t  \n\n'
        ch += '\t Post processing options: \n'
        ch += '\t\t mask: {}  \n'.format(self.mask)
        ch += '\t\t feat_norm: {} \n'.format(self.feat_norm)
        ch += '\t\t dct_pca: {}, dct_pca_config: {} \n'.format(self.dct_pca,
                                                               self.dct_pca_config)
        ch += '\t\t sdc: {}, sdc_config: {} \n'.format(self.sdc,
                                                       self.sdc_config)
        ch += '\t\t delta: {}, double_delta: {}, delta_filter: {} \n'.format(self.delta,
                                                                             self.double_delta,
                                                                             self.delta_filter)
        ch += '\t\t rasta: {} \n'.format(self.rasta)
Anthony Larcher's avatar
Anthony Larcher committed
194
        ch += '\t\t keep_all_features: {} \n'.format(self.keep_all_features)
195

Anthony Larcher's avatar
Anthony Larcher committed
196
        return ch
Anthony Larcher's avatar
Anthony Larcher committed
197

198
    def post_processing(self, feat, label, global_mean=None, global_std=None):
Anthony Larcher's avatar
Anthony Larcher committed
199
        """
Anthony Larcher's avatar
Anthony Larcher committed
200
201
        After cepstral coefficients, filter banks or bottleneck parameters are computed or read from file
        post processing is applied.
Anthony Larcher's avatar
Anthony Larcher committed
202

Anthony Larcher's avatar
Anthony Larcher committed
203
204
205
206
207
208
        :param feat: the matrix of acoustic parameters to post-process
        :param label: the VAD labels for the acoustic parameters
        :param global_mean: vector or mean to use for normalization
        :param global_std: vector of standard deviation to use for normalization

        :return: the matrix of acoustic parameters ingand their VAD labels after post-process
Anthony Larcher's avatar
Anthony Larcher committed
209
        """
210
211
212
213
214
        # Apply a mask on the features
        if self.mask is not None:
            feat = self._mask(feat)

        # Perform RASTA filtering if required
Anthony Larcher's avatar
Anthony Larcher committed
215
216
        if self.rasta:
            feat, label = self._rasta(feat, label)
217
218
219
220
221

        # Add temporal context
        if self.delta or self.double_delta:
            feat = self._delta_and_2delta(feat)
        elif self.dct_pca:
Anthony Larcher's avatar
Anthony Larcher committed
222
            feat = pca_dct(feat, self.dct_pca_config[0], self.dct_pca_config[1], self.dct_pca_config[2])
223
        elif self.sdc:
Anthony Larcher's avatar
Anthony Larcher committed
224
            feat = shifted_delta_cepstral(feat, d=self.sdc_config[0], p=self.sdc_config[1], k=self.sdc_config[2])
Anthony Larcher's avatar
Anthony Larcher committed
225

226
        # Smooth the labels and fuse the channels if more than one.
227
        logging.debug('Smooth the labels and fuse the channels if more than one')
Anthony Larcher's avatar
Anthony Larcher committed
228
        label = label_fusion(label)
Anthony Larcher's avatar
Anthony Larcher committed
229
        
230
        # Normalize the data
Anthony Larcher's avatar
Anthony Larcher committed
231
232
233
        if self.feat_norm is None:
            logging.debug('no norm')
        else:
234
            self._normalize(label, feat, global_mean, global_std)
Anthony Larcher's avatar
Anthony Larcher committed
235

236
        # if not self.keep_all_features, only selected features and labels are kept
Anthony Larcher's avatar
Anthony Larcher committed
237
        if not self.keep_all_features:
238
            logging.debug('no keep all')
239
240
            feat = feat[label]
            label = label[label]
Sylvain Meignier's avatar
Sylvain Meignier committed
241

242
        return feat, label
Anthony Larcher's avatar
Anthony Larcher committed
243

244
    def _mask(self, cep):
Anthony Larcher's avatar
Anthony Larcher committed
245
        """
Anthony Larcher's avatar
Anthony Larcher committed
246
247
248
        Keep only the MFCC index present in the filter list
        :param cep: acoustic parameters to filter

249
        :return: return the list of MFCC given by filter list
Anthony Larcher's avatar
Anthony Larcher committed
250
        """
251
252
253
254
        if len(self.mask) == 0:
            raise Exception('filter list is empty')
        logging.debug('applied mask')
        return cep[:, self.mask]
Anthony Larcher's avatar
Anthony Larcher committed
255

256
    def _normalize(self, label, cep, global_mean=None, global_std=None):
Anthony Larcher's avatar
Anthony Larcher committed
257
        """
Anthony Larcher's avatar
Anthony Larcher committed
258
        Normalize acoustic parameters in place
Anthony Larcher's avatar
Anthony Larcher committed
259

Anthony Larcher's avatar
Anthony Larcher committed
260
261
262
263
        :param label: vad labels to use for normalization
        :param cep: acoustic parameters to normalize
        :param global_mean: mean vector to use if provided
        :param global_std: standard deviation vector to use if provided
Anthony Larcher's avatar
Anthony Larcher committed
264
265
266
        """
        # Perform feature normalization on the entire session.
        if self.feat_norm is None:
267
            logging.debug('no norm')
Anthony Larcher's avatar
Anthony Larcher committed
268
            pass
Anthony Larcher's avatar
Anthony Larcher committed
269
        elif self.feat_norm == 'cms':
270
            logging.debug('cms norm')
271
            cms(cep, label, global_mean)
Anthony Larcher's avatar
Anthony Larcher committed
272
        elif self.feat_norm == 'cmvn':
273
            logging.debug('cmvn norm')
274
            cmvn(cep, label, global_mean, global_std)
Anthony Larcher's avatar
Anthony Larcher committed
275
        elif self.feat_norm == 'stg':
276
            logging.debug('stg norm')
277
            stg(cep, label=label)
Sylvain Meignier's avatar
Sylvain Meignier committed
278
        elif self.feat_norm == 'cmvn_sliding':
279
            logging.debug('sliding cmvn norm')
Sylvain Meignier's avatar
Sylvain Meignier committed
280
            cep_sliding_norm(cep, label=label, win=301, center=True, reduce=True)
Sylvain Meignier's avatar
Sylvain Meignier committed
281
        elif self.feat_norm == 'cms_sliding':
282
            logging.debug('sliding cms norm')
Sylvain Meignier's avatar
Sylvain Meignier committed
283
            cep_sliding_norm(cep, label=label, win=301, center=True, reduce=False)
Anthony Larcher's avatar
Anthony Larcher committed
284
        else:
Anthony Larcher's avatar
Anthony Larcher committed
285
            logging.warning('Wrong feature normalisation type')
Anthony Larcher's avatar
Anthony Larcher committed
286

287
    def _delta_and_2delta(self, cep):
Anthony Larcher's avatar
Anthony Larcher committed
288
        """
289
290
291
292
        Add deltas and double deltas.
        :param cep: a matrix of cepstral cefficients

        :return: the cepstral coefficient stacked with deltas and double deltas
Anthony Larcher's avatar
Anthony Larcher committed
293
        """
294
        if self.delta:
295
            logging.debug('add delta')
296
297
298
            delta = compute_delta(cep, filt=self.delta_filter)
            cep = numpy.column_stack((cep, delta))
            if self.double_delta:
299
                logging.debug('add delta delta')
300
301
302
                double_delta = compute_delta(delta, filt=self.delta_filter)
                cep = numpy.column_stack((cep, double_delta))
        return cep
Anthony Larcher's avatar
Anthony Larcher committed
303

304
305
306
307
308
309
    def _rasta(self, cep, label):
        """
        Performs RASTA filtering if required.
        The two first frames are copied from the third to keep
        the length consistent
        !!! if vad is None: label[] is empty
Anthony Larcher's avatar
Anthony Larcher committed
310

Anthony Larcher's avatar
Anthony Larcher committed
311
312
        :param cep: the acoustic features to filter
        :param label: the VAD label
313
314
315
        :return:
        """
        if self.rasta:
316
            logging.debug('perform RASTA %s', self.rasta)
317
318
319
320
            cep = rasta_filt(cep)
            cep[:2, :] = cep[2, :]
            label[:2] = label[2]
        return cep, label
Anthony Larcher's avatar
Anthony Larcher committed
321

322
    def get_context(self, feat, start=None, stop=None, label=None):
323
        """
Anthony Larcher's avatar
Anthony Larcher committed
324
325
        Add a left annd right context to each frame.
        First and last frames are duplicated to provide context at the begining and at the end
326
327
328
329
330

        :param feat: sequence of feature frames (one fame per line)
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment
        :param label: vad label if available
Anthony Larcher's avatar
Anthony Larcher committed
331
332

        :return: a sequence of frames with their left and right context
333
        """
334
335
336
337
338
339
        if start is None:
            start = 0
        if stop is None:
            stop = feat.shape[0]
        context_feat = framing(
            numpy.pad(feat,
Anthony Larcher's avatar
Anthony Larcher committed
340
                      ((max(self.context[0] - start, 0), max(stop - feat.shape[0] + self.context[1] + 1, 0)),
Anthony Larcher's avatar
Anthony Larcher committed
341
                       (0, 0)),
Anthony Larcher's avatar
Anthony Larcher committed
342
343
                      mode='edge')[start - self.context[0] + max(self.context[0] - start, 0):
            stop + self.context[1] + max(self.context[0] - start, 0), :], win_size=1+sum(self.context)
344
        ).reshape(-1, (1+sum(self.context)) * feat.shape[1])
Anthony Larcher's avatar
Anthony Larcher committed
345

Anthony Larcher's avatar
Anthony Larcher committed
346
347
348
349
        if label is not None:
            context_label = label[start:stop]
        else:
            context_label = None
350
351
352
353

        return context_feat, context_label

    def get_traps(self, feat, start=None, stop=None, label=None):
Anthony Larcher's avatar
Anthony Larcher committed
354
355
356
357
358
359
360
361
362
363
364
        """
        Compute TRAP parameters. The input frames are concatenated to add their left and right context,
        a Hamming window is applied and a DCT reduces the dimensionality of the resulting vector.

        :param feat: input acoustic parameters to process
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment
        :param label: vad label if available

        :return: a sequence of TRAP parameters
        """
Anthony Larcher's avatar
Anthony Larcher committed
365
366
367
368
369
370

        if start is None:
            start = 0
        if stop is None:
            stop = feat.shape[0]

371
        context_feat = framing(
Anthony Larcher's avatar
Anthony Larcher committed
372
373
            numpy.pad(
                      feat, 
Anthony Larcher's avatar
Anthony Larcher committed
374
                      ((self.context[0]-start, stop - feat.shape[0] + self.context[1] + 1), (0, 0)),
Anthony Larcher's avatar
Anthony Larcher committed
375
                      mode='edge'
Anthony Larcher's avatar
Anthony Larcher committed
376
377
                      )[start-self.context[0] +
                        max(self.context[0]-start, 0):stop + self.context[1] + max(self.context[0]-start, 0), :],
378
379
            win_size=1+sum(self.context)
        ).transpose(0, 2, 1)
Anthony Larcher's avatar
Anthony Larcher committed
380
381
        hamming_dct = (dct_basis(self.traps_dct_nb, sum(self.context) + 1) *
                       numpy.hamming(sum(self.context) + 1)).T
Anthony Larcher's avatar
Anthony Larcher committed
382
383
384
385
386

        if label is not None:
            context_label = label[start:stop]
        else:
            context_label = None
Anthony Larcher's avatar
Anthony Larcher committed
387

388
389
390
391
392
393
        return numpy.dot(
            context_feat.reshape(-1, hamming_dct.shape[0]),
            hamming_dct
        ).reshape(context_feat.shape[0], -1), context_label

    def load(self, show, channel=0, input_feature_filename=None, label=None, start=None, stop=None):
394
        """
Anthony Larcher's avatar
Anthony Larcher committed
395
        Depending of the setting of the FeaturesServer, can either:
396

Anthony Larcher's avatar
Anthony Larcher committed
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
        1. Get the datasets from a single HDF5 file
            The HDF5 file is loaded from disk or processed on the fly
            via the FeaturesExtractor of the current FeaturesServer

        2. Load datasets from multiple input HDF5 files. The datasets are post-processed separately, then concatenated
            and post-process

        :param show: ID of the show to load (should be the same for each HDF5 file to read from)
        :param channel: audio channel index in case the parameters are extracted from an audio file
        :param input_feature_filename: name of the input feature file in case it is independent from the ID of the show
        :param label: vad labels
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment

        :return: acoustic parameters and their vad labels
412
        """
Anthony Larcher's avatar
Anthony Larcher committed
413
414
415
416

        # In case the name of the input file does not include the ID of the show
        # (i.e., feature_filename_structure does not include {})
        # self.audio_filename_structure is updated to use the input_feature_filename
Anthony Larcher's avatar
Anthony Larcher committed
417
418
419
420
        if self.show == show \
                and self.input_feature_filename == input_feature_filename\
                and self.start_stop == (start, stop)  \
                and self.previous_load is not None:
Sylvain Meignier's avatar
Sylvain Meignier committed
421
422
423
424
            logging.debug('return previous load')
            return self.previous_load

        self.show = show
Anthony Larcher's avatar
Anthony Larcher committed
425
        self.input_feature_filename = input_feature_filename
Anthony Larcher's avatar
Anthony Larcher committed
426
        self.start_stop = (start, stop)
Anthony Larcher's avatar
Anthony Larcher committed
427
        
428
429
430
431
        feature_filename = None
        if input_feature_filename is not None:
            self.feature_filename_structure = input_feature_filename
            feature_filename = self.feature_filename_structure.format(show)
Anthony Larcher's avatar
Anthony Larcher committed
432

433
        if self.dataset_list is not None:
Sylvain Meignier's avatar
Sylvain Meignier committed
434
            self.previous_load = self.get_features(show,
Anthony Larcher's avatar
Anthony Larcher committed
435
436
437
438
                                                   channel=channel,
                                                   input_feature_filename=feature_filename,
                                                   label=label,
                                                   start=start, stop=stop)
Anthony Larcher's avatar
Anthony Larcher committed
439
        else:
440
            logging.info('Extract tandem features from multiple sources')
441
            self.previous_load = self.get_tandem_features(show,
Anthony Larcher's avatar
Anthony Larcher committed
442
443
                                                          channel=channel,
                                                          label=label,
Anthony Larcher's avatar
Anthony Larcher committed
444
                                                          start=start, stop=stop)
Sylvain Meignier's avatar
Sylvain Meignier committed
445
        return self.previous_load
446

447
    def get_features(self, show, channel=0, input_feature_filename=None, label=None, start=None, stop=None):
Anthony Larcher's avatar
Anthony Larcher committed
448
        """
449
450
451
        Get the datasets from a single HDF5 file
        The HDF5 file is loaded from disk or processed on the fly
        via the FeaturesExtractor of the current FeaturesServer
Anthony Larcher's avatar
Anthony Larcher committed
452

Anthony Larcher's avatar
Anthony Larcher committed
453
454
455
456
457
458
459
460
        :param show: ID of the show
        :param channel: index of the channel to read
        :param input_feature_filename: name of the input file in case it does not include the ID of the show
        :param label: vad labels
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment

        :return: acoustic parameters and their vad labels
Anthony Larcher's avatar
Anthony Larcher committed
461
        """
462
        """
Anthony Larcher's avatar
Anthony Larcher committed
463
464
        Si le nom du fichier d'entrée est totalement indépendant du show
        -> si feature_filename_structure ne contient pas "{}"
465
466
467
468
        on peut mettre à jour: self.audio_filename_structure pour entrer directement le nom du fichier de feature
        """
        if input_feature_filename is not None:
            self.feature_filename_structure = input_feature_filename
Anthony Larcher's avatar
Anthony Larcher committed
469

470
        # If no extractor for this source, open hdf5 file and return handler
Anthony Larcher's avatar
Anthony Larcher committed
471
        if self.features_extractor is None:
Anthony Larcher's avatar
Anthony Larcher committed
472
            h5f = h5py.File(self.feature_filename_structure.format(show), "r")
473
474
475

        # If an extractor is provided for this source, extract features and return an hdf5 handler
        else:
Anthony Larcher's avatar
Anthony Larcher committed
476
            h5f = self.features_extractor.extract(show, channel, input_audio_filename=input_feature_filename)
477

Anthony Larcher's avatar
Anthony Larcher committed
478
479
480
481
482
483
484
485
486
487
488
489
490
        # Get the selected segment
        dataset_length = h5f[show + "/" + next(h5f[show].__iter__())].shape[0]
        # Deal with the case where start < 0 or stop > feat.shape[0]
        if start is None:
            start = 0
        pad_begining = -start if start < 0 else 0
        start = max(start, 0)

        if stop is None:
            stop = dataset_length
        pad_end = stop - dataset_length if stop > dataset_length else 0
        stop = min(stop, dataset_length)

Anthony Larcher's avatar
Anthony Larcher committed
491
        global_cmvn = self.global_cmvn and not (start is None or stop is None)
492

Anthony Larcher's avatar
Anthony Larcher committed
493
        # Get the data between start and stop
494
495
        # Concatenate all required datasets
        feat = []
496
497
        global_mean = []
        global_std = []
498
        if "energy" in self.dataset_list:
Anthony Larcher's avatar
Anthony Larcher committed
499
            feat.append(h5f["/".join((show, "energy"))].value[start:stop, numpy.newaxis])
500
501
            global_mean.append(h5f["/".join((show, "energy_mean"))].value)
            global_std.append(h5f["/".join((show, "energy_std"))].value)
502
        if "cep" in self.dataset_list:
Anthony Larcher's avatar
Anthony Larcher committed
503
            feat.append(h5f["/".join((show, "cep"))][start:stop, :])
504
505
            global_mean.append(h5f["/".join((show, "cep_mean"))].value)
            global_std.append(h5f["/".join((show, "cep_std"))].value)
506
        if "fb" in self.dataset_list:
Anthony Larcher's avatar
Anthony Larcher committed
507
            feat.append(h5f["/".join((show, "fb"))][start:stop, :])
508
509
            global_mean.append(h5f["/".join((show, "fb_mean"))].value)
            global_std.append(h5f["/".join((show, "fb_std"))].value)
510
        if "bnf" in self.dataset_list:
Anthony Larcher's avatar
Anthony Larcher committed
511
            feat.append(h5f["/".join((show, "bnf"))][start:stop, :])
512
513
            global_mean.append(h5f["/".join((show, "bnf_mean"))].value)
            global_std.append(h5f["/".join((show, "bnf_std"))].value)
514
        feat = numpy.hstack(feat)
515
516
        global_mean = numpy.hstack(global_mean)
        global_std = numpy.hstack(global_std)
517

Sylvain Meignier's avatar
?    
Sylvain Meignier committed
518
519
        if label is None:
            if "/".join((show, "vad")) in h5f:
Anthony Larcher's avatar
Anthony Larcher committed
520
                label = h5f.get("/".join((show, "vad"))).value.astype('bool').squeeze()[start:stop]
Sylvain Meignier's avatar
?    
Sylvain Meignier committed
521
522
            else:
                label = numpy.ones(feat.shape[0], dtype='bool')
Anthony Larcher's avatar
Anthony Larcher committed
523
        # Pad the segment if needed
Anthony Larcher's avatar
Anthony Larcher committed
524
        feat = numpy.pad(feat, ((pad_begining, pad_end), (0, 0)), mode='edge')
Anthony Larcher's avatar
Anthony Larcher committed
525
        label = numpy.pad(label, (pad_begining, pad_end), mode='edge')
Anthony Larcher's avatar
Anthony Larcher committed
526
527
        stop += pad_begining + pad_end

528
529
        h5f.close()
        # Post-process the features and return the features and vad label
530
531
532
533
        if global_cmvn:
            feat, label = self.post_processing(feat, label, global_mean, global_std)
        else:
            feat, label = self.post_processing(feat, label)
534

Anthony Larcher's avatar
Anthony Larcher committed
535
        return feat, label
536

537
    def get_tandem_features(self, show, channel=0, label=None, start=None, stop=None):
Anthony Larcher's avatar
Anthony Larcher committed
538
        """
Anthony Larcher's avatar
Anthony Larcher committed
539
        Read acoustic parameters from multiple HDF5 files (from disk or extracted by FeaturesExtractor objects).
540

Anthony Larcher's avatar
Anthony Larcher committed
541
542
543
544
545
546
547
        :param show: Id of the show
        :param channel: index of the channel
        :param label: vad labels
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment

        :return: acoustic parameters and their vad labels
Anthony Larcher's avatar
bug vad    
Anthony Larcher committed
548
        """
549
550
        # Each source has its own sources (including subserver) that provides features and label
        features = []
Anthony Larcher's avatar
Anthony Larcher committed
551
        for features_server, get_vad in self.sources:
552
            # Get features from this source
553
            feat, lbl = features_server.get_features(show, channel=channel, label=label, start=start, stop=stop)
554
555
556
            if get_vad:
                label = lbl
            features.append(feat)
557

558
559
560
        features = numpy.hstack(features)

        # If the VAD is not required, return all labels at True
Sylvain Meignier's avatar
?    
Sylvain Meignier committed
561
        if label is None:
562
563
564
            label = numpy.ones(feat.shape[0], dtype='bool')

        # Apply the final post-processing on the concatenated features
Anthony Larcher's avatar
Anthony Larcher committed
565
        return self.post_processing(features, label)
566

567
    def mean_std(self, show, channel=0, start=None, stop=None):
Anthony Larcher's avatar
Anthony Larcher committed
568
569
570
571
572
573
574
575
576
577
        """
        Compute the mean and standard deviation vectors for a segment of acoustic features

        :param show: the ID of the show
        :param channel: the index of the channel
        :param start: index of the first frame of the selected segment
        :param stop: index of the last frame of the selected segment

        :return: the number of frames, the mean of the frames and their standard deviation
        """
578
        feat, _ = self.load(show, channel=channel, start=start, stop=stop)
579
        return feat.shape[0], feat.sum(axis=0), numpy.sum(feat**2, axis=0)
Anthony Larcher's avatar
Anthony Larcher committed
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610

    def stack_features(self,
                       show_list,
                       channel_list=None,
                       feature_filename_list=None,
                       label_list=None,
                       start_list=None,
                       stop_list=None):
        """
        Load acoustic features from a list of fils and return them stacked in a 2D-array
        one line per frame.

        :param show_list:
        :param channel_list:
        :param label_list:
        :param start_list:
        :param stop_list:
        :return:
        """
        if channel_list is None:
            channel_list = numpy.zeros(len(show_list))
        if feature_filename_list is None:
            feature_filename_list = numpy.empty(len(show_list), dtype='|O')
        if label_list is None:
            label_list = numpy.empty(len(show_list), dtype='|O')
        if start_list is None:
            start_list = numpy.empty(len(show_list), dtype='|O')
        if stop_list is None:
            stop_list = numpy.empty(len(show_list), dtype='|O')

        features_list = []
Anthony Larcher's avatar
Anthony Larcher committed
611
612
613
        for idx, load_arg  in enumerate(zip(show_list, channel_list, feature_filename_list, label_list, start_list, stop_list)):
            logging.critical("load file {} / {}".format(idx + 1, len(show_list))) 
            features_list.append(self.load(*load_arg)[0])
Anthony Larcher's avatar
Anthony Larcher committed
614

Anthony Larcher's avatar
Anthony Larcher committed
615
        return numpy.vstack(features_list)
Anthony Larcher's avatar
Anthony Larcher committed
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702


    def _stack_features_worker(self,
                               input_queue,
                               output_queue):
        """Load a list of feature files into a Queue object
        
        :param input: a Queue object
        :param output: a list of Queue objects to fill
        """
        while True:
            next_task = input_queue.get()

            if next_task is None:
                # Poison pill means shutdown
                output_queue.put(None)
                input_queue.task_done()
                break
            
            output_queue.put(self.load(*next_task)[0])
            
            input_queue.task_done()

    #@profile
    def stack_features_parallel(self,  # fileList, numThread=1):
                                show_list,
                                channel_list=None,
                                feature_filename_list=None,
                                label_list=None,
                                start_list=None,
                                stop_list=None,
                                num_thread=1):
        """Load a list of feature files and stack them in a unique ndarray. 
        The list of files to load is splited in sublists processed in parallel
        
        :param fileList: a list of files to load
        :param numThread: numbe of thead (optional, default is 1)
        """
        if channel_list is None:
            channel_list = numpy.zeros(len(show_list))
        if feature_filename_list is None:
            feature_filename_list = numpy.empty(len(show_list), dtype='|O')
        if label_list is None:
            label_list = numpy.empty(len(show_list), dtype='|O')
        if start_list is None:
            start_list = numpy.empty(len(show_list), dtype='|O')
        if stop_list is None:
            stop_list = numpy.empty(len(show_list), dtype='|O')


        #queue_in = Queue.Queue(maxsize=len(fileList)+numThread)
        queue_in = multiprocessing.JoinableQueue(maxsize=len(show_list)+num_thread)
        queue_out = []
        
        # Start worker processes
        jobs = []
        for i in range(num_thread):
            queue_out.append(multiprocessing.Queue())
            p = multiprocessing.Process(target=self._stack_features_worker, 
                                        args=(queue_in, queue_out[i]))
            jobs.append(p)
            p.start()
        
        # Submit tasks
        for task in zip(show_list, channel_list, feature_filename_list, label_list, start_list, stop_list):
            queue_in.put(task)

        # Add None to the queue to kill the workers
        for task in range(num_thread):
            queue_in.put(None)
        
        # Wait for all the tasks to finish
        queue_in.join()
                   
        output = []
        for q in queue_out:
            while True:
                data = q.get()
                if data is None:
                    break
                output.append(data)

        for p in jobs:
            p.join()
        return numpy.concatenate(output, axis=0)