Commit 92f95258 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

mostly doc

parent 88f4d2b9
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
# #
# SIDEKIT is a python package for speaker verification. # SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/ # Home page: http://www-lium.univ-lemans.fr/sidekit/
# #PARALLEL_MODULE
# SIDEKIT is a python package for speaker verification. # SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/ # Home page: http://www-lium.univ-lemans.fr/sidekit/
# #
...@@ -50,8 +50,8 @@ if 'SIDEKIT' in os.environ: ...@@ -50,8 +50,8 @@ if 'SIDEKIT' in os.environ:
if val == "true": if val == "true":
SIDEKIT_CONFIG["mpi"] = True SIDEKIT_CONFIG["mpi"] = True
if k == "cuda": if k == "cuda":
if val == "true": if val == "false":
SIDEKIT_CONFIG["cuda"] = True SIDEKIT_CONFIG["cuda"] = False
PARALLEL_MODULE = 'multiprocessing' # can be , threading, multiprocessing MPI is planned in the future PARALLEL_MODULE = 'multiprocessing' # can be , threading, multiprocessing MPI is planned in the future
...@@ -187,5 +187,5 @@ __maintainer__ = "Anthony Larcher" ...@@ -187,5 +187,5 @@ __maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr" __email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production" __status__ = "Production"
__docformat__ = 'reStructuredText' __docformat__ = 'reStructuredText'
__version__="1.4" __version__="1.9"
...@@ -259,6 +259,14 @@ class IdMap: ...@@ -259,6 +259,14 @@ class IdMap:
return ok return ok
def set(self, left, right, start=None, stop=None): def set(self, left, right, start=None, stop=None):
"""
Fill the IdMap object with numpy array of leftids, rightids, and optionally starts and stops
:param left: a numpy array for leftids
:param right: a numpy array for rightids
:param start: a numpy array for start time (optional)
:param stop: a numpy array for stop time (optional)
"""
self.leftids = copy.deepcopy(left) self.leftids = copy.deepcopy(left)
self.rightids = copy.deepcopy(right) self.rightids = copy.deepcopy(right)
......
...@@ -108,6 +108,14 @@ class Key: ...@@ -108,6 +108,14 @@ class Key:
@classmethod @classmethod
def create(cls, modelset, segset, tar, non): def create(cls, modelset, segset, tar, non):
"""
Class method that creates a Key object
:param modelset: a numpy array with model IDs
:param segset: a numpy array with segment IDs
:param tar: a matrix of boolean, True if the trial is target, dimensions must be: number of models X number of segments
:param non: a matrix of boolean, True if the trial is impostor, dimensions must be: number of models X number of segments
:return: a new key object
"""
key = Key() key = Key()
key.modelset = modelset key.modelset = modelset
key.segset = segset key.segset = segset
......
...@@ -48,7 +48,7 @@ __maintainer__ = "Anthony Larcher" ...@@ -48,7 +48,7 @@ __maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr" __email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production" __status__ = "Production"
__docformat__ = 'reStructuredText' __docformat__ = 'reStructuredText'
#comment
class FeaturesServer(object): class FeaturesServer(object):
""" """
...@@ -494,12 +494,12 @@ class FeaturesServer(object): ...@@ -494,12 +494,12 @@ class FeaturesServer(object):
def get_features_per_speaker(self, show, idmap, channel=0, input_feature_filename=None, label=None): def get_features_per_speaker(self, show, idmap, channel=0, input_feature_filename=None, label=None):
""" """
Load a single file and return a dictionary with spk_ids as keys and (feature, label) as data Load a single file and return a dictionary with spk_ids as keys and (feature, label) as data
:param show: :param show: name of the show
:param channel: :param channel: number of the audio channel
:param input_feature_filename: :param input_feature_filename: name of the input file to read from
:param label: :param label: voice activity detection labels (optional)
:param idmap: :param idmap: idmap to select the features
:return: :return: a numpy array of acoustic features
""" """
if input_feature_filename is not None: if input_feature_filename is not None:
self.feature_filename_structure = input_feature_filename self.feature_filename_structure = input_feature_filename
...@@ -641,7 +641,6 @@ class FeaturesServer(object): ...@@ -641,7 +641,6 @@ class FeaturesServer(object):
return numpy.vstack(features_list) return numpy.vstack(features_list)
def _stack_features_worker(self, def _stack_features_worker(self,
input_queue, input_queue,
output_queue): output_queue):
...@@ -660,8 +659,6 @@ class FeaturesServer(object): ...@@ -660,8 +659,6 @@ class FeaturesServer(object):
output_queue.put(self.load(*next_task)[0]) output_queue.put(self.load(*next_task)[0])
input_queue.task_done() input_queue.task_done()
#@profile
def stack_features_parallel(self, # fileList, numThread=1): def stack_features_parallel(self, # fileList, numThread=1):
show_list, show_list,
channel_list=None, channel_list=None,
......
...@@ -99,6 +99,12 @@ def write_pcm(data, output_file_name): ...@@ -99,6 +99,12 @@ def write_pcm(data, output_file_name):
@check_path_existance @check_path_existance
def write_wav(data, output_file_name, fs): def write_wav(data, output_file_name, fs):
"""Write signal to single channel WAV 16 bits
:param data: audio signal to write
:param output_file_name: name of the file to write
:param fs: sample rate in Hz
"""
if data.dtype != numpy.int16: if data.dtype != numpy.int16:
if data.dtype == numpy.float32: if data.dtype == numpy.float32:
data /= numpy.abs(data).max() data /= numpy.abs(data).max()
...@@ -980,6 +986,7 @@ def read_htk_segment(input_file_name, ...@@ -980,6 +986,7 @@ def read_htk_segment(input_file_name,
m = numpy.r_[numpy.repeat(m[[0]], s-start, axis=0), m, numpy.repeat(m[[-1]], stop-e, axis=0)] m = numpy.r_[numpy.repeat(m[[0]], s-start, axis=0), m, numpy.repeat(m[[-1]], stop-e, axis=0)]
return m.astype(numpy.float32) return m.astype(numpy.float32)
def _add_dataset_header(fh, def _add_dataset_header(fh,
dataset_id, dataset_id,
_min_val, _min_val,
...@@ -988,6 +995,12 @@ def _add_dataset_header(fh, ...@@ -988,6 +995,12 @@ def _add_dataset_header(fh,
""" """
Create a dataset in the HDF5 file and write the data Create a dataset in the HDF5 file and write the data
after compressing float to int after compressing float to int
:param fh: file handler in HDF5 format
:param dataset_id: name of the new dataset to create
:param _min_val: minimum value in the dataset (used for compression)
:param _range: range of the values in the dataset (used for compression)
:param _header: header of the dataset
""" """
_c_header = (_header - _min_val) / _range _c_header = (_header - _min_val) / _range
numpy.clip(_c_header, 0., 1.) numpy.clip(_c_header, 0., 1.)
...@@ -1004,6 +1017,7 @@ def _add_dataset_header(fh, ...@@ -1004,6 +1017,7 @@ def _add_dataset_header(fh,
compression="gzip", compression="gzip",
fletcher32=True) fletcher32=True)
def _add_percentile_dataset(fh, def _add_percentile_dataset(fh,
dataset_id, dataset_id,
data): data):
...@@ -1011,6 +1025,10 @@ def _add_percentile_dataset(fh, ...@@ -1011,6 +1025,10 @@ def _add_percentile_dataset(fh,
Create the dataset in the HDF5 file, write the data Create the dataset in the HDF5 file, write the data
compressed in int8 format and the header compressed in compressed in int8 format and the header compressed in
int format int format
:param fh: file handler in HDF5 format
:param dataset_id: name of the new dataset to create
:param data: data to fill the dataset
""" """
_min_val = data.min() _min_val = data.min()
_range = data.ptp() _range = data.ptp()
...@@ -1044,28 +1062,71 @@ def _add_percentile_dataset(fh, ...@@ -1044,28 +1062,71 @@ def _add_percentile_dataset(fh,
fletcher32=True) fletcher32=True)
def _read_dataset(h5f, dataset_id): def _read_dataset(h5f, dataset_id):
"""
Read a dataset from HDF5 file
:param h5f: file handler in HDF5 format
:param dataset_id: name of the dataset to read
:return: the data stored in the dataset
"""
data = h5f[dataset_id][()] data = h5f[dataset_id][()]
if data.ndim == 1: if data.ndim == 1:
data = data[:, numpy.newaxis] data = data[:, numpy.newaxis]
return data return data
def _read_segment(h5f, dataset_id, s, e): def _read_segment(h5f, dataset_id, s, e):
"""
Read a sequence of features stored in an HDF5 dataset.
:param h5f: file handler in HDF5 format
:param dataset_id: name of the dataset to read from
:param s: start index of the sequence to read
:param e: end index of the sequence to read
:return: the sequence of features in a numpy array format
"""
data = h5f[dataset_id][s:e] data = h5f[dataset_id][s:e]
return data return data
def _read_dataset_htk(h5f, dataset_id): def _read_dataset_htk(h5f, dataset_id):
"""
Read a dataset from HDF5 file
:param h5f: file handler in HDF5 format
:param dataset_id: name of the dataset to read
:return: the sequence of features in a numpy array format
"""
(A, B) = h5f[dataset_id + "comp"][()] (A, B) = h5f[dataset_id + "comp"][()]
data = (h5f[dataset_id][()] + B) / A data = (h5f[dataset_id][()] + B) / A
if data.ndim == 1: if data.ndim == 1:
data = data[:, numpy.newaxis] data = data[:, numpy.newaxis]
return data return data
def _read_segment_htk(h5f, dataset_id, e, s): def _read_segment_htk(h5f, dataset_id, e, s):
"""
Read a sequence of features stored in an HDF5 dataset written in HTK format
:param h5f: file handler in HDF5 format
:param dataset_id: name of the dataset to read from
:param s: start index of the sequence to read
:param e: end index of the sequence to read
:return: the sequence of features in a numpy array format
"""
(A, B) = h5f[dataset_id + "comp"][()] (A, B) = h5f[dataset_id + "comp"][()]
data = (h5f[dataset_id][s:e, :] + B) / A data = (h5f[dataset_id][s:e, :] + B) / A
return data return data
def read_dataset_percentile(h5f, dataset_id): def read_dataset_percentile(h5f, dataset_id):
"""
Read a compressed dataset from HDF5 file
:param h5f: file handler in HDF5 format
:param dataset_id: name of the dataset to read
:return: the sequence of features in a numpy array format
"""
# read the header # read the header
(_min_val, _range) = h5f[dataset_id + "_min_range"][()] (_min_val, _range) = h5f[dataset_id + "_min_range"][()]
c_header = h5f[dataset_id + "_header"][()] c_header = h5f[dataset_id + "_header"][()]
...@@ -1079,7 +1140,17 @@ def read_dataset_percentile(h5f, dataset_id): ...@@ -1079,7 +1140,17 @@ def read_dataset_percentile(h5f, dataset_id):
mat3 = (_header[:,[2]] + (_header[:,[3]] - _header[:,[2]]) * (c_data.T - 192) * (1/63)) * (c_data.T > 192) mat3 = (_header[:,[2]] + (_header[:,[3]] - _header[:,[2]]) * (c_data.T - 192) * (1/63)) * (c_data.T > 192)
return (mat1+mat2+mat3).T return (mat1+mat2+mat3).T
def _read_segment_percentile(h5f, dataset_id, s, e): def _read_segment_percentile(h5f, dataset_id, s, e):
"""
Read a sequence of features stored in a compressed HDF5 dataset
:param h5f: file handler in HDF5 format
:param dataset_id: name of the dataset to read from
:param s: start index of the sequence to read
:param e: end index of the sequence to read
:return: the sequence of features in a numpy array format
"""
# read the header # read the header
(_min_val, _range) = h5f[dataset_id + "_min_range"][()] (_min_val, _range) = h5f[dataset_id + "_min_range"][()]
c_header = h5f[dataset_id + "_header"][()] c_header = h5f[dataset_id + "_header"][()]
...@@ -1100,6 +1171,25 @@ def _write_show(show, ...@@ -1100,6 +1171,25 @@ def _write_show(show,
fb, fb_mean, fb_std, fb, fb_mean, fb_std,
bnf, bnf_mean, bnf_std, bnf, bnf_mean, bnf_std,
label): label):
"""
Write features for a given show in HDF5 format
:param show: name of the show to write
:param fh: file handler in HDF5 format
:param cep: cepstral coefficients
:param cep_mean: mean vector of the cepstral coefficients
:param cep_std: standard deviation vector of the cepstral coefficients
:param energy: energy value per frame
:param energy_mean: mean of the energy
:param energy_std: standard deviation of the energy
:param fb: filterbank coefficients
:param fb_mean: mean vector of the filterbank coefficients
:param fb_std: standard deviation vector of filterbank coefficients
:param bnf: bottleneck features
:param bnf_mean: mean vector of the bottleneck features
:param bnf_std: standard deviation vector of bottleneck features
:param label: voice activity detection labels per frame
"""
if cep is not None: if cep is not None:
fh.create_dataset(show + '/cep', data=cep.astype('float32'), fh.create_dataset(show + '/cep', data=cep.astype('float32'),
maxshape=(None, None), maxshape=(None, None),
...@@ -1161,6 +1251,7 @@ def _write_show(show, ...@@ -1161,6 +1251,7 @@ def _write_show(show,
compression="gzip", compression="gzip",
fletcher32=True) fletcher32=True)
def _write_show_htk(show, def _write_show_htk(show,
fh, fh,
cep, cep_mean, cep_std, cep, cep_mean, cep_std,
...@@ -1168,6 +1259,24 @@ def _write_show_htk(show, ...@@ -1168,6 +1259,24 @@ def _write_show_htk(show,
fb, fb_mean, fb_std, fb, fb_mean, fb_std,
bnf, bnf_mean, bnf_std, bnf, bnf_mean, bnf_std,
label): label):
"""
Write features for a given show in HDF5 and HTK format
:param fh: file handler in HDF5 format
:param cep: cepstral coefficients
:param cep_mean: mean vector of the cepstral coefficients
:param cep_std: standard deviation vector of the cepstral coefficients
:param energy: energy value per frame
:param energy_mean: mean of the energy
:param energy_std: standard deviation of the energy
:param fb: filterbank coefficients
:param fb_mean: mean vector of the filterbank coefficients
:param fb_std: standard deviation vector of filterbank coefficients
:param bnf: bottleneck features
:param bnf_mean: mean vector of the bottleneck features
:param bnf_std: standard deviation vector of bottleneck features
:param label: voice activity detection labels per frame
"""
if cep is not None: if cep is not None:
A_cep = 2 * 32767. / (cep.max() - cep.min()) A_cep = 2 * 32767. / (cep.max() - cep.min())
B_cep = (cep.max() + cep.min()) * 32767. / (cep.max() - cep.min()) B_cep = (cep.max() + cep.min()) * 32767. / (cep.max() - cep.min())
...@@ -1253,6 +1362,7 @@ def _write_show_htk(show, ...@@ -1253,6 +1362,7 @@ def _write_show_htk(show,
compression="gzip", compression="gzip",
fletcher32=True) fletcher32=True)
def _write_show_percentile(show, def _write_show_percentile(show,
fh, fh,
cep, cep_mean, cep_std, cep, cep_mean, cep_std,
...@@ -1260,6 +1370,25 @@ def _write_show_percentile(show, ...@@ -1260,6 +1370,25 @@ def _write_show_percentile(show,
fb, fb_mean, fb_std, fb, fb_mean, fb_std,
bnf, bnf_mean, bnf_std, bnf, bnf_mean, bnf_std,
label): label):
"""
Write features for a given show in HDF5 and HTK format
:param show: name of the sow to write
:param fh: file handler in HDF5 format
:param cep: cepstral coefficients
:param cep_mean: mean vector of the cepstral coefficients
:param cep_std: standard deviation vector of the cepstral coefficients
:param energy: energy value per frame
:param energy_mean: mean of the energy
:param energy_std: standard deviation of the energy
:param fb: filterbank coefficients
:param fb_mean: mean vector of the filterbank coefficients
:param fb_std: standard deviation vector of filterbank coefficients
:param bnf: bottleneck features
:param bnf_mean: mean vector of the bottleneck features
:param bnf_std: standard deviation vector of bottleneck features
:param label: voice activity detection labels per frame
"""
if cep is not None: if cep is not None:
_add_percentile_dataset(fh, show + '/cep', cep) _add_percentile_dataset(fh, show + '/cep', cep)
...@@ -1318,7 +1447,6 @@ def _write_show_percentile(show, ...@@ -1318,7 +1447,6 @@ def _write_show_percentile(show,
fletcher32=True) fletcher32=True)
def write_hdf5(show, def write_hdf5(show,
fh, fh,
cep, cep_mean, cep_std, cep, cep_mean, cep_std,
...@@ -1343,7 +1471,7 @@ def write_hdf5(show, ...@@ -1343,7 +1471,7 @@ def write_hdf5(show,
:param bnf_mean: pre-computed mean of the bottleneck features :param bnf_mean: pre-computed mean of the bottleneck features
:param bnf_std: pre-computed standard deviation of the bottleneck features :param bnf_std: pre-computed standard deviation of the bottleneck features
:param label: vad labels to store :param label: vad labels to store
:param compressed: boolean, default is False :param compression: boolean, default is False
:return: :return:
""" """
#write the the type of compression: could be: #write the the type of compression: could be:
...@@ -1382,13 +1510,14 @@ def write_hdf5(show, ...@@ -1382,13 +1510,14 @@ def write_hdf5(show,
bnf, bnf_mean, bnf_std, bnf, bnf_mean, bnf_std,
label) label)
def read_hdf5(h5f, show, dataset_list=("cep", "fb", "energy", "vad", "bnf")): def read_hdf5(h5f, show, dataset_list=("cep", "fb", "energy", "vad", "bnf")):
""" """
:param h5f: HDF5 file handler to read from :param h5f: HDF5 file handler to read from
:param show: identifier of the show to read :param show: identifier of the show to read
:param dataset_list: list of datasets to read and concatenate :param dataset_list: list of datasets to read and concatenate
:return: :return: a numpy array with acoustic features and one with VAD labels
""" """
compression_type = {0:'none', 1:'htk', 2:'percentile'} compression_type = {0:'none', 1:'htk', 2:'percentile'}
if "compression" not in h5f: if "compression" not in h5f:
...@@ -1464,17 +1593,18 @@ def read_hdf5(h5f, show, dataset_list=("cep", "fb", "energy", "vad", "bnf")): ...@@ -1464,17 +1593,18 @@ def read_hdf5(h5f, show, dataset_list=("cep", "fb", "energy", "vad", "bnf")):
return feat.astype(numpy.float32), label return feat.astype(numpy.float32), label
def _rms_energy(x): def _rms_energy(x):
return 10*numpy.log10((1e-12 + x.dot(x))/len(x)) return 10*numpy.log10((1e-12 + x.dot(x))/len(x))
def _add_noise(signal, noise_file_name, snr, sample_rate): def _add_noise(signal, noise_file_name, snr, sample_rate):
""" """
Add noise to a speech signal
:param signal: :param signal: the original signal to augmente
:param noise_file_name: :param noise_file_name: the name fo the noise file to use
:param snr: :param snr: signal to noise ratio
:return: :return: the signal augmented with noise
""" """
# Open noise file # Open noise file
if isinstance(noise_file_name, numpy.ndarray): if isinstance(noise_file_name, numpy.ndarray):
...@@ -1502,6 +1632,7 @@ def _add_noise(signal, noise_file_name, snr, sample_rate): ...@@ -1502,6 +1632,7 @@ def _add_noise(signal, noise_file_name, snr, sample_rate):
return (noisy - noisy.mean()) / noisy.std() return (noisy - noisy.mean()) / noisy.std()
def bin_interp(upcount, lwcount, upthr, lwthr, margin, tol=0.1): def bin_interp(upcount, lwcount, upthr, lwthr, margin, tol=0.1):
n_iter = 1 n_iter = 1
if abs(upcount - upthr - margin) < tol: if abs(upcount - upthr - margin) < tol:
...@@ -1525,6 +1656,7 @@ def bin_interp(upcount, lwcount, upthr, lwthr, margin, tol=0.1): ...@@ -1525,6 +1656,7 @@ def bin_interp(upcount, lwcount, upthr, lwthr, margin, tol=0.1):
diff = midcount - midthr - margin diff = midcount - midthr - margin
return midcount return midcount
def asl_meter(x, fs, nbits=16): def asl_meter(x, fs, nbits=16):
'''Measure the Active Speech Level (ASR) of x following ITU-T P.56. '''Measure the Active Speech Level (ASR) of x following ITU-T P.56.
If x is integer, it will be scaled to (-1, 1) according to nbits. If x is integer, it will be scaled to (-1, 1) according to nbits.
...@@ -1587,6 +1719,7 @@ def asl_meter(x, fs, nbits=16): ...@@ -1587,6 +1719,7 @@ def asl_meter(x, fs, nbits=16):
return asl return asl
def _add_reverb(signal, reverb_file_name, sample_rate, reverb_level=-26.0, ): def _add_reverb(signal, reverb_file_name, sample_rate, reverb_level=-26.0, ):
'''Adds reverb (convolutive noise) to a speech signal. '''Adds reverb (convolutive noise) to a speech signal.
The output speech level is normalized to asl_level. The output speech level is normalized to asl_level.
...@@ -1600,107 +1733,3 @@ def _add_reverb(signal, reverb_file_name, sample_rate, reverb_level=-26.0, ): ...@@ -1600,107 +1733,3 @@ def _add_reverb(signal, reverb_file_name, sample_rate, reverb_level=-26.0, ):
return (y - y.mean()) / y.std() return (y - y.mean()) / y.std()
def degrade_audio(input_path,
input_extension,
output_path,
output_extension,
input_filename,
output_filename,
sampling_frequency=16000,
noise_file_name=None,
snr=-10,
reverb_file_name=None,
reverb_level=-26.):
"""
:param input_filename:
:param output_filename:
:return:
"""
# Open audio file, get the signal and possibly the sampling frequency
signal, sample_rate = read_audio(input_filename, sampling_frequency)
if signal.ndim == 1:
signal = signal[:, numpy.newaxis]
for channel in range(signal.shape[1]):
if noise_file_name is not None:
signal[:, channel] = _add_noise(signal[:, channel], noise_file_name, snr, sampling_frequency)
if reverb_file_name is not None:
signal[:, channel] = _add_reverb(signal[:, channel], reverb_file_name, sampling_frequency, reverb_level)
write_wav(signal, output_filename, sample_rate)
@process_parallel_lists
def augment_list(input_path,
input_extension,
output_path,
output_extension,
sampling_frequency,
show_list,
channel_list,
audio_file_list=None,
feature_file_list=None,
noise_file_list=None,
snr_list=None,
reverb_file_list=None,
reverb_levels=None,
num_thread=1):
"""
Compute the acoustic parameters (filter banks, cepstral coefficients, log-energy and bottleneck features
for a list of audio files and save them to disk in a HDF5 format
The process is parallelized if num_thread is higher than 1
:param show_list: list of IDs of the show to process
:param channel_list: list of channel indices corresponding to each show