Commit 78944e2d authored by Anthony Larcher's avatar Anthony Larcher
Browse files

Modifs in new FeaturesServer

parent defc52cd
Pipeline #270 skipped
......@@ -28,10 +28,10 @@ Copyright 2014-2016 Anthony Larcher and Sylvain Meignier
useful parameters for speaker verification.
"""
import numpy
PARALLEL_MODULE = 'multiprocessing' # can be , threading, multiprocessing MPI is planned in the future
import numpy as np
param_type = np.float32
stat_type = np.float64
PARAM_TYPE = numpy.float32
STAT_TYPE = numpy.float64
# Import bosaris-like classes
from sidekit.bosaris import IdMap
......@@ -91,6 +91,7 @@ from sidekit.theano_utils import FForwardNetwork
from sidekit.sv_utils import clean_stat_server
__author__ = "Anthony Larcher and Sylvain Meignier"
__copyright__ = "Copyright 2014-2016 Anthony Larcher and Sylvain Meignier"
__license__ = "LGPL"
......
import numpy
import h5py
import logging
from sidekit import param_type
from sidekit import PARAM_TYPE
from sidekit.frontend.features import mfcc
from sidekit.frontend.io import read_audio, read_label, write_hdf5
from sidekit.frontend.vad import vad_snr, vad_energy
......@@ -171,9 +171,9 @@ class FeaturesExtractor():
# If the size of the signal is not enough for one frame, return zero features
if length < self.window_sample:
cep = numpy.empty((0, self.ceps_number), dtype=param_type)
energy = numpy.empty((0, 1), dtype=param_type)
fb = numpy.empty((0, self.filter_bank_size), dtype=param_type)
cep = numpy.empty((0, self.ceps_number), dtype=PARAM_TYPE)
energy = numpy.empty((0, 1), dtype=PARAM_TYPE)
fb = numpy.empty((0, self.filter_bank_size), dtype=PARAM_TYPE)
label = numpy.empty((0, 1), dtype='int8')
else:
......@@ -231,7 +231,6 @@ class FeaturesExtractor():
return h5f
#def save(self, show, channel, audio_file_dir, label_filename=None):
def save(self, show, channel, input_audio_filename=None, output_feature_filename=None):
"""
TO DO: BNF are not yet managed here
......
......@@ -53,7 +53,8 @@ class FeaturesServer():
- retourne une concaténation de l'ensemble
"""
def __init__(self, extractor=None,
def __init__(self,
features_extractor=None,
feature_filename_structure=None,
sources=None,
dataset_list=None,
......@@ -73,19 +74,18 @@ class FeaturesServer():
double_channel_extension=None,
keep_all_features=None):
"""
:param extractor: a FeaturesExtractor if required to extract features from audio file
:param features_extractor: a FeaturesExtractor if required to extract features from audio file
if None, data are loaded from an existing HDF5 file
:param feature_filename_structure: structure of the filename to use to load HDF5 files
:param subservers: tuple of subservers (FeaturesServers_beta) to load features from each source
:return:
"""
self.extractor = None
self.features_extractor = None
self.feature_filename_structure = '{}'
self.sources = ()
self.dataset_list = None
# Post processing options
self.vad=None
self.snr = 40
......@@ -103,8 +103,8 @@ class FeaturesServer():
self.double_channel_extension = ('_a', '_b')
self.keep_all_features=True
if extractor is not None:
self.extractor = extractor
if features_extractor is not None:
self.features_extractor = features_extractor
if feature_filename_structure is not None:
self.feature_filename_structure = feature_filename_structure
if sources is not None:
......@@ -112,7 +112,6 @@ class FeaturesServer():
if dataset_list is not None:
self.dataset_list = dataset_list
if vad is not None:
self.vad = vad
if snr is not None:
......@@ -290,7 +289,6 @@ class FeaturesServer():
cep = rasta_filt(cep)
cep[:2, :] = cep[2, :]
label[:2] = label[2]
return cep, label
def load(self, show, channel=0, input_feature_filename=None):
......@@ -342,12 +340,12 @@ class FeaturesServer():
self.feature_filename_structure = input_feature_filename
# If no extractor for this source, open hdf5 file and return handler
if self.extractor is None:
if self.features_extractor is None:
h5f = h5py.File(self.feature_filename_structure.format(show))
# If an extractor is provided for this source, extract features and return an hdf5 handler
else:
h5f = self.extractor.extract(show, channel, input_audio_filename=input_feature_filename)
h5f = self.features_extractor.extract(show, channel, input_audio_filename=input_feature_filename)
# Concatenate all required datasets
feat = []
......@@ -381,10 +379,10 @@ class FeaturesServer():
# Each source has its own sources (including subserver) that provides features and label
features = []
label = numpy.empty(0)
for fs, get_vad in self.sources:
for features_server, get_vad in self.sources:
# Get features from this source
feat, lbl = fs.get_features(show, channel=channel)
feat, lbl = features_server.get_features(show, channel=channel)
if get_vad:
label = lbl
......
......@@ -36,7 +36,7 @@ from sidekit.frontend.io import *
from sidekit.frontend.normfeat import *
from sidekit.frontend.features import *
from sidekit import param_type
from sidekit import PARAM_TYPE
__author__ = "Anthony Larcher and Sylvain Meignier"
__copyright__ = "Copyright 2014-2016 Anthony Larcher and Sylvain Meignier"
......@@ -83,15 +83,15 @@ def compute_delta(features, win=3, method='filter',
"""
# First and last features are appended to the begining and the end of the
# stream to avoid border effect
x = np.zeros((features.shape[0] + 2 * win, features.shape[1]), dtype=param_type)
x = np.zeros((features.shape[0] + 2 * win, features.shape[1]), dtype=PARAM_TYPE)
x[:win, :] = features[0, :]
x[win:-win, :] = features
x[-win:, :] = features[-1, :]
delta = np.zeros(x.shape, dtype=param_type)
delta = np.zeros(x.shape, dtype=PARAM_TYPE)
if method == 'diff':
filt = np.zeros(2 * win + 1, dtype=param_type)
filt = np.zeros(2 * win + 1, dtype=PARAM_TYPE)
filt[0] = -1
filt[-1] = 1
......@@ -124,7 +124,7 @@ def pca_dct(cep, left_ctx=12, right_ctx=12, P=None):
ceps = framing(y, win_size=left_ctx + 1 + right_ctx).transpose(0, 2, 1)
dct_temp = (dct_basis(left_ctx + 1 + right_ctx, left_ctx + 1 + right_ctx)).T
if P is None:
P = np.eye(dct_temp.shape[0] * cep.shape[1], dtype=param_type)
P = np.eye(dct_temp.shape[0] * cep.shape[1], dtype=PARAM_TYPE)
return (np.dot(ceps.reshape(-1, dct_temp.shape[0]),
dct_temp).reshape(ceps.shape[0], -1)).dot(P)
......@@ -180,7 +180,7 @@ def trfbank(fs, nfft, lowfreq, maxfreq, nlinfilt, nlogfilt, midfreq=1000):
# ------------------------
# Compute start/middle/end points of the triangular filters in spectral
# domain
freqs = np.zeros(nfilt + 2, dtype=param_type)
freqs = np.zeros(nfilt + 2, dtype=PARAM_TYPE)
if nlogfilt == 0:
linsc = (maxfreq - lowfreq) / (nlinfilt + 1)
freqs[:nlinfilt + 2] = lowfreq + np.arange(nlinfilt + 2) * linsc
......@@ -200,7 +200,7 @@ def trfbank(fs, nfft, lowfreq, maxfreq, nlinfilt, nlogfilt, midfreq=1000):
# Compute log-linear filters on [1000;maxfreq]
lowMel = hz2mel(min([1000, maxfreq]))
maxMel = hz2mel(maxfreq)
mels = np.zeros(nlogfilt + 2, dtype=param_type)
mels = np.zeros(nlogfilt + 2, dtype=PARAM_TYPE)
melsc = (maxMel - lowMel) / (nlogfilt + 1)
# Verify that mel2hz(melsc)>linsc
......@@ -211,7 +211,7 @@ def trfbank(fs, nfft, lowfreq, maxfreq, nlinfilt, nlogfilt, midfreq=1000):
freqs[:nlinfilt] = lowfreq + np.arange(nlinfilt) * linsc
lowMel = hz2mel(freqs[nlinfilt - 1] + 2 * linsc)
maxMel = hz2mel(maxfreq)
mels = np.zeros(nlogfilt + 2, dtype=param_type)
mels = np.zeros(nlogfilt + 2, dtype=PARAM_TYPE)
melsc = (maxMel - lowMel) / (nlogfilt + 1)
mels[:nlogfilt + 2] = lowMel + np.arange(nlogfilt + 2) * melsc
......@@ -221,7 +221,7 @@ def trfbank(fs, nfft, lowfreq, maxfreq, nlinfilt, nlogfilt, midfreq=1000):
heights = 2. / (freqs[2:] - freqs[0:-2])
# Compute filterbank coeff (in fft domain, in bins)
fbank = np.zeros((nfilt, int(np.floor(nfft / 2)) + 1), dtype=param_type)
fbank = np.zeros((nfilt, int(np.floor(nfft / 2)) + 1), dtype=PARAM_TYPE)
# FFT bins (in Hz)
nfreqs = np.arange(nfft) / (1. * nfft) * fs
......@@ -261,7 +261,7 @@ def mel_filter_bank(fs, nfft, lowfreq, maxfreq, widest_nlogfilt, widest_lowfreq,
#------------------------
# Compute start/middle/end points of the triangular filters in spectral
# domain
widest_freqs = np.zeros(widest_nlogfilt + 2, dtype=param_type)
widest_freqs = np.zeros(widest_nlogfilt + 2, dtype=PARAM_TYPE)
lowMel = hz2mel(widest_lowfreq)
maxMel = hz2mel(widest_maxfreq)
......@@ -272,13 +272,13 @@ def mel_filter_bank(fs, nfft, lowfreq, maxfreq, widest_nlogfilt, widest_lowfreq,
widest_freqs = mel2hz(mels)
# Select filters in the narrow band
sub_band_freqs = np.array([fr for fr in widest_freqs if lowfreq <= fr <= maxfreq], dtype=param_type)
sub_band_freqs = np.array([fr for fr in widest_freqs if lowfreq <= fr <= maxfreq], dtype=PARAM_TYPE)
heights = 2./(sub_band_freqs[2:] - sub_band_freqs[0:-2])
nfilt = sub_band_freqs.shape[0] - 2
# Compute filterbank coeff (in fft domain, in bins)
fbank = np.zeros((nfilt, np.floor(nfft/2)+1), dtype=param_type)
fbank = np.zeros((nfilt, np.floor(nfft/2)+1), dtype=PARAM_TYPE)
# FFT bins (in Hz)
nfreqs = np.arange(nfft) / (1. * nfft) * fs
......@@ -357,7 +357,7 @@ def mfcc(input_sig,
l = framed.shape[0]
nfft = 2 ** int(np.ceil(np.log2(window_length)))
ham = np.hamming(window_length)
spec = np.ones((l, int(nfft / 2) + 1), dtype=param_type)
spec = np.ones((l, int(nfft / 2) + 1), dtype=PARAM_TYPE)
logEnergy = np.log((framed**2).sum(axis=1))
dec = 500000
start = 0
......
......@@ -32,20 +32,13 @@ import struct
import math
import os
import decimal
import wave
import logging
import audioop
from scipy.io import wavfile
from scipy.signal import decimate
from sidekit.sidekit_io import *
from sidekit import PARAM_TYPE
from sidekit import param_type
try:
import h5py
h5py_loaded = True
except ImportError:
h5py_loaded = False
__author__ = "Anthony Larcher"
__copyright__ = "Copyright 2014-2016 Anthony Larcher"
......@@ -82,7 +75,7 @@ def read_pcm(inputFileName):
sampleCount = int(f.tell() / 2)
f.seek(0, 0) # got to the begining of the file
data = np.asarray(struct.unpack('<' + 'h' * sampleCount, f.read()))
return (data/32768.0).astype(param_type)
return (data/32768.0).astype(PARAM_TYPE)
def read_wav(inputFileName):
......@@ -93,7 +86,7 @@ def read_wav(inputFileName):
:return: the audio signal read from the file in a ndarray.
"""
framerate, sig = wavfile.read(inputFileName)
return (sig/32768.).astype(param_type), framerate
return (sig/32768.).astype(PARAM_TYPE), framerate
def pcmu2lin(p, s=4004.189931):
......@@ -374,7 +367,7 @@ def read_sph(inputFileName, mode='p'):
info[0] = -1
if not ffx[4] == '':
pass # VERIFY SCRIPT, WHICH CASE IS HANDLED HERE
return y.astype(param_type), int(info[8])
return y.astype(PARAM_TYPE), int(info[8])
def read_audio(inputFileName, fs=None):
......@@ -407,7 +400,7 @@ def read_audio(inputFileName, fs=None):
print("Warning in read_audio, up-sampling function is not implemented yet!")
elif read_fs % float(fs) == 0 and not fs == read_fs:
sig = decimate(sig, int(read_fs / float(fs)), n=None, ftype='iir', axis=0)
return sig.astype(param_type), fs
return sig.astype(PARAM_TYPE), fs
@check_path_existance
......@@ -513,7 +506,7 @@ def read_spro4(inputFileName,
lbl = read_label(labelFileName, selectedLabel, framePerSecond)
features = features[lbl, :]
return features.astype(param_type)
return features.astype(PARAM_TYPE)
def read_hdf5_segment(filename, feature_id, mask, start, end):
......@@ -587,7 +580,7 @@ def read_spro4_segment(inputFileName, start=0, end=None):
if start != s or end != e: # repeat first or/and last frame as required
features = np.r_[np.repeat(features[[0]], s-start, axis=0), features, np.repeat(features[[-1]], end-e, axis=0)]
return features.astype(param_type)
return features.astype(PARAM_TYPE)
@check_path_existance
......@@ -610,7 +603,7 @@ def write_cep_hdf5(features, fh, show):
fh.create_dataset(show, data=features, compression="gzip", fletcher32=True)
def read_cep_hdf5(fh, show):
return (fh.get(show).value).astype(param_type)
return (fh.get(show).value).astype(PARAM_TYPE)
@check_path_existance
def write_htk(features,
......@@ -731,7 +724,7 @@ def read_hdf5(fh, show, feature_id="ceps", label=True):
vad = fh.get(show + '/' + "vad").value.astype('bool').squeeze()
else:
vad = None
return feat.astype(param_type), vad
return feat.astype(PARAM_TYPE), vad
else:
print("Cannot find {} in current file".format(show + '/' + feature_id))
......@@ -856,7 +849,7 @@ def read_htk(inputFileName,
d = d[lbl, :]
return d.astype(param_type), fp, dt, tc, t
return d.astype(PARAM_TYPE), fp, dt, tc, t
def read_htk_segment(inputFileName,
......@@ -902,7 +895,7 @@ def read_htk_segment(inputFileName,
fh.close()
if start != s or stop != e: # repeat first or/and last frame as required
m = np.r_[np.repeat(m[[0]], s-start, axis=0), m, np.repeat(m[[-1]], stop-e, axis=0)]
return m.astype(param_type)
return m.astype(PARAM_TYPE)
def read_feature_segment(inputFileName,
......
This diff is collapsed.
......@@ -29,8 +29,8 @@ import os
import numpy as np
import copy
import sys
from sidekit import PARALLEL_MODULE
import logging
from sidekit import PARALLEL_MODULE
__license__ = "LGPL"
__author__ = "Anthony Larcher"
......
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment