Commit e67d0913 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

different minor modifications

parent 405ab784
......@@ -22,7 +22,7 @@
# along with SIDEKIT. If not, see <>.
Copyright 2014-2017 Anthony Larcher and Sylvain Meignier
Copyright 2014-2018 Anthony Larcher and Sylvain Meignier
from ctypes import *
......@@ -35,21 +35,14 @@ import importlib
# Read environment variable if it exists
"theano_config":'cuda', # Can be 'cpu' or 'cuda'
if 'SIDEKIT' in os.environ:
for cfg in os.environ['SIDEKIT'].split(","):
k, val = cfg.split("=")
if k == "theano":
if val == "false":
SIDEKIT_CONFIG["theano"] = False
elif k == "theano_config":
SIDEKIT_CONFIG["theano_config"] = val
elif k == "libsvm":
if k == "libsvm":
if val == "false":
SIDEKIT_CONFIG["libsvm"] = False
elif k == "mpi":
......@@ -120,23 +113,6 @@ from sidekit.gmm_scoring import gmm_scoring
from sidekit.jfa_scoring import jfa_scoring
# Import NNET classes and functions if the FLAG is True
theano_imported = False
if SIDEKIT_CONFIG["theano"]:
if SIDEKIT_CONFIG["theano_config"] == "cuda":
os.environ['THEANO_FLAGS'] = 'mode=FAST_RUN,device=cuda,floatX=float32'
os.environ['THEANO_FLAGS'] = 'mode=FAST_RUN,device=cpu,floatX=float32'
theano_imported = True
except ImportError:
print("Cannot import Theano")
if theano_imported:
print("Import theano")
from sidekit.nnet.feed_forward import FForwardNetwork
from sidekit.sv_utils import clean_stat_server
libsvm_loaded = False
......@@ -182,7 +158,7 @@ __maintainer__ = "Anthony Larcher"
__email__ = ""
__status__ = "Production"
__docformat__ = 'reStructuredText'
# __all__ = ["io",
# "vad",
......@@ -272,8 +272,7 @@ class FeaturesExtractor(object):
# Perform feature selection
label, threshold = self._vad(cep, energy, fb, signal[start:end, channel])
print("type(label) = {}\n".format(type(label)))
print("label.dtype = {}\n".format(label.dtype))
if len(label) < len(energy):
label = numpy.hstack((label, numpy.zeros(len(energy)-len(label), dtype='bool')))
......@@ -320,7 +319,7 @@ class FeaturesExtractor(object):
if "vad" not in self.save_param:
label = None
write_hdf5(show, h5f,
cep, cep_mean, cep_std,
energy, energy_mean, energy_std,
......@@ -37,11 +37,10 @@ import os
import struct
import warnings
import wave
import scipy.signal
from scipy.signal import decimate
from sidekit.sidekit_wrappers import check_path_existance
from sidekit.sidekit_io import *
from sidekit.sidekit_wrappers import check_path_existance
__author__ = "Anthony Larcher"
......@@ -433,7 +432,7 @@ def read_audio(input_file_name, framerate=None):
print("Warning in read_audio, up-sampling function is not implemented yet!")
elif read_framerate % float(framerate) == 0 and not framerate == read_framerate:
sig = decimate(sig, int(read_framerate / float(framerate)), n=None, ftype='iir', axis=0)
sig = scipy.signal.decimate(sig, int(read_framerate / float(framerate)), n=None, ftype='iir', axis=0)
return sig.astype(numpy.float32), framerate
......@@ -561,13 +560,13 @@ def read_hdf5_segment(file_name, dataset, mask, start, end):
n_frames, feat_size = fh[dataset].shape
compressed = False
if dataset.split('/')[0] + "/comp" in h5f:
if dataset.split('/')[0] + "/comp" in fh:
compressed = True
# Check that the segment is within the range of the file
s, e = max(0, start), min(n_frames, end)
if compressed:
(A, B) = h5f["/".join((dataset + "_comp"))].value
(A, B) = fh["/".join((dataset + "_comp"))].value
features = (fh[dataset][s:e, mask]-B)/A
features = fh[dataset][s:e, mask]
......@@ -779,7 +778,7 @@ def write_hdf5(show,
fh.create_dataset(show + '/energy', data=(A_energy * energy - B_energy).astype("short"),
maxshape=(None, None),
......@@ -24,7 +24,7 @@
Copyright 2014-2017 Anthony Larcher and Sylvain Meignier
:mod:`nnet` provides methods to manage Neural Networks using Theano
:mod:`nnet` provides methods to manage Neural Networks using PyTorch
from sidekit.nnet.feed_forward import FForwardNetwork
......@@ -32,7 +32,7 @@ from sidekit.nnet.feed_forward import kaldi_to_hdf5
__author__ = "Anthony Larcher and Sylvain Meignier"
__copyright__ = "Copyright 2014-2017 Anthony Larcher and Sylvain Meignier"
__copyright__ = "Copyright 2014-2018 Anthony Larcher and Sylvain Meignier"
__license__ = "LGPL"
__maintainer__ = "Anthony Larcher"
__email__ = ""
......@@ -39,22 +39,13 @@ from multiprocessing import Pool
import numpy
import os
import time
import torch
import warnings
import sidekit.frontend
from sidekit.sidekit_io import init_logging
# from sidekit import THEANO_CONFIG
from sidekit.sidekit_wrappers import check_path_existance
# if THEANO_CONFIG == "gpu":
# os.environ['THEANO_FLAGS'] = 'mode=FAST_RUN,device=gpu,floatX=float32'
# else:
# os.environ['THEANO_FLAGS'] = 'mode=FAST_RUN,device=cpu,floatX=float32'
import theano
import theano.tensor as T
__license__ = "LGPL"
__author__ = "Anthony Larcher"
__copyright__ = "Copyright 2015-2017 Anthony Larcher"
......@@ -66,7 +57,7 @@ __docformat__ = 'reStructuredText'
def kaldi_to_hdf5(input_file_name, output_file_name):
Convert a text file containing frame alinment from Kaldi into an
Convert a text file containing frame alignment from Kaldi into an
HDF5 file with the following structure:
......@@ -146,36 +137,270 @@ def mean_std_many(features_server, feature_size, seg_list, traps=False, num_thre
return total_n, total_f / total_n, total_s / total_n
def get_params(params):
Return parameters of into a Python dictionary format
:param params: a list of Theano shared variables
:return: the same variables in Numpy format in a dictionary
return { p.get_value() for p in params}
def set_params(params, param_dict):
Set the parameters in a list of Theano variables from a dictionary
A FAIRE: en vérifiant le transfert sur les GPUs et voir ou se situe le bottleneck
tester avec des données bidon et la même architecture
tester avec des données bidon et une file d'attente remplie avec du multiprocessing
modifier la création des batchs pour utiliser la file d'attente
self.params = {"input_mean": input_mean.astype(T.config.floatX),
"input_std": input_std.astype(T.config.floatX),
"activation_functions": layers_activations,
"b{}".format(len(sizes) - 1): numpy.zeros(sizes[-1]).astype(T.config.floatX),
"hidden_layer_sizes": hidden_layer_sizes
for ii in range(1, len(sizes)):
self.params["W{}".format(ii)] = numpy.random.randn(
sizes[ii - 1],
sizes[ii]).astype(T.config.floatX) * 0.1
self.params["b{}".format(ii)] = numpy.random.random(sizes[ii]).astype(T.config.floatX) / 5.0 - 4.1
def init_weights(module):
if type(module) == torch.nn.Linear:, 0.1)
if module.bias is not None:, -3.9)
class FForwardNetwork(torch.nn.Module):
def __init__(self,
filename = None,
D_in = 0,
hidden_layer_sizes = (),
hidden_layer_activations = (),
input_mean = numpy.empty(0),
input_std = numpy.empty(0)
In the constructor we instantiate two nn.Linear modules and assign them as
member variables.
self.d_in = D_in
self.d_out = D_out
self.model = torch.nn.Sequential()
if filename is not None:
# Load model parameters
pass # TO DO
elif len(hidden_layer_activations) != len(hidden_layer_sizes) + 1:
pass # TO DO
else: # initialize a NN with given sizes of layers and activation functions
modules = []
layer_sizes = (D_in,) + hidden_layer_sizes + (D_out,)
for ii in numpy.arange(len(layer_sizes) - 1):
modules.append(torch.nn.Linear(layer_sizes[ii], layer_sizes[ii + 1]))
if hidden_layer_activations[ii] is not None:
self.model = torch.nn.Sequential(*modules)
def random_init(self):
Randomly initialize the model parameters (weights and bias)
def forward(self, x):
In the forward function we accept a Tensor of input data and we must return
a Tensor of output data. We can use Modules defined in the constructor as
well as arbitrary operators on Tensors.
x_data = torch.autograd.Variable(torch.from_numpy(x).float())
return self.model.forward(x_data)
def predict(self, x):
x_data = torch.autograd.Variable(torch.from_numpy(x).float(), requires_grad=False)
output = self.model.forward(x_data)
def train(self):
def _train_acoustic(self,
train the network and return the parameters
Exit at the end of the training process or as soon as the output_accuracy_limit is reach on
the training data
Return a dictionary of the network parameters
:param training_seg_list: list of segments to use for training
It is a list of 4 dimensional tuples which
first argument is the absolute file name
second argument is the index of the first frame of the segment
third argument is the index of the last frame of the segment
and fourth argument is a numpy array of integer,
labels corresponding to each frame of the segment
:param cross_validation_seg_list: is a list of segments to use for
cross validation. Same format as train_seg_list
:param features_server: FeaturesServer used to load data
:param optimizer: a string that can be "adam", "sgd",
:param lr: initial learning rate
:param segment_buffer_size: number of segments loaded at once
:param batch_size: size of the minibatches as number of frames
:param max_iters: macimum number of epochs
:param tolerance:
:param output_file_name: root name of the files to save Neural Betwork parameters
:param save_tmp_nnet: boolean, if True, save the parameters after each epoch
:param traps: boolean, if True, compute TRAPS on the input data, if False jsut use concatenated frames
# Define the optimizer and the loss to train the network
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(self.model.parameters())
# split the list of files to process
training_segment_sets = [training_seg_list[i:i + segment_buffer_size]
for i in range(0, len(training_seg_list), segment_buffer_size)]
for t in range(nb_epoch):
# Training phasse
for ii, training_segment_set in enumerate(training_segment_sets):
start_time = time.time()
l = []
f = []
for idx, val in enumerate(training_segment_set):
show, s, _, label = val
e = s + len(label)
# Load the segment of frames plus left and right context
feat, _ = features_server.load(show,
start=s - features_server.context[0],
stop=e + features_server.context[1])
if traps:
# Get features in context
# Get features in context
lab = numpy.hstack(l).astype(numpy.int16)
fea = numpy.vstack(f).astype(numpy.float32)
assert numpy.all(lab != -1) and len(lab) == len(fea) # make sure that all frames have defined label
shuffle = numpy.random.permutation(len(lab))
lab = lab.take(shuffle, axis=0)
fea = fea.take(shuffle, axis=0)
for jj, (X, t) in enumerate(zip(numpy.array_split(fea, nsplits), numpy.array_split(lab, nsplits))):
lab_pred = self.forward(X)
# Compute and print loss
loss = criterion(lab_pred, lab)
print(t, loss.item())
# Zero gradients, perform the backward pass and update the weights
# Cross-validation
validation_correct = 0
validation_total = 0
sample_nb = 0
for ii, cv_segment in enumerate(cross_validation_seg_list):
show, s, e, label = cv_segment
e = s + len(label)
l = label.astype(numpy.int16)
# Load the segment of frames plus left and right context
feat, _ = features_server.load(show,
start=s - features_server.context[0],
stop=e + features_server.context[1])
if traps:
# Get features in context
X = features_server.get_traps(feat=feat,
stop=feat.shape[0] - features_server.context[1])[0].astype(
X = features_server.get_context(feat=feat,
stop=feat.shape[0] - features_server.context[1])[0].astype(
lab_pred = self.forward(X)
sample_nb += len(X)
loss = criterion(lab_pred, l)
cost +=[0]
acc += torch.sum(preds ==
print("Epoch: {}/{}\tCost={}\tAccuracy={}".format(t+1, nb_epoch, cost/ii, 100.*acc/sample_nb))
:param params: dictionary to read from
:param param_dict: list of variables in Theano format
for p_ in params:
def export_params(params, param_dict):
Export network parameters into Numpy format
:param params: dictionary of variables in Theano format
:param param_dict: dictionary of variables in Numpy format
for k in param_dict:
params[] = k.get_value()
class FForwardNetwork(object):
......@@ -183,6 +408,8 @@ class FForwardNetwork(object):
Class FForwardNetwork that implement a feed-forward neural network for multiple purposes
def __init__(self, filename=None,
......@@ -425,13 +425,11 @@ def init_logging(level=logging.INFO, filename=None):
numpy.set_printoptions(linewidth=250, precision=4)
frm = '%(asctime)s - %(levelname)s - %(message)s'
root = logging.getLogger()
if root.handlers:
for handler in root.handlers:
logging.basicConfig(format=frm, level=level)
if filename is not None:
fh = logging.FileHandler(filename)
......@@ -28,7 +28,6 @@ in an efficient manner
import os
import numpy
import copy
import sys
import logging
from sidekit import PARALLEL_MODULE
......@@ -204,7 +204,7 @@ class StatServer:
#def __init__(self, statserver_file_name=None, ubm=None, index=None):$
def __init__(self, statserver_file_name=None, distrib_nb=0, feature_size=0, index=None):
def __init__(self, statserver_file_name=None, distrib_nb=0, feature_size=0, index=None, ubm=None):
"""Initialize an empty StatServer or load a StatServer from an existing
......@@ -220,6 +220,10 @@ class StatServer:
self.stat0 = numpy.array([], dtype=STAT_TYPE)
self.stat1 = numpy.array([], dtype=STAT_TYPE)
if ubm is not None:
distrib_nb = ubm.w.shape[0]
feature_size =[1]
if statserver_file_name is None:
# initialize
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment