Commit 145cdecc authored by Anthony Larcher's avatar Anthony Larcher
Browse files

sad

parent fdd13f3b
......@@ -51,7 +51,7 @@ if 'SIDEKIT' in os.environ:
SIDEKIT_CONFIG["mpi"] = True
if k == "cuda":
if val == "true":
SIDEKIT_CONFIG["libsvm"] = True
SIDEKIT_CONFIG["cuda"] = True
PARALLEL_MODULE = 'multiprocessing' # can be , threading, multiprocessing MPI is planned in the future
......@@ -167,7 +167,9 @@ if CUDA:
from sidekit.nnet import xtrain
from sidekit.nnet import extract_idmap
from sidekit.nnet import extract_parallel
from sidekit.nnet import SAD_RNN
else:
print("Don't import Torch")
if SIDEKIT_CONFIG["mpi"]:
found_mpi4py = importlib.find_loader('mpi4py') is not None
......
......@@ -27,6 +27,7 @@ Copyright 2014-2019 Anthony Larcher and Sylvain Meignier
:mod:`nnet` provides methods to manage Neural Networks using PyTorch
"""
from sidekit.nnet.sad_rnn import SAD_RNN
from sidekit.nnet.feed_forward import FForwardNetwork
from sidekit.nnet.feed_forward import kaldi_to_hdf5
from sidekit.nnet.xsets import XvectorMultiDataset, XvectorDataset, StatDataset
......
......@@ -23,7 +23,8 @@ class SAD_Dataset(Dataset):
train_list = {}
with open(mdtm_file, 'r') as f:
for line in f:
lines = [l for l in f]
for line in lines[:500]:
show, _, start, dur, _, _, _, _ = line.rstrip().split()
if show not in train_list:
train_list[show] = []
......@@ -48,25 +49,56 @@ class SAD_Dataset(Dataset):
self.vad = {}
self.segments = []
#speech_only_segments = []
#speech_nonspeech_segments = []
for show in sorted(train_list.keys()):
features, _ = features_server.load(show)
labels = numpy.zeros((len(features), 1), dtype=numpy.int)
for seg in train_list[show]:
labels[seg['start']:seg['stop']] = 1
self.vad[show] = labels
for seg in uem_list[show]:
if seg['start'] is not None:
start, stop = seg['start'], seg['stop']
else:
start, stop = 0, len(features)
for i in range(start, min(stop, len(features)) - self.duration, self.step):
self.segments.append((show, i, i + self.duration))
speech_only_segments = []
speech_nonspeech_segments = []
if show in train_list and show in uem_list:
for seg in train_list[show]:
labels[seg['start']:seg['stop']] = 1
self.vad[show] = labels
for seg in uem_list[show]:
if seg['start'] is not None:
start, stop = seg['start'], seg['stop']
else:
start, stop = 0, len(features)
# cree les segments ne contenant QUE de la parole (sans recouvrement)
for i in range(start, min(stop, len(features)) - self.duration, self.duration):
if labels[i:i+self.duration].sum() == self.duration:
speech_only_segments.append((show, i, i + self.duration))
# cree les segments contenant de la PAROLE ET DU SILENCE (avec recouvrement pour equilibrer les classes)
for i in range(start, min(stop, len(features)) - self.duration, self.step):
if labels[i:i+self.duration].sum() < self.duration - 1:
speech_nonspeech_segments.append((show, i, i + self.duration))
#for i in range(start, min(stop, len(features)) - self.duration, self.step):
# self.segments.append((show, i, i + self.duration))
tmp = speech_only_segments + speech_nonspeech_segments
random.shuffle(tmp)
self.segments += tmp
print("Show {}, ratio S/NS = {}".format(show, len(speech_only_segments)/(len(speech_nonspeech_segments) + len(speech_only_segments))))
#tmp = speech_only_segments + speech_nonspeech_segments
#if shuffle:
# print("taille de tmp: {}".format(len(tmp)))
# random.shuffle(tmp)
# print("taille de tmp: {}".format(len(tmp)))
# print(tmp[0])
# for t in tmp:
# self.segments.append(t)
#self.segments = tmp.copy()
self.input_size = features.shape[1]
if shuffle:
random.shuffle(self.segments)
print("Final ratio S/NS = {}".format(len(speech_only_segments)/(len(speech_nonspeech_segments) + len(speech_only_segments))))
self.len = len(self.segments) // self.batch_size
......@@ -196,7 +228,6 @@ class SAD_RNN():
:param features_server: a sidekit FeaturesServer object
:param model_file_format: file format to save the model. The format uses the current epoch
"""
self.model.to(device)
criterion = nn.BCELoss()
optimizer = optim.RMSprop(self.model.parameters())
......@@ -209,9 +240,11 @@ class SAD_RNN():
for batch_idx, (X, Y) in enumerate(training_set):
batch_loss = self._fit_batch(optimizer, criterion, X, Y)
losses[epoch].append(batch_loss)
sys.stdout.write("\rEpoch {}/{}, loss {:.5f}".format(
print("Epoch {}/{}, loss {:.5f}\n".format(
epoch + 1, nb_epochs, numpy.mean(losses[epoch])))
sys.stdout.flush()
#sys.stdout.write("\rEpoch {}/{}, loss {:.5f}".format(
# epoch + 1, nb_epochs, numpy.mean(losses[epoch])))
#sys.stdout.flush()
it += 1
torch.save(self.model.state_dict(), model_file_format.format(epoch+1))
......
......@@ -230,7 +230,9 @@ def xtrain(args):
# Decrease learning rate after every epoch
#args.lr = args.lr * 0.9
#args.lr = args.lr * 0.9
args.lr = args.lr * 0.9
print(" Decrease learning rate: {}".format(args.lr))
def train_epoch(epoch, args, initial_model_file_name):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment