Commit 39e464a5 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

refactoring

parent c73b1d5b
...@@ -28,6 +28,7 @@ Copyright 2014-2021 Anthony Larcher ...@@ -28,6 +28,7 @@ Copyright 2014-2021 Anthony Larcher
import collections import collections
import math import math
import numpy import numpy
from scipy import signal
import pandas import pandas
import random import random
import soundfile import soundfile
...@@ -459,10 +460,7 @@ def data_augmentation(speech, ...@@ -459,10 +460,7 @@ def data_augmentation(speech,
""" """
# Select the data augmentation randomly # Select the data augmentation randomly
if len(transform_dict.keys()) >= transform_number: aug_idx = random.sample(range(len(transform_dict.keys())), k=transform_number)
aug_idx = numpy.arange(len(transform_dict.keys()))
else:
aug_idx = random.choice(numpy.arange(len(transform_dict.keys())), k=transform_number)
augmentations = numpy.array(list(transform_dict.keys()))[aug_idx] augmentations = numpy.array(list(transform_dict.keys()))[aug_idx]
if "phone_filtering" in augmentations: if "phone_filtering" in augmentations:
...@@ -481,12 +479,10 @@ def data_augmentation(speech, ...@@ -481,12 +479,10 @@ def data_augmentation(speech,
speech = strech(speech, rate) speech = strech(speech, rate)
if "add_reverb" in augmentations: if "add_reverb" in augmentations:
rir_nfo = random.randrange(len(rir_df)) rir_nfo = rir_df.iloc[random.randrange(rir_df.shape[0])].file_id
rir_fn = transform_dict["add_noise"]["data_path"] + "/" + rir_nfo + ".wav" rir_fn = transform_dict["add_reverb"]["data_path"] + "/" + rir_nfo + ".wav"
rir, rir_fs = torchaudio.load(rir_fn) rir, rir_fs = torchaudio.load(rir_fn)
rir = rir[rir_nfo[1], :] #keep selected channel speech = torch.tensor(signal.convolve(speech, rir, mode='full')[:, :speech.shape[1]])
speech_ = torch.nn.functional.pad(speech, (rir.shape[1]-1, 0))
speech = torch.nn.functional.conv1d(speech_[None, ...], rir[None, ...])[0]
if "add_noise" in augmentations: if "add_noise" in augmentations:
# Pick a noise type # Pick a noise type
...@@ -499,7 +495,7 @@ def data_augmentation(speech, ...@@ -499,7 +495,7 @@ def data_augmentation(speech,
# TODO make SNRs configurable by noise type # TODO make SNRs configurable by noise type
snr_db = random.randint(13, 20) snr_db = random.randint(13, 20)
pick_count = random.randint(3, 7) pick_count = random.randint(3, 7)
index_list = random.choices(range(noise_df.loc['speech'].shape[0]), k=pick_count) index_list = random.sample(range(noise_df.loc['speech'].shape[0]), k=pick_count)
for idx in index_list: for idx in index_list:
noise_row = noise_df.loc['speech'].iloc[idx] noise_row = noise_df.loc['speech'].iloc[idx]
noise += load_noise_seg(noise_row, speech.shape, sample_rate, transform_dict["add_noise"]["data_path"]) noise += load_noise_seg(noise_row, speech.shape, sample_rate, transform_dict["add_noise"]["data_path"])
......
# coding: utf-8 -*-
#
# This file is part of SIDEKIT.
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is free software: you can redistribute it and/or modify
# it under the terms of the GNU LLesser General Public License as
# published by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
# SIDEKIT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with SIDEKIT. If not, see <http://www.gnu.org/licenses/>.
"""
Copyright 2014-2021 Anthony Larcher, Yevhenii Prokopalo
"""
import os
import torch
os.environ['MKL_THREADING_LAYER'] = 'GNU'
__license__ = "LGPL"
__author__ = "Anthony Larcher"
__copyright__ = "Copyright 2015-2021 Anthony Larcher"
__maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reS'
class MeanStdPooling(torch.nn.Module):
"""
Mean and Standard deviation pooling
"""
def __init__(self):
"""
"""
super(MeanStdPooling, self).__init__()
pass
def forward(self, x):
"""
:param x:
:return:
"""
mean = torch.mean(x, dim=2)
std = torch.std(x, dim=2)
return torch.cat([mean, std], dim=1)
class AttentivePooling(torch.nn.Module):
"""
Mean and Standard deviation attentive pooling
"""
def __init__(self, num_channels, n_mels, reduction=2, global_context=False):
"""
"""
# TODO Make global_context configurable (True/False)
# TODO Make convolution parameters configurable
super(AttentivePooling, self).__init__()
in_factor = 3 if global_context else 1
self.attention = torch.nn.Sequential(
torch.nn.Conv1d(num_channels * (n_mels//8) * in_factor, num_channels//reduction, kernel_size=1),
torch.nn.ReLU(),
torch.nn.BatchNorm1d(num_channels//reduction),
torch.nn.Tanh(),
torch.nn.Conv1d(num_channels//reduction, num_channels * (n_mels//8), kernel_size=1),
torch.nn.Softmax(dim=2),
)
self.global_context = global_context
self.gc = MeanStdPooling()
def new_parameter(self, *size):
out = torch.nn.Parameter(torch.FloatTensor(*size))
torch.nn.init.xavier_normal_(out)
return out
def forward(self, x):
"""
:param x:
:return:
"""
if self.global_context:
w = self.attention(torch.cat([x, self.gc(x).unsqueeze(2).repeat(1, 1, x.shape[-1])], dim=1))
else:
w = self.attention(x)
mu = torch.sum(x * w, dim=2)
rh = torch.sqrt( ( torch.sum((x**2) * w, dim=2) - mu**2 ).clamp(min=1e-5) )
x = torch.cat((mu, rh),1)
x = x.view(x.size()[0], -1)
return x
class GruPooling(torch.nn.Module):
"""
"""
def __init__(self, input_size, gru_node, nb_gru_layer):
"""
:param input_size:
:param gru_node:
:param nb_gru_layer:
"""
super(GruPooling, self).__init__()
self.lrelu_keras = torch.nn.LeakyReLU(negative_slope = 0.3)
self.bn_before_gru = torch.nn.BatchNorm1d(num_features = input_size)
self.gru = torch.nn.GRU(input_size = input_size,
hidden_size = gru_node,
num_layers = nb_gru_layer,
batch_first = True)
def forward(self, x):
"""
:param x:
:return:
"""
x = self.bn_before_gru(x)
x = self.lrelu_keras(x)
x = x.permute(0, 2, 1) #(batch, filt, time) >> (batch, time, filt)
self.gru.flatten_parameters()
x, _ = self.gru(x)
x = x[:, -1, :]
return x
...@@ -242,8 +242,7 @@ class SideSet(Dataset): ...@@ -242,8 +242,7 @@ class SideSet(Dataset):
self.rir_df = None self.rir_df = None
if "add_reverb" in self.transform: if "add_reverb" in self.transform:
# load the RIR database # load the RIR database
tmp_rir_df = pandas.read_csv(self.transformation["add_reverb"]["rir_db_csv"]) self.rir_df = pandas.read_csv(self.transformation["add_reverb"]["rir_db_csv"])
self.rir_df = zip(tmp_rir_df['file_id'].tolist(), tmp_rir_df['channel'].tolist())
def __getitem__(self, index): def __getitem__(self, index):
""" """
...@@ -314,7 +313,7 @@ class IdMapSet(Dataset): ...@@ -314,7 +313,7 @@ class IdMapSet(Dataset):
window_len=24000, window_len=24000,
window_shift=8000, window_shift=8000,
sample_rate=16000, sample_rate=16000,
min_duration=0.150 min_duration=0.165
): ):
""" """
......
...@@ -43,6 +43,9 @@ import yaml ...@@ -43,6 +43,9 @@ import yaml
from collections import OrderedDict from collections import OrderedDict
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from .pooling import MeanStdPooling
from .pooling import AttentivePooling
from .pooling import GruPooling
from .preprocessor import MfccFrontEnd from .preprocessor import MfccFrontEnd
from .preprocessor import MelSpecFrontEnd from .preprocessor import MelSpecFrontEnd
from .preprocessor import RawPreprocessor from .preprocessor import RawPreprocessor
...@@ -67,6 +70,8 @@ from .loss import ArcMarginProduct ...@@ -67,6 +70,8 @@ from .loss import ArcMarginProduct
from ..sidekit_io import init_logging from ..sidekit_io import init_logging
torch.backends.cudnn.benchmark = True
os.environ['MKL_THREADING_LAYER'] = 'GNU' os.environ['MKL_THREADING_LAYER'] = 'GNU'
__license__ = "LGPL" __license__ = "LGPL"
...@@ -78,17 +83,6 @@ __status__ = "Production" ...@@ -78,17 +83,6 @@ __status__ = "Production"
__docformat__ = 'reS' __docformat__ = 'reS'
#logging.basicConfig(format='%(asctime)s %(message)s')
# Make PyTorch Deterministic
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
numpy.random.seed(0)
def eer(negatives, positives): def eer(negatives, positives):
"""Logarithmic complexity EER computation """Logarithmic complexity EER computation
...@@ -424,63 +418,6 @@ class TrainingMonitor(): ...@@ -424,63 +418,6 @@ class TrainingMonitor():
self.current_patience -= 1 self.current_patience -= 1
class MeanStdPooling(torch.nn.Module):
"""
Mean and Standard deviation pooling
"""
def __init__(self):
"""
"""
super(MeanStdPooling, self).__init__()
pass
def forward(self, x):
"""
:param x:
:return:
"""
mean = torch.mean(x, dim=2)
std = torch.std(x, dim=2)
return torch.cat([mean, std], dim=1)
class GruPooling(torch.nn.Module):
"""
"""
def __init__(self, input_size, gru_node, nb_gru_layer):
"""
:param input_size:
:param gru_node:
:param nb_gru_layer:
"""
super(GruPooling, self).__init__()
self.lrelu_keras = torch.nn.LeakyReLU(negative_slope = 0.3)
self.bn_before_gru = torch.nn.BatchNorm1d(num_features = input_size)
self.gru = torch.nn.GRU(input_size = input_size,
hidden_size = gru_node,
num_layers = nb_gru_layer,
batch_first = True)
def forward(self, x):
"""
:param x:
:return:
"""
x = self.bn_before_gru(x)
x = self.lrelu_keras(x)
x = x.permute(0, 2, 1) #(batch, filt, time) >> (batch, time, filt)
self.gru.flatten_parameters()
x, _ = self.gru(x)
x = x[:, -1, :]
return x
class Xtractor(torch.nn.Module): class Xtractor(torch.nn.Module):
""" """
Class that defines an x-vector extractor based on 5 convolutional layers and a mean standard deviation pooling Class that defines an x-vector extractor based on 5 convolutional layers and a mean standard deviation pooling
...@@ -614,11 +551,11 @@ class Xtractor(torch.nn.Module): ...@@ -614,11 +551,11 @@ class Xtractor(torch.nn.Module):
elif self.loss == 'aps': elif self.loss == 'aps':
self.after_speaker_embedding = SoftmaxAngularProto(int(self.speaker_number)) self.after_speaker_embedding = SoftmaxAngularProto(int(self.speaker_number))
self.preprocessor_weight_decay = 0.000 self.preprocessor_weight_decay = 0.00002
self.sequence_network_weight_decay = 0.000 self.sequence_network_weight_decay = 0.00002
self.stat_pooling_weight_decay = 0.000 self.stat_pooling_weight_decay = 0.00002
self.before_speaker_embedding_weight_decay = 0.00 self.before_speaker_embedding_weight_decay = 0.00002
self.after_speaker_embedding_weight_decay = 0.00 self.after_speaker_embedding_weight_decay = 0.0002
elif model_archi == "rawnet2": elif model_archi == "rawnet2":
...@@ -855,18 +792,8 @@ class Xtractor(torch.nn.Module): ...@@ -855,18 +792,8 @@ class Xtractor(torch.nn.Module):
m=0.2, m=0.2,
easy_margin=True) easy_margin=True)
#self.after_speaker_embedding = ArcLinear(input_size,
# self.speaker_number,
# margin=self.aam_margin,
# s=self.aam_s)
#self.after_speaker_embedding = ArcFace(embedding_size=input_size,
# classnum=self.speaker_number,
# s=64.,
# m=0.5)
self.after_speaker_embedding_weight_decay = cfg["after_embedding"]["weight_decay"] self.after_speaker_embedding_weight_decay = cfg["after_embedding"]["weight_decay"]
def forward(self, x, is_eval=False, target=None, extract_after_pooling=False): def forward(self, x, is_eval=False, target=None, extract_after_pooling=False):
""" """
...@@ -888,9 +815,6 @@ class Xtractor(torch.nn.Module): ...@@ -888,9 +815,6 @@ class Xtractor(torch.nn.Module):
x = self.before_speaker_embedding(x) x = self.before_speaker_embedding(x)
if self.norm_embedding: if self.norm_embedding:
#x_norm = x.norm(p=2,dim=1, keepdim=True) / 10. # Why 10. ?
#x_norm = torch.linalg.norm(x, ord=2, dim=1, keepdim=True, out=None, dtype=None)
#x = torch.div(x, x_norm)
x = l2_norm(x) x = l2_norm(x)
if self.loss == "cce": if self.loss == "cce":
...@@ -1058,6 +982,7 @@ def update_training_dictionary(dataset_description, ...@@ -1058,6 +982,7 @@ def update_training_dictionary(dataset_description,
training_opts["compute_test_eer"] = False training_opts["compute_test_eer"] = False
training_opts["log_interval"] = 10 training_opts["log_interval"] = 10
training_opts["validation_frequency"] = 1
training_opts["tmp_model_name"] = "tmp_model.pt" training_opts["tmp_model_name"] = "tmp_model.pt"
training_opts["best_model_name"] = "best_model.pt" training_opts["best_model_name"] = "best_model.pt"
...@@ -1139,15 +1064,11 @@ def get_loaders(dataset_opts, training_opts, speaker_number): ...@@ -1139,15 +1064,11 @@ def get_loaders(dataset_opts, training_opts, speaker_number):
First we load the dataframe from CSV file in order to split it for training and validation purpose First we load the dataframe from CSV file in order to split it for training and validation purpose
Then we provide those two Then we provide those two
""" """
#with open(dataset_yaml, "r") as fh:
# dataset_params = yaml.load(fh, Loader=yaml.FullLoader)
# df = pandas.read_csv(dataset_params["dataset_description"])
df = pandas.read_csv(dataset_opts["dataset_csv"]) df = pandas.read_csv(dataset_opts["dataset_csv"])
training_df, validation_df = train_test_split(df, test_size=dataset_opts["validation_ratio"] , stratify=df["speaker_idx"]) training_df, validation_df = train_test_split(df,
test_size=dataset_opts["validation_ratio"],
torch.manual_seed(training_opts['seed']) stratify=df["speaker_idx"])
training_set = SideSet(dataset_opts, training_set = SideSet(dataset_opts,
set_type="train", set_type="train",
...@@ -1164,8 +1085,8 @@ def get_loaders(dataset_opts, training_opts, speaker_number): ...@@ -1164,8 +1085,8 @@ def get_loaders(dataset_opts, training_opts, speaker_number):
side_sampler = SideSampler(training_set.sessions['speaker_idx'], side_sampler = SideSampler(training_set.sessions['speaker_idx'],
speaker_number, speaker_number,
1, dataset_opts["train"]["sampler"]["examples_per_speaker"],
100, dataset_opts["train"]["sampler"]["samples_per_speaker"],
dataset_opts["batch_size"]) dataset_opts["batch_size"])
training_loader = DataLoader(training_set, training_loader = DataLoader(training_set,
...@@ -1192,9 +1113,6 @@ def get_loaders(dataset_opts, training_opts, speaker_number): ...@@ -1192,9 +1113,6 @@ def get_loaders(dataset_opts, training_opts, speaker_number):
# Select a subset of non-target trials to reduce the number of tests # Select a subset of non-target trials to reduce the number of tests
tar_non_ratio = numpy.sum(tar_indices)/numpy.sum(non_indices) tar_non_ratio = numpy.sum(tar_indices)/numpy.sum(non_indices)
#non_indices *= numpy.random.choice([False, True],
# size=non_indices.shape,
# p=[1-tar_non_ratio, tar_non_ratio])
non_indices *= (numpy.random.rand(*non_indices.shape) < tar_non_ratio) non_indices *= (numpy.random.rand(*non_indices.shape) < tar_non_ratio)
return training_loader, validation_loader, tar_indices, non_indices return training_loader, validation_loader, tar_indices, non_indices
...@@ -1317,8 +1235,7 @@ def new_xtrain(dataset_description, ...@@ -1317,8 +1235,7 @@ def new_xtrain(dataset_description,
**kwargs): **kwargs):
""" """
REFACTORING REFACTORING
- tester les loggings - affiner les loggings
- une fonction qui prend le modele et retourne un optimizer et un scheduler
""" """
dataset_opts, model_opts, training_opts = update_training_dictionary(dataset_description, dataset_opts, model_opts, training_opts = update_training_dictionary(dataset_description,
model_description, model_description,
...@@ -1333,23 +1250,33 @@ def new_xtrain(dataset_description, ...@@ -1333,23 +1250,33 @@ def new_xtrain(dataset_description,
best_eer=100, best_eer=100,
compute_test_eer=training_opts["compute_test_eer"]) compute_test_eer=training_opts["compute_test_eer"])
# Display the entire configurations as YAML dictionnaries # Make PyTorch Deterministic
monitor.logger.info(yaml.dump(dataset_opts, default_flow_style=False)) torch.backends.cudnn.deterministic = False
monitor.logger.info(yaml.dump(model_opts, default_flow_style=False)) if training_opts["deterministic"]:
monitor.logger.info(yaml.dump(training_opts, default_flow_style=False)) torch.backends.cudnn.deterministic = True
# Set all the seeds # Set all the seeds
numpy.random.seed(training_opts["seed"]) # Set the random seed of numpy for the data split. numpy.random.seed(training_opts["seed"]) # Set the random seed of numpy for the data split.
torch.manual_seed(training_opts["seed"]) torch.manual_seed(training_opts["seed"])
torch.cuda.manual_seed(training_opts["seed"]) torch.cuda.manual_seed(training_opts["seed"])
# Display the entire configurations as YAML dictionnaries
monitor.logger.info("\n*********************************\nDataset options\n*********************************\n")
monitor.logger.info(yaml.dump(dataset_opts, default_flow_style=False))
monitor.logger.info("\n*********************************\nModel options\n*********************************\n")
monitor.logger.info(yaml.dump(model_opts, default_flow_style=False))
monitor.logger.info("\n*********************************\nTraining options\n*********************************\n")
monitor.logger.info(yaml.dump(training_opts, default_flow_style=False))
# Test to optimize # Test to optimize
torch.autograd.profiler.emit_nvtx(enabled=False) torch.autograd.profiler.emit_nvtx(enabled=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Initialize the model
model = get_network(model_opts) model = get_network(model_opts)
speaker_number = model.speaker_number speaker_number = model.speaker_number
embedding_size = model.embedding_size
if torch.cuda.device_count() > 1 and training_opts["multi_gpu"]: if torch.cuda.device_count() > 1 and training_opts["multi_gpu"]:
model = torch.nn.DataParallel(model) model = torch.nn.DataParallel(model)
...@@ -1399,7 +1326,7 @@ def new_xtrain(dataset_description, ...@@ -1399,7 +1326,7 @@ def new_xtrain(dataset_description,
val_acc, val_loss, val_eer = cross_validation(model, val_acc, val_loss, val_eer = cross_validation(model,
validation_loader, validation_loader,
device, device,
[validation_loader.dataset.__len__(), model_opts["embedding_size"]], [validation_loader.dataset.__len__(), embedding_size],
validation_tar_indices, validation_tar_indices,
validation_non_indices, validation_non_indices,
training_opts["mixed_precision"]) training_opts["mixed_precision"])
...@@ -2014,7 +1941,6 @@ def cross_validation(model, validation_loader, device, validation_shape, tar_ind ...@@ -2014,7 +1941,6 @@ def cross_validation(model, validation_loader, device, validation_shape, tar_ind
#classes[cursor:cursor + batch_size] = target.detach().cpu() #classes[cursor:cursor + batch_size] = target.detach().cpu()
cursor += batch_size cursor += batch_size
#print(classes.shape[0])
local_device = "cpu" if embeddings.shape[0] > 3e4 else device local_device = "cpu" if embeddings.shape[0] > 3e4 else device
embeddings = embeddings.to(local_device) embeddings = embeddings.to(local_device)
scores = torch.einsum('ij,kj', embeddings, embeddings).cpu().numpy() scores = torch.einsum('ij,kj', embeddings, embeddings).cpu().numpy()
...@@ -2028,7 +1954,6 @@ def cross_validation(model, validation_loader, device, validation_shape, tar_ind ...@@ -2028,7 +1954,6 @@ def cross_validation(model, validation_loader, device, validation_shape, tar_ind
equal_error_rate = rocch2eer(pmiss, pfa) equal_error_rate = rocch2eer(pmiss, pfa)
return (100. * accuracy.cpu().numpy() / validation_shape[0], return (100. * accuracy.cpu().numpy() / validation_shape[0],
loss.cpu().numpy() / ((batch_idx + 1) * batch_size),
loss.cpu().numpy() / ((batch_idx + 1) * batch_size), loss.cpu().numpy() / ((batch_idx + 1) * batch_size),
equal_error_rate) equal_error_rate)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment