Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Anthony Larcher
sidekit
Commits
39e464a5
Commit
39e464a5
authored
Apr 08, 2021
by
Anthony Larcher
Browse files
refactoring
parent
c73b1d5b
Changes
4
Hide whitespace changes
Inline
Side-by-side
nnet/augmentation.py
View file @
39e464a5
...
...
@@ -28,6 +28,7 @@ Copyright 2014-2021 Anthony Larcher
import
collections
import
math
import
numpy
from
scipy
import
signal
import
pandas
import
random
import
soundfile
...
...
@@ -459,10 +460,7 @@ def data_augmentation(speech,
"""
# Select the data augmentation randomly
if
len
(
transform_dict
.
keys
())
>=
transform_number
:
aug_idx
=
numpy
.
arange
(
len
(
transform_dict
.
keys
()))
else
:
aug_idx
=
random
.
choice
(
numpy
.
arange
(
len
(
transform_dict
.
keys
())),
k
=
transform_number
)
aug_idx
=
random
.
sample
(
range
(
len
(
transform_dict
.
keys
())),
k
=
transform_number
)
augmentations
=
numpy
.
array
(
list
(
transform_dict
.
keys
()))[
aug_idx
]
if
"phone_filtering"
in
augmentations
:
...
...
@@ -481,12 +479,10 @@ def data_augmentation(speech,
speech
=
strech
(
speech
,
rate
)
if
"add_reverb"
in
augmentations
:
rir_nfo
=
random
.
randrange
(
len
(
rir_df
))
rir_fn
=
transform_dict
[
"add_
noise
"
][
"data_path"
]
+
"/"
+
rir_nfo
+
".wav"
rir_nfo
=
rir_df
.
iloc
[
random
.
randrange
(
rir_df
.
shape
[
0
])].
file_id
rir_fn
=
transform_dict
[
"add_
reverb
"
][
"data_path"
]
+
"/"
+
rir_nfo
+
".wav"
rir
,
rir_fs
=
torchaudio
.
load
(
rir_fn
)
rir
=
rir
[
rir_nfo
[
1
],
:]
#keep selected channel
speech_
=
torch
.
nn
.
functional
.
pad
(
speech
,
(
rir
.
shape
[
1
]
-
1
,
0
))
speech
=
torch
.
nn
.
functional
.
conv1d
(
speech_
[
None
,
...],
rir
[
None
,
...])[
0
]
speech
=
torch
.
tensor
(
signal
.
convolve
(
speech
,
rir
,
mode
=
'full'
)[:,
:
speech
.
shape
[
1
]])
if
"add_noise"
in
augmentations
:
# Pick a noise type
...
...
@@ -499,7 +495,7 @@ def data_augmentation(speech,
# TODO make SNRs configurable by noise type
snr_db
=
random
.
randint
(
13
,
20
)
pick_count
=
random
.
randint
(
3
,
7
)
index_list
=
random
.
choices
(
range
(
noise_df
.
loc
[
'speech'
].
shape
[
0
]),
k
=
pick_count
)
index_list
=
random
.
sample
(
range
(
noise_df
.
loc
[
'speech'
].
shape
[
0
]),
k
=
pick_count
)
for
idx
in
index_list
:
noise_row
=
noise_df
.
loc
[
'speech'
].
iloc
[
idx
]
noise
+=
load_noise_seg
(
noise_row
,
speech
.
shape
,
sample_rate
,
transform_dict
[
"add_noise"
][
"data_path"
])
...
...
nnet/pooling.py
0 → 100644
View file @
39e464a5
# coding: utf-8 -*-
#
# This file is part of SIDEKIT.
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is free software: you can redistribute it and/or modify
# it under the terms of the GNU LLesser General Public License as
# published by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
# SIDEKIT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with SIDEKIT. If not, see <http://www.gnu.org/licenses/>.
"""
Copyright 2014-2021 Anthony Larcher, Yevhenii Prokopalo
"""
import
os
import
torch
os
.
environ
[
'MKL_THREADING_LAYER'
]
=
'GNU'
__license__
=
"LGPL"
__author__
=
"Anthony Larcher"
__copyright__
=
"Copyright 2015-2021 Anthony Larcher"
__maintainer__
=
"Anthony Larcher"
__email__
=
"anthony.larcher@univ-lemans.fr"
__status__
=
"Production"
__docformat__
=
'reS'
class
MeanStdPooling
(
torch
.
nn
.
Module
):
"""
Mean and Standard deviation pooling
"""
def
__init__
(
self
):
"""
"""
super
(
MeanStdPooling
,
self
).
__init__
()
pass
def
forward
(
self
,
x
):
"""
:param x:
:return:
"""
mean
=
torch
.
mean
(
x
,
dim
=
2
)
std
=
torch
.
std
(
x
,
dim
=
2
)
return
torch
.
cat
([
mean
,
std
],
dim
=
1
)
class
AttentivePooling
(
torch
.
nn
.
Module
):
"""
Mean and Standard deviation attentive pooling
"""
def
__init__
(
self
,
num_channels
,
n_mels
,
reduction
=
2
,
global_context
=
False
):
"""
"""
# TODO Make global_context configurable (True/False)
# TODO Make convolution parameters configurable
super
(
AttentivePooling
,
self
).
__init__
()
in_factor
=
3
if
global_context
else
1
self
.
attention
=
torch
.
nn
.
Sequential
(
torch
.
nn
.
Conv1d
(
num_channels
*
(
n_mels
//
8
)
*
in_factor
,
num_channels
//
reduction
,
kernel_size
=
1
),
torch
.
nn
.
ReLU
(),
torch
.
nn
.
BatchNorm1d
(
num_channels
//
reduction
),
torch
.
nn
.
Tanh
(),
torch
.
nn
.
Conv1d
(
num_channels
//
reduction
,
num_channels
*
(
n_mels
//
8
),
kernel_size
=
1
),
torch
.
nn
.
Softmax
(
dim
=
2
),
)
self
.
global_context
=
global_context
self
.
gc
=
MeanStdPooling
()
def
new_parameter
(
self
,
*
size
):
out
=
torch
.
nn
.
Parameter
(
torch
.
FloatTensor
(
*
size
))
torch
.
nn
.
init
.
xavier_normal_
(
out
)
return
out
def
forward
(
self
,
x
):
"""
:param x:
:return:
"""
if
self
.
global_context
:
w
=
self
.
attention
(
torch
.
cat
([
x
,
self
.
gc
(
x
).
unsqueeze
(
2
).
repeat
(
1
,
1
,
x
.
shape
[
-
1
])],
dim
=
1
))
else
:
w
=
self
.
attention
(
x
)
mu
=
torch
.
sum
(
x
*
w
,
dim
=
2
)
rh
=
torch
.
sqrt
(
(
torch
.
sum
((
x
**
2
)
*
w
,
dim
=
2
)
-
mu
**
2
).
clamp
(
min
=
1e-5
)
)
x
=
torch
.
cat
((
mu
,
rh
),
1
)
x
=
x
.
view
(
x
.
size
()[
0
],
-
1
)
return
x
class
GruPooling
(
torch
.
nn
.
Module
):
"""
"""
def
__init__
(
self
,
input_size
,
gru_node
,
nb_gru_layer
):
"""
:param input_size:
:param gru_node:
:param nb_gru_layer:
"""
super
(
GruPooling
,
self
).
__init__
()
self
.
lrelu_keras
=
torch
.
nn
.
LeakyReLU
(
negative_slope
=
0.3
)
self
.
bn_before_gru
=
torch
.
nn
.
BatchNorm1d
(
num_features
=
input_size
)
self
.
gru
=
torch
.
nn
.
GRU
(
input_size
=
input_size
,
hidden_size
=
gru_node
,
num_layers
=
nb_gru_layer
,
batch_first
=
True
)
def
forward
(
self
,
x
):
"""
:param x:
:return:
"""
x
=
self
.
bn_before_gru
(
x
)
x
=
self
.
lrelu_keras
(
x
)
x
=
x
.
permute
(
0
,
2
,
1
)
#(batch, filt, time) >> (batch, time, filt)
self
.
gru
.
flatten_parameters
()
x
,
_
=
self
.
gru
(
x
)
x
=
x
[:,
-
1
,
:]
return
x
nnet/xsets.py
View file @
39e464a5
...
...
@@ -242,8 +242,7 @@ class SideSet(Dataset):
self
.
rir_df
=
None
if
"add_reverb"
in
self
.
transform
:
# load the RIR database
tmp_rir_df
=
pandas
.
read_csv
(
self
.
transformation
[
"add_reverb"
][
"rir_db_csv"
])
self
.
rir_df
=
zip
(
tmp_rir_df
[
'file_id'
].
tolist
(),
tmp_rir_df
[
'channel'
].
tolist
())
self
.
rir_df
=
pandas
.
read_csv
(
self
.
transformation
[
"add_reverb"
][
"rir_db_csv"
])
def
__getitem__
(
self
,
index
):
"""
...
...
@@ -314,7 +313,7 @@ class IdMapSet(Dataset):
window_len
=
24000
,
window_shift
=
8000
,
sample_rate
=
16000
,
min_duration
=
0.15
0
min_duration
=
0.1
6
5
):
"""
...
...
nnet/xvector.py
View file @
39e464a5
...
...
@@ -43,6 +43,9 @@ import yaml
from
collections
import
OrderedDict
from
torch.utils.data
import
DataLoader
from
sklearn.model_selection
import
train_test_split
from
.pooling
import
MeanStdPooling
from
.pooling
import
AttentivePooling
from
.pooling
import
GruPooling
from
.preprocessor
import
MfccFrontEnd
from
.preprocessor
import
MelSpecFrontEnd
from
.preprocessor
import
RawPreprocessor
...
...
@@ -67,6 +70,8 @@ from .loss import ArcMarginProduct
from
..sidekit_io
import
init_logging
torch
.
backends
.
cudnn
.
benchmark
=
True
os
.
environ
[
'MKL_THREADING_LAYER'
]
=
'GNU'
__license__
=
"LGPL"
...
...
@@ -78,17 +83,6 @@ __status__ = "Production"
__docformat__
=
'reS'
#logging.basicConfig(format='%(asctime)s %(message)s')
# Make PyTorch Deterministic
torch
.
manual_seed
(
0
)
torch
.
backends
.
cudnn
.
deterministic
=
False
torch
.
backends
.
cudnn
.
benchmark
=
True
numpy
.
random
.
seed
(
0
)
def
eer
(
negatives
,
positives
):
"""Logarithmic complexity EER computation
...
...
@@ -424,63 +418,6 @@ class TrainingMonitor():
self
.
current_patience
-=
1
class
MeanStdPooling
(
torch
.
nn
.
Module
):
"""
Mean and Standard deviation pooling
"""
def
__init__
(
self
):
"""
"""
super
(
MeanStdPooling
,
self
).
__init__
()
pass
def
forward
(
self
,
x
):
"""
:param x:
:return:
"""
mean
=
torch
.
mean
(
x
,
dim
=
2
)
std
=
torch
.
std
(
x
,
dim
=
2
)
return
torch
.
cat
([
mean
,
std
],
dim
=
1
)
class
GruPooling
(
torch
.
nn
.
Module
):
"""
"""
def
__init__
(
self
,
input_size
,
gru_node
,
nb_gru_layer
):
"""
:param input_size:
:param gru_node:
:param nb_gru_layer:
"""
super
(
GruPooling
,
self
).
__init__
()
self
.
lrelu_keras
=
torch
.
nn
.
LeakyReLU
(
negative_slope
=
0.3
)
self
.
bn_before_gru
=
torch
.
nn
.
BatchNorm1d
(
num_features
=
input_size
)
self
.
gru
=
torch
.
nn
.
GRU
(
input_size
=
input_size
,
hidden_size
=
gru_node
,
num_layers
=
nb_gru_layer
,
batch_first
=
True
)
def
forward
(
self
,
x
):
"""
:param x:
:return:
"""
x
=
self
.
bn_before_gru
(
x
)
x
=
self
.
lrelu_keras
(
x
)
x
=
x
.
permute
(
0
,
2
,
1
)
#(batch, filt, time) >> (batch, time, filt)
self
.
gru
.
flatten_parameters
()
x
,
_
=
self
.
gru
(
x
)
x
=
x
[:,
-
1
,
:]
return
x
class
Xtractor
(
torch
.
nn
.
Module
):
"""
Class that defines an x-vector extractor based on 5 convolutional layers and a mean standard deviation pooling
...
...
@@ -614,11 +551,11 @@ class Xtractor(torch.nn.Module):
elif
self
.
loss
==
'aps'
:
self
.
after_speaker_embedding
=
SoftmaxAngularProto
(
int
(
self
.
speaker_number
))
self
.
preprocessor_weight_decay
=
0.000
self
.
sequence_network_weight_decay
=
0.000
self
.
stat_pooling_weight_decay
=
0.000
self
.
before_speaker_embedding_weight_decay
=
0.00
self
.
after_speaker_embedding_weight_decay
=
0.00
self
.
preprocessor_weight_decay
=
0.000
02
self
.
sequence_network_weight_decay
=
0.000
02
self
.
stat_pooling_weight_decay
=
0.000
02
self
.
before_speaker_embedding_weight_decay
=
0.00
002
self
.
after_speaker_embedding_weight_decay
=
0.00
02
elif
model_archi
==
"rawnet2"
:
...
...
@@ -855,18 +792,8 @@ class Xtractor(torch.nn.Module):
m
=
0.2
,
easy_margin
=
True
)
#self.after_speaker_embedding = ArcLinear(input_size,
# self.speaker_number,
# margin=self.aam_margin,
# s=self.aam_s)
#self.after_speaker_embedding = ArcFace(embedding_size=input_size,
# classnum=self.speaker_number,
# s=64.,
# m=0.5)
self
.
after_speaker_embedding_weight_decay
=
cfg
[
"after_embedding"
][
"weight_decay"
]
def
forward
(
self
,
x
,
is_eval
=
False
,
target
=
None
,
extract_after_pooling
=
False
):
"""
...
...
@@ -888,9 +815,6 @@ class Xtractor(torch.nn.Module):
x
=
self
.
before_speaker_embedding
(
x
)
if
self
.
norm_embedding
:
#x_norm = x.norm(p=2,dim=1, keepdim=True) / 10. # Why 10. ?
#x_norm = torch.linalg.norm(x, ord=2, dim=1, keepdim=True, out=None, dtype=None)
#x = torch.div(x, x_norm)
x
=
l2_norm
(
x
)
if
self
.
loss
==
"cce"
:
...
...
@@ -1058,6 +982,7 @@ def update_training_dictionary(dataset_description,
training_opts
[
"compute_test_eer"
]
=
False
training_opts
[
"log_interval"
]
=
10
training_opts
[
"validation_frequency"
]
=
1
training_opts
[
"tmp_model_name"
]
=
"tmp_model.pt"
training_opts
[
"best_model_name"
]
=
"best_model.pt"
...
...
@@ -1139,15 +1064,11 @@ def get_loaders(dataset_opts, training_opts, speaker_number):
First we load the dataframe from CSV file in order to split it for training and validation purpose
Then we provide those two
"""
#with open(dataset_yaml, "r") as fh:
# dataset_params = yaml.load(fh, Loader=yaml.FullLoader)
# df = pandas.read_csv(dataset_params["dataset_description"])
df
=
pandas
.
read_csv
(
dataset_opts
[
"dataset_csv"
])
training_df
,
validation_df
=
train_test_split
(
df
,
test_size
=
dataset_opts
[
"validation_ratio"
]
,
stratify
=
df
[
"speaker_idx"
])
torch
.
manual_seed
(
training_opts
[
'seed'
])
training_df
,
validation_df
=
train_test_split
(
df
,
test_size
=
dataset_opts
[
"validation_ratio"
],
stratify
=
df
[
"speaker_idx"
])
training_set
=
SideSet
(
dataset_opts
,
set_type
=
"train"
,
...
...
@@ -1164,8 +1085,8 @@ def get_loaders(dataset_opts, training_opts, speaker_number):
side_sampler
=
SideSampler
(
training_set
.
sessions
[
'speaker_idx'
],
speaker_number
,
1
,
100
,
dataset_opts
[
"train"
][
"sampler"
][
"examples_per_speaker"
]
,
dataset_opts
[
"train"
][
"sampler"
][
"samples_per_speaker"
]
,
dataset_opts
[
"batch_size"
])
training_loader
=
DataLoader
(
training_set
,
...
...
@@ -1192,9 +1113,6 @@ def get_loaders(dataset_opts, training_opts, speaker_number):
# Select a subset of non-target trials to reduce the number of tests
tar_non_ratio
=
numpy
.
sum
(
tar_indices
)
/
numpy
.
sum
(
non_indices
)
#non_indices *= numpy.random.choice([False, True],
# size=non_indices.shape,
# p=[1-tar_non_ratio, tar_non_ratio])
non_indices
*=
(
numpy
.
random
.
rand
(
*
non_indices
.
shape
)
<
tar_non_ratio
)
return
training_loader
,
validation_loader
,
tar_indices
,
non_indices
...
...
@@ -1317,8 +1235,7 @@ def new_xtrain(dataset_description,
**
kwargs
):
"""
REFACTORING
- tester les loggings
- une fonction qui prend le modele et retourne un optimizer et un scheduler
- affiner les loggings
"""
dataset_opts
,
model_opts
,
training_opts
=
update_training_dictionary
(
dataset_description
,
model_description
,
...
...
@@ -1333,23 +1250,33 @@ def new_xtrain(dataset_description,
best_eer
=
100
,
compute_test_eer
=
training_opts
[
"compute_test_eer"
])
#
Display the entire configurations as YAML dictionnaries
monitor
.
logger
.
info
(
yaml
.
dump
(
dataset_opts
,
default_flow_style
=
False
))
monitor
.
logger
.
info
(
yaml
.
dump
(
model_opts
,
default_flow_style
=
False
))
monitor
.
logger
.
info
(
yaml
.
dump
(
training_opts
,
default_flow_style
=
False
))
#
Make PyTorch Deterministic
torch
.
backends
.
cudnn
.
deterministic
=
False
if
training_opts
[
"deterministic"
]:
torch
.
backends
.
cudnn
.
deterministic
=
True
# Set all the seeds
numpy
.
random
.
seed
(
training_opts
[
"seed"
])
# Set the random seed of numpy for the data split.
torch
.
manual_seed
(
training_opts
[
"seed"
])
torch
.
cuda
.
manual_seed
(
training_opts
[
"seed"
])
# Display the entire configurations as YAML dictionnaries
monitor
.
logger
.
info
(
"
\n
*********************************
\n
Dataset options
\n
*********************************
\n
"
)
monitor
.
logger
.
info
(
yaml
.
dump
(
dataset_opts
,
default_flow_style
=
False
))
monitor
.
logger
.
info
(
"
\n
*********************************
\n
Model options
\n
*********************************
\n
"
)
monitor
.
logger
.
info
(
yaml
.
dump
(
model_opts
,
default_flow_style
=
False
))
monitor
.
logger
.
info
(
"
\n
*********************************
\n
Training options
\n
*********************************
\n
"
)
monitor
.
logger
.
info
(
yaml
.
dump
(
training_opts
,
default_flow_style
=
False
))
# Test to optimize
torch
.
autograd
.
profiler
.
emit_nvtx
(
enabled
=
False
)
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
# Initialize the model
model
=
get_network
(
model_opts
)
speaker_number
=
model
.
speaker_number
embedding_size
=
model
.
embedding_size
if
torch
.
cuda
.
device_count
()
>
1
and
training_opts
[
"multi_gpu"
]:
model
=
torch
.
nn
.
DataParallel
(
model
)
...
...
@@ -1399,7 +1326,7 @@ def new_xtrain(dataset_description,
val_acc
,
val_loss
,
val_eer
=
cross_validation
(
model
,
validation_loader
,
device
,
[
validation_loader
.
dataset
.
__len__
(),
model_opts
[
"
embedding_size
"
]
],
[
validation_loader
.
dataset
.
__len__
(),
embedding_size
],
validation_tar_indices
,
validation_non_indices
,
training_opts
[
"mixed_precision"
])
...
...
@@ -2014,7 +1941,6 @@ def cross_validation(model, validation_loader, device, validation_shape, tar_ind
#classes[cursor:cursor + batch_size] = target.detach().cpu()
cursor
+=
batch_size
#print(classes.shape[0])
local_device
=
"cpu"
if
embeddings
.
shape
[
0
]
>
3e4
else
device
embeddings
=
embeddings
.
to
(
local_device
)
scores
=
torch
.
einsum
(
'ij,kj'
,
embeddings
,
embeddings
).
cpu
().
numpy
()
...
...
@@ -2028,7 +1954,6 @@ def cross_validation(model, validation_loader, device, validation_shape, tar_ind
equal_error_rate
=
rocch2eer
(
pmiss
,
pfa
)
return
(
100.
*
accuracy
.
cpu
().
numpy
()
/
validation_shape
[
0
],
loss
.
cpu
().
numpy
()
/
((
batch_idx
+
1
)
*
batch_size
),
loss
.
cpu
().
numpy
()
/
((
batch_idx
+
1
)
*
batch_size
),
equal_error_rate
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment