Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Anthony Larcher
sidekit
Commits
2640573a
Commit
2640573a
authored
Mar 12, 2021
by
Anthony Larcher
Browse files
debug
parent
7f61cc0b
Changes
3
Hide whitespace changes
Inline
Side-by-side
iv_scoring.py
View file @
2640573a
...
...
@@ -60,7 +60,7 @@ def _check_missing_model(enroll, test, ndx):
return
clean_ndx
def
cosine_scoring
(
enroll
,
test
,
ndx
,
wccn
=
None
,
check_missing
=
True
):
def
cosine_scoring
(
enroll
,
test
,
ndx
,
wccn
=
None
,
check_missing
=
True
,
device
=
None
):
"""Compute the cosine similarities between to sets of vectors. The list of
trials to perform is given in an Ndx object.
...
...
@@ -99,7 +99,10 @@ def cosine_scoring(enroll, test, ndx, wccn=None, check_missing=True):
if
enroll_copy
!=
test_copy
:
test_copy
.
norm_stat1
()
s_size_in_bytes
=
enroll_copy
.
stat1
.
shape
[
0
]
*
test_copy
.
stat1
.
shape
[
0
]
*
4
device
=
torch
.
device
(
"cuda:0"
if
torch
.
cuda
.
is_available
()
and
s_size_in_bytes
<
1e9
else
"cpu"
)
if
device
==
None
:
device
=
torch
.
device
(
"cuda:0"
if
torch
.
cuda
.
is_available
()
and
s_size_in_bytes
<
3e9
else
"cpu"
)
else
:
device
=
device
if
torch
.
cuda
.
is_available
()
and
s_size_in_bytes
<
3e9
else
torch
.
device
(
"cpu"
)
s
=
torch
.
mm
(
torch
.
FloatTensor
(
enroll_copy
.
stat1
).
to
(
device
),
torch
.
FloatTensor
(
test_copy
.
stat1
).
to
(
device
).
T
).
cpu
().
numpy
()
score
=
Scores
()
...
...
nnet/xsets.py
View file @
2640573a
...
...
@@ -378,7 +378,7 @@ class SpkSet(Dataset):
for
jdx
,
possible_start
in
enumerate
(
possible_starts
):
segment_dict
=
dict
()
segment_dict
[
'start'
]
=
possible_start
/
self
.
sample_rate
segment_dict
[
'start'
]
=
possible_start
segment_dict
[
'duration'
]
=
self
.
duration
segment_dict
[
'file_id'
]
=
current_session
.
file_id
...
...
@@ -434,18 +434,25 @@ class SpkSet(Dataset):
nfo
=
soundfile
.
info
(
f
"
{
self
.
data_path
}
/
{
current_segment
[
'file_id'
]
}{
self
.
data_file_extension
}
"
)
if
self
.
_windowed
:
start_frame
=
int
(
current_segment
[
'start'
]
*
self
.
sample_rate
)
if
start_frame
+
self
.
sample_number
>=
nfo
.
frames
:
start_frame
=
numpy
.
min
(
nfo
.
frames
-
self
.
sample_number
-
1
)
start_frame
=
current_segment
[
'start'
]
stop_frame
=
start_frame
+
self
.
sample_number
else
:
start_frame
=
int
(
current_segment
[
'start'
]
*
self
.
sample_rate
)
stop_frame
=
int
(
current_segment
[
'duration'
]
*
self
.
sample_rate
)
speech
,
speech_fs
=
torchaudio
.
load
(
f
"
{
self
.
data_path
}
/
{
current_segment
[
'file_id'
]
}{
self
.
data_file_extension
}
"
,
frame_offset
=
start_frame
,
num_frames
=
self
.
sample_number
)
#speech, speech_fs = torchaudio.load(f"{self.data_path}/{current_segment['file_id']}{self.data_file_extension}",
# frame_offset=start_frame,
# num_frames=self.sample_number)
sig
,
_
=
soundfile
.
read
(
f
"
{
self
.
data_path
}
/
{
current_segment
[
'file_id'
]
}{
self
.
data_file_extension
}
"
,
start
=
start_frame
,
stop
=
stop_frame
,
dtype
=
wav_type
)
sig
=
sig
.
astype
(
numpy
.
float32
)
sig
+=
0.0001
*
numpy
.
random
.
randn
(
sig
.
shape
[
0
])
speech
=
torch
.
tensor
(
sig
).
type
(
torch
.
FloatTensor
)
if
len
(
self
.
transform
)
>
0
:
# Select the data augmentation randomly
...
...
nnet/xvector.py
View file @
2640573a
...
...
@@ -502,6 +502,7 @@ class Xtractor(torch.nn.Module):
self
.
preprocessor
=
MfccFrontEnd
()
self
.
sequence_network
=
PreFastResNet34
()
self
.
embedding_size
=
256
self
.
before_speaker_embedding
=
torch
.
nn
.
Linear
(
in_features
=
2560
,
out_features
=
256
)
...
...
@@ -509,13 +510,11 @@ class Xtractor(torch.nn.Module):
self
.
stat_pooling
=
MeanStdPooling
()
self
.
stat_pooling_weight_decay
=
0
self
.
embedding_size
=
256
self
.
loss
=
"aam"
self
.
after_speaker_embedding
=
ArcMarginProduct
(
256
,
self
.
after_speaker_embedding
=
ArcMarginProduct
(
self
.
embedding_size
,
int
(
self
.
speaker_number
),
s
=
30
.0
,
m
=
0.2
0
,
s
=
30
,
m
=
0.2
,
easy_margin
=
False
)
self
.
preprocessor_weight_decay
=
0.000
...
...
@@ -996,7 +995,7 @@ def xtrain(speaker_number,
set_type
=
"train"
,
dataset_df
=
training_df
,
overlap
=
dataset_params
[
'train'
][
'overlap'
],
output_format
=
"pytorch"
,
output_format
=
output_format
,
windowed
=
True
)
validation_set
=
SideSet
(
dataset_yaml
,
...
...
@@ -1065,11 +1064,9 @@ def xtrain(speaker_number,
param_list
.
append
({
'params'
:
model
.
module
.
after_speaker_embedding
.
parameters
(),
'weight_decay'
:
model
.
module
.
after_speaker_embedding_weight_decay
})
optimizer
=
_optimizer
(
param_list
,
**
_options
)
scheduler
=
torch
.
optim
.
lr_scheduler
.
MultiStepLR
(
optimizer
,
milestones
=
numpy
.
arange
(
50
,
10000
,
10
),
gamma
=
0.95
,
last_epoch
=-
1
,
verbose
=
False
)
scheduler
=
torch
.
optim
.
lr_scheduler
.
StepLR
(
optimizer
,
step_size
=
10
*
training_loader
.
__len__
(),
gamma
=
0.95
)
if
mixed_precision
:
scaler
=
GradScaler
()
...
...
@@ -1099,7 +1096,7 @@ def xtrain(speaker_number,
clipping
=
clipping
)
# Add the cross validation here
if
math
.
fmod
(
epoch
,
1
0
)
==
0
:
if
math
.
fmod
(
epoch
,
1
)
==
0
:
val_acc
,
val_loss
,
val_eer
=
cross_validation
(
model
,
validation_loader
,
device
,
[
validation_set
.
__len__
(),
embedding_size
],
mixed_precision
)
test_eer
=
test_metrics
(
model
,
device
,
speaker_number
,
num_thread
,
mixed_precision
)
...
...
@@ -1984,7 +1981,7 @@ def eer(negatives, positives):
n_index
=
n_index
-
next_n_jump
if
next_p_jump
==
0
and
next_n_jump
==
0
:
break
p_score
=
positives
[
p_index
]
n_score
=
negatives
[
n_index
]
next_p_jump
=
next_p_jump
//
2
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment