Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Anthony Larcher
sidekit
Commits
0c053d7f
Commit
0c053d7f
authored
Apr 20, 2021
by
Anthony Larcher
Browse files
debug extract per speaker
parent
f391adb6
Changes
2
Hide whitespace changes
Inline
Side-by-side
nnet/xsets.py
View file @
0c053d7f
...
...
@@ -478,6 +478,7 @@ class IdMapSetPerSpeaker(Dataset):
data_root_path
,
file_extension
,
transform_pipeline
=
{},
transform_number
=
1
,
frame_rate
=
100
,
min_duration
=
0.165
):
...
...
@@ -494,7 +495,6 @@ class IdMapSetPerSpeaker(Dataset):
self
.
data_root_path
=
data_root_path
self
.
file_extension
=
file_extension
self
.
len
=
len
(
set
(
self
.
idmap
.
leftids
))
self
.
transformation
=
transform_pipeline
self
.
min_duration
=
min_duration
self
.
sample_rate
=
frame_rate
self
.
speaker_list
=
list
(
set
(
self
.
idmap
.
leftids
))
...
...
@@ -503,13 +503,8 @@ class IdMapSetPerSpeaker(Dataset):
self
.
output_im
.
rightids
=
self
.
output_im
.
leftids
self
.
output_im
.
start
=
numpy
.
empty
(
self
.
output_im
.
rightids
.
shape
[
0
],
"|O"
)
self
.
output_im
.
stop
=
numpy
.
empty
(
self
.
output_im
.
rightids
.
shape
[
0
],
"|O"
)
self
.
transform
=
[]
#if (len(self.transformation) > 0):
# if (self.transformation["pipeline"] != '') and (self.transformation["pipeline"] is not None):
# self.transform_list = self.transformation["pipeline"].split(',')
if
self
.
transformation
is
not
None
:
self
.
transform_list
=
self
.
transformation
.
split
(
","
)
self
.
transformation
=
transform_pipeline
self
.
transform_number
=
transform_number
self
.
noise_df
=
None
if
"add_noise"
in
self
.
transform
:
...
...
@@ -555,10 +550,10 @@ class IdMapSetPerSpeaker(Dataset):
speech
=
torch
.
cat
(
tmp_data
,
dim
=
1
)
speech
+=
10e-6
*
torch
.
randn
(
speech
.
shape
)
if
len
(
self
.
transform
)
>
0
:
if
len
(
self
.
transform
ation
.
keys
()
)
>
0
:
speech
=
data_augmentation
(
speech
,
speech_fs
,
self
.
transform
,
self
.
transform
ation
,
self
.
transform_number
,
noise_df
=
self
.
noise_df
,
rir_df
=
self
.
rir_df
)
...
...
nnet/xvector.py
View file @
0c053d7f
...
...
@@ -1773,7 +1773,7 @@ def extract_embeddings_per_speaker(idmap_name,
file_extension
=
file_extension
,
transform_pipeline
=
transform_pipeline
,
frame_rate
=
sample_rate
,
min_duration
=
(
model
.
context_size
()
+
2
)
*
frame_shift
*
2
)
min_duration
=
1.
)
dataloader
=
DataLoader
(
dataset
,
batch_size
=
1
,
...
...
@@ -1788,10 +1788,7 @@ def extract_embeddings_per_speaker(idmap_name,
# Get the size of embeddings to extract
name
=
list
(
model
.
before_speaker_embedding
.
state_dict
().
keys
())[
-
1
].
split
(
'.'
)[
0
]
+
'.weight'
if
extract_after_pooling
:
emb_size
=
model
.
before_speaker_embedding
.
state_dict
()[
name
].
shape
[
1
]
else
:
emb_size
=
model
.
embedding_size
emb_size
=
model
.
embedding_size
# Create the StatServer
embeddings
=
StatServer
()
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment