Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Anthony Larcher
sidekit
Commits
a681c617
Commit
a681c617
authored
Apr 14, 2021
by
Anthony Larcher
Browse files
add replicas
parent
a832fd2d
Changes
2
Hide whitespace changes
Inline
Side-by-side
nnet/xsets.py
View file @
a681c617
...
...
@@ -62,6 +62,7 @@ class SideSampler(torch.utils.data.Sampler):
batch_size
,
seed
=
0
,
rank
=
0
,
num_process
=
1
,
num_replicas
=
1
):
"""[summary]
...
...
@@ -81,29 +82,34 @@ class SideSampler(torch.utils.data.Sampler):
self
.
epoch
=
0
self
.
seed
=
seed
self
.
rank
=
rank
self
.
num_process
=
num_process
self
.
num_replicas
=
num_replicas
assert
batch_size
%
examples_per_speaker
==
0
assert
(
self
.
samples_per_speaker
*
self
.
spk_count
*
self
.
examples_per_speaker
)
%
self
.
num_
replica
s
==
0
assert
batch_size
%
(
examples_per_speaker
*
self
.
num_replicas
)
==
0
assert
(
self
.
samples_per_speaker
*
self
.
spk_count
*
self
.
examples_per_speaker
)
%
self
.
num_
proces
s
==
0
self
.
batch_size
=
batch_size
//
examples_per_speaker
self
.
batch_size
=
batch_size
//
(
self
.
examples_per_speaker
*
self
.
num_replicas
)
# reference all segment indexes per speaker
for
idx
in
range
(
self
.
spk_count
):
self
.
labels_to_indices
[
idx
]
=
list
()
for
idx
,
value
in
enumerate
(
self
.
train_sessions
):
self
.
labels_to_indices
[
value
].
append
(
idx
)
# suffle segments per speaker
# s
h
uffle segments per speaker
g
=
torch
.
Generator
()
g
.
manual_seed
(
self
.
seed
+
self
.
epoch
)
for
idx
,
ldlist
in
enumerate
(
self
.
labels_to_indices
.
values
()):
ldlist
=
numpy
.
array
(
ldlist
)
self
.
labels_to_indices
[
idx
]
=
ldlist
[
torch
.
randperm
(
ldlist
.
shape
[
0
]).
numpy
()]
self
.
labels_to_indices
[
idx
]
=
ldlist
[
torch
.
randperm
(
ldlist
.
shape
[
0
]
,
generator
=
g
).
numpy
()]
self
.
segment_cursors
=
numpy
.
zeros
((
len
(
self
.
labels_to_indices
),),
dtype
=
numpy
.
int
)
def
__iter__
(
self
):
g
=
torch
.
Generator
()
g
.
manual_seed
(
self
.
seed
+
self
.
epoch
)
numpy
.
random
.
seed
(
self
.
seed
+
self
.
epoch
)
# Generate batches per speaker
straight
=
numpy
.
arange
(
self
.
spk_count
)
indices
=
numpy
.
ones
((
self
.
samples_per_speaker
,
self
.
spk_count
),
dtype
=
numpy
.
int
)
*
straight
...
...
@@ -134,9 +140,6 @@ class SideSampler(torch.utils.data.Sampler):
# we want to convert the speaker indexes into segment indexes
self
.
index_iterator
=
numpy
.
zeros_like
(
batch_matrix
)
g
=
torch
.
Generator
()
g
.
manual_seed
(
self
.
seed
+
self
.
epoch
)
# keep track of next segment index to sample for each speaker
for
idx
,
value
in
enumerate
(
batch_matrix
):
if
self
.
segment_cursors
[
value
]
>
len
(
self
.
labels_to_indices
[
value
])
-
1
:
...
...
@@ -144,13 +147,15 @@ class SideSampler(torch.utils.data.Sampler):
self
.
segment_cursors
[
value
]
=
0
self
.
index_iterator
[
idx
]
=
self
.
labels_to_indices
[
value
][
self
.
segment_cursors
[
value
]]
self
.
segment_cursors
[
value
]
+=
1
self
.
index_iterator
=
self
.
index_iterator
.
reshape
(
-
1
,
self
.
num_replicas
*
self
.
examples_per_speaker
)[:,
self
.
rank
*
self
.
examples_per_speaker
:(
self
.
rank
+
1
)
*
self
.
examples_per_speaker
].
flatten
()
return
iter
(
self
.
index_iterator
)
self
.
index_iterator
=
numpy
.
repeat
(
self
.
index_iterator
,
self
.
num_replicas
)
self
.
index_iterator
=
self
.
index_iterator
.
reshape
(
-
1
,
self
.
num_process
*
self
.
examples_per_speaker
*
self
.
num_replicas
)[:,
self
.
rank
*
self
.
examples_per_speaker
*
self
.
num_replicas
:(
self
.
rank
+
1
)
*
self
.
examples_per_speaker
*
self
.
num_replicas
].
flatten
()
return
iter
(
self
.
index_iterator
)
def
__len__
(
self
)
->
int
:
return
(
self
.
samples_per_speaker
*
self
.
spk_count
*
self
.
examples_per_speaker
)
//
self
.
num_replicas
return
(
self
.
samples_per_speaker
*
self
.
spk_count
*
self
.
examples_per_speaker
*
self
.
num_replicas
)
//
self
.
num_process
def
set_epoch
(
self
,
epoch
:
int
)
->
None
:
self
.
epoch
=
epoch
...
...
nnet/xvector.py
View file @
a681c617
...
...
@@ -1064,6 +1064,8 @@ def get_loaders(dataset_opts, training_opts, model_opts, local_rank=0):
batch_size
=
batch_size
,
seed
=
training_opts
[
'torch_seed'
],
rank
=
local_rank
,
num_process
=
torch
.
cuda
.
device_count
(),
num_replicas
=
dataset_opts
[
"train"
][
"sampler"
][
"augmentation_replica"
]
)
else
:
batch_size
=
dataset_opts
[
"batch_size"
]
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment