Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Ambuj Mehrish
sidekit
Commits
145cdecc
Commit
145cdecc
authored
Jun 25, 2019
by
Anthony Larcher
Browse files
sad
parent
fdd13f3b
Changes
4
Hide whitespace changes
Inline
Side-by-side
__init__.py
View file @
145cdecc
...
...
@@ -51,7 +51,7 @@ if 'SIDEKIT' in os.environ:
SIDEKIT_CONFIG
[
"mpi"
]
=
True
if
k
==
"cuda"
:
if
val
==
"true"
:
SIDEKIT_CONFIG
[
"
libsvm
"
]
=
True
SIDEKIT_CONFIG
[
"
cuda
"
]
=
True
PARALLEL_MODULE
=
'multiprocessing'
# can be , threading, multiprocessing MPI is planned in the future
...
...
@@ -167,7 +167,9 @@ if CUDA:
from
sidekit.nnet
import
xtrain
from
sidekit.nnet
import
extract_idmap
from
sidekit.nnet
import
extract_parallel
from
sidekit.nnet
import
SAD_RNN
else
:
print
(
"Don't import Torch"
)
if
SIDEKIT_CONFIG
[
"mpi"
]:
found_mpi4py
=
importlib
.
find_loader
(
'mpi4py'
)
is
not
None
...
...
nnet/__init__.py
View file @
145cdecc
...
...
@@ -27,6 +27,7 @@ Copyright 2014-2019 Anthony Larcher and Sylvain Meignier
:mod:`nnet` provides methods to manage Neural Networks using PyTorch
"""
from
sidekit.nnet.sad_rnn
import
SAD_RNN
from
sidekit.nnet.feed_forward
import
FForwardNetwork
from
sidekit.nnet.feed_forward
import
kaldi_to_hdf5
from
sidekit.nnet.xsets
import
XvectorMultiDataset
,
XvectorDataset
,
StatDataset
...
...
nnet/sad_rnn.py
View file @
145cdecc
...
...
@@ -23,7 +23,8 @@ class SAD_Dataset(Dataset):
train_list
=
{}
with
open
(
mdtm_file
,
'r'
)
as
f
:
for
line
in
f
:
lines
=
[
l
for
l
in
f
]
for
line
in
lines
[:
500
]:
show
,
_
,
start
,
dur
,
_
,
_
,
_
,
_
=
line
.
rstrip
().
split
()
if
show
not
in
train_list
:
train_list
[
show
]
=
[]
...
...
@@ -48,25 +49,56 @@ class SAD_Dataset(Dataset):
self
.
vad
=
{}
self
.
segments
=
[]
#speech_only_segments = []
#speech_nonspeech_segments = []
for
show
in
sorted
(
train_list
.
keys
()):
features
,
_
=
features_server
.
load
(
show
)
labels
=
numpy
.
zeros
((
len
(
features
),
1
),
dtype
=
numpy
.
int
)
for
seg
in
train_list
[
show
]:
labels
[
seg
[
'start'
]:
seg
[
'stop'
]]
=
1
self
.
vad
[
show
]
=
labels
for
seg
in
uem_list
[
show
]:
if
seg
[
'start'
]
is
not
None
:
start
,
stop
=
seg
[
'start'
],
seg
[
'stop'
]
else
:
start
,
stop
=
0
,
len
(
features
)
for
i
in
range
(
start
,
min
(
stop
,
len
(
features
))
-
self
.
duration
,
self
.
step
):
self
.
segments
.
append
((
show
,
i
,
i
+
self
.
duration
))
speech_only_segments
=
[]
speech_nonspeech_segments
=
[]
if
show
in
train_list
and
show
in
uem_list
:
for
seg
in
train_list
[
show
]:
labels
[
seg
[
'start'
]:
seg
[
'stop'
]]
=
1
self
.
vad
[
show
]
=
labels
for
seg
in
uem_list
[
show
]:
if
seg
[
'start'
]
is
not
None
:
start
,
stop
=
seg
[
'start'
],
seg
[
'stop'
]
else
:
start
,
stop
=
0
,
len
(
features
)
# cree les segments ne contenant QUE de la parole (sans recouvrement)
for
i
in
range
(
start
,
min
(
stop
,
len
(
features
))
-
self
.
duration
,
self
.
duration
):
if
labels
[
i
:
i
+
self
.
duration
].
sum
()
==
self
.
duration
:
speech_only_segments
.
append
((
show
,
i
,
i
+
self
.
duration
))
# cree les segments contenant de la PAROLE ET DU SILENCE (avec recouvrement pour equilibrer les classes)
for
i
in
range
(
start
,
min
(
stop
,
len
(
features
))
-
self
.
duration
,
self
.
step
):
if
labels
[
i
:
i
+
self
.
duration
].
sum
()
<
self
.
duration
-
1
:
speech_nonspeech_segments
.
append
((
show
,
i
,
i
+
self
.
duration
))
#for i in range(start, min(stop, len(features)) - self.duration, self.step):
# self.segments.append((show, i, i + self.duration))
tmp
=
speech_only_segments
+
speech_nonspeech_segments
random
.
shuffle
(
tmp
)
self
.
segments
+=
tmp
print
(
"Show {}, ratio S/NS = {}"
.
format
(
show
,
len
(
speech_only_segments
)
/
(
len
(
speech_nonspeech_segments
)
+
len
(
speech_only_segments
))))
#tmp = speech_only_segments + speech_nonspeech_segments
#if shuffle:
# print("taille de tmp: {}".format(len(tmp)))
# random.shuffle(tmp)
# print("taille de tmp: {}".format(len(tmp)))
# print(tmp[0])
# for t in tmp:
# self.segments.append(t)
#self.segments = tmp.copy()
self
.
input_size
=
features
.
shape
[
1
]
if
shuffle
:
random
.
shuffle
(
self
.
segments
)
print
(
"Final ratio S/NS = {}"
.
format
(
len
(
speech_only_segments
)
/
(
len
(
speech_nonspeech_segments
)
+
len
(
speech_only_segments
))))
self
.
len
=
len
(
self
.
segments
)
//
self
.
batch_size
...
...
@@ -196,7 +228,6 @@ class SAD_RNN():
:param features_server: a sidekit FeaturesServer object
:param model_file_format: file format to save the model. The format uses the current epoch
"""
self
.
model
.
to
(
device
)
criterion
=
nn
.
BCELoss
()
optimizer
=
optim
.
RMSprop
(
self
.
model
.
parameters
())
...
...
@@ -209,9 +240,11 @@ class SAD_RNN():
for
batch_idx
,
(
X
,
Y
)
in
enumerate
(
training_set
):
batch_loss
=
self
.
_fit_batch
(
optimizer
,
criterion
,
X
,
Y
)
losses
[
epoch
].
append
(
batch_loss
)
sys
.
stdout
.
write
(
"
\r
Epoch {}/{}, loss {:.5f}"
.
format
(
print
(
"
Epoch {}/{}, loss {:.5f}
\n
"
.
format
(
epoch
+
1
,
nb_epochs
,
numpy
.
mean
(
losses
[
epoch
])))
sys
.
stdout
.
flush
()
#sys.stdout.write("\rEpoch {}/{}, loss {:.5f}".format(
# epoch + 1, nb_epochs, numpy.mean(losses[epoch])))
#sys.stdout.flush()
it
+=
1
torch
.
save
(
self
.
model
.
state_dict
(),
model_file_format
.
format
(
epoch
+
1
))
...
...
nnet/xvector.py
View file @
145cdecc
...
...
@@ -230,7 +230,9 @@ def xtrain(args):
# Decrease learning rate after every epoch
#args.lr = args.lr * 0.9
#args.lr = args.lr * 0.9
args
.
lr
=
args
.
lr
*
0.9
print
(
" Decrease learning rate: {}"
.
format
(
args
.
lr
))
def
train_epoch
(
epoch
,
args
,
initial_model_file_name
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment