Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Ambuj Mehrish
sidekit
Commits
ad9fa3bb
Commit
ad9fa3bb
authored
Jul 02, 2019
by
Anthony Larcher
Browse files
save_multispeaker
parent
aba2e39e
Changes
2
Hide whitespace changes
Inline
Side-by-side
features_extractor.py
View file @
ad9fa3bb
...
...
@@ -537,7 +537,8 @@ class FeaturesExtractor(object):
input_audio_filename
=
None
,
output_feature_filename
=
None
,
keep_all
=
True
,
skip_existing_file
=
False
):
skip_existing_file
=
False
,
compressed
=
'percentile'
):
"""
:param idmap:
:param channel:
...
...
@@ -614,10 +615,11 @@ class FeaturesExtractor(object):
energy
[
idx
],
fb
[
idx
],
None
,
label
[
idx
])
label
[
idx
],
compressed
)
if
keep_all
:
self
.
_save
(
show
,
output_feature_filename
,
save_param
,
cep
,
energy
,
fb
,
None
,
label
)
self
.
_save
(
show
,
output_feature_filename
,
save_param
,
cep
,
energy
,
fb
,
None
,
label
,
compressed
)
self
.
vad
=
param_vad
self
.
save_param
=
save_param
...
...
nnet/sad_rnn.py
View file @
ad9fa3bb
...
...
@@ -17,100 +17,99 @@ class SAD_Dataset(Dataset):
Object that takes a list of files from a file and initialize a Dataset
"""
def
__init__
(
self
,
mdtm_file
,
features_server
,
batch_size
=
512
,
duration
=
3.2
,
step
=
0.8
,
uem_file
=
None
,
def
__init__
(
self
,
mdtm_file
,
feature_file
,
batch_size
=
512
,
duration
=
3.2
,
step
=
0.8
,
uem_file
=
None
,
shuffle
=
False
):
def
__init__
(
self
,
mdtm_file
,
feature_file
,
batch_size
=
512
,
duration
=
3.2
,
step
=
0.8
,
uem_file
=
None
,
shuffle
=
False
):
self
.
batch_size
=
batch_size
self
.
duration
=
int
(
duration
*
100
)
self
.
step
=
int
(
step
*
100
)
self
.
features_server
=
features_server
train_list
=
{}
with
open
(
mdtm_file
,
'r'
)
as
f
:
lines
=
[
l
for
l
in
f
]
for
line
in
lines
[:
500
]:
show
,
_
,
start
,
dur
,
_
,
_
,
_
,
_
=
line
.
rstrip
().
split
()
if
show
not
in
train_list
:
train_list
[
show
]
=
[]
train_list
[
show
].
append
({
self
.
batch_size
=
batch_size
self
.
duration
=
int
(
duration
*
100
)
self
.
step
=
int
(
step
*
100
)
self
.
features_server
=
features_server
train_list
=
{}
with
open
(
mdtm_file
,
'r'
)
as
f
:
lines
=
[
l
for
l
in
f
]
for
line
in
lines
[:
500
]:
show
,
_
,
start
,
dur
,
_
,
_
,
_
,
_
=
line
.
rstrip
().
split
()
if
show
not
in
train_list
:
train_list
[
show
]
=
[]
train_list
[
show
].
append
({
"start"
:
int
(
float
(
start
)
*
100
),
"stop"
:
int
((
float
(
start
)
+
float
(
dur
))
*
100
)})
uem_list
=
{}
if
uem_file
is
not
None
:
with
open
(
uem_file
,
'r'
)
as
f
:
for
line
in
f
:
show
,
_
,
start
,
stop
=
line
.
rstrip
().
split
()
if
show
not
in
uem_list
:
uem_list
[
show
]
=
[]
uem_list
[
show
].
append
({
"start"
:
int
(
float
(
start
)
*
100
),
"stop"
:
int
((
float
(
start
)
+
float
(
dur
))
*
100
)})
uem_list
=
{}
if
uem_file
is
not
None
:
with
open
(
uem_file
,
'r'
)
as
f
:
for
line
in
f
:
show
,
_
,
start
,
stop
=
line
.
rstrip
().
split
()
if
show
not
in
uem_list
:
uem_list
[
show
]
=
[]
uem_list
[
show
].
append
({
"start"
:
int
(
float
(
start
)
*
100
),
"stop"
:
int
(
float
(
stop
)
*
100
)})
else
:
for
show
in
train_list
.
keys
():
uem_list
[
show
].
append
({
"start"
:
None
,
"stop"
:
None
})
self
.
vad
=
{}
self
.
segments
=
[]
# speech_only_segments = []
# speech_nonspeech_segments = []
for
show
in
sorted
(
train_list
.
keys
()):
features
,
_
=
features_server
.
load
(
show
)
labels
=
numpy
.
zeros
((
len
(
features
),
1
),
dtype
=
numpy
.
int
)
speech_only_segments
=
[]
speech_nonspeech_segments
=
[]
if
show
in
train_list
and
show
in
uem_list
:
for
seg
in
train_list
[
show
]:
labels
[
seg
[
'start'
]:
seg
[
'stop'
]]
=
1
self
.
vad
[
show
]
=
labels
for
seg
in
uem_list
[
show
]:
if
seg
[
'start'
]
is
not
None
:
start
,
stop
=
seg
[
'start'
],
seg
[
'stop'
]
else
:
start
,
stop
=
0
,
len
(
features
)
for
i
in
range
(
start
,
min
(
stop
,
len
(
features
))
-
self
.
duration
,
self
.
step
):
self
.
segments
.
append
((
show
,
i
,
i
+
self
.
duration
))
# cree les segments ne contenant QUE de la parole (sans recouvrement)
for
i
in
range
(
start
,
min
(
stop
,
len
(
features
))
-
self
.
duration
,
self
.
duration
):
if
labels
[
i
:
i
+
self
.
duration
].
sum
()
==
self
.
duration
:
speech_only_segments
.
append
((
show
,
i
,
i
+
self
.
duration
))
# cree les segments contenant de la PAROLE ET DU SILENCE (avec recouvrement pour equilibrer les classes)
for
i
in
range
(
start
,
min
(
stop
,
len
(
features
))
-
self
.
duration
,
self
.
step
):
if
labels
[
i
:
i
+
self
.
duration
].
sum
()
<
self
.
duration
-
1
:
speech_nonspeech_segments
.
append
((
show
,
i
,
i
+
self
.
duration
))
# for i in range(start, min(stop, len(features)) - self.duration, self.step):
# self.segments.append((show, i, i + self.duration))
tmp
=
speech_only_segments
+
speech_nonspeech_segments
random
.
shuffle
(
tmp
)
self
.
segments
+=
tmp
print
(
"Show {}, ratio S/NS = {}"
.
format
(
show
,
len
(
speech_only_segments
)
/
(
len
(
speech_nonspeech_segments
)
+
len
(
speech_only_segments
))))
# tmp = speech_only_segments + speech_nonspeech_segments
# if shuffle:
# print("taille de tmp: {}".format(len(tmp)))
# random.shuffle(tmp)
# print("taille de tmp: {}".format(len(tmp)))
# print(tmp[0])
# for t in tmp:
# self.segments.append(t)
# self.segments = tmp.copy()
self
.
input_size
=
features
.
shape
[
1
]
print
(
"Final ratio S/NS = {}"
.
format
(
len
(
speech_only_segments
)
/
(
len
(
speech_nonspeech_segments
)
+
len
(
speech_only_segments
))))
self
.
len
=
len
(
self
.
segments
)
//
self
.
batch_size
"stop"
:
int
(
float
(
stop
)
*
100
)})
else
:
for
show
in
train_list
.
keys
():
uem_list
[
show
].
append
({
"start"
:
None
,
"stop"
:
None
})
self
.
vad
=
{}
self
.
segments
=
[]
# speech_only_segments = []
# speech_nonspeech_segments = []
for
show
in
sorted
(
train_list
.
keys
()):
features
,
_
=
features_server
.
load
(
show
)
labels
=
numpy
.
zeros
((
len
(
features
),
1
),
dtype
=
numpy
.
int
)
speech_only_segments
=
[]
speech_nonspeech_segments
=
[]
if
show
in
train_list
and
show
in
uem_list
:
for
seg
in
train_list
[
show
]:
labels
[
seg
[
'start'
]:
seg
[
'stop'
]]
=
1
self
.
vad
[
show
]
=
labels
for
seg
in
uem_list
[
show
]:
if
seg
[
'start'
]
is
not
None
:
start
,
stop
=
seg
[
'start'
],
seg
[
'stop'
]
else
:
start
,
stop
=
0
,
len
(
features
)
for
i
in
range
(
start
,
min
(
stop
,
len
(
features
))
-
self
.
duration
,
self
.
step
):
self
.
segments
.
append
((
show
,
i
,
i
+
self
.
duration
))
# cree les segments ne contenant QUE de la parole (sans recouvrement)
for
i
in
range
(
start
,
min
(
stop
,
len
(
features
))
-
self
.
duration
,
self
.
duration
):
if
labels
[
i
:
i
+
self
.
duration
].
sum
()
==
self
.
duration
:
speech_only_segments
.
append
((
show
,
i
,
i
+
self
.
duration
))
# cree les segments contenant de la PAROLE ET DU SILENCE (avec recouvrement pour equilibrer les classes)
for
i
in
range
(
start
,
min
(
stop
,
len
(
features
))
-
self
.
duration
,
self
.
step
):
if
labels
[
i
:
i
+
self
.
duration
].
sum
()
<
self
.
duration
-
1
:
speech_nonspeech_segments
.
append
((
show
,
i
,
i
+
self
.
duration
))
# for i in range(start, min(stop, len(features)) - self.duration, self.step):
# self.segments.append((show, i, i + self.duration))
tmp
=
speech_only_segments
+
speech_nonspeech_segments
random
.
shuffle
(
tmp
)
self
.
segments
+=
tmp
print
(
"Show {}, ratio S/NS = {}"
.
format
(
show
,
len
(
speech_only_segments
)
/
(
len
(
speech_nonspeech_segments
)
+
len
(
speech_only_segments
))))
# tmp = speech_only_segments + speech_nonspeech_segments
# if shuffle:
# print("taille de tmp: {}".format(len(tmp)))
# random.shuffle(tmp)
# print("taille de tmp: {}".format(len(tmp)))
# print(tmp[0])
# for t in tmp:
# self.segments.append(t)
# self.segments = tmp.copy()
self
.
input_size
=
features
.
shape
[
1
]
print
(
"Final ratio S/NS = {}"
.
format
(
len
(
speech_only_segments
)
/
(
len
(
speech_nonspeech_segments
)
+
len
(
speech_only_segments
))))
self
.
len
=
len
(
self
.
segments
)
//
self
.
batch_size
def
__getitem__
(
self
,
index
):
batch_X
=
numpy
.
zeros
((
self
.
batch_size
,
self
.
duration
,
self
.
input_size
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment