Commit 6f229e99 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

before debug

parent ad8a9a74
......@@ -584,6 +584,11 @@ class FeaturesExtractor(object):
else:
tmp_dict[show][_id] = numpy.concatenate((tmp_dict[show][_id], numpy.arange(start, stop-1)), axis=0)
for k in tmp_dict[show].keys():
print(f"{k} : {tmp_dict[show][k].shape}")
output_show = list()
output_id = list()
output_start = list()
......@@ -598,6 +603,7 @@ class FeaturesExtractor(object):
# logging.info('tmp file name: '+temp_file_name)
self.vad = None
h5f = self.extract(show, channel, input_audio_filename, backing_store=False)
print(h5f["EST2BC_FRE_FR_20101019_0910_FINTER_DEBATE/cep"].shape)
energy = h5f.get(show + '/energy')[()]
label = h5f.get(show + '/vad')[()]
fb = h5f.get(show + '/fb')[()]
......
......@@ -480,6 +480,9 @@ class FeaturesServer(object):
label=label,
start=start, stop=stop,
global_cmvn=self.global_cmvn)
import ipdb
ipdb.set_trace()
# Post-process the features and return the features and vad label
if global_cmvn:
feat, label = self.post_processing(feat, label, global_mean, global_std)
......@@ -509,33 +512,34 @@ class FeaturesServer(object):
h5f = self.features_extractor.extract(show, channel, input_audio_filename=input_feature_filename)
feat_per_spk = dict()
for spk_id in set(idmap.modelset):
for spk_id in set(idmap.leftids):
# get all segments for the current spk_id
spk_feat = []
spk_label = []
for idx, (id, start, stop) in enumerate(zip(idmap.modelset, idmap.start, idmap.stop)):
for idx, (id, start, stop) in enumerate(zip(idmap.leftids, idmap.start, idmap.stop)):
if id == spk_id:
feat, label, global_mean, global_std, global_cmvn = read_hdf5_segment(h5f,
show,
dataset_list=self.dataset_list,
label=label,
start=start, stop=stop,
global_cmvn=self.global_cmvn)
feat, lbl, global_mean, global_std, global_cmvn = read_hdf5_segment(h5f,
show,
dataset_list=self.dataset_list,
label=label,
start=start, stop=stop-1,
global_cmvn=self.global_cmvn)
spk_feat.append(feat)
spk_label.append(label)
spk_label.append(lbl)
# concatenate all features and then post-process
feat = numpy.concatenate(spk_feat)
label = numpy.concatenate(spk_label)
lbl = numpy.concatenate(spk_label)
# Post-process the features and return the features and vad label
if global_cmvn:
feat, label = self.post_processing(feat, label, global_mean, global_std)
feat, lbl = self.post_processing(feat, lbl, global_mean, global_std)
else:
feat, label = self.post_processing(feat, label)
feat_per_spk[spk_id] = (feat, label)
feat, lbl = self.post_processing(feat, lbl)
feat_per_spk[spk_id] = (feat, lbl)
print(f"global_cmvn = {global_cmvn}")
return feat_per_spk
def get_tandem_features(self, show, channel=0, label=None, start=None, stop=None):
......@@ -574,7 +578,7 @@ class FeaturesServer(object):
:param show: the ID of the show
:param channel: the index of the channel
:param start: index of the first frame of the selected segment
param start: index of the first frame of the selected segment
:param stop: index of the last frame of the selected segment
:return: the number of frames, the mean of the frames and their standard deviation
......
......@@ -743,7 +743,8 @@ class StatServer:
assert isinstance(ubm, Mixture), 'First parameter has to be a Mixture'
assert isinstance(feature_server, FeaturesServer), 'Second parameter has to be a FeaturesServer'
show = self.segset[0]
#show = self.segset[0]
show = input_feature_filename
# If using a FeaturesExtractor, get the channel number by checking the extension of the show
channel = 0
......@@ -754,6 +755,7 @@ class StatServer:
features_per_speaker = feature_server.get_features_per_speaker(show, idmap, channel)
for idx, (spk_id, data) in enumerate(features_per_speaker.items()):
cep, _ = data
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment