Commit 188f6ba4 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

save_multispeaker not compressed

parent 0865e444
......@@ -581,6 +581,11 @@ class FeaturesExtractor(object):
output_id = list()
output_start = list()
output_stop = list()
global_compression = 'none'
if self.compressed == 'percentile':
global_compression = 'percentile'
self.compressed = 'none'
for show in tmp_dict:
# temp_file_name = tempfile.NamedTemporaryFile().name
# logging.info('tmp file name: '+temp_file_name)
......@@ -616,14 +621,16 @@ class FeaturesExtractor(object):
fb[idx],
None,
label[idx],
compressed)
global_compression)
if keep_all:
self._save(show, output_feature_filename, save_param, cep, energy, fb, None, label, compressed)
self._save(show, output_feature_filename, save_param, cep, energy, fb, None, label, global_compression)
self.vad = param_vad
self.save_param = save_param
self.compressed = global_compression
if keep_all:
return copy.deepcopy(idmap)
out_idmap = IdMap()
......
......@@ -1085,6 +1085,7 @@ def _write_show(show,
compression="gzip",
fletcher32=True)
if energy is not None:
energy = energy.squeeze()
fh.create_dataset(show + '/energy', data=energy.astype('float32'),
maxshape=(None,),
compression="gzip",
......
......@@ -19,23 +19,8 @@ class SAD_Dataset(Dataset):
"""
Object that takes a list of files from a file and initialize a Dataset
"""
def __init__(self, mdtm_file, feature_file, batch_size=512, duration=3.2, step=0.8, uem_file=None, shuffle=False):
def __init__(self, mdtm_file, feature_file, batch_size=512, duration=3.2, step=0.8, uem_file=None, shuffle=False, compressed='percentile'):
<<<<<<< HEAD
def __init__(self, mdtm_file, feature_file, batch_size=512, duration=3.2, step=0.8, uem_file=None,
shuffle=False):
self.batch_size = batch_size
self.duration = int(duration * 100)
self.step = int(step * 100)
self.features_server = features_server
train_list = {}
with open(mdtm_file, 'r') as f:
lines = [l for l in f]
for line in lines[:500]:
=======
self.batch_size = batch_size
self.duration = int(duration * 100)
self.step = int(step * 100)
......@@ -45,7 +30,6 @@ class SAD_Dataset(Dataset):
train_list = {}
with open(mdtm_file, 'r') as f:
for line in f:
>>>>>>> 11656bee1e162d1fc94efa35d515950ea553defd
show, _, start, dur, _, _, _, _ = line.rstrip().split()
if show not in train_list:
train_list[show] = []
......@@ -70,23 +54,11 @@ class SAD_Dataset(Dataset):
self.vad = {}
self.segments = []
<<<<<<< HEAD
# speech_only_segments = []
# speech_nonspeech_segments = []
for show in sorted(train_list.keys()):
features, _ = features_server.load(show)
=======
for show in sorted(train_list.keys()):
#features, _ = features_server.load(show)
#features = read_hdf5_segment(self.feature_file,
# show,
# ['energy', 'cep'],
# label=None,
# start=None, stop=None,
# global_cmvn=False)[0]
features = _read_dataset_percentile(self.feature_file, show+"/cep")
>>>>>>> 11656bee1e162d1fc94efa35d515950ea553defd
if compressed == 'percentile':
features = _read_dataset_percentile(self.feature_file, show+"/cep")
elif compressed == 'none':
features = self.feature_file[show+"/cep"].value
labels = numpy.zeros((len(features), 1), dtype=numpy.int)
speech_only_segments = []
......@@ -135,14 +107,11 @@ class SAD_Dataset(Dataset):
batch_Y = numpy.zeros((self.batch_size, self.duration, 1))
for i in range(self.batch_size):
show, start, stop = self.segments[index * self.batch_size + i]
#features, _ = self.features_server.load(show)
#features = read_hdf5_segment(self.feature_file,
# show,
# ['energy', 'cep'],
# label=None,
# start=start, stop=stop,
# global_cmvn=False)[0]
features = _read_dataset_percentile(self.feature_file, show + "/cep")
#features = _read_dataset_percentile(self.feature_file, show + "/cep")
features = self.feature_file[show + "/cep"].value
m = features.mean(axis=0)
s = features.std(axis=0)
features = (features - m) / s
batch_X[i] = features[start:stop]
batch_Y[i] = self.vad[show][start:stop]
#batch_X[i] = features[start:stop]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment