Commit 8c3cd7a9 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

bug fixed in factor analyser and debug AddNoise in Xsets

parent cf803d6a
......@@ -765,7 +765,7 @@ class FactorAnalyser:
tv_rank = self.F.shape[1]
fh = stat_server_filename
if isinstance(stat_server_filename, 'str'):
if isinstance(stat_server_filename, str):
fh = h5py.File(stat_server_filename, 'r')
_, sv_size = fh[prefix + 'stat1'].shape
......@@ -818,7 +818,7 @@ class FactorAnalyser:
iv_server.stat1, iv_sigma = watcher.get()
if isinstance(stat_server_filename, 'str'):
if isinstance(stat_server_filename, str):
fh.close()
if uncertainty:
......
......@@ -34,7 +34,7 @@ import threading
import pyroomacoustics
Noise = collections.namedtuple('Noise', 'type file duration')
Noise = collections.namedtuple('Noise', 'type file_id duration')
def normalize(wav):
......@@ -61,7 +61,7 @@ class AddNoise(object):
"""
def __init__(self, dataset_length, noisy_file_ratio, noise_db_csv, snr_min_max, noise_root_path, sample_rate=16000):
def __init__(self, noise_db_csv, snr_min_max, noise_root_path, sample_rate=16000):
"""
"""
......@@ -73,7 +73,7 @@ class AddNoise(object):
df = pandas.read_csv(noise_db_csv)
self.noises = []
for index, row in df.iterrows():
self.noises.append(Noise(type=row["type"], file=row["file_id"], duration=row["duration"]))
self.noises.append(Noise(type=row["type"], file_id=row["file_id"], duration=row["duration"]))
def __call__(self, sample):
"""
......@@ -84,7 +84,7 @@ class AddNoise(object):
"""
data = sample[0]
if sample[4]:
original_duration = len(data) / self.sample_rate
original_duration = len(data)
# accumulate enough noise to cover duration of original waveform
noises = []
......@@ -94,28 +94,31 @@ class AddNoise(object):
# select noise file at random
file = random.choice(self.noises)
noise_signal, fs = soundfile.read(self.noise_root_path + "/" + file.file_id + ".wav")
# Load noise from file
duration = noise_signal.shape[0] / fs
if not fs == self.sample_rate:
print("Problem") # todo
duration = noise_signal.shape[0]
# if noise file is longer than what is needed, crop it
if duration > left:
noise = crop(noise_signal, duration)
noise = crop(noise_signal, left)
left = 0
# otherwise, take the whole file
else:
noise = noise_signal
left -= duration
# Todo Downsample if needed
# if sample_rate > fs:
#
noise = normalize(noise)
noises.append(noise)
noises.append(noise.squeeze()[:, None])
# concatenate
noise = numpy.vstack(noises)
noise = numpy.vstack(noises).squeeze()
# select SNR at random
snr = (self.snr_max - self.snr_min) * numpy.random.random_sample() + self.snr_min
......@@ -123,7 +126,7 @@ class AddNoise(object):
data = normalize(data) + alpha * noise
return data, sample[1], sample[2], sample[3], sample[4], sample[5]
return data.squeeze(), sample[1], sample[2], sample[3], sample[4], sample[5]
......
......@@ -308,7 +308,6 @@ class MFCC(object):
:return:
"""
framed = framing(sample[0], self.window_length, win_shift=self.window_length - self.overlap).copy()
# Pre-emphasis filtering is applied after framing to be consistent with stream processing
framed = pre_emphasis(framed, self.prefac)
......@@ -430,20 +429,19 @@ class SideSet(Dataset):
if 'add_noise' in t:
self.add_noise[:int(self.len * self.transformation["noise_file_ratio"])] = 1
numpy.random.shuffle(self.add_noise)
_transform.append(AddNoise(noisy_file_ratio=self.transformation["noise_file_ratio"],
noise_db_csv=self.transformation["noise_db_csv"],
_transform.append(AddNoise(noise_db_csv=self.transformation["noise_db_csv"],
snr_min_max=self.transformation["noise_snr"],
noise_root_path=self.transformation["noise_root_db"]))
if 'add_reverb' in t:
self.add_reverb[:int(self.len * self.transformation["reverb_file_ratio"])] = 1
numpy.random.shuffle(self.add_reverb)
_transform.append(AddReverb(ratio=self.transformation["reverb_ratio"],
depth=self.transformation["reverb_depth"],
_transform.append(AddReverb(depth=self.transformation["reverb_depth"],
width=self.transformation["reverb_width"],
height=self.transformation["reverb_height"],
absorption=self.transformation["reverb_absorption"],
noise=None,
snr=self.transformation["reverb_snr"]))
if 'MFCC' in t:
_transform.append(MFCC())
......@@ -469,7 +467,6 @@ class SideSet(Dataset):
self.transforms = transforms.Compose(_transform)
def __getitem__(self, index):
"""
......@@ -483,13 +480,13 @@ class SideSet(Dataset):
speaker_idx = self.sessions.iloc[index]["speaker_idx"]
if self.transform_pipeline:
sig, speaker_idx, _, __ = self.transforms((sig,
if self.transformation["pipeline"]:
sig, speaker_idx, _, __, _t, _s = self.transforms((sig,
speaker_idx,
self.spec_aug[index],
self.temp_aug[index],
self.add_noise[index],
self.add_reverb[index],
self.add_reverb[index]
))
return torch.from_numpy(sig).type(torch.FloatTensor), speaker_idx
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment