Commit d694e28b authored by Anthony Larcher's avatar Anthony Larcher
Browse files

more options in new_xtrain

parent 9bff8db3
......@@ -136,305 +136,6 @@ def crop(signal, duration):
return chunk
class AddNoise(object):
"""
"""
def __init__(self, noise_db_csv, snr_min_max, noise_root_path, sample_rate=16000):
"""
"""
self.snr_min = snr_min_max[0]
self.snr_max = snr_min_max[1]
self.noise_root_path = noise_root_path
self.sample_rate = sample_rate
df = pandas.read_csv(noise_db_csv)
self.noises = []
for index, row in df.iterrows():
self.noises.append(Noise(type=row["type"], file_id=row["file_id"], duration=row["duration"]))
def __call__(self, sample):
"""
:param original:
:param sample_rate:
:return:
"""
data = sample[0]
if sample[4]:
original_duration = len(data)
# accumulate enough noise to cover duration of original waveform
noises = []
left = original_duration
while left > 0:
# select noise file at random
file = random.choice(self.noises)
noise_signal, fs = soundfile.read(self.noise_root_path + "/" + file.file_id + ".wav")
# Load noise from file
if not fs == self.sample_rate:
print("Problem") # todo
duration = noise_signal.shape[0]
# if noise file is longer than what is needed, crop it
if duration > left:
noise = crop(noise_signal, left)
left = 0
# otherwise, take the whole file
else:
noise = noise_signal
left -= duration
# Todo Downsample if needed
# if sample_rate > fs:
#
noise = normalize(noise)
noises.append(noise.squeeze())
# concatenate
noise = numpy.hstack(noises)
# select SNR at random
snr = (self.snr_max - self.snr_min) * numpy.random.random_sample() + self.snr_min
alpha = numpy.exp(-numpy.log(10) * snr / 20)
data = normalize(data) + alpha * noise
return data.squeeze(), sample[1], sample[2], sample[3], sample[4], sample[5]
class AddNoiseFromSilence(object):
"""
"""
def __init__(self, noise_db_csv, snr_min_max, noise_root_path, sample_rate=16000):
"""
"""
self.snr_min = snr_min_max[0]
self.snr_max = snr_min_max[1]
self.noise_root_path = noise_root_path
self.sample_rate = sample_rate
df = pandas.read_csv(noise_db_csv)
self.noises = []
for index, row in df.iterrows():
self.noises.append(Noise(type=row["type"], file_id=row["file_id"], duration=row["duration"]))
def __call__(self, sample):
"""
:param original:
:param sample_rate:
:return:
"""
data = sample[0]
if sample[4]:
original_duration = len(data)
# accumulate enough noise to cover duration of original waveform
noises = []
left = original_duration
while left > 0:
# select noise file at random
file = random.choice(self.noises)
noise_signal, fs = soundfile.read(self.noise_root_path + "/" + file.file_id + ".wav")
# Load noise from file
if not fs == self.sample_rate:
print("Problem") # todo
duration = noise_signal.shape[0]
# if noise file is longer than what is needed, crop it
if duration > left:
noise = crop(noise_signal, left)
left = 0
# otherwise, take the whole file
else:
noise = noise_signal
left -= duration
# Todo Downsample if needed
# if sample_rate > fs:
#
noise = normalize(noise)
noises.append(noise.squeeze())
# concatenate
noise = numpy.hstack(noises)
# select SNR at random
snr = (self.snr_max - self.snr_min) * numpy.random.random_sample() + self.snr_min
alpha = numpy.exp(-numpy.log(10) * snr / 20)
data = normalize(data) + alpha * noise
return data.squeeze(), sample[1], sample[2], sample[3], sample[4], sample[5]
if has_pyroom:
class AddReverb(object):
"""Simulate indoor reverberation
Parameters
----------
depth : (float, float), optional
Minimum and maximum values for room depth (in meters).
Defaults to (2.0, 10.0).
width : (float, float), optional
Minimum and maximum values for room width (in meters).
Defaults to (1.0, 10.0).
height : (float, float), optional
Minimum and maximum values for room heigth (in meters).
Defaults to (2.0, 5.0).
absorption : (float, float), optional
Minimum and maximum values of walls absorption coefficient.
Defaults to (0.2, 0.9).
noise : str or list of str, optional
`pyannote.database` collection(s) used for adding noise.
Defaults to "MUSAN.Collection.BackgroundNoise"
snr : (float, float), optional
Minimum and maximum values of signal-to-noise ratio.
Defaults to (5.0, 15.0)
"""
def __init__(
self,
depth=(2.0, 10.0),
width=(1.0, 10.0),
height=(2.0, 5.0),
absorption=(0.2, 0.9),
noise=None,
snr=(5.0, 15.0)
):
super().__init__()
self.depth = depth
self.width = width
self.height = height
self.absorption = absorption
self.max_order_ = 17
self.noise = noise
self.snr = snr
self.noise_ = noise
self.n_rooms_ = 128
self.new_rooms_prob_ = 0.001
self.main_lock_ = threading.Lock()
self.rooms_ = collections.deque(maxlen=self.n_rooms_)
self.room_lock_ = [threading.Lock() for _ in range(self.n_rooms_)]
@staticmethod
def random(m, M):
"""
:param m:
:param M:
:return:
"""
return (M - m) * numpy.random.random_sample() + m
def new_room(self, sample_rate: int):
"""
:param sample_rate:
:return:
"""
# generate a room at random
depth = self.random(*self.depth)
width = self.random(*self.width)
height = self.random(*self.height)
absorption = self.random(*self.absorption)
room = pyroomacoustics.ShoeBox(
[depth, width, height],
fs=sample_rate,
absorption=absorption,
max_order=self.max_order_,
)
# play the original audio chunk at a random location
original = [
self.random(0, depth),
self.random(0, width),
self.random(0, height),
]
room.add_source(original)
# play the noise audio chunk at a random location
noise = [self.random(0, depth), self.random(0, width), self.random(0, height)]
room.add_source(noise)
# place the microphone at a random location
microphone = [
self.random(0, depth),
self.random(0, width),
self.random(0, height),
]
room.add_microphone_array(
pyroomacoustics.MicrophoneArray(numpy.c_[microphone, microphone], sample_rate)
)
room.compute_rir()
return room
def __call__(self, sample):
data = sample[0]
if sample[5]:
with self.main_lock_:
# initialize rooms (with 2 sources and 1 microphone)
while len(self.rooms_) < self.n_rooms_:
room = self.new_room(self.sample_rate)
self.rooms_.append(room)
# create new room with probability new_rooms_prob_
if numpy.random.rand() > 1.0 - self.new_rooms_prob_:
room = self.new_room(self.sample_rate)
self.rooms_.append(room)
# choose one room at random
index = numpy.random.choice(self.n_rooms_)
# lock chosen room to ensure room.sources are not updated concurrently
with self.room_lock_[index]:
room = self.rooms_[index]
# play normalized original audio chunk at source #1
n_samples = len(data)
data = normalize(original).squeeze()
room.sources[0].add_signal(data)
# generate noise with random SNR
noise = self.noise_(n_samples, self.sample_rate).squeeze()
snr = self.random(*self.snr)
alpha = numpy.exp(-numpy.log(10) * snr / 20)
noise *= alpha
# play noise at source #2
room.sources[1].add_signal(noise)
# simulate room and return microphone signal
room.simulate()
data = room.mic_array.signals[0, :n_samples, numpy.newaxis]
return data, sample[1], sample[2], sample[3] , sample[4], sample[5]
def data_augmentation(speech,
sample_rate,
transform_dict,
......
......@@ -164,7 +164,7 @@ class MelSpecFrontEnd(torch.nn.Module):
f_max=7600,
win_length=1024,
window_fn=torch.hann_window,
hop_length=256,
hop_length=160,
power=2.0,
n_mels=80):
......
......@@ -992,7 +992,18 @@ def update_training_dictionary(dataset_description,
fill_dict(training_opts, tmp_train_dict)
# Overwrite with manually given parameters
# TODO
if "lr" in kwargs:
training_opts["lr"] = kwargs['lr']
if "batch_size" in kwargs:
dataset_opts["batch_size"] = kwargs["batch_size"]
if "optimizer" in kwargs:
training_opts["optimizer"]["type"] = kwargs["optimizer"]
if "scheduler" in kwargs:
training_opts["scheduler"]["type"] = kwargs["scheduler"]
if "margin" in kwargs:
model_opts["loss"]["aam_margin"] = kwargs["margin"]
if "aam_s" in kwargs:
model_opts["loss"]["aam_s"] = kwargs["aam_s"]
return dataset_opts, model_opts, training_opts
......@@ -1354,6 +1365,8 @@ def new_xtrain(dataset_description,
# TODO gérer l'affichage en utilisant le training_monitor
monitor.display_final()
return monitor.best_eer
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment