Commit ff8eeae3 authored by Félix Michaud's avatar Félix Michaud
Browse files

new images 1epoch

parent 67567504
......@@ -87,10 +87,10 @@ def freq_mask(spec):
fact1 = random.randint(int(rows/40), int((rows/10)))
frame = np.zeros([fact1, columns])
#position of the band on the y axis
pos = random.randint(0, rows-fact1-1)
pos = random.randint(10, rows-fact1-1)
up = np.ones([pos-1, columns])
down = np.ones([rows-(pos+fact1)+1, columns])
mask = np.concatenate((up, frame, down), axis=0)
mask = torch.from_numpy(np.concatenate((up, frame, down), axis=0)).float()
masked = spec * mask
return masked
......@@ -101,13 +101,32 @@ def time_mask(spec):
fact1 = random.randint(int(columns/40), int((columns/10)))
frame = np.zeros([rows, fact1])
#position of the band on the x axis
pos = random.randint(0, columns-fact1-1)
pos = random.randint(10, columns-fact1-1)
left = np.ones([rows, pos-1])
right = np.ones([rows, columns-(pos+fact1)+1])
mask = np.concatenate((left, frame, right), axis=1)
mask = torch.from_numpy(np.concatenate((left, frame, right), axis=1)).float()
masked = spec * mask
return masked
def manipulate(data, sampling_rate, shift_max, shift_direction):
shift = np.random.randint(sampling_rate * shift_max)
if shift_direction == 'right':
shift = -shift
elif shift_direction == 'both':
direction = np.random.randint(0, 2)
if direction == 1:
shift = -shift
augmented_data = np.roll(data, shift)
# Set to silence for heading/ tailing
if shift > 0:
augmented_data[:shift] = 0
else:
augmented_data[shift:] = 0
return augmented_data
class Dataset(data.Dataset):
'Characterizes a dataset for PyTorch'
def __init__(self, path, nb_classes=2, augmentation=True, path_background="./noises"):
......@@ -132,17 +151,23 @@ class Dataset(data.Dataset):
print("** WARNING ** No data loaded from " + path)
return dict_classes
def mixed_audio_augmentation(self,audio):
def mixed_audio_augmentation(self, audio, sampling_rate):
classe_noise = random.randint(0, len(list(self.dict_noises.keys()))-1)
classe_noise = list(self.dict_noises.keys())[classe_noise]
#random natural noise augmentation
filename_noise = self.dict_noises[classe_noise][random.randint(0, len(self.dict_noises[classe_noise])-1)]
audio_noise, sr = load_file(filename_noise)
coeff = int(np.ceil(np.max(audio)*random.choice([1, 2, 3, 4, 5, 6, 7])))
return audio + (audio_noise)*coeff
noisy_audio = audio + (audio_noise)*coeff
#random pitch shifting
step_pitch = random.uniform(-10, 10)
mod_audio = librosa.effects.pitch_shift(noisy_audio, sampling_rate, n_steps=step_pitch)
#ramdom time shifting
final_audio = manipulate(mod_audio, sampling_rate, 0.25, 'both')
return final_audio
#apply randomly at list 1 band on the spectrogram
def spec_augmentation(self, spec):
spec = spec.numpy()
n = random.randint(0, 2)
if n == 0:
t = random.randint(0, 1)
......@@ -154,7 +179,7 @@ class Dataset(data.Dataset):
for ii in range(n):
spec = time_mask(spec)
spec = freq_mask(spec)
return torch.from_numpy(spec)
return spec
def __len__(self):
......@@ -184,6 +209,7 @@ class Dataset(data.Dataset):
audio = filt(audio_raw, rate)
mag, phase = _stft(audio)
mag = create_mask(mag.squeeze(0).squeeze(0))
print(np.shape(mag), 'mag')
mask = threshold(mag)
f.append(mask)
f.append(mag)
......@@ -194,16 +220,14 @@ class Dataset(data.Dataset):
audio_mix += audio
'Build mixed mask'
if self.augmentation:
audio_mix = self.mixed_audio_augmentation(audio_mix)
audio_mix = self.mixed_audio_augmentation(audio_mix, rate)
mag_mix, phase_mix = _stft(audio_mix)
mag_mix = mag_mix.squeeze(0).squeeze(0)
print(mag_mix.size(),'mag_mix before spec' )
mag_mix = self.spec_augmentation(mag_mix)
print(mag_mix.size(),'mag_mix after spec' )
# mags_mix = create_im(mag_mix)
# mags_mix = mags_mix.squeeze(0)
# return [mags_mix, phase_mix, files]
# mag_mix = self.spec_augmentation(mag_mix)
mags_mix = create_im(mag_mix)
mags_mix = mags_mix.squeeze(0)
return [mags_mix, phase_mix, files]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment