Commit 96c8aaf5 authored by Félix Michaud's avatar Félix Michaud
Browse files

loss

parents 97d5d11d b826fc7b
......@@ -85,11 +85,8 @@ def create_mask(mag):
# grid_warp = torch.from_numpy(warpgrid_log(264, 52, warp=True))
magim = F.grid_sample(magim, grid_warp)
return torch.from_numpy(np.flipud(magim).copy())
#kernel size:5, padding:3, image size:[256, 44]
#kernel size:3, padding:1, image size[256, 44]
#depends on the overlap of the stft
#create a band of zeros in the spectrogram on the frequency range
def freq_mask(spec):
fbank_size = np.shape(spec)
rows , columns = fbank_size[0], fbank_size[1]
......@@ -104,6 +101,7 @@ def freq_mask(spec):
masked = spec * mask
return masked
#create a band of zeros in the spectrogram on the time range
def time_mask(spec):
fbank_size = np.shape(spec)
rows , columns = fbank_size[0], fbank_size[1]
......@@ -177,7 +175,6 @@ def _add_noise(signal, noise_file_name, snr, sample_rate):
#create a new signal of length = max_time
def time_elong(sr, audio, max_time=2):
final_audio = np.zeros((1, sr*max_time))
if len(audio) > sr*max_time:
print('the new audio file has to be longer then the original')
else:
......@@ -193,11 +190,11 @@ def time_elong(sr, audio, max_time=2):
class Dataset(data.Dataset):
'Characterizes a dataset for PyTorch'
def __init__(self, path, nb_classes=1, nb_classes_noise=1, augmentation=True, path_background="./noises"):
def __init__(self, path, name_cl, nb_classes=1, nb_classes_noise=1, augmentation=True, path_background="./noises"):
self.dict_classes = self.load_data(path)
self.nb_classes = nb_classes
self.nb_classes_noise = nb_classes_noise
self.name_cl = name_cl
self.augmentation = augmentation
self.path_background = path_background
if self.augmentation:
......@@ -223,23 +220,20 @@ class Dataset(data.Dataset):
filename_noise = self.dict_noises[classe_noise][random.randint(0, len(self.dict_noises[classe_noise])-1)]
return filename_noise
def data_augment(audio):
def data_augment(self, audio, sampling_rate):
#random pitch shifting
# step_pitch = random.uniform(-0.001, 0.001)
# mod_audio = librosa.effects.pitch_shift(noisy_audio, sampling_rate, n_steps=step_pitch)
#ramdom time shifting
# final_audio = manipulate(noisy_audio, sampling_rate, 0.1, 'both')
return filename_noise
step_pitch = random.uniform(-0.001, 0.001)
augment_audio = librosa.effects.pitch_shift(audio, sampling_rate, n_steps=step_pitch)
return augment_audio
#apply randomly at list 1 band on the spectrogram
def spec_augmentation(self, spec):
n = random.randint(0, 2)
if n == 0:
t = random.randint(0, 1)
if t == 1:
spec = time_mask(spec)
if t == 0:
if n == 1:
spec = freq_mask(spec)
else:
for ii in range(n):
......@@ -259,20 +253,37 @@ class Dataset(data.Dataset):
files = []
for cl in range(nb_classes):
'Load audio file'
classe_name = list(self.dict_classes.keys())[cl]
idx = int(random.random() * len(self.dict_classes[classe_name]) )
#pick a class in the order of the dict
rand_class = random.randint(0, len(self.dict_classes)-1)
classe_name = list(self.dict_classes.keys())[rand_class]
#select a random file in the class
idx = int(random.random() * len(self.dict_classes[classe_name]))
filename = self.dict_classes[classe_name][idx]
files.append([classe_name, filename])
return files
def load_class(self, classe_name):
files = []
'Load audio file'
#select a random file in the class
idx = int(random.random() * len(self.dict_classes[classe_name]) )
filename = self.dict_classes[classe_name][idx]
files.append([classe_name, filename])
return files
'[class_name, filename, [mask], [magnitude], [phase] ]'
def __getitem__(self, index):
'Load audio file for each classe'
files = self.load_files(self.nb_classes)
file = self.load_class(self.name_cl)
audio_mix = None
max_time = 2
for f in files:
for f in file:
audio_raw, sr = load_file(f[1])
audio_raw = self.data_augment(audio_raw, sr)
new = time_elong(sr, audio_raw, max_time)
audio = filt(new, sr)
mag, phase = _stft(audio)
......@@ -284,30 +295,31 @@ class Dataset(data.Dataset):
if audio_mix is None:
audio_mix = audio
else:
audio_mix += audio
#add calls as noise to the longer file
audio_mix += audio
'add calls as noise from a random class'
classes_noise = self.load_files(self.nb_classes_noise)
noisy_mix = None
for fn in classes_noise:
audio_raw, sr = load_file(fn[1])
new = time_elong(sr, audio_raw, max_time)
audion = filt(new, sr)
audio_mix += audion
'Build mixed mask'
'Randomly add either gaussian noise or natural noise'
if self.augmentation:
if random.randint(0, 1) == 1:
n_noise = self.get_noise()
snr = np.random.randint(-10, 5)
else:
if random.randint(0, 1) == 1:
n_noise = self.get_noise()
snr = np.random.randint(-10, 0)
else:
n_noise = np.random.normal(loc=0, scale=1, size=(1, max_time*sr))
n_noise = librosa.to_mono(n_noise)
snr = np.random.randint(30, 50) #-10/5 for natural noise, 30/50
audio_mix = _add_noise(audio_mix, n_noise, snr, sr)
snr = np.random.randint(10, 20) #-10/5 for natural noise, 30/50
audio_mix = _add_noise(audio_mix, n_noise, snr, sr)
mag_mix, phase_mix = _stft(audio_mix)
mag_mix = mag_mix.squeeze(0).squeeze(0)
mag_mix = self.spec_augmentation(mag_mix)
mags_mix = create_im(mag_mix)
mags_mix = mags_mix.squeeze(0)
return [mags_mix, phase_mix, files]
return [mags_mix, phase_mix, file]
......@@ -7,7 +7,6 @@ Created on Wed Jun 26 22:14:38 2019
"""
import os
import random
import time
import fnmatch
import csv
......@@ -16,14 +15,12 @@ from arguments import ArgParser
from unet import UNet5
import torch.nn as nn
#from tensorboardX import SummaryWriter
from matplotlib import image as mpimg
from Dataloader_solo import Dataset
import matplotlib
import numpy as np
import collections
import scipy
#organize the name files according to their number
import librosa
#organize the name files according to their number
def create_list(path, ext):
list_names = []
for root, dirnames, filenames in os.walk(path):
......@@ -100,36 +97,36 @@ def train(net, loader_train, optimizer, path, args):
# #writing of the Loss values and elapsed time for every batch
batchtime = (time.time() - args.starting_training_time)/60 #minutes
# #Writing of the elapsed time and loss for every batch
with open(path + "/loss_times.csv", "a") as f:
with open(args.path + "/loss_times.csv", "a") as f:
writer = csv.writer(f)
writer.writerow([str(loss.cpu().detach().numpy()), batchtime, num_batch])
if ii%args.save_per_batchs == 0:
torch.save({ 'model_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict()},
path + '/Saved_models/model_batchs{}.pth.tar'.format(num_batch))
args.path + '/Saved_models/model_batchs{}.pth.tar'.format(num_batch))
def evaluation(net, loader, args):
#no upgrade over the gradient
torch.set_grad_enabled(False)
num_batch = 0
criterion = nn.BCELoss()
args.out_threshold = 0.4
for ii, batch_data in enumerate(loader):
# forward pass
magmix = batch_data[0]
magmix = magmix.to(args.device)
masks = unwrap_mask(batch_data[2])
masks = masks.to(args.device)
num_batch += 1
masks_pred = net(magmix)
# #loss
loss = criterion(masks_pred, masks)
#Visualization
with open("./losses/loss_eval/loss_times_eval{}.csv".format(args.saved_model), "a") as f:
writer = csv.writer(f)
writer.writerow([str(loss.cpu().detach().numpy()), batchtime, num_batch])
#def evaluation(net, loader, args):
##no upgrade over the gradient
# torch.set_grad_enabled(False)
# num_batch = 0
# criterion = nn.BCELoss()
# args.out_threshold = 0.4
# for ii, batch_data in enumerate(loader):
# # forward pass
# magmix = batch_data[0]
# magmix = magmix.to(args.device)
# masks = unwrap_mask(batch_data[2])
# masks = masks.to(args.device)
# num_batch += 1
# masks_pred = net(magmix)
## #loss
# loss = criterion(masks_pred, masks)
# #Visualization
# with open("./losses/loss_eval/loss_times_eval{}.csv".format(args.saved_model), "a") as f:
# writer = csv.writer(f)
# writer.writerow([str(loss.cpu().detach().numpy()), batchtime, num_batch])
#***************************************************
......@@ -153,8 +150,8 @@ if __name__ == '__main__':
net = net.to(args.device)
# Set up optimizer
optimizer = create_optimizer(net, args)
args.path = "./Unet5/file_noise/masks"
args._augment = 'natural_noise_avec_masks'
args.path = "./Unet5/pitchr"
args._augment = 'natural_noise_gaussian noise_avec_masks_sans_pitch_shifting_2call_oiseaux_en_bruit noise snr reduced (10-20)'
###########################################################
################### TRAINING ##############################
###########################################################
......@@ -163,9 +160,9 @@ if __name__ == '__main__':
fichierLoss = open(args.path+"/loss_times.csv", "w")
fichierLoss.close()
#Dataset loading
root = './data_sound/trainset/'
root = './data_sound/trainset'
ext = '.wav'
train_classes = Dataset(root, nb_classes=args.nb_classes, nb_classes_noise=3, path_background="./data_sound/noises/")
train_classes = Dataset(root,'crow', nb_classes=args.nb_classes, nb_classes_noise=2, path_background="./data_sound/noises/")
loader_train = torch.utils.data.DataLoader(
train_classes,
batch_size = args.batch_size,
......@@ -177,27 +174,27 @@ if __name__ == '__main__':
torch.save({
'model_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict()},
path+'/Saved_models/model_epoch{}.pth.tar'.format(epoch))
args.path+'/Saved_models/model_epoch{}.pth.tar'.format(epoch))
###########################################################
################### EVALUATION ############################
###########################################################
if args.mode == 'eval':
#OverWrite the Files for loss saving and time saving
fichierLoss = open("./losses/loss_eval/loss_times_eval{}.csv".format(args.saved_model), "w")
fichierLoss.close()
#Dataset loading
root = './data_sound/valset/'
ext = '.wav'
val_classes = Dataset(root, nb_classes=args.nb_classes, path_background="./data_sound/noises/")
#inisialization of the model from the saved model
checkpoint = torch.load('Saved_models2/model{}.pth.tar'.format(args.saved_model))
net.load_state_dict(checkpoint['model_state_dict'])
loader_eval = torch.utils.data.DataLoader(
val_classes,
batch_size = args.batch_size,
shuffle=True,
num_workers=20)
for epoch in range(0, 1):
evaluation(net, loader_eval, optimizer, args)
# if args.mode == 'eval':
# #OverWrite the Files for loss saving and time saving
# fichierLoss = open("./losses/loss_eval/loss_times_eval{}.csv".format(args.saved_model), "w")
# fichierLoss.close()
# #Dataset loading
# root = './data_sound/valset/'
# ext = '.wav'
# val_classes = Dataset(root, nb_classes=args.nb_classes, path_background="./data_sound/noises/")
# #inisialization of the model from the saved model
# checkpoint = torch.load('Saved_models2/model{}.pth.tar'.format(args.saved_model))
# net.load_state_dict(checkpoint['model_state_dict'])
#
# loader_eval = torch.utils.data.DataLoader(
# val_classes,
# batch_size = args.batch_size,
# shuffle=True,
# num_workers=20)
#
# for epoch in range(0, 1):
# evaluation(net, loader_eval, optimizer, args)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment