Main_solo.py 6.99 KB
Newer Older
Félix Michaud's avatar
Félix Michaud committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 26 22:14:38 2019

@author: felix
"""

import os
import time
import fnmatch
import csv
import torch
from arguments import ArgParser
from unet import UNet5
import torch.nn as nn
#from tensorboardX import SummaryWriter
Félix Michaud's avatar
Félix Michaud committed
18
from Dataloader_solo import Dataset
Félix Michaud's avatar
Félix Michaud committed
19
20
import numpy as np
import scipy
Félix Michaud's avatar
Félix Michaud committed
21
import librosa
Félix Michaud's avatar
Félix Michaud committed
22

Félix Michaud's avatar
Félix Michaud committed
23
#organize the name files according to their number
Félix Michaud's avatar
Félix Michaud committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def create_list(path, ext):
    list_names = []
    for root, dirnames, filenames in os.walk(path):
        for filename in fnmatch.filter(filenames, '*' + ext):
            list_names.append(os.path.join(root, filename))
    return list_names


def create_optimizer(nets, args):
    net_sound = nets
    param_groups = [{'params': net_sound.parameters(), 'lr': args.lr_sound}]
    return torch.optim.Adam(param_groups)


def init_weights(m):
    if type(m) == nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)


def unwrap_mask(infos):
#"for a kernel of 5"
    gt_masks = torch.empty(args.batch_size, args.nb_classes, 256, 173, dtype=torch.float)
    for ii in range(args.batch_size):
        for jj in range(args.nb_classes):
            gt_masks[ii, jj] = infos[jj][2][ii]
    return gt_masks


def build_audio(audio_names, pred_masks, magmix, phasemix):
    for ii in range(args.batch_size):
        pred_masks = pred_masks[ii].numpy()
        magmix     = magmix[ii].squeeze(0).detach().numpy()
        phasemix   = phasemix[ii].squeeze(0).detach().numpy()
        for n in range(args.nb_classes):
            name   = audio_names[n][1][ii]
            magnew = pred_masks[n]*magmix
            spec   = magnew.astype(np.complex)*np.exp(1j*phasemix)
            audio  = librosa.istft(spec, hop_length=256)
            scipy.io.wavfile.write('restored_audio/restored_{}.wav'.format(name), 22050, audio)


def save_arguments(args, path):
    file1 = open(path+"/infos.txt","w")
    print("Input arguments:")
    for key, val in vars(args).items():
        file1.writelines([key, str(val), '\n']) 
        print("{:16} {}".format(key, val))
    file1.close()


Félix Michaud's avatar
Félix Michaud committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def evaluation(net, loader, args):
#no upgrade over the gradient    
    torch.set_grad_enabled(False)
    num_batch = 0
    criterion = nn.BCELoss()
    args.out_threshold = 0.4
    for ii, batch_data in enumerate(loader):
       # forward pass
        magmix = batch_data[0]
        magmix = magmix.to(args.device)
        masks  = unwrap_mask(batch_data[2])
        masks = masks.to(args.device)
        num_batch += 1
        masks_pred = net(magmix)
#        #loss
        loss = criterion(masks_pred, masks)
        #writing of the Loss values and elapsed time for every batch
        batchtime = (time.time() - args.starting_training_time)/60 #minutes
        with open(args.path + "/loss_times_eval.csv", "a") as f:
            writer = csv.writer(f)
            writer.writerow([str(loss.cpu().detach().numpy()), batchtime, num_batch])


def train(net, loader_train, loader_eval, optimizer, args):
Félix Michaud's avatar
Félix Michaud committed
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
    torch.set_grad_enabled(True)
    num_batch = 0 
    criterion = nn.BCELoss()     
    for ii, batch_data in enumerate(loader_train):
        #add species names in infos.txt
        if ii == 0:
            args.species = []
            for n in range(args.nb_classes):
                args.species.append(batch_data[2][n][0][0])
            save_arguments(args, args.path)

        num_batch += 1
        magmix     = batch_data[0]
        magmix     = magmix.to(args.device)
        masks_pred = net(magmix, dropout=True)        
        masks      = unwrap_mask(batch_data[2])
        masks      = masks.to(args.device)
        loss       = criterion(masks_pred, masks)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()        
#        #writing of the Loss values and elapsed time for every batch
        batchtime = (time.time() - args.starting_training_time)/60 #minutes
#        #Writing of the elapsed time and loss for every batch 
Félix Michaud's avatar
Félix Michaud committed
123
        with open(args.path + "/loss_times_train.csv", "a") as f:
Félix Michaud's avatar
Félix Michaud committed
124
            writer = csv.writer(f)
Félix Michaud's avatar
Félix Michaud committed
125
126
127
128
            writer.writerow([str(loss.cpu().detach().numpy()), batchtime, num_batch])
        if ii%100 == 0:
            evaluation(net, loader_eval, args)
            torch.set_grad_enabled(True)
Félix Michaud's avatar
Félix Michaud committed
129
130
131
        if ii%args.save_per_batchs == 0:  
            torch.save({                'model_state_dict': net.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()},
Félix Michaud's avatar
Félix Michaud committed
132
            args.path + '/Saved_models/model_batchs{}.pth.tar'.format(num_batch))
Félix Michaud's avatar
Félix Michaud committed
133
134
135
136
137
138
139
140
141
142
143
144
145


#***************************************************    
#****************** MAIN ***************************    
#***************************************************   
if __name__ == '__main__':
    # arguments
    parser          = ArgParser()
    args            = parser.parse_train_arguments()
    args.batch_size = 16
    args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    args.starting_training_time = time.time()
    args.save_per_batchs = 500
Félix Michaud's avatar
Félix Michaud committed
146
    args.nb_classes = 1 
Félix Michaud's avatar
Félix Michaud committed
147
148
149
150
151
152
153
154
    args.mode = 'train'
    args.lr_sounds = 1e-5   
    #model definition     
    net = UNet5(n_channels=1, n_classes=args.nb_classes)
    net.apply(init_weights)
    net = net.to(args.device)
    # Set up optimizer
    optimizer = create_optimizer(net, args)
Félix Michaud's avatar
Félix Michaud committed
155
156
157
    bird = 'crow'
    args.path = "./training_solo/5species/" + bird + '3'
    args._augment = 'dropout0.2_natural_noise_gaussian noise_avec_masks_avec_pitch_shifting_3call_oiseaux_en_bruit_200000samples_5especes_oiseaux_for_noise'
Félix Michaud's avatar
Félix Michaud committed
158
159
160
161
162
###########################################################
################### TRAINING ##############################
###########################################################      
    if args.mode == 'train':
        #OverWrite the Files for loss saving and time saving
Félix Michaud's avatar
Félix Michaud committed
163
164
165
166
        fichierTrain = open(args.path+"/loss_times_train.csv", "w")
        fichierTrain.close()
        fichierEval = open(args.path+"/loss_times_eval.csv", "w")
        fichierEval.close()
Félix Michaud's avatar
Félix Michaud committed
167
        #Dataset loading
Félix Michaud's avatar
Félix Michaud committed
168
169
        root_train = './data_sound/trainset2'
        root_eval = './data_sound/valset' 
Félix Michaud's avatar
Félix Michaud committed
170
        ext  = '.wav'    
Félix Michaud's avatar
Félix Michaud committed
171
172
#training Dataset
        train_classes = Dataset('train', root_train, bird, nb_classes=args.nb_classes, nb_classes_noise=2, path_background="./data_sound/noises/")
Félix Michaud's avatar
Félix Michaud committed
173
174
175
176
177
        loader_train  = torch.utils.data.DataLoader(
        train_classes,
        batch_size = args.batch_size,
        shuffle=True,
        num_workers=20)
Félix Michaud's avatar
Félix Michaud committed
178
179
180
181
182
183
184
185
#Eval Dataset
        eval_classes = Dataset('eval', root_eval, bird, nb_classes=args.nb_classes, nb_classes_noise=2, path_background="./data_sound/noises/")
        loader_eval  = torch.utils.data.DataLoader(
        eval_classes,
        batch_size = args.batch_size,
        shuffle=True,
        num_workers=20)

Félix Michaud's avatar
Félix Michaud committed
186
187

        for epoch in range(0, 1):
Félix Michaud's avatar
Félix Michaud committed
188
            train(net, loader_train, loader_eval, optimizer, args) 
Félix Michaud's avatar
Félix Michaud committed
189
190
191
            torch.save({
                'model_state_dict': net.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()},
Félix Michaud's avatar
Félix Michaud committed
192
            args.path+'/Saved_models/model_epoch{}.pth.tar'.format(epoch))