Commit e32dd028 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

minor debgug in mixture and xvector

parent 96e86961
......@@ -756,13 +756,13 @@ class Mixture(object):
session_list = feature_list.rightids
else:
session_list = feature_list
print("avant init")
init_session_list = session_list[:20]
init_start_list = None
init_stop_list = None
if start_list is not None:
init_start_list = start_list[:20]
init_stop_list = stop_list[:20]
self._init(features_server, init_session_list, start_list=init_start_list, stop_list=init_stop_list, num_thread=num_thread)
print("fin init")
# for N iterations:
for it in iterations[:int(numpy.log2(distrib_nb))]:
# Save current model before spliting
......
......@@ -34,6 +34,7 @@ import numpy
import pandas
import pickle
import shutil
import time
import torch
import torch.optim as optim
import torch.multiprocessing as mp
......@@ -176,7 +177,6 @@ def save_checkpoint(state, is_best, filename='checkpoint.pth.tar', best_filename
"""
torch.save(state, filename)
if is_best:
print("BEST MODEL EVER !!!")
shutil.copyfile(filename, best_filename)
class MeanStdPooling(torch.nn.Module):
......@@ -528,6 +528,9 @@ def xtrain(speaker_number,
#writer = SummaryWriter("runs/xvectors_experiments_2")
writer = None
t= time.localtime()
logging.critical(f"Start process at {time.strftime('%H:%M:%S', t)}")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Start from scratch
if model_name is None:
......@@ -634,10 +637,16 @@ def xtrain(speaker_number,
},
]
optimizer = torch.optim.Adam(params,
lr=0.001,
weight_decay=0.0001,
amsgrad=1)
#optimizer = torch.optim.Adam(params,
# lr=0.001,
# weight_decay=0.0001,
# amsgrad=1)
optimizer = torch.optim.SGD(params,
lr=lr,
momentum=0.9,
weight_decay=0.0005)
print(f"Learning rate = {lr}")
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True)
......@@ -656,7 +665,8 @@ def xtrain(speaker_number,
# Add the cross validation here
accuracy, val_loss = cross_validation(model, validation_loader, device=device)
logging.critical("*** Cross validation accuracy = {} %".format(accuracy))
t= time.localtime()
logging.critical(f"***{time.strftime('%H:%M:%S', t)} Cross validation accuracy = {accuracy} %")
# Decrease learning rate according to the scheduler policy
scheduler.step(val_loss)
......@@ -686,6 +696,9 @@ def xtrain(speaker_number,
best_accuracy_epoch = epoch
#writer.close()
for ii in range(torch.cuda.device_count()):
print(torch.cuda.memory_summary(ii))
logging.critical(f"Best accuracy {best_accuracy * 100.} obtained at epoch {best_accuracy_epoch}")
def train_epoch(model, epoch, training_loader, optimizer, log_interval, device, clipping=False, tb_writer=None):
......@@ -740,7 +753,19 @@ def train_epoch(model, epoch, training_loader, optimizer, log_interval, device,
# plot_classes_preds(model, data.to(device), target.to(device)),
# global_step=epoch * len(training_loader) + batch_idx)
running_loss = 0.0
else:
save_checkpoint({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'accuracy': 0.0,
'scheduler': 0.0
}, False, filename="model_loss_NAN.pt", best_filename='toto.pt')
with open("batch_loss_NAN.pkl", "wb") as fh:
pickle.dump(data.cpu(), fh)
import sys
sys.exit()
running_loss = 0.0
return model
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment