Commit 47e78263 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

cleaning

parents 3a64e622 f9f9d650
......@@ -833,7 +833,8 @@ class FactorAnalyser:
scaling_factor=1.,
output_file_name=None,
save_partial=False,
save_final=True):
save_final=True,
num_thread=1):
"""
Train a simplified Probabilistic Linear Discriminant Analysis model (no within class covariance matrix
but full residual covariance matrix)
......@@ -904,7 +905,7 @@ class FactorAnalyser:
stat1=local_stat.stat1,
e_h=e_h,
e_hh=e_hh,
num_thread=1)
num_thread=num_thread)
# Accumulate for minimum divergence step
_R = numpy.sum(e_hh, axis=0) / session_per_model.shape[0]
......
......@@ -510,7 +510,6 @@ def write_label(label,
fid.write(line)
def read_label(input_file_name, selected_label='speech', frame_per_second=100):
"""Read label file in ALIZE format
......
......@@ -539,7 +539,7 @@ class SideSet(Dataset):
if self.output_format == "pytorch":
return torch.from_numpy(sig).type(torch.FloatTensor), torch.from_numpy(speaker_idx).type(torch.LongTensor)
else:
return sig, speaker_idx
return sig.astype(numpy.float32), speaker_idx
def __len__(self):
"""
......@@ -646,7 +646,7 @@ class IdMapSet(Dataset):
if "CMVN" in t:
_transform.append(CMVN())
if 'add_noise' in t:
self.add_noise[:] = 1
self.add_noise = numpy.ones(self.idmap.leftids.shape[0], dtype=bool)
numpy.random.shuffle(self.add_noise)
_transform.append(AddNoise(noise_db_csv="list/musan.csv",
snr_min_max=[5.0, 15.0],
......
......@@ -31,10 +31,12 @@ import traceback
import logging
import matplotlib.pyplot as plt
import multiprocessing
import os
import numpy
import pandas
import pickle
import shutil
import sys
import time
import torch
import tqdm
......@@ -62,6 +64,7 @@ import torch.distributed as dist
import torch.multiprocessing as mp
os.environ['MKL_THREADING_LAYER'] = 'GNU'
__license__ = "LGPL"
__author__ = "Anthony Larcher"
......@@ -640,9 +643,9 @@ def xtrain(speaker_number,
if num_thread is None:
num_thread = multiprocessing.cpu_count()
logging.critical(f"Use {num_thread} cpus")
logging.critical(f"Start process at {time.strftime('%H:%M:%S', time.localtime())}")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Start from scratch
if model_name is None and model_yaml in ["xvector", "rawnet2"]:
......@@ -919,7 +922,6 @@ def train_epoch(model, epoch, training_loader, optimizer, log_interval, device,
running_loss = 0.0
for batch_idx, (data, target) in enumerate(training_loader):
data = data.squeeze().to(device)
print(f"Shape of data: {data.shape}")
target = target.squeeze()
target = target.to(device)
optimizer.zero_grad()
......@@ -996,17 +998,18 @@ def cross_validation(model, validation_loader, device):
criterion = torch.nn.CrossEntropyLoss()
with torch.no_grad():
for batch_idx, (data, target) in enumerate(validation_loader):
target = target.squeeze()
batch_size = target.shape[0]
target = target.squeeze().to(device)
data = data.squeeze().to(device)
if loss_criteria == "aam":
output = model(data.to(device), target=target)
output = model(data, target=target)
else:
output = model(data.to(device), target=None)
output = model(data, target=None)
accuracy += (torch.argmax(output.data, 1) == target.to(device)).sum()
accuracy += (torch.argmax(output.data, 1) == target).sum()
loss += criterion(output, target.to(device))
loss += criterion(output, target)
return 100. * accuracy.cpu().numpy() / ((batch_idx + 1) * batch_size), \
loss.cpu().numpy() / ((batch_idx + 1) * batch_size)
......@@ -1445,29 +1448,3 @@ def xtime(model, training_set, validation_set,
def example(rank, world_size):
# create default process group
dist.init_process_group("gloo", rank=rank, world_size=world_size)
# create local model
model = torch.nn.Linear(10, 10).to(rank)
# construct DDP model
ddp_model = DDP(model, device_ids=[rank])
# define loss function and optimizer
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD(ddp_model.parameters(), lr=0.001)
# forward pass
outputs = ddp_model(torch.randn(20, 10).to(rank))
labels = torch.randn(20, 10).to(rank)
# backward pass
loss_fn(outputs, labels).backward()
# update parameters
optimizer.step()
def main():
world_size = 2
mp.spawn(example,
args=(world_size,),
nprocs=world_size,
join=True)
......@@ -36,6 +36,7 @@ import numpy
import os
import scipy
import sys
import tqdm
import warnings
from sidekit.bosaris import IdMap
......@@ -890,7 +891,7 @@ class StatServer:
unique_speaker = numpy.unique(self.modelset)
W = numpy.zeros((vect_size, vect_size))
for speakerID in unique_speaker:
for speakerID in tqdm.tqdm(unique_speaker):
spk_ctr_vec = self.get_model_stat1(speakerID) \
- numpy.mean(self.get_model_stat1(speakerID), axis=0)
W += numpy.dot(spk_ctr_vec.transpose(), spk_ctr_vec)
......@@ -1505,7 +1506,7 @@ class StatServer:
if save_partial:
sidekit.sidekit_io.write_fa_hdf5((mean, V, None, None, sigma),
save_partial + "_{}_between_class.h5".format(it))
"Partial_plda_{}_between_class.h5".format(it))
return V, sigma
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment