Commit b9d0efb1 authored by Anthony Larcher's avatar Anthony Larcher
Browse files
parents 62233110 ef14b4cf
......@@ -386,6 +386,7 @@ class FactorAnalyser:
# Estimate TV iteratively
for it in range(nb_iter):
print(f"Start it: {it}")
# Create accumulators for the list of models to process
_A = numpy.zeros((nb_distrib, tv_rank, tv_rank), dtype=STAT_TYPE)
_C = numpy.zeros((tv_rank, feature_size * nb_distrib), dtype=STAT_TYPE)
......@@ -606,7 +607,7 @@ class FactorAnalyser:
# Estimate TV iteratively
for it in range(nb_iter):
print(f"Iteration {it}")
# Create serialized accumulators for the list of models to process
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
......
......@@ -306,6 +306,8 @@ class Xtractor(torch.nn.Module):
self.feature_size = None
self.norm_embedding = norm_embedding
print(f"Speaker number : {self.speaker_number}")
if model_archi == "xvector":
if loss not in ["cce", 'aam']:
......@@ -554,6 +556,7 @@ class Xtractor(torch.nn.Module):
elif k.startswith('dropout'):
before_embedding_layers.append((k, torch.nn.Dropout(p=cfg["before_embedding"][k])))
self.embedding_size = input_size
self.before_speaker_embedding = torch.nn.Sequential(OrderedDict(before_embedding_layers))
self.before_speaker_embedding_weight_decay = cfg["before_embedding"]["weight_decay"]
......@@ -718,9 +721,10 @@ def xtrain(speaker_number,
if model_name is None and model_yaml in ["xvector", "rawnet2"]:
# Initialize a first model
if model_yaml == "xvector":
model = Xtractor(speaker_number, "xvector")
model = Xtractor(speaker_number, "xvector", loss=loss)
elif model_yaml == "rawnet2":
model = Xtractor(speaker_number, "rawnet2")
model_archi = model_yaml
else:
with open(model_yaml, 'r') as fh:
model_archi = yaml.load(fh, Loader=yaml.FullLoader)
......@@ -775,6 +779,7 @@ def xtrain(speaker_number,
param.requires_grad = False
print(model)
embedding_size = model.embedding_size
if torch.cuda.device_count() > 1 and multi_gpu:
print("Let's use", torch.cuda.device_count(), "GPUs!")
......@@ -911,7 +916,7 @@ def xtrain(speaker_number,
val_acc, val_loss, val_eer, test_eer = compute_metrics(model,
validation_loader,
device,
[validation_set.__len__(), 300],
[validation_set.__len__(), embedding_size],
speaker_number,
model_archi)
......@@ -937,12 +942,11 @@ def xtrain(speaker_number,
val_acc, val_loss, val_eer, test_eer = compute_metrics(model,
validation_loader,
device,
[validation_set.__len__(), 300],
[validation_set.__len__(), embedding_size],
speaker_number,
model_archi)
logging.critical(f"***{time.strftime('%H:%M:%S', time.localtime())} Training metrics - Cross validation accuracy = {val_acc} %, EER = {val_eer * 100} %")
logging.critical(f"***{time.strftime('%H:%M:%S', time.localtime())} Training metrics - Test EER = {test_eer * 100} %")
# Decrease learning rate according to the scheduler policy
scheduler.step(val_loss)
......@@ -1262,6 +1266,8 @@ def extract_embeddings_per_speaker(idmap_name,
if data.shape[1] > 20000000:
data = data[..., :20000000]
vec = model(data.to(device), is_eval=True)
if model.loss == "aam":
vec = vec[1]
embeddings.stat1[idx, :] = vec.detach().cpu()
return embeddings
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment