Commit 76edfb09 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

debug

parent e1f577e2
......@@ -239,50 +239,36 @@ class ArcMarginProduct(torch.nn.Module):
super(ArcMarginProduct, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.s = torch.tensor(s)
self.m = torch.tensor(m)
self.s = s
self.m = m
self.weight = Parameter(torch.FloatTensor(out_features, in_features))
torch.nn.init.xavier_uniform_(self.weight)
self.easy_margin = torch.tensor(easy_margin)
self.cos_m = torch.tensor(math.cos(m))
self.sin_m = torch.tensor(math.sin(m))
self.th = torch.tensor(math.cos(math.pi - m))
self.mm = torch.tensor(math.sin(math.pi - m) * m)
def forward(self, input, target):
with torch.cuda.amp.autocast(enabled=False):
# --------------------------- cos(theta) & phi(theta) ---------------------------
cosine = torch.nn.functional.linear(torch.nn.functional.normalize(input), torch.nn.functional.normalize(self.weight))
if target is None:
return cosine * self.s
else:
sine = torch.sqrt(torch.tensor(1.0) - torch.pow(cosine, 2))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = torch.where(cosine > torch.tensor(0), phi, cosine)
else:
phi = torch.where(cosine > self.th, phi, cosine - self.mm)
# --------------------------- convert label to one-hot ---------------------------
# one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
one_hot = torch.zeros(cosine.size(), device='cuda')
one_hot.scatter_(1, target.view(-1, 1).long(), 1)
# -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
output = (one_hot * phi) + (
(1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4
output *= self.s
# print(output)
return output
self.easy_margin = easy_margin
self.cos_m = math.cos(self.m)
self.sin_m = math.sin(self.m)
self.th = math.cos(math.pi - self.m)
self.mm = math.sin(math.pi - self.m) * self.m
def forward(self, input, target=None):
assert input.size()[0] == target.size()[0]
assert input.size()[1] == self.in_features
# cos(theta)
cosine = torch.nn.functional.linear(torch.nn.functional.normalize(input), torch.nn.functional.normalize(self.weight))
# cos(theta + m)
sine = torch.sqrt((1.0 - torch.mul(cosine, cosine)).clamp(0, 1))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = torch.where(cosine > 0, phi, cosine)
else:
phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)
#one_hot = torch.zeros(cosine.size(), device='cuda' if torch.cuda.is_available() else 'cpu')
one_hot = torch.zeros_like(cosine)
one_hot.scatter_(1, target.view(-1, 1), 1)
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
output = output * self.s
return output
......@@ -419,10 +419,10 @@ class Xtractor(torch.nn.Module):
else:
self.loss = loss
self.feature_size = 80
self.activation = torch.nn.LeakyReLU(0.2)
self.preprocessor = MfccFrontEnd()
self.feature_size = self.preprocessor.n_mfcc
self.sequence_network = torch.nn.Sequential(OrderedDict([
("conv1", torch.nn.Conv1d(self.feature_size, 512, 5, dilation=1)),
......@@ -638,11 +638,6 @@ class Xtractor(torch.nn.Module):
else:
self.activation = torch.nn.ReLU()
if cfg["segmental"][list(cfg["segmental"].keys())[0]].startswith("conv2D"):
self.input_nbdim = 3
elif cfg["segmental"][list(cfg["segmental"].keys())[0]].startswith("conv"):
self.input_nbdim = 2
# Create sequential object for the first part of the network
segmental_layers = []
for k in cfg["segmental"].keys():
......@@ -786,14 +781,6 @@ class Xtractor(torch.nn.Module):
if self.preprocessor is not None:
x = self.preprocessor(x)
else:
with torch.no_grad():
with torch.cuda.amp.autocast(enabled=False):
x = self.PreEmphasis(x)
x = self.MFCC(x)
x = self.CMVN(x).unsqueeze(1)
x = self.sequence_network(x)
# Mean and Standard deviation pooling
......@@ -966,7 +953,10 @@ def xtrain(speaker_number,
logging.critical("model_parameters_count: {:d}".format(
sum(p.numel()
for p in model.parameters()
for p in model.sequence_network.parameters()
if p.requires_grad) + \
sum(p.numel()
for p in model.before_speaker_embedding.parameters()
if p.requires_grad)))
embedding_size = model.embedding_size
......@@ -1009,14 +999,6 @@ def xtrain(speaker_number,
output_format="pytorch",
windowed=True)
#training_set = SideSet(dataset_yaml,
# set_type="train",
# overlap=dataset_params['train']['overlap'],
# dataset_df=training_df,
# output_format="pytorch",
# )
validation_set = SideSet(dataset_yaml,
set_type="validation",
dataset_df=validation_df,
......@@ -1036,14 +1018,13 @@ def xtrain(speaker_number,
else:
batch_size = dataset_params["batch_size"]
print(f"Size of batches = {batch_size}")
training_loader = DataLoader(training_set,
batch_size=batch_size,
shuffle=True,
drop_last=True,
pin_memory=True,
num_workers=1,#num_thread,
num_workers=num_thread,
persistent_workers=True)
validation_loader = DataLoader(validation_set,
......@@ -1091,7 +1072,7 @@ def xtrain(speaker_number,
verbose=False)
if mixed_precision:
scaler = torch.cuda.amp.GradScaler()
scaler = GradScaler()
else:
scaler = None
......@@ -1099,19 +1080,8 @@ def xtrain(speaker_number,
best_accuracy_epoch = 1
best_eer = 100
curr_patience = patience
logging.critical("Compute EER before starting")
val_acc, val_loss, val_eer = cross_validation(model,
validation_loader,
device,
[validation_set.__len__(),
embedding_size],
mixed_precision)
test_eer = test_metrics(model, device, speaker_number, num_thread, mixed_precision)
logging.critical(f"***{time.strftime('%H:%M:%S', time.localtime())} Initial metrics - Cross validation accuracy = {val_acc} %, EER = {val_eer * 100} %")
logging.critical(f"***{time.strftime('%H:%M:%S', time.localtime())} Initial metrics - Test EER = {test_eer * 100} %")
test_eer = 100.
for epoch in range(1, epochs + 1):
# Process one epoch and return the current model
......@@ -1128,10 +1098,8 @@ def xtrain(speaker_number,
scaler=scaler,
clipping=clipping)
print("end of train epoch")
# Add the cross validation here
if math.fmod(epoch, 100) == 0:
if math.fmod(epoch, 10) == 0:
val_acc, val_loss, val_eer = cross_validation(model, validation_loader, device, [validation_set.__len__(), embedding_size], mixed_precision)
test_eer = test_metrics(model, device, speaker_number, num_thread, mixed_precision)
......@@ -1810,8 +1778,9 @@ def extract_embeddings(idmap_name,
# Process the data
with torch.no_grad():
for idx, (data, mod, seg, start, stop) in enumerate(tqdm.tqdm(dataloader, desc='xvector extraction', mininterval=1)):
for idx, (data, mod, seg, start, stop) in enumerate(tqdm.tqdm(dataloader,
desc='xvector extraction',
mininterval=1)):
if data.shape[1] > 20000000:
data = data[...,:20000000]
with torch.cuda.amp.autocast(enabled=mixed_precision):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment