Commit 74571bd8 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

running sincnet

parent c8ad19c5
......@@ -170,6 +170,7 @@ if CUDA:
from .nnet import extract_embeddings
from .nnet import ResBlock
from .nnet import ResNet18
from .nnet import SincNet
else:
print("Don't import Torch")
......
......@@ -34,6 +34,7 @@ from .xsets import XvectorMultiDataset, XvectorDataset, StatDataset
from .xvector import Xtractor, xtrain, extract_idmap, extract_parallel, xtrain_asynchronous, extract_embeddings
from .res_net import ResBlock, ResNet18
from .rawnet import prepare_voxceleb1, Vox1Set, PreEmphasis
from .sincnet import SincNet
__author__ = "Anthony Larcher and Sylvain Meignier"
__copyright__ = "Copyright 2014-2020 Anthony Larcher and Sylvain Meignier"
......
......@@ -40,8 +40,8 @@ import torch
import torch.nn.functional as F
import torch.nn as nn
import math
from pyannote.core import SlidingWindow
from pyannote.audio.train.task import Task
#from pyannote.core import SlidingWindow
#from pyannote.audio.train.task import Task
class SincConv1d(nn.Module):
......@@ -233,6 +233,7 @@ class SincNet(nn.Module):
"""
'''
@staticmethod
def get_alignment(task: Task, **kwargs):
"""Get frame alignment"""
......@@ -279,6 +280,7 @@ class SincNet(nn.Module):
return SlidingWindow(
duration=receptive_field / sample_rate, step=jump / sample_rate, start=0.0
)
'''
def __init__(
self,
......@@ -435,7 +437,8 @@ class SincNet(nn.Module):
if self.dropout:
output = self.dropout_(output)
return output.transpose(1, 2)
#return output.transpose(1, 2)
return output
def dimension():
doc = "Output features dimension."
......
......@@ -43,6 +43,7 @@ from ..bosaris import IdMap
from ..statserver import StatServer
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from .sincnet import SincNet
__license__ = "LGPL"
__author__ = "Anthony Larcher"
......@@ -82,7 +83,7 @@ class Xtractor(torch.nn.Module):
"""
super(Xtractor, self).__init__()
self.speaker_number = speaker_number
self.feature_size = None
self.feature_size = None
if model_archi is None:
self.feature_size = 30
......@@ -127,9 +128,10 @@ class Xtractor(torch.nn.Module):
"""
Prepare Preprocessor
"""
self.preprocessor = None
if "preprocessor" in cfg:
if cfg['preprocessor']["type"] == "sincnet":
self.sincnet = SincNet(
self.preprocessor = SincNet(
waveform_normalize=cfg['preprocessor']["waveform_normalize"],
sample_rate=cfg['preprocessor']["sample_rate"],
min_low_hz=cfg['preprocessor']["min_low_hz"],
......@@ -142,7 +144,7 @@ class Xtractor(torch.nn.Module):
activation=cfg['preprocessor']["activation"],
dropout=cfg['preprocessor']["dropout"]
)
self.feature_size = self.sincnet_.dimension
self.feature_size = self.preprocessor.dimension
"""
Prepapre sequence network
......@@ -236,6 +238,9 @@ class Xtractor(torch.nn.Module):
:param x:
:return:
"""
if self.preprocessor is not None:
x = self.preprocessor(x)
x = self.sequence_network(x)
# Mean and Standard deviation pooling
......@@ -387,6 +392,8 @@ def train_epoch(model, epoch, training_loader, optimizer, log_interval, device):
accuracy = 0.0
for batch_idx, (data, target) in enumerate(training_loader):
if model.preprocessor is not None:
data = data[:, :, None]
target = target.squeeze()
optimizer.zero_grad()
output = model(data.to(device))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment