Commit 15e1f7b7 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

new sidekit

parent 6373b7e1
......@@ -47,6 +47,8 @@ from .utils import customize_threshold
from .utils import keep_recurring_speakers
from .utils import remove_show_from_id
from sidekit.nnet.xvector import extract_embeddings_per_speaker
def bic_linear_segmentation(init_diar, cep, model_cfg):
"""
......@@ -228,71 +230,70 @@ def extract_vectors(current_diar, root_folder, model_cfg, show, model=None):
normalization=False)
elif model_cfg["model"]["type"] == "lium_xv":
"""
current_vec_per_cluster = current_vec_per_segment.mean_stat_per_model()
#current_vec_per_cluster.norm_stat1()
#current_vec_per_cluster= sidekit.nnet.xvector.extract_embeddings_per_speaker(idmap_name=current_im,
# model_filename=xtractor_name,
# data_root_name=f"{root_folder}/wav/",
# device=torch.device("cuda"),
# num_thread=5)
print(f"nombre de segments: {current_vec_per_segment.stat1.shape}, nombre de locuteurs: {current_vec_per_cluster.stat1.shape}")
#diar_seg=copy.deepcopy(diar)
#for i in range(len(diar_seg)):
# diar_seg[i]["cluster"]="tmp_"+str(i)
#current_im = diar_seg.id_map()
#current_im.start = current_im.start * 160
#current_im.stop = current_im.stop * 160
#if os.path.exists(f"{file_path}/{out_file_name}_seg.idmap.h5"):
# os.remove(f"{file_path}/{out_file_name}_seg.idmap.h5")
#current_im.write(f"{file_path}/{out_file_name}_seg.idmap.h5")
#current_vec_per_seg= sidekit.nnet.xvector.extract_embeddings_per_speaker(idmap_name=f"{file_path}/{out_file_name}_seg.idmap.h5",
# model_filename=f"{model_cfg['model_dir']}/best_xtractor.pt",
# data_root_name=f"{model_cfg['wav_dir']}",
# device=torch.device("cuda"),
# transform_pipeline=model_cfg["model"]["vectors"]["xvectors"]["transforms"],
# num_thread=5)
current_vec = current_vec_per_segment.mean_stat_per_model()
current_im = current_diar.id_map()
current_im.start = current_im.start * 160
current_im.stop = current_im.stop * 160
if os.path.exists(f"{model_cfg['tmp_dir']}/{show}.idmap.h5"):
os.remove(f"{model_cfg['tmp_dir']}/{show}.idmap.h5")
current_im.write(f"{model_cfg['tmp_dir']}/{show}.idmap.h5")
current_vec_per_cluster = extract_embeddings_per_speaker(
idmap_name=f"{model_cfg['tmp_dir']}/{show}.idmap.h5",
model_filename=f"{model_cfg['model_dir']}/best_xtractor.pt",
data_root_name=f"{model_cfg['wav_dir']}",
device=torch.device("cuda"),
transform_pipeline={},
num_thread=5)
diar_seg=copy.deepcopy(current_diar)
for i in range(len(diar_seg)):
diar_seg[i]["cluster"]="tmp_"+str(i)
current_im = diar_seg.id_map()
current_im.start = current_im.start * 160
current_im.stop = current_im.stop * 160
if os.path.exists(f"{model_cfg['tmp_dir']}/{show}_seg.idmap.h5"):
os.remove(f"{model_cfg['tmp_dir']}/{show}_seg.idmap.h5")
current_im.write(f"{model_cfg['tmp_dir']}/{show}_seg.idmap.h5")
current_vec_per_segment = extract_embeddings_per_speaker(
idmap_name=f"{model_cfg['tmp_dir']}/{show}_seg.idmap.h5",
model_filename=f"{model_cfg['model_dir']}/best_xtractor.pt",
data_root_name=f"{model_cfg['wav_dir']}",
device=torch.device("cuda"),
transform_pipeline={},
num_thread=5)
"""
current_im = current_diar.id_map()
current_im.start = current_im.start * 160
current_im.stop = current_im.stop * 160
if os.path.exists(f"{model_cfg['tmp_dir']}/{show}.idmap.h5"):
os.remove(f"{model_cfg['tmp_dir']}/{show}.idmap.h5")
current_im.write(f"{model_cfg['tmp_dir']}/{show}.idmap.h5")
current_vec_per_cluster= sidekit.nnet.xvector.extract_embeddings_per_speaker(idmap_name=f"{model_cfg['tmp_dir']}/{show}.idmap.h5",
model_filename=f"{model_cfg['model_dir']}/best_xtractor.pt",
# model_filename="../Baseline_LIUM_HAC/cfg/models/Evallies_xv/best_xtractor.pt",
data_root_name=f"{model_cfg['wav_dir']}",
device=torch.device("cuda"),
transform_pipeline={},
num_thread=5)
# current_vec_per_cluster= sidekit.nnet.xvector.extract_embeddings_per_speaker(idmap_name=current_im,
# # model_filename=model_cfg['tmp_dir'] + "model/best_xtractor.pt",
# model_filename="../Baseline_LIUM_HAC/cfg/models/Evallies_xv/best_xtractor.pt",
# data_root_name=f"{root_folder}/wav/",
# device=torch.device("cuda"),
# num_thread=5)
diar_seg=copy.deepcopy(current_diar)
for i in range(len(diar_seg)):
diar_seg[i]["cluster"]="tmp_"+str(i)
current_im = diar_seg.id_map()
current_im.start = current_im.start * 160
current_im.stop = current_im.stop * 160
if os.path.exists(f"{model_cfg['tmp_dir']}/{show}_seg.idmap.h5"):
os.remove(f"{model_cfg['tmp_dir']}/{show}_seg.idmap.h5")
current_im.write(f"{model_cfg['tmp_dir']}/{show}_seg.idmap.h5")
current_vec_per_segment= sidekit.nnet.xvector.extract_embeddings_per_speaker(idmap_name=f"{model_cfg['tmp_dir']}/{show}_seg.idmap.h5",
model_filename=f"{model_cfg['model_dir']}/best_xtractor.pt",
data_root_name=f"{model_cfg['wav_dir']}",
device=torch.device("cuda"),
transform_pipeline={},
num_thread=5)
#
current_im = current_diar.id_map()
# current_im.write(f"{file_path}/{show}.idmap.h5")
# Get X-vector extractor name
xtractor_name = model_cfg['tmp_dir'] + "model/best_xtractor.pt"
# Extract 1 x-vector per segment
current_vec_per_segment = sidekit.nnet.xvector.extract_embeddings(idmap_name=current_im,
model_filename=xtractor_name,
data_root_name=f"{root_folder}/wav/",
device=torch.device("cuda"),
file_extension="wav",
transform_pipeline={},
sliding_window=False,
sample_rate=16000,
mixed_precision=False)
# Extract 1 x-vector per speaker (cluster)
current_vec_per_cluster = sidekit.nnet.xvector.extract_embeddings_per_speaker(
idmap_name=current_im,
model_filename=xtractor_name,
data_root_name=f"{root_folder}/wav/",
device=torch.device("cuda"),
file_extension="wav",
transform_pipeline={},
sample_rate=16000,
mixed_precision=False,
num_thread=1)
return current_vec_per_cluster, current_vec_per_segment
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment