Commit 6ef96f8f authored by Anthony Larcher's avatar Anthony Larcher
Browse files

update

parent 9e60b8f2
......@@ -962,7 +962,7 @@ def apply_ideal_correction(bottomline_diar, ref, uem, der_track_show):
"""
hyp = s4d_to_allies(copy.deepcopy(bottomline_diar))
der, fa_rate, miss_rate, conf_rate, error, time, newspkmap = compute_der([ref], [hyp], [uem], collar = 0.250)
# der, fa_rate, miss_rate, conf_rate, time, newspkmap = compute_der(ref, hyp, uem, {}, 0.250)
# der, fa_rate, miss_rate, conf_rate, time, newspkmap = compute_der(ref, hyp, uem, {}, 0.250)
der_track_cs = {"time": time, "der_log": [der], "correction": ["initial"]}
removelist = []
......@@ -1271,6 +1271,9 @@ def allies_within_show_hal(model_cfg,
else:
scores_clusters, link_clusters, th_clusters = vec2link(model_cfg, vec_per_cluster, init_diar, model)
backup_vad_type = model_cfg["model"]["vad"]["type"]
backup_thr_h = model_cfg['first_seg']['thr_h']
model_cfg["model"]["vad"]["type"] = "reference"
model_cfg['first_seg']['thr_h']=model_cfg['within_show']['thr_h']
......@@ -1280,6 +1283,9 @@ def allies_within_show_hal(model_cfg,
init_diar,
data_folder)
model_cfg["model"]["vad"]["type"] = backup_vad_type
model_cfg['first_seg']['thr_h'] = backup_thr_h
bottomline_cluster_list = bottomline_diar.unique('cluster')
#######################################################################
......
......@@ -139,8 +139,6 @@ def vec2link_xv(model_cfg, xv_vec, current_diar):
wccn=None,
check_missing=False)
print(f"Size of scores : {scores.scoremat.shape}")
# Use 2 gaussian to shift the scores
#if scores.modelset.shape[0] > 2:
# th_w = customize_threshold(scores, th_w)
......@@ -365,12 +363,10 @@ def perform_second_seg(model,
# th_w = customize_threshold(scores, th_w)
# Run HAC clustering
print(f"Avant HAC : len(diar)= {len(initial_diar.unique('cluster'))}, min et max scores: {scores.scoremat.min()} et {scores.scoremat.max()}, th = {th_w}")
new_diar, cluster_dict, merge = s4d.clustering.hac_iv.hac_iv(initial_diar,
scores,
threshold=th_w,
method=hac_method)
print(f"Apres HAC : len(diar)= {len(new_diar.unique('cluster'))}, min et max scores: {scores.scoremat.min()} et {scores.scoremat.max()}, th = {th_w}")
# Update the model names of i-vector
# reverse the cluster dict:
......@@ -528,7 +524,6 @@ def lium_iv_initial_training(model_cfg, train_data):
if not os.path.isfile(f"{model_cfg['tmp_dir']}/model/TV.h5"):
if not os.path.isfile(f"{model_cfg['tmp_dir']}/model/tv_stat.h5"):
print("Compute statistics")
# Accumulate sufficient statistics for the training data
logging.critical("Create StatServer with {} segments".format(len(tv_idmap.leftids)) )
tv_stat = sidekit.StatServer(tv_idmap,
......@@ -748,10 +743,12 @@ def lium_xv_initial_training(model_cfg, train_data):
train_diar = train_diar.filter("duration", ">=", 100)
training_idmap = train_diar.id_map()
training_idmap.start = training_idmap.start * 160
training_idmap.stop = training_idmap.stop * 160
#training_idmap.start = training_idmap.start * 160
#training_idmap.stop = training_idmap.stop * 160
if not os.path.isfile(f"{model_cfg['tmp_dir']}/model/training_xv.h5"):
if model_cfg['model']['vectors']['xvectors']["transforms"] is None:
model_cfg['model']['vectors']['xvectors']["transforms"] = dict()
training_xv = sidekit.nnet.xvector.extract_embeddings(idmap_name=training_idmap,
model_filename=model_cfg['tmp_dir']+"/model/best_xtractor.pt",
data_root_name=train_data.root_folder + '/wav/',
......@@ -845,11 +842,11 @@ def allies_init_seg(model, model_cfg, show, data_folder, verbose=False):
init_diar[i]['cluster'] = "tmp_"+str(i)
# Start from VAD segmentation
else:
logger.info(f"\t* load initial segmentation from {model_cfg['model']['vad']['dir']}/{show}.mdtm")
init_diar = s4d.Diar.read_mdtm(f"{model_cfg['model']['vad']['dir']}/{show}.mdtm")
logger.info(f"\t* load initial segmentation from {model_cfg['model']['vad']['dir']}/{show}.rttm")
init_diar = s4d.Diar.read_rttm(f"{model_cfg['model']['vad']['dir']}/{show}.rttm")
init_diar.pad(15)
init_diar.pack(25)
init_diar.pad(25)
init_diar.pack(50)
# Run the first pass of segmentation
logger.info("\t* run 1st clustering")
......@@ -882,11 +879,11 @@ def allies_init_seg(model, model_cfg, show, data_folder, verbose=False):
current_vec = sidekit.StatServer(f"{first_seg_path}/{show}_{model_cfg['model']['vectors']['type']}v.h5")
current_vec_per_seg = sidekit.StatServer(f"{first_seg_path}/{show}_{model_cfg['model']['vectors']['type']}v_per_seg.h5")
current_diar, current_vec, current_vec_per_seg = perform_second_seg(model,
current_diar,
current_vec,
current_vec_per_seg,
model_cfg)
#current_diar, current_vec, current_vec_per_seg = perform_second_seg(model,
# current_diar,
# current_vec,
# current_vec_per_seg,
# model_cfg)
# re-extract x-vectors based on the final diarization to have correct vectors for the clusters
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment