Commit 7100adee authored by Anthony Larcher's avatar Anthony Larcher
Browse files

debug cross

parent 6a0211ff
...@@ -309,28 +309,32 @@ def cross_show_HAL(previous_vec, ...@@ -309,28 +309,32 @@ def cross_show_HAL(previous_vec,
# For each speaker in the current file # For each speaker in the current file
for ii in range(previous_vec_mean.modelset.shape[0], ll_vec.modelset.shape[0]): for ii in range(previous_vec_mean.modelset.shape[0], ll_vec.modelset.shape[0]):
print(f"Look for matching of current speaker {ii}") #print(f"Look for matching of current speaker {ii}")
question_number = 0 question_number = 0
# Get the current name of the speaker # Get the current name of the speaker
current_speaker_name = scores.modelset[ii] current_speaker_name = scores.modelset[ii]
#print(f"current speaker = {current_speaker_name}")
# get the scores obtained with all previous speakers and rank them # get the scores obtained with all previous speakers and rank them
sorted_idx = numpy.argsort(scores.scoremat[ii][:previous_vec_mean.modelset.shape[0]]) sorted_idx = numpy.argsort(scores.scoremat[ii][:previous_vec_mean.modelset.shape[0]])[::-1]
sorted_scores_current_speaker = scores.scoremat[ii, sorted_idx] sorted_scores_current_speaker = scores.scoremat[ii, sorted_idx]
#print(sorted_scores_current_speaker)
# If one score is above th_x AND that the corresponding previous speaker is not locked # If one score is above th_x AND that the corresponding previous speaker is not locked
for jj, previous_spk_idx in enumerate(sorted_idx): for jj, previous_spk_idx in enumerate(sorted_idx):
print(f"\tCompare to previous speaker jj") #print(f"\tCompare to previous speaker {jj}")
previous_spk_name = ll_vec.modelset[previous_spk_idx] previous_spk_name = ll_vec.modelset[previous_spk_idx]
#print(f"\tprevious speaker name is: {previous_spk_name}")
# There are scores higher than the threshold # There are scores higher than the threshold
if sorted_scores_current_speaker[previous_spk_idx] > th_x: if sorted_scores_current_speaker[previous_spk_idx] > th_x:
print(f"\t\tSome scores are above the threshold") #print(f"\t\tSome scores are above the threshold")
if not ll_vec.modelset[previous_spk_idx] in previous_locked_spk: if not ll_vec.modelset[previous_spk_idx] in previous_locked_spk:
# ---> link the speakers # ---> link the speakers
...@@ -339,22 +343,30 @@ def cross_show_HAL(previous_vec, ...@@ -339,22 +343,30 @@ def cross_show_HAL(previous_vec,
previous_locked_spk.append(ll_vec.modelset[previous_spk_idx]) previous_locked_spk.append(ll_vec.modelset[previous_spk_idx])
# move to next speaker # move to next speaker
break break
"""
# There are no more scores higher than the threshold # There are no more scores higher than the threshold
else: else:
if previous_spk_idx not in previous_locked_spk: if previous_spk_idx not in previous_locked_spk:
#print(f"\t\t\tSpeaker not locked")
# Get the time of the middle of the longest segment for the longest seg within_diar_id in within_diar # Get the time of the middle of the longest segment for the longest seg within_diar_id in within_diar
tmp_diar = copy.deepcopy(within_diar) tmp_diar = copy.deepcopy(within_diar)
tmp_diar.filter("cluster", "==", current_speaker_name).add_duration().sort(["duration"], reverse=True) tmp_diar = tmp_diar.filter("cluster", "==", current_speaker_name).add_duration()
tmp_diar.sort(["duration"], reverse=True)
show1 = tmp_diar[0]["show"] show1 = tmp_diar[0]["show"]
t1 = (tmp_diar[0]["stop"] - tmp_diar[0]["start"]) / 200. #print(f"\t\t{tmp_diar[0]}")
t1 = (tmp_diar[0]["stop"] + tmp_diar[0]["start"]) / 200.
# Get the time of the middle of the longest segment for previous_diar_id in previous_diar # Get the time of the middle of the longest segment for previous_diar_id in previous_diar
tmp_diar = copy.deepcopy(previous_diar) tmp_diar = copy.deepcopy(previous_diar)
tmp_diar.filter("cluster", "==", previous_spk_name).add_duration().sort(["duration"], reverse=True) tmp_diar = tmp_diar.filter("cluster", "==", previous_spk_name).add_duration()
tmp_diar.sort(["duration"], reverse=True)
show2 = tmp_diar[0]["show"] show2 = tmp_diar[0]["show"]
t2 = (tmp_diar[0]["stop"] - tmp_diar[0]["start"]) / 200. #print(f"\t\t{tmp_diar[0]}")
t2 = (tmp_diar[0]["stop"] + tmp_diar[0]["start"]) / 200.
# Ask the question to the user # Ask the question to the user
complete_hyp = copy.deepcopy(previous_diar) complete_hyp = copy.deepcopy(previous_diar)
...@@ -367,78 +379,14 @@ def cross_show_HAL(previous_vec, ...@@ -367,78 +379,14 @@ def cross_show_HAL(previous_vec,
question_number += 1 question_number += 1
if question_number > lim: if answer.answer:
linkage_speaker_dict[current_speaker_name] = ll_vec.modelset[previous_spk_idx]
previous_locked_spk.append(ll_vec.modelset[previous_spk_idx])
break break
"""
linkage_speaker_dict = {}
# For each speaker in the current diarization
for ii in range(previous_vec_mean.modelset.shape[0], ll_vec.modelset.shape[0]):
# Get the current name of the speaker
current_speaker_name = scores.modelset[ii]
if question_number > lim:
# get all scores of this model with previously known speakers break
#lst = scores.scoremat[i][0:previous_vec_mean.modelset.shape[0]] """
# Get a list of tuples (score, old_model_name)
#tmp = []
#for j in range(len(lst)):
# tmp.append((lst[j], scores.modelset[j]))
#tmp.sort()
# Trie les previous speakers du plus proche au plus lointain
sorted_idx = numpy.argsort(scores.scoremat[ii][:previous_vec_mean.modelset.shape[0]])
if scores.scoremat[ii, sorted_idx[0]] < th_x:
# Tant qu'on a pas posé "lim" questions, qu'il reste des locuteurs et qu'on n'a pas trouvé
keep_questioning = True
previous_spk_idx = 0
already_asked_questions = 0
already_questioned_speakers = []
while keep_questioning and already_asked_questions < lim and previous_spk_idx < previous_vec_mean.modelset.shape[0]:
# get the name of the previous speaker being investigated
previous_spk_name = scores.modelset[sorted_idx[previous_spk_idx]]
# Si le previous_spk n'a pas encore été vu dans les questions précédentes
if previous_spk_name not in already_questioned_speakers:
# Get the time of the middle of the longest segment for the longest seg within_diar_id in within_diar
tmp_diar = copy.deepcopy(within_diar)
tmp_diar.filter("cluster", "==", current_speaker_name).add_duration().sort(["duration"], reverse=True)
show1 = tmp_diar[0]["show"]
t1 = (tmp_diar[0]["stop"] - tmp_diar[0]["start"]) / 200.
# Get the time of the middle of the longest segment for previous_diar_id in previous_diar
tmp_diar = copy.deepcopy(previous_diar)
tmp_diar.filter("cluster", "==", previous_spk_name).add_duration().sort(["duration"], reverse=True)
show2 = tmp_diar[0]["show"]
t2 = (tmp_diar[0]["stop"] - tmp_diar[0]["start"]) / 200.
# Ask the question to the user
complete_hyp = copy.deepcopy(previous_diar)
complete_hyp.append_diar(within_diar)
message_to_user = MessageToUser(file_info,
s4d_to_allies(complete_hyp),
Request('same', t1, t2, archive_file_info[show2]))
keep_questioning, answer = user.validate(message_to_user)
# On incrémente le compteur de question
already_asked_questions += 1
# If the current speaker has to be linked with a previously seen one
if answer.answer :
print("ANSWER IS TRUE")
linkage_speaker_dict[current_speaker_name] = previous_spk_name
else:
already_questioned_speakers.append(previous_spk_name)
previous_spk_idx += 1
else:
previous_spk_idx += 1
"""
# concatenate previous_vec et within_vec # concatenate previous_vec et within_vec
new_previous_vec = concat_statservers(previous_vec_backup, within_vec_backup) new_previous_vec = concat_statservers(previous_vec_backup, within_vec_backup)
......
...@@ -84,8 +84,8 @@ def create_bottomline_clustering(model, model_cfg, show, current_diar, file_path ...@@ -84,8 +84,8 @@ def create_bottomline_clustering(model, model_cfg, show, current_diar, file_path
model_cfg, model_cfg,
"reference") "reference")
#print(f"cluster_{cluster} with {len(diar_per_cluster)} segs changed to " print(f"cluster_{cluster} with {len(diar_per_cluster)} segs changed to "
# f"{len(diar_per_cluster.unique('cluster'))} clusters") f"{len(diar_per_cluster.unique('cluster'))} clusters")
# Creat a diarization with bottom line clustering # Creat a diarization with bottom line clustering
for seg in diar_per_cluster: for seg in diar_per_cluster:
bottomline_diar.append(show=seg["show"], bottomline_diar.append(show=seg["show"],
...@@ -1051,6 +1051,8 @@ def run_active_learning_tree(link, ...@@ -1051,6 +1051,8 @@ def run_active_learning_tree(link,
stop_clustering_list = [] # a list of nodes that have gotten confirmation for clustering question stop_clustering_list = [] # a list of nodes that have gotten confirmation for clustering question
der, new_diar = check_der(init_diar, bottomline_cluster_list, temporary_link_list, ref, uem) der, new_diar = check_der(init_diar, bottomline_cluster_list, temporary_link_list, ref, uem)
print("Initial DER based on bottomline_diar and linkage : ", der)
der_track_show = {"der_log": [der], "correction": ["initial"]} der_track_show = {"der_log": [der], "correction": ["initial"]}
for node in links_to_check: for node in links_to_check:
...@@ -1296,6 +1298,9 @@ def allies_within_show_hal(model_cfg, ...@@ -1296,6 +1298,9 @@ def allies_within_show_hal(model_cfg,
hyp = s4d_to_allies(init_diar) hyp = s4d_to_allies(init_diar)
der_init, fa_rate, miss_rate, conf_rate, error, time, newspkmap = compute_der([ref], [hyp], [uem], collar = 0.250) der_init, fa_rate, miss_rate, conf_rate, error, time, newspkmap = compute_der([ref], [hyp], [uem], collar = 0.250)
print("Current DER (original) : ", der_init)
hyp = s4d_to_allies(bottomline_diar) hyp = s4d_to_allies(bottomline_diar)
der, fa_rate, miss_rate, conf_rate, error, time, newspkmap = compute_der([ref], [hyp], [uem], collar = 0.250) der, fa_rate, miss_rate, conf_rate, error, time, newspkmap = compute_der([ref], [hyp], [uem], collar = 0.250)
print("DER of bottomline_diar: ", der) print("DER of bottomline_diar: ", der)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment