Commit 5d1b0f89 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

new cross show

parent aa835e1e
......@@ -184,6 +184,35 @@ def cross_show(model, model_cfg, speaker_dictionary, archive_diar, th_x, human_a
return new_previous_iv, new_previous_diar, within_diar
def rename_speakers(new_dictionary, current_diar, current_vec):
for idx, seg in enumerate(current_diar):
current_diar.segments[idx]["cluster"] = new_dictionary[seg["cluster"]]
new_names = []
for spk in current_vec.modelset:
new_names.append(new_dictionary[spk])
current_vec.modelset = numpy.array(new_names)
return current_diar, current_vec
def reset_speaker_names(archive_diar, current_diar, current_vec):
existing_ids = archive_diar.unique("cluster")
# Rename speakers to avoid duplicated IDs
clean_names = dict()
spk_idx = len(archive_diar.unique("cluster")) + 1
for spk in current_diar.unique("cluster"):
while f"speaker_{spk_idx}" in existing_ids:
spk_idx += 1
clean_names[spk] = f"speaker_{spk_idx}"
existing_ids.append(f"speaker_{spk_idx}")
return rename_speakers(clean_names, current_diar, current_vec)
def allies_cross_show_clustering(show,
model,
model_cfg,
......@@ -228,6 +257,11 @@ def allies_cross_show_clustering(show,
# Process the first file
if not speaker_dictionary:
# Rename speakers to avoid duplicated IDs
current_diar, current_vec_per_speaker = reset_speaker_names(archive_diar,
current_diar,
current_vec_per_speaker)
# Initialize with the current diar
archive_diar.append_diar(current_diar)
......@@ -237,42 +271,57 @@ def allies_cross_show_clustering(show,
# Process other files
else:
current_diar, current_vec_per_speaker = reset_speaker_names(archive_diar,
current_diar,
current_vec_per_speaker)
# Get the matrix of archive_speakers x-vectors to compute the scores
spk_ids = []
shows = []
archive_models = []
archive_xv = numpy.empty((len(speaker_dictionary), model_cfg["model"]["vectors"]["size"]))
for idx, (k, v) in enumerate(speaker_dictionary.items):
a_xv = numpy.empty((len(speaker_dictionary), 256))
for idx, (k, v) in enumerate(speaker_dictionary.items()):
shows.append(k[0])
spk_ids.append(k[1])
archive_models.append(k[0] + "#" + k[1])
archive_xv[idx, :] = v
a_xv[idx, :] = v
archive_models = numpy.array(archive_models)
archive_xv = sidekit.StatServer()
archive_xv.modelset = archive_models
archive_xv.segset = archive_models
archive_xv.stat0 = numpy.ones(archive_xv.segset.shape)
archive_xv.stat1 = a_xv
# Compute the scores (for x-vector and cosine first)
# TODO: the same for i-vectors and PLDA
# Compute scores
ndx = sidekit.Ndx(models=archive_models, testsegs=current_vec_per_speaker.modelset)
ndx = sidekit.Ndx()
ndx.modelset = archive_models
ndx.segset = current_vec_per_speaker.modelset
ndx.trialmask=numpy.ones((len(ndx.modelset), len(ndx.segset)), dtype="bool")
scores = sidekit.iv_scoring.cosine_scoring(archive_xv, current_vec_per_speaker, ndx,
wccn=None,
check_missing=False)
clustering_dict = dict()
#############
# In case speakers from the past have been seen in several files, reduce the matrix to get a unique
# line per past-speaker
unique_past_speakers = numpy.array(set(spk_ids))
clustering_dict = dict()
unique_past_speakers = numpy.array(list(set(spk_ids)))
unique_past_speakers_scores = numpy.empty((len(unique_past_speakers), scores.scoremat.shape[1]))
for idx, spk in enumerate(unique_past_speakers):
unique_past_speakers_scores[idx, :] = scores.scoremat[numpy.array(spk_ids) == spk, :].max(axis=0)
for idx, current_spk in enumerate(current_vec_per_speaker.modelset):
if unique_past_speakers_scores[:, idx].max() > th_x and unique_past_speakers_scores[unique_past_speakers_scores[:, idx].argmax()[0], :].argmax()[0] == idx:
if unique_past_speakers_scores[:, idx].max() > th_x and unique_past_speakers_scores[unique_past_speakers_scores[:, idx].argmax(), :].argmax() == idx:
# if the current speaker is the closest of its own closest archived speaker: match!
clustering_dict[current_spk] = spk_ids[unique_past_speakers_scores[:, idx].argmax()[0]]
clustering_dict[current_spk] = spk_ids[unique_past_speakers_scores[:, idx].argmax()]
##############
# Display the number of speaker identified to old ones
......
......@@ -136,7 +136,7 @@ def vec2link_xv(model_cfg, xv_vec, current_diar):
# Use 2 gaussian to shift the scores
if scores.modelset.shape[0] > 2:
th_w = customize_threshiold(scores, th_w)
th_w = customize_threshold(scores, th_w)
scores.scoremat = 0.5 * (scores.scoremat + scores.scoremat.transpose())
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment