Commit f41e8a24 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

cleaning cross show without HAL

parent bd58b919
......@@ -138,43 +138,26 @@ def check_dif_files(old_modelname,new_modelname,within_diar,prev_diar):
if ref_new[i][3] < center_new and ref_new[i][4]>center_new:
new = ref_new[i][1]
return old==new
return old == new
def cross_show(previous_iv,
previous_diar,
within_iv,
within_diar,
th_x,
lim,
reference_path=None,
do_hal=False):
def compute_distance_cross_show(previous_vec, previous_diar, within_vec):
"""
Here we compute the scores considering previous and current clusters and then modify the score matrix
to enable only clustering of previous and current clusters (no previous/previous and no current/current).
:param previous_iv:
:param previous_vec:
:param previous_diar:
:param within_iv:
:param within_diar:
:param th_x:
:param lim:
:param reference_path:
:param do_hal:
:param within_vec:
:return:
"""
within_iv_backup = copy.deepcopy(within_iv)
previous_iv_backup = copy.deepcopy(previous_iv)
# get the mean_per_model for previous and within
within_iv_mean = within_iv.mean_stat_per_model()
previous_iv_mean = previous_iv.mean_stat_per_model()
# merge the mean_per_model for previous and within
ll_iv_mean = concat_statservers(previous_iv_mean, within_iv_mean)
ll_vec = concat_statservers(previous_vec, within_vec)
# Compute the score matrix
ndx = sidekit.Ndx(models=ll_iv_mean.modelset, testsegs=ll_iv_mean.modelset)
scores = sidekit.iv_scoring.cosine_scoring(ll_iv_mean,
ll_iv_mean,
ndx = sidekit.Ndx(models=ll_vec.modelset, testsegs=ll_vec.modelset)
scores = sidekit.iv_scoring.cosine_scoring(ll_vec,
ll_vec,
ndx,
wccn=None,
check_missing=False,
......@@ -183,30 +166,74 @@ def cross_show(previous_iv,
scores.scoremat = -0.5 * (scores.scoremat + scores.scoremat.transpose())
# Constrain the scores to forbid any new clustering between previous shows
lowest_distance = numpy.min(scores.scoremat)
for iv_idx in range(previous_iv_mean.modelset.shape[0]):
for iv_jdx in range(previous_iv_mean.modelset.shape[0]):
if previous_iv_mean.modelset[iv_idx] == previous_iv_mean.modelset[iv_jdx]:
scores.scoremat[iv_idx, iv_jdx] = scores.scoremat[iv_idx, iv_jdx]
else:
scores.scoremat[iv_idx, iv_jdx] = numpy.inf
for vec_idx, mod in enumerate(previous_vec.modelset):
same_indices = numpy.argwhere(previous_vec.modelset != mod)
scores.scoremat[vec_idx, same_indices] = numpy.inf
#for iv_idx in range(previous_vec.modelset.shape[0]):
# for iv_jdx in range(previous_vec.modelset.shape[0]):
# if previous_vec.modelset[iv_idx] == previous_vec.modelset[iv_jdx]:
# scores.scoremat[iv_idx, iv_jdx] = scores.scoremat[iv_idx, iv_jdx]
# else:
# scores.scoremat[iv_idx, iv_jdx] = numpy.inf
# Add to keep the within show clustering
for iv_idx in range(previous_iv_mean.modelset.shape[0], ll_iv_mean.modelset.shape[0]):
for iv_jdx in range(previous_iv_mean.modelset.shape[0], ll_iv_mean.modelset.shape[0]):
if ll_iv_mean.modelset[iv_idx] == ll_iv_mean.modelset[iv_jdx]:
scores.scoremat[iv_idx, iv_jdx] = scores.scoremat[iv_idx, iv_jdx]
else:
scores.scoremat[iv_idx, iv_jdx] = numpy.inf
for ii in range(previous_vec.modelset.shape[0], ll_vec.modelset.shape[0]):
for jj in range(previous_vec.modelset.shape[0], ll_vec.modelset.shape[0]):
if not ll_vec.modelset[ii] == ll_vec.modelset[jj]:
scores.scoremat[ii, jj] = numpy.inf
modelset_seg_idx = dict()
for seg in previous_diar.segments:
modelset_seg_idx[seg['cluster']] = numpy.where(ll_iv_mean.modelset == seg['cluster'])[0]
modelset_seg_idx[seg['cluster']] = numpy.where(ll_vec.modelset == seg['cluster'])[0]
numpy.fill_diagonal(scores.scoremat, 0.0)
return ll_vec, scores
def cross_show(previous_vec,
previous_diar,
within_vec,
within_diar,
th_x,
lim,
user,
file_info,
uem,
ref,
human_in_the_loop=False):
"""
:param previous_vec:
:param previous_diar:
:param within_vec:
:param within_diar:
:param th_x:
:param lim:
:param user:
:param file_info:
:param uem:
:param ref:
:param human_in_the_loop:
:return:
"""
within_vec_backup = copy.deepcopy(within_vec)
previous_vec_backup = copy.deepcopy(previous_vec)
# get the mean_per_model for previous and within
within_vec_mean = within_vec.mean_stat_per_model()
previous_vec_mean = previous_vec.mean_stat_per_model()
"""
Compute distance matrix to perform HAC between previous and within cluster.
This matrix is normalized to enable/disable clustering between previous/previous
and within/within clusters
"""
ll_vec, scores = compute_distance_cross_show(previous_vec_mean, previous_diar, within_vec_mean)
"""
metadata = 0
if do_hal:
if human_in_the_loop:
tdict = {}
for i in range(previous_iv_mean.modelset.shape[0], ll_iv_mean.modelset.shape[0]):
insp_name = scores.modelset[i]
......@@ -240,8 +267,9 @@ def cross_show(previous_iv,
j += 1
else:
j += 1
"""
if not do_hal:
if not human_in_the_loop:
scores.scoremat += 1.
th_x += 1.
numpy.fill_diagonal(scores.scoremat, 0.0)
......@@ -251,20 +279,21 @@ def cross_show(previous_iv,
T = scipy.cluster.hierarchy.fcluster(Z, th_x, 'distance')
# Don't allow to modify the names of previously existing clusters
# Create a dictionary with old_model_name as key and new_luster as value
# Create a dictionary with old_model_name as key and new_cluster as value
cluster_dict = dict()
clusters_by_index = dict()
for ii in range(T.shape[0]):
if T[ii] not in clusters_by_index:
clusters_by_index[T[ii]] = ll_iv_mean.modelset[ii]
cluster_dict[ll_iv_mean.modelset[ii]] = clusters_by_index[T[ii]]
clusters_by_index[T[ii]] = ll_vec.modelset[ii]
cluster_dict[ll_vec.modelset[ii]] = clusters_by_index[T[ii]]
# concatenate previous_iv et within_iv
new_previous_iv = concat_statservers(previous_iv_backup, within_iv_backup)
# concatenate previous_vec et within_vec
new_previous_vec = concat_statservers(previous_vec_backup, within_vec_backup)
new_previous_diar = copy.deepcopy(previous_diar)
new_previous_diar.segments += within_diar.segments
if do_hal:
"""
if human_in_the_loop:
for ii, mod in enumerate(new_previous_iv.modelset):
if mod in list(tdict.keys()):
new_previous_iv.modelset[ii] = tdict[mod]
......@@ -274,11 +303,15 @@ def cross_show(previous_iv,
for ii, seg in enumerate(within_diar.segments):
if seg['cluster'] in list(tdict.keys()):
within_diar.segments[ii]['cluster'] = tdict[seg['cluster']]
return new_previous_vec, new_previous_diar, within_diar, metadata
else:
# Modify the model names for i-vectors
for ii, mod in enumerate(new_previous_iv.modelset):
new_previous_iv.modelset[ii] = cluster_dict[mod]
"""
if not human_in_the_loop:
# Modify the model names for vectors
for ii, mod in enumerate(new_previous_vec.modelset):
new_previous_vec.modelset[ii] = cluster_dict[mod]
for ii, seg in enumerate(new_previous_diar.segments):
new_previous_diar.segments[ii]['cluster'] = cluster_dict[seg['cluster']]
......@@ -286,39 +319,56 @@ def cross_show(previous_iv,
for ii, seg in enumerate(within_diar.segments):
within_diar.segments[ii]['cluster'] = cluster_dict[seg['cluster']]
return new_previous_iv, new_previous_diar, within_diar, metadata
return new_previous_vec, new_previous_diar, within_diar, None
def allies_cross_show_clustering(show_idx, model, current_diar, current_vec, th_x, lim, reference_path=None, hal=False):
def allies_cross_show_clustering(show_idx,
archive_vectors,
current_diar,
current_vec,
th_x,
lim,
user,
file_info,
uem,
ref,
hal=False):
"""
:param show_idx:
:param model:
:param archive_vectors:
:param current_diar:
:param current_vec:
:param th_x:
:param lim:
:param user:
:param file_info:
:param uem:
:param ref:
:param reference_path:
:param hal:
:return:
"""
if show_idx == 0:
model["previous_vec"] = copy.deepcopy(current_vec)
model["previous_diar"] = current_diar
archive_vectors["previous_vec"] = copy.deepcopy(current_vec)
archive_vectors["previous_diar"] = current_diar
metadata = 0
else:
previous_vec, previous_diar, current_diar, metadata = cross_show(previous_iv=model["previous_vec"],
previous_diar=model["previous_diar"],
previous_vec, previous_diar, current_diar, metadata = cross_show(previous_iv=archive_vectors["previous_vec"],
previous_diar=archive_vectors["previous_diar"],
within_iv=current_vec,
within_diar=current_diar,
th_x=th_x,
lim=lim,
reference_path=reference_path,
user=user,
file_info=file_info,
uem=uem,
ref=ref,
do_hal=hal)
model["previous_vec"]=previous_vec
model["previous_diar"]=previous_diar
archive_vectors["previous_vec"]=previous_vec
archive_vectors["previous_diar"]=previous_diar
return model, current_diar, metadata
return archive_vectors, current_diar, metadata
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment