Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Anthony Larcher
evALLIES
Commits
f41e8a24
Commit
f41e8a24
authored
Sep 01, 2021
by
Anthony Larcher
Browse files
cleaning cross show without HAL
parent
bd58b919
Changes
1
Hide whitespace changes
Inline
Side-by-side
lium_baseline/cross_show.py
View file @
f41e8a24
...
...
@@ -138,43 +138,26 @@ def check_dif_files(old_modelname,new_modelname,within_diar,prev_diar):
if
ref_new
[
i
][
3
]
<
center_new
and
ref_new
[
i
][
4
]
>
center_new
:
new
=
ref_new
[
i
][
1
]
return
old
==
new
return
old
==
new
def
cross_show
(
previous_iv
,
previous_diar
,
within_iv
,
within_diar
,
th_x
,
lim
,
reference_path
=
None
,
do_hal
=
False
):
def
compute_distance_cross_show
(
previous_vec
,
previous_diar
,
within_vec
):
"""
Here we compute the scores considering previous and current clusters and then modify the score matrix
to enable only clustering of previous and current clusters (no previous/previous and no current/current).
:param previous_
i
v:
:param previous_v
ec
:
:param previous_diar:
:param within_iv:
:param within_diar:
:param th_x:
:param lim:
:param reference_path:
:param do_hal:
:param within_vec:
:return:
"""
within_iv_backup
=
copy
.
deepcopy
(
within_iv
)
previous_iv_backup
=
copy
.
deepcopy
(
previous_iv
)
# get the mean_per_model for previous and within
within_iv_mean
=
within_iv
.
mean_stat_per_model
()
previous_iv_mean
=
previous_iv
.
mean_stat_per_model
()
# merge the mean_per_model for previous and within
ll_
iv_mean
=
concat_statservers
(
previous_
iv_mean
,
within_
iv_mean
)
ll_
vec
=
concat_statservers
(
previous_
vec
,
within_
vec
)
# Compute the score matrix
ndx
=
sidekit
.
Ndx
(
models
=
ll_
iv_mean
.
modelset
,
testsegs
=
ll_
iv_mean
.
modelset
)
scores
=
sidekit
.
iv_scoring
.
cosine_scoring
(
ll_
iv_mean
,
ll_
iv_mean
,
ndx
=
sidekit
.
Ndx
(
models
=
ll_
vec
.
modelset
,
testsegs
=
ll_
vec
.
modelset
)
scores
=
sidekit
.
iv_scoring
.
cosine_scoring
(
ll_
vec
,
ll_
vec
,
ndx
,
wccn
=
None
,
check_missing
=
False
,
...
...
@@ -183,30 +166,74 @@ def cross_show(previous_iv,
scores
.
scoremat
=
-
0.5
*
(
scores
.
scoremat
+
scores
.
scoremat
.
transpose
())
# Constrain the scores to forbid any new clustering between previous shows
lowest_distance
=
numpy
.
min
(
scores
.
scoremat
)
for
iv_idx
in
range
(
previous_iv_mean
.
modelset
.
shape
[
0
]):
for
iv_jdx
in
range
(
previous_iv_mean
.
modelset
.
shape
[
0
]):
if
previous_iv_mean
.
modelset
[
iv_idx
]
==
previous_iv_mean
.
modelset
[
iv_jdx
]:
scores
.
scoremat
[
iv_idx
,
iv_jdx
]
=
scores
.
scoremat
[
iv_idx
,
iv_jdx
]
else
:
scores
.
scoremat
[
iv_idx
,
iv_jdx
]
=
numpy
.
inf
for
vec_idx
,
mod
in
enumerate
(
previous_vec
.
modelset
):
same_indices
=
numpy
.
argwhere
(
previous_vec
.
modelset
!=
mod
)
scores
.
scoremat
[
vec_idx
,
same_indices
]
=
numpy
.
inf
#for iv_idx in range(previous_vec.modelset.shape[0]):
# for iv_jdx in range(previous_vec.modelset.shape[0]):
# if previous_vec.modelset[iv_idx] == previous_vec.modelset[iv_jdx]:
# scores.scoremat[iv_idx, iv_jdx] = scores.scoremat[iv_idx, iv_jdx]
# else:
# scores.scoremat[iv_idx, iv_jdx] = numpy.inf
# Add to keep the within show clustering
for
iv_idx
in
range
(
previous_iv_mean
.
modelset
.
shape
[
0
],
ll_iv_mean
.
modelset
.
shape
[
0
]):
for
iv_jdx
in
range
(
previous_iv_mean
.
modelset
.
shape
[
0
],
ll_iv_mean
.
modelset
.
shape
[
0
]):
if
ll_iv_mean
.
modelset
[
iv_idx
]
==
ll_iv_mean
.
modelset
[
iv_jdx
]:
scores
.
scoremat
[
iv_idx
,
iv_jdx
]
=
scores
.
scoremat
[
iv_idx
,
iv_jdx
]
else
:
scores
.
scoremat
[
iv_idx
,
iv_jdx
]
=
numpy
.
inf
for
ii
in
range
(
previous_vec
.
modelset
.
shape
[
0
],
ll_vec
.
modelset
.
shape
[
0
]):
for
jj
in
range
(
previous_vec
.
modelset
.
shape
[
0
],
ll_vec
.
modelset
.
shape
[
0
]):
if
not
ll_vec
.
modelset
[
ii
]
==
ll_vec
.
modelset
[
jj
]:
scores
.
scoremat
[
ii
,
jj
]
=
numpy
.
inf
modelset_seg_idx
=
dict
()
for
seg
in
previous_diar
.
segments
:
modelset_seg_idx
[
seg
[
'cluster'
]]
=
numpy
.
where
(
ll_
iv_mean
.
modelset
==
seg
[
'cluster'
])[
0
]
modelset_seg_idx
[
seg
[
'cluster'
]]
=
numpy
.
where
(
ll_
vec
.
modelset
==
seg
[
'cluster'
])[
0
]
numpy
.
fill_diagonal
(
scores
.
scoremat
,
0.0
)
return
ll_vec
,
scores
def
cross_show
(
previous_vec
,
previous_diar
,
within_vec
,
within_diar
,
th_x
,
lim
,
user
,
file_info
,
uem
,
ref
,
human_in_the_loop
=
False
):
"""
:param previous_vec:
:param previous_diar:
:param within_vec:
:param within_diar:
:param th_x:
:param lim:
:param user:
:param file_info:
:param uem:
:param ref:
:param human_in_the_loop:
:return:
"""
within_vec_backup
=
copy
.
deepcopy
(
within_vec
)
previous_vec_backup
=
copy
.
deepcopy
(
previous_vec
)
# get the mean_per_model for previous and within
within_vec_mean
=
within_vec
.
mean_stat_per_model
()
previous_vec_mean
=
previous_vec
.
mean_stat_per_model
()
"""
Compute distance matrix to perform HAC between previous and within cluster.
This matrix is normalized to enable/disable clustering between previous/previous
and within/within clusters
"""
ll_vec
,
scores
=
compute_distance_cross_show
(
previous_vec_mean
,
previous_diar
,
within_vec_mean
)
"""
metadata = 0
if
do_hal
:
if
human_in_the_loop
:
tdict = {}
for i in range(previous_iv_mean.modelset.shape[0], ll_iv_mean.modelset.shape[0]):
insp_name = scores.modelset[i]
...
...
@@ -240,8 +267,9 @@ def cross_show(previous_iv,
j += 1
else:
j += 1
"""
if
not
do_hal
:
if
not
human_in_the_loop
:
scores
.
scoremat
+=
1.
th_x
+=
1.
numpy
.
fill_diagonal
(
scores
.
scoremat
,
0.0
)
...
...
@@ -251,20 +279,21 @@ def cross_show(previous_iv,
T
=
scipy
.
cluster
.
hierarchy
.
fcluster
(
Z
,
th_x
,
'distance'
)
# Don't allow to modify the names of previously existing clusters
# Create a dictionary with old_model_name as key and new_luster as value
# Create a dictionary with old_model_name as key and new_
c
luster as value
cluster_dict
=
dict
()
clusters_by_index
=
dict
()
for
ii
in
range
(
T
.
shape
[
0
]):
if
T
[
ii
]
not
in
clusters_by_index
:
clusters_by_index
[
T
[
ii
]]
=
ll_
iv_mean
.
modelset
[
ii
]
cluster_dict
[
ll_
iv_mean
.
modelset
[
ii
]]
=
clusters_by_index
[
T
[
ii
]]
clusters_by_index
[
T
[
ii
]]
=
ll_
vec
.
modelset
[
ii
]
cluster_dict
[
ll_
vec
.
modelset
[
ii
]]
=
clusters_by_index
[
T
[
ii
]]
# concatenate previous_
i
v et within_
i
v
new_previous_
i
v
=
concat_statservers
(
previous_
i
v_backup
,
within_
i
v_backup
)
# concatenate previous_v
ec
et within_v
ec
new_previous_v
ec
=
concat_statservers
(
previous_v
ec
_backup
,
within_v
ec
_backup
)
new_previous_diar
=
copy
.
deepcopy
(
previous_diar
)
new_previous_diar
.
segments
+=
within_diar
.
segments
if
do_hal
:
"""
if human_in_the_loop:
for ii, mod in enumerate(new_previous_iv.modelset):
if mod in list(tdict.keys()):
new_previous_iv.modelset[ii] = tdict[mod]
...
...
@@ -274,11 +303,15 @@ def cross_show(previous_iv,
for ii, seg in enumerate(within_diar.segments):
if seg['cluster'] in list(tdict.keys()):
within_diar.segments[ii]['cluster'] = tdict[seg['cluster']]
return new_previous_vec, new_previous_diar, within_diar, metadata
else:
# Modify the model names for i-vectors
for
ii
,
mod
in
enumerate
(
new_previous_iv
.
modelset
):
new_previous_iv
.
modelset
[
ii
]
=
cluster_dict
[
mod
]
"""
if
not
human_in_the_loop
:
# Modify the model names for vectors
for
ii
,
mod
in
enumerate
(
new_previous_vec
.
modelset
):
new_previous_vec
.
modelset
[
ii
]
=
cluster_dict
[
mod
]
for
ii
,
seg
in
enumerate
(
new_previous_diar
.
segments
):
new_previous_diar
.
segments
[
ii
][
'cluster'
]
=
cluster_dict
[
seg
[
'cluster'
]]
...
...
@@ -286,39 +319,56 @@ def cross_show(previous_iv,
for
ii
,
seg
in
enumerate
(
within_diar
.
segments
):
within_diar
.
segments
[
ii
][
'cluster'
]
=
cluster_dict
[
seg
[
'cluster'
]]
return
new_previous_
i
v
,
new_previous_diar
,
within_diar
,
metadata
return
new_previous_v
ec
,
new_previous_diar
,
within_diar
,
None
def
allies_cross_show_clustering
(
show_idx
,
model
,
current_diar
,
current_vec
,
th_x
,
lim
,
reference_path
=
None
,
hal
=
False
):
def
allies_cross_show_clustering
(
show_idx
,
archive_vectors
,
current_diar
,
current_vec
,
th_x
,
lim
,
user
,
file_info
,
uem
,
ref
,
hal
=
False
):
"""
:param show_idx:
:param
model
:
:param
archive_vectors
:
:param current_diar:
:param current_vec:
:param th_x:
:param lim:
:param user:
:param file_info:
:param uem:
:param ref:
:param reference_path:
:param hal:
:return:
"""
if
show_idx
==
0
:
model
[
"previous_vec"
]
=
copy
.
deepcopy
(
current_vec
)
model
[
"previous_diar"
]
=
current_diar
archive_vectors
[
"previous_vec"
]
=
copy
.
deepcopy
(
current_vec
)
archive_vectors
[
"previous_diar"
]
=
current_diar
metadata
=
0
else
:
previous_vec
,
previous_diar
,
current_diar
,
metadata
=
cross_show
(
previous_iv
=
model
[
"previous_vec"
],
previous_diar
=
model
[
"previous_diar"
],
previous_vec
,
previous_diar
,
current_diar
,
metadata
=
cross_show
(
previous_iv
=
archive_vectors
[
"previous_vec"
],
previous_diar
=
archive_vectors
[
"previous_diar"
],
within_iv
=
current_vec
,
within_diar
=
current_diar
,
th_x
=
th_x
,
lim
=
lim
,
reference_path
=
reference_path
,
user
=
user
,
file_info
=
file_info
,
uem
=
uem
,
ref
=
ref
,
do_hal
=
hal
)
model
[
"previous_vec"
]
=
previous_vec
model
[
"previous_diar"
]
=
previous_diar
archive_vectors
[
"previous_vec"
]
=
previous_vec
archive_vectors
[
"previous_diar"
]
=
previous_diar
return
model
,
current_diar
,
metadata
return
archive_vectors
,
current_diar
,
metadata
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment