hac_iv.py 3.99 KB
Newer Older
Anthony Larcher's avatar
Anthony Larcher committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# -*- coding: utf-8 -*-
#
# This file is part of s4d.
#
# s4d is a python package for speaker diarization.
# Home page: http://www-lium.univ-lemans.fr/s4d/
#
# s4d is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
# s4d is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with s4d.  If not, see <http://www.gnu.org/licenses/>.


"""
Anthony Larcher's avatar
Anthony Larcher committed
23
Copyright 2014-2020 Sylvain Meignier
Anthony Larcher's avatar
Anthony Larcher committed
24
25
"""

Sylvain Meignier's avatar
Sylvain Meignier committed
26
import copy
Anthony Larcher's avatar
Anthony Larcher committed
27
import logging
Sylvain Meignier's avatar
Sylvain Meignier committed
28
import numpy as np
Anthony Larcher's avatar
Anthony Larcher committed
29

Sylvain Meignier's avatar
Sylvain Meignier committed
30
31
from scipy.spatial.distance import squareform
from scipy.cluster import hierarchy as hac
Anthony Larcher's avatar
Anthony Larcher committed
32
from .hac_utils import *
Sylvain Meignier's avatar
Sylvain Meignier committed
33
34


Sylvain Meignier's avatar
Sylvain Meignier committed
35
36
37
38
def information(merge, nb_merge, i, j, value):
    merge.append([nb_merge, i, j, value])


Sylvain Meignier's avatar
Sylvain Meignier committed
39
def hac_iv(diar, scores, method="complete", threshold=0.0):
Sylvain Meignier's avatar
Sylvain Meignier committed
40
41
    ldiar = copy.deepcopy(diar)
    lscores = copy.deepcopy(scores)
Sylvain Meignier's avatar
Sylvain Meignier committed
42
    # get the triangular part of the distances
Sylvain Meignier's avatar
Sylvain Meignier committed
43
    distances, t = scores2distance(lscores, threshold)
Anthony Larcher's avatar
Anthony Larcher committed
44
45
46
47
48
    # distance = numpy.copy((scores.scoremat + scores.scoremat.T) / 2.0) * -1.0
    # numpy.fill_diagonal(distance, numpy.inf)
    # min = numpy.min(distance)
    # distance -= min
    # numpy.fill_diagonal(distance, 0.0)
Sylvain Meignier's avatar
Sylvain Meignier committed
49
    distance_sym = squareform(distances)
Anthony Larcher's avatar
Anthony Larcher committed
50
    # t = -1.0 * threshold - min
Sylvain Meignier's avatar
Sylvain Meignier committed
51
52
    # cluster the data
    link = hac.linkage(distance_sym, method=method)
Sylvain Meignier's avatar
Sylvain Meignier committed
53
    # print(link)
54
55
56
    # assign new cluster
    # d : 'key' give the new names of cluster_list in values (a list)
    cluster_dict = dict()
Sylvain Meignier's avatar
Sylvain Meignier committed
57
58
    merge = list()
    i = 0
59
    cluster_list = scores.modelset.tolist()
Anthony Larcher's avatar
Anthony Larcher committed
60
    # print(cluster_list)
Sylvain Meignier's avatar
Sylvain Meignier committed
61
    while i < len(link) and link[i, 2] < t:
62
        # the cluster_list of the 2 clusters
Sylvain Meignier's avatar
??    
Sylvain Meignier committed
63
        logging.debug('c0: {:d} c1: {:d} value: {:.4f}'.format(int(link[i, 0]),
Sylvain Meignier's avatar
Sylvain Meignier committed
64
65
                                                              int(link[i, 1]),
                                                              link[i, 2]))
66
67
        c0 = cluster_list[int(link[i, 0])]
        c1 = cluster_list[int(link[i, 1])]
Sylvain Meignier's avatar
??    
Sylvain Meignier committed
68
        logging.debug(
Sylvain Meignier's avatar
Sylvain Meignier committed
69
            '\t c0: {} c1: {} value: {:.4f}'.format(c0, c1, link[i, 2]))
Sylvain Meignier's avatar
Sylvain Meignier committed
70
        information(merge, i, c0, c1, link[i, 2])
71
        if c1 in cluster_dict:
Sylvain Meignier's avatar
Sylvain Meignier committed
72
            # c0 is put in c1, and c1 is not empty
73
            cluster_dict[c1].append(c0)
Sylvain Meignier's avatar
Sylvain Meignier committed
74
        else:
75
76
            cluster_dict[c1] = [c0]
        if c0 in cluster_dict:
Sylvain Meignier's avatar
Sylvain Meignier committed
77
            # remove c0 key
78
79
            cluster_dict[c1] += cluster_dict[c0]
            cluster_dict.pop(c0)
Sylvain Meignier's avatar
Sylvain Meignier committed
80
        # add the speaker of the new cluster
81
82
        cluster_list.append(c1)
        ldiar.rename('cluster', [c0], c1)
Sylvain Meignier's avatar
Sylvain Meignier committed
83
84
        i += 1

85
    return ldiar, cluster_dict, merge
Sylvain Meignier's avatar
Sylvain Meignier committed
86

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
87
# def hac_update_model(diarization, ivectors):
Sylvain Meignier's avatar
Sylvain Meignier committed
88
89


Sylvain Meignier's avatar
??    
Sylvain Meignier committed
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
def hac_iv_it(diar, model_iv, threshold=0.0):
    model_iv_local = copy.deepcopy(model_iv)
    model_iv_local.diar = copy.deepcopy(diar)
    np.fill_diagonal(model_iv_local.scores.scoremat, -np.inf)

    nb = model_iv_local.scores.modelset.shape[0]

    i, j, v = argmax(model_iv_local.scores.scoremat, nb)
    nb_merge = 0
    while v > threshold and nb > 1:
        nb_merge += 1
        logging.info('merge: %d c1: %s (%d) c2: %s (%d) dist: %f, size: %d',
                     nb_merge, model_iv_local.scores.modelset[i], i,
                     model_iv_local.scores.modelset[j], j,
                     v, model_iv_local.scores.modelset.shape[0])
        name_i = model_iv_local.scores.modelset[i]
        name_j = model_iv_local.scores.modelset[j]

        model_iv_local.update(i, j)
109
        model_iv_local.diar.rename('cluster', [name_j], name_i)
Sylvain Meignier's avatar
??    
Sylvain Meignier committed
110
111
112
113
114
115
116
117
        np.fill_diagonal(model_iv_local.scores.scoremat, -np.inf)

        nb = model_iv_local.scores.modelset.shape[0]
        i, j, v = argmax(model_iv_local.scores.scoremat, nb)


    return model_iv_local.diar