cc_iv.py 5.89 KB
Newer Older
Anthony Larcher's avatar
Anthony Larcher committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# -*- coding: utf-8 -*-
#
# This file is part of s4d.
#
# s4d is a python package for speaker diarization.
# Home page: http://www-lium.univ-lemans.fr/s4d/
#
# s4d is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
# s4d is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with s4d.  If not, see <http://www.gnu.org/licenses/>.


"""
Anthony Larcher's avatar
Anthony Larcher committed
23
Copyright 2014-2020 Sylvain Meignier
Anthony Larcher's avatar
Anthony Larcher committed
24
"""
Sylvain Meignier's avatar
Sylvain Meignier committed
25
26
27
28

import copy
import logging
import numpy as np
Anthony Larcher's avatar
Anthony Larcher committed
29

Sylvain Meignier's avatar
Sylvain Meignier committed
30
from collections import namedtuple
Anthony Larcher's avatar
Anthony Larcher committed
31
32
from .hac_utils import scores2distance
from scipy.sparse import csgraph
Sylvain Meignier's avatar
Sylvain Meignier committed
33
34
from sidekit.bosaris.scores import Scores

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
35
ConnectedComponentTuple = namedtuple('ConnectedComponent', ['type', 'diarization', 'scores' ])
Sylvain Meignier's avatar
Sylvain Meignier committed
36
37


Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
38
StarGraphTuple = namedtuple('ConnectedComponent', ['type', 'diarization', 'scores',
Anthony Larcher's avatar
Anthony Larcher committed
39
                                                   'center', 'within_inertia'])
Sylvain Meignier's avatar
Sylvain Meignier committed
40
41


Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
42
class ConnectedComponent:
Sylvain Meignier's avatar
Sylvain Meignier committed
43
44
    def __init__(self, diar, scores, threshold):
        """
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
45
        :param diarization: s4d annotation
Sylvain Meignier's avatar
Sylvain Meignier committed
46
        :param cluster_list: list of cluster_list, the ith cluster corresponds to the speaker
Sylvain Meignier's avatar
Sylvain Meignier committed
47
            of column and row i in distances_
48
        :param distances: matrice of distances between cluster_list (speakers)
Sylvain Meignier's avatar
Sylvain Meignier committed
49
50
        :param thr: the threshold will be apply to the distances_ to generate a graph
        """
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
51
        # output: the list of CC, each row contains the sub-diarization, the sub-matrix
52
        #  and the cluster list of the cc
Sylvain Meignier's avatar
Sylvain Meignier committed
53
54
55
56
57
58
59
        self.cc = list()
        # output: contain the star graphs (and isolated vertices)
        self.diar = copy.deepcopy(diar)
        self.scores = copy.deepcopy(scores)
        self.thr = threshold
        self.cc_nb = -1
        self.cc_list = None
Sylvain Meignier's avatar
Sylvain Meignier committed
60
61
62
        self.nb_sg = 0
        self.nb_sg0 = 0
        self.n = 0
Sylvain Meignier's avatar
Sylvain Meignier committed
63
64
65
66
67
68
69
70
71
72
73
74
75


    def _star_graph(self, lst, graph):
        """
        Verify if a star graph exists in the sub graph
        (http://en.wikipedia.org/wiki/Star_(graph_theory))
        :param lst: list of vertices
        :param graph: the graph
        :return the center and the within_inertia
        """
        within_inertia = np.inf
        center = -1
        for i in lst:
Sylvain Meignier's avatar
??    
Sylvain Meignier committed
76
            s = np.sum(graph[i, lst])
Sylvain Meignier's avatar
Sylvain Meignier committed
77
78
79
80
81
            if within_inertia > s:
                within_inertia = s
                center = i
        return center, within_inertia

Sylvain Meignier's avatar
Sylvain Meignier committed
82
    def sub_graph(self, rename_cc=False):
Sylvain Meignier's avatar
Sylvain Meignier committed
83
84
85
86
87
88
89
        """
        find the 2 kind of sub-graphs:
         - isolated vertices and star graphs already clustered
         - other connected components put in self.cc list
        :return: generate self.cc and self.table_out
        """
        logging.debug('threshold the distance matrix')
Sylvain Meignier's avatar
Sylvain Meignier committed
90
        distances, t = scores2distance(self.scores, self.thr)
Sylvain Meignier's avatar
Sylvain Meignier committed
91

92
93
94
95
        mask = (distances>t)
        graph = distances.copy()
        graph[mask] = np.inf
        #graph = threshold(distances, threshmax=t, newval=np.inf)
Sylvain Meignier's avatar
Sylvain Meignier committed
96
        logging.debug('get connected components')
Sylvain Meignier's avatar
Sylvain Meignier committed
97
        cc_nb, cc_list = csgraph.connected_components(graph, directed=False)
Sylvain Meignier's avatar
Sylvain Meignier committed
98
99
100
101
        diar_out = copy.deepcopy(self.diar)
        # list of lists, each sub list contains the index of a connected component
        # print(cc_nb, cc_list)
        lst = []
Sylvain Meignier's avatar
Sylvain Meignier committed
102
        for i in range(cc_nb):
Sylvain Meignier's avatar
Sylvain Meignier committed
103
104
            lst.append(list())

Sylvain Meignier's avatar
Sylvain Meignier committed
105
        for j in range(len(cc_list)):
Sylvain Meignier's avatar
Sylvain Meignier committed
106
107
108
109
110
111
112
113
114
115
116
117
            lst[cc_list[j]].append(j)
        self.cc_list = list()
        # for each connected component
        for slst in lst:
            # start graph if the list contains only one index
            #print(slst)
            scores = Scores()
            scores.modelset = np.copy(self.scores.modelset[slst])
            scores.segset = np.copy(self.scores.segset[slst])
            scores.scoremask = np.copy(self.scores.scoremask[slst, :][:, slst])
            scores.scoremat = np.copy(self.scores.scoremat[slst, :][:, slst])

118
            diar_cc = self.diar.filter('cluster', 'in', scores.modelset.tolist())
Sylvain Meignier's avatar
Sylvain Meignier committed
119
            if len(slst) == 1: # isolated vertex
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
120
                cc = StarGraphTuple('star_graph_0', diar_cc, scores, self.scores.modelset[slst[0]], 0)
Sylvain Meignier's avatar
Sylvain Meignier committed
121
                self.nb_sg0 += 1
Sylvain Meignier's avatar
Sylvain Meignier committed
122
123
124
            else:
                center, within_inertia = self._star_graph(slst, graph)
                if center >= 0: # Star Graph if the list contains a center
125
                    l = diar_cc.unique('cluster')
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
126
                    #logging.info('nb_sg rename '+str(l)+' into '+self.scores.modelset[center])
127
128
                    diar_out.rename('cluster', l, self.scores.modelset[center])
                    diar_cc.rename('cluster', l, self.scores.modelset[center])
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
129
                    cc = StarGraphTuple('star_graph_k', diar_cc, scores, self.scores.modelset[center], within_inertia)
Sylvain Meignier's avatar
Sylvain Meignier committed
130
                    self.nb_sg += 1
Sylvain Meignier's avatar
Sylvain Meignier committed
131
132
                else: # connected component without star graph
                    if rename_cc:
133
134
                        diar_out.rename('cluster', diar_cc.unique('cluster'), self.scores.modelset[center])
                        #diar_cc.rename('cluster', diar_cc.unique('cluster'), self.scores.modelset[center])
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
135
                    cc = ConnectedComponentTuple('cc', diar_cc, scores)
Sylvain Meignier's avatar
Sylvain Meignier committed
136
                    self.n += 1
Sylvain Meignier's avatar
Sylvain Meignier committed
137
138

            self.cc_list.append(cc)
Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
139
140
        logging.debug('-- stat CC '+str(self.thr)+' -- nb_sg0: '+str(self.nb_sg0)
                     +' nb_sg: '+str(self.nb_sg)+' cc: '+str(self.n)+ ' nb: '
Sylvain Meignier's avatar
Sylvain Meignier committed
141
142
                     +str(self.n + self.nb_sg0 + self.nb_sg)+'/'+str(cc_nb))
        return diar_out, self.cc_list, self.nb_sg0, self.nb_sg, self.n
Sylvain Meignier's avatar
Sylvain Meignier committed
143
144
145
146


def connexted_component(diar, scores, threshold):

Sylvain Meignier's avatar
stable    
Sylvain Meignier committed
147
    graphs = ConnectedComponent(diar, scores, threshold)
Sylvain Meignier's avatar
Sylvain Meignier committed
148
149
    return graphs.sub_graph()