Commit 5d25c5fa authored by Sylvain Meignier's avatar Sylvain Meignier
Browse files

Origin

parents
diarization.egg-info/
s4d.egg-info/
*/__pycache__/
*.pyc
dist/
s4d
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false">
<file url="PROJECT" charset="UTF-8" />
</component>
</project>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0" is_locked="false">
<option name="myName" value="Project Default" />
<option name="myLocal" value="false" />
<inspection_tool class="SpellCheckingInspection" enabled="true" level="TYPO" enabled_by_default="true">
<option name="processCode" value="false" />
<option name="processLiterals" value="false" />
<option name="processComments" value="true" />
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="PROJECT_PROFILE" value="Project Default" />
<option name="USE_PROJECT_PROFILE" value="true" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.4.2 virtualenv at ~/pyenv3" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/../../i2rsv/.idea/i2rsv.iml" filepath="$PROJECT_DIR$/../../i2rsv/.idea/i2rsv.iml" />
<module fileurl="file://$PROJECT_DIR$/.idea/s4d.iml" filepath="$PROJECT_DIR$/.idea/s4d.iml" />
<module fileurl="file://$PROJECT_DIR$/../s4d_scripts/.idea/s4d_scripts.iml" filepath="$PROJECT_DIR$/../s4d_scripts/.idea/s4d_scripts.iml" />
<module fileurl="file://$USER_HOME$/diarization/selection_donnees/.idea/selection_donnees.iml" filepath="$USER_HOME$/diarization/selection_donnees/.idea/selection_donnees.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.5.0 virtualenv at ~/pyenv3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module" module-name="s4d_scripts" />
<orderEntry type="module" module-name="i2rsv" />
<orderEntry type="module" module-name="selection_donnees" />
</component>
</module>
\ No newline at end of file
<component name="DependencyValidationManager">
<state>
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
</state>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>
\ No newline at end of file
This diff is collapsed.
include *.md
\ No newline at end of file
Diarization lib
+++++++++++++++
INTRODUCTION
============
This GPL v3 package contains tool to build a speaker diarization system.
PREREQUISITES
=============
Diarization requires the following software installed for your platform:
1) Python__
__ http://www.python.org
2) NumPy__
__ http://www.numpy.org/
3) Scipy__
__ http://http://www.scipy.org/
4) Pandas__
__ http://http://www.pandas.org/
4) If you want to build the documentation: Sphinx__ 1.1.0 or newer
__ http://http://sphinx-doc.org/
INSTALLATION
============
python setup.py install
__author__ = 'meignier'
import s4d.clustering.hac_bic
import s4d.clustering.hac_clr
import s4d.clustering.hac_utils
from s4d.graph import ClusteringGraph
from s4d.segmentation import sanity_check, bic_linear, div_gauss
from s4d.diar import Diar
from s4d.viterbi import Viterbi
__version__ = "0.0.1"
\ No newline at end of file
from s4d.clustering.hac_bic import GaussFull
from s4d.clustering.hac_bic import HAC_BIC
from s4d.clustering.hac_bic import bic_square_root
import logging
import numpy as np
class GaussFull:
"""
Class to model a speaker by a gaussian with full covariance
"""
def __init__(self, name, dim):
self.logger = logging.getLogger(__name__)
self.name = name
self.count = 0
self.dim = dim
self.stat1 = np.zeros(dim)
self.stat2 = np.zeros((dim, dim))
self.cov_log_det = 0;
self.mu = None
self.mu_dot = np.NAN
self.cov = None
self.partial_bic = np.NaN
def add(self, features):
"""
Accumulate statistics for *features*
:param features: numpy.ndarray
"""
self.count += features.shape[0] # add the number of features
self.stat1 += features.sum(axis=0)
self.stat2 += np.dot(features.T, features)
def _cov_log_det(self):
"""
Compute the log det of the covariance matrix
:return: float
"""
s, d = np.linalg.slogdet(self.cov)
return d
# cov_chol, lower = scipy.linalg.cho_factor(self.cov)
# return 2.0 * np.sum(np.log(np.diagonal(cov_chol)))
def compute(self):
"""
Compute the mean and covariance according the statistique, the log det
of the covariance and the partial BIC :math:`PBIC`.
:math:`PBIC_{x} = \\frac{n_x}{2} \\log|\\Sigma_x|`
"""
self.mu = self.stat1 / self.count
tmp = self.mu[:, np.newaxis]
self.mu_dot = np.dot(tmp, tmp.T)
self.cov = self.stat2 / self.count - self.mu_dot
self.cov_log_det = self._cov_log_det()
self.partial_bic = self.cov_log_det * 0.5 * self.count
@classmethod
def merge(cls, m1, m2):
"""
Merge two models *m1* and *m2*. Compute the new mean (*mu*),
covariance (*cov*) and PBIC *partial_bic*
:param m1: a GaussFull object
:param m2: a GaussFull object
:return: a GaussFull object
"""
m = GaussFull(m1.name, m1.dim)
m.count = m1.count + m2.count
m.stat1 = m1.stat1 + m2.stat1
m.stat2 = m1.stat2 + m2.stat2
m.mu = m.stat1 / m.count
tmp = m.mu[:, np.newaxis]
m.cov = m.stat2 / m.count - np.dot(tmp, tmp.T)
m.cov_log_det = m._cov_log_det()
m.partial_bic = m.cov_log_det * 0.5 * m.count
return m
@classmethod
def merge_partial_bic(cls, m1, m2):
"""
Merge statistic accumulators of two a GaussFull objects, compute
the new log det.
:param m1: a GaussFull object
:param m2: a GaussFull object
:return: the log det
"""
try:
count = m1.count + m2.count
mu = ((m1.stat1 + m2.stat1) / count)[:, np.newaxis]
cov = (m1.stat2 + m2.stat2) / count - np.dot(mu, mu.T)
s, d = np.linalg.slogdet(cov)
# cov_chol, lower = scipy.linalg.cho_factor(cov, overwrite_a=True, check_finite=False)
# d = 2.0 * np.sum(np.log(np.diagonal(cov_chol)))
d *= 0.5 * count
return d
except:
logging.warning('Det problem set to NaN ', m1.name, m2.nname)
return np.nan
@classmethod
def cst_bic(cls, dim, alpha):
"""
Compute the BIC constant:
:math:`cst = \\frac{1}{2} \\alpha \\left(d + \\frac{d(d+1)}{2}\\right)`
where :math:`d`is the feature dimension (*dim*)
and :math:`alpha` a threshold (*alpha*)
:param dim: the feature dimension
:param alpha: the threshold
:return: the constant
"""
return 0.5 * alpha * (dim + (0.5 * ((dim + 1) * dim)))
__author__ = 'meignier'
import numpy as np
import logging
import copy
from s4d.clustering.hac_utils import argmin, roll, set_new_label
from s4d.clustering.hac_utils import bic_square_root
from s4d.clustering.gauss import GaussFull
class HAC_BIC:
"""
BIC Hierarchical Agglomerative Clustering (HAC) with gaussian models
The algorithm is based upon a hierarchical agglomerative clustering. The
initial set of clusters is composed of one segment per cluster. Each cluster
is modeled by a Gaussian with a full covariance matrix (see
:class:`gauss.GaussFull`). :math:`\Delta BIC`
measure is employed to select the candidate clusters to group as well as
to stop the merging process. The two closest clusters :math:`i` and
:math:`j` are merged at each iteration until :math:`\\Delta BIC_{i,j} > 0`.
:math:`\\Delta BIC_{i,j} = PBIC_{i+j} - PBIC_{i} - PBIC_{j} - P`
:math:`PBIC_{x} = \\frac{n_x}{2} \\log|\\Sigma_x|`
:math:`cst = \\frac{1}{2} \\alpha \\left(d + \\frac{d(d+1)}{2}\\right)`
:math:`P = cst + log(n_i+n_j)`
where :math:`|\\Sigma_i|`, :math:`|\\Sigma_j|` and :math:`|\\Sigma|` are the
determinants of gaussians associated to the clusters :math:`i`, :math:`j`
and :math:`i+j`. :math:`\\alpha` is a parameter to set up. The penalty
factor :math:`P` depends on :math:`d`, the dimension of the features, as well as
on :math:`n_i` and :math:`n_j`, refering to the total length of cluster
:math:`i` and cluster :math:`j` respectively.
"""
def __init__(self, features_server, table, alpha=1.0, sr=False):
self.fs = features_server
self.alpha = alpha
self.diar = copy.deepcopy(table)
self.models = []
self.merge = []
self.nb_merge = 0
self.sr = sr
self._init_train()
self._init_distance()
def _init_train(self):
"""
Train initial models
"""
map = self.diar.make_index(['label', 'show'])
for label in map:
model = GaussFull(label, self.fs.dim())
self.models.append(model)
for show in map[label]:
tmp, vad = self.fs.load(show)
cep = tmp[0]
self.dim = cep.shape[1];
self.cst_bic = GaussFull.cst_bic(self.dim, self.alpha)
for row in map[label][show]:
start = row['start']
stop = row['stop']
model.add(cep[start:stop])
for model in self.models:
model.compute()
def _init_distance(self):
""" Compute distance matrix
"""
nb = len(self.models)
self.dist = np.full((nb, nb), np.nan)
# for i in range(0, nb):
# mi = self.models[i]
for i, mi in enumerate(self.models):
#self.merge.append([])
# for j, mj in enumerate(self.models, start=i+1):
# logging.debug('i %d j %d n %d', i, j ,nb)
for j in range(i + 1, nb):
mj = self.models[j]
self.dist[i, j] = self.dist[j, i] = self._dist(mi, mj)
def _dist(self, mi, mj):
"""
Compute the BIC distance d(i,j)
:param mi: a GaussFull object
:param mj: a GaussFull object
:return: float
"""
if self.sr:
return GaussFull.merge_partial_bic(mi, mj) - \
mi.partial_bic - mj.partial_bic - bic_square_root(mi.count, mj.count, self.alpha, self.dim)
return GaussFull.merge_partial_bic(mi, mj) - \
mi.partial_bic - mj.partial_bic - self.cst_bic * np.log(mi.count + mj.count)
def _merge_model(self, mi, mj):
"""
Merge two a GaussFull objects
:param mi: a GaussFull object
:param mj: a GaussFull object
:return: a GaussFull object
"""
return GaussFull.merge(mi, mj)
def _update_dist(self, i):
"""
Update row and column i of the distance matrix
:param i: int
"""
nb = len(self.models)
mi = self.models[i]
for j in (x for x in range(nb) if x != i):
mj = self.models[j]
self.dist[i, j] = self.dist[j, i] = self._dist(mi, mj)
def information(self, i, j, value):
self.merge.append([self.nb_merge, self.models[i].name, self.models[j].name, value])
# def link(self, thr):
# labels = list()
# idx = dict()
# qt = dict()
# k=0;
# while self.merge[k][0] < 0:
# name = self.merge[k][1]
# labels.append(name)
# idx[name] = k
# qt[name] = 1
# k += 1
#
# l = k
# link = np.zeros((l-1, 4))
#
# while k < len(self.merge):
# m = self.merge[k][0]
# name_i = self.merge[k][1]
# name_j = self.merge[k][2]
# v_ij = self.merge[k][3]
#
# qt[name_i] += 1
# link[m, 0] = idx[name_i]
# link[m, 1] = idx[name_j]
# link[m, 2] = v_ij
# link[m, 3] = qt[name_i]
# idx[name_i] = m+l
# idx[name_j] = -1
#
# k+= 1
#
# min = np.min(link[:,2])
# link[:,2] += -1*min
#
# return labels, link, thr-min
def perform(self, to_the_end=False):
"""
perform the HAC algorithm
:return: a Diar object and a dictonary mapping the old labels to the
new lables
"""
nb = len(self.models)
self.nb_merge = -1
for i in range(nb):
self.information(i, i, 0)
i, j, v = argmin(self.dist, nb)
self.nb_merge = 0
while v < 0.0 and nb > 1:
self.information(i, j, v)
self.nb_merge += 1
logging.debug('merge: %d c1: %s (%d) c2: %s (%d) dist: %f %d',
self.nb_merge, self.models[i].name, i,
self.models[j].name, j, v, nb)
# update merge
#self.merge[i].append(
# [self.nb_merge, self.models[i].name, self.models[j].name, v])
#self.merge[i] += self.merge[j]
#self.merge.pop(j)
self.diar.rename('label', [self.models[j].name], self.models[i].name)
# update model
self.models[i] = self._merge_model(self.models[i], self.models[j])
self.models.pop(j)
#nb = len(self.models)
# update distances
self.dist = roll(self.dist, j)
self._update_dist(i)
nb -= 1
i, j, v = argmin(self.dist, nb)
out_diar = copy.deepcopy(self.diar)
n = self.nb_merge
if to_the_end:
while nb > 1:
self.information(i, j, v)
self.nb_merge += 1
logging.debug('merge: %d c1: %s (%d) c2: %s (%d) dist: %f %d',
self.nb_merge, self.models[i].name, i,
self.models[j].name, j, v, nb)
self.diar.rename('label', [self.models[j].name], self.models[i].name)
# update model
self.models[i] = self._merge_model(self.models[i], self.models[j])
self.models.pop(j)
#nb = len(self.models)
# update distances
self.dist = roll(self.dist, j)
self._update_dist(i)
nb -= 1
i, j, v = argmin(self.dist, nb)
# make a map between old and new label
#return set_new_label(self.merge, self.diar, [x.name for x in self.models])
# labels_dict = dict()
# for i in range(0, len(self.merge)):
# if self.models[i].name not in labels_dict:
# labels_dict[self.models[i].name] = self.models[i].name
# for info in self.merge[i]:
# labels_dict[info[2]] = self.models[i].name
# for row in self.diar:
# name = row['label']
# name2 = labels_dict[name]
# row['label'] = name2
return out_diar
# def perform(self):
# """
# perform the HAC algorithm
# :return: a Diar object and a dictonary mapping the old labels to the
# new lables
# """
# nb = len(self.models)
# i, j, v = argmin(self.dist, nb)
# self.nb_merge = 0
# while v < 0.0 and nb > 1:
# self.nb_merge += 1
# logging.debug('merge: %d c1: %s (%d) c2: %s (%d) dist: %f',
# self.nb_merge, self.models[i].name, i,
# self.models[j].name, j, v)
# # update merge
# self.merge[i].append(
# [self.nb_merge, self.models[i].name, self.models[j].name, v])
# self.merge[i] += self.merge[j]
# self.merge.pop(j)
# # update model
# self.models[i] = self._merge_model(self.models[i], self.models[j])
# self.models.pop(j)
# nb = len(self.models)
# # update distances
# self.dist = roll(self.dist, j)
# self._update_dist(i)
# i, j, v = argmin(self.dist, nb)
# logging.debug('exit merge %d c1: %s (%d) c2: %s (%d) dist: %f',
# self.nb_merge, self.models[i].name, i,
# self.models[j].name, j, v)
#
# # make a map between old and new label
# #return set_new_label(self.merge, self.diar, [x.name for x in self.models])
#
# labels_dict = dict()
# for i in range(0, len(self.merge)):
# if self.models[i].name not in labels_dict:
# labels_dict[self.models[i].name] = self.models[i].name
# for info in self.merge[i]:
# labels_dict[info[2]] = self.models[i].name
# for row in self.diar:
# name = row['label']
# name2 = labels_dict[name]
# row['label'] = name2
#
# return self.diar, labels_dict
import numpy as np
import logging
import copy
from sidekit import Mixture, FeaturesServer
from s4d.clustering.hac_utils import set_new_label, argmin, roll
from s4d.diar import Diar
from sidekit.statserver import StatServer
class HAC_CLR:
"""
CLR Hierarchical Agglomerative Clustering (HAC) with GMM trained by MAP
"""
def __init__(self, features_server, diar, ubm, ce=False):
assert isinstance(features_server, FeaturesServer), 'First parameter has to be a FeatureServer'
assert isinstance(diar, Diar), '2sd parameter has to be a Diar (segmentationContener)'
assert isinstance(ubm, Mixture), '3rd parameter has to be a Mixture'
self.features_server = features_server
self.diar = copy.deepcopy(diar)
self.merge = []
self.nb_merge = 0
self.ubm = ubm
self.ce = ce
self.stat_speaker = None
self.stat_seg = None
#self.init_train()
#self._init_distance()
def _get_cep(self, map, label):
cep_list = list()
for show in map[label]:
idx = self.diar.features_by_label(show)[label]
if len(idx) > 0:
tmp, vad = self.features_server.load(show)
cep_list.append(tmp[0][idx])
cep = np.concatenate(cep_list, axis=0)
return cep
def _ll(self, ubm, cep, mu=None, label='ubm'):
# ajouter le top gaussien
lp = ubm.compute_log_posterior_probabilities(cep, mu=mu)
ppMax = np.max(lp, axis=1)
ll = np.log(np.sum(np.exp(lp), axis=1))
#ll = ppMax + np.log(np.sum(np.exp((lp.transpose() - ppMax).transpose()),
# axis=1))
not_finite = np.logical_not(np.isfinite(ll))
cpt = np.count_nonzero(not_finite)
#ll[finite] = np.finfo('d').min
ll[not_finite] = 1.0e-200
m = np.mean(ll)
if cpt > 0:
logging.info('model '+label+'), nb trame with llk problem: %d/%d \t %f', cpt, cep.shape[0], m)
return m
def initial_models(self, nb_threads=1):
# sort by show to minimize the reading of mfcc by the statServer
self.diar.sort(['show'])
# Compute statistics by segments
self.stat_seg = StatServer(self.diar.id_map())
self.stat_seg.accumulate_stat(self.ubm, self.features_server)
self.stat_speaker = self.stat_seg.adapt_mean_MAP_multisession(self.ubm)
def initial_distances(self, nb_threads=1):