Commit 87e0e020 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

update anthony

parent 6c5c4a9e
# -*- coding: utf-8 -*-
"""
Copyright 2014-2016 Anthony Larcher
.. topic::sidekit
| This file is part of SIDEKIT.
|
| SIDEKIT is a python package for speaker verification.
| Home page: http://www-lium.univ-lemans.fr/sidekit/
|
| SIDEKIT is free software: you can redistribute it and/or modify
| it under the terms of the GNU Lesser General Public License as
| published by the Free Software Foundation, either version 3 of the License,
| or (at your option) any later version.
|
| SIDEKIT is distributed in the hope that it will be useful,
| but WITHOUT ANY WARRANTY; without even the implied warranty of
| MERCHANTABILITY or fFITNESS FOR A PARTICULAR PURPOSE. See the
| GNU Lesser General Public License for more details.
|
| You should have received a copy of the GNU Lesser General Public License
| along with SIDEKIT. If not, see <http://www.gnu.org/licenses/>.
#
# This file is part of SIDEKIT.
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is free software: you can redistribute it and/or modify
# it under the terms of the GNU LLesser General Public License as
# published by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
# SIDEKIT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with SIDEKIT. If not, see <http://www.gnu.org/licenses/>.
"""
Copyright 2014-2016 Anthony Larcher and Sylvain Meignier
PARALLEL_MODULE = 'multiprocessing' # can be , threading, multiprocessing MPI is planned in the future
import sys
# Import libsvm
import logging
from ctypes import *
from ctypes.util import find_library
from os import path
from sidekit.sidekit_wrappers import *
# Import bosaris-like classes
from sidekit.bosaris import IdMap
from sidekit.bosaris import Ndx
from sidekit.bosaris import Key
from sidekit.bosaris import Scores
from sidekit.bosaris import DetPlot
from sidekit.bosaris import effective_prior
from sidekit.bosaris import fast_minDCF
# Import classes
from sidekit.features_server import FeaturesServer
from sidekit.mixture import Mixture
from sidekit.statserver import StatServer
import sidekit.frontend.io
import sidekit.frontend.vad
import sidekit.frontend.normfeat
import sidekit.frontend.features
# Import function libraries
from sidekit.sidekit_io import *
from sidekit.sv_utils import *
from sidekit.lid_utils import *
from sidekit.gmm_scoring import *
from sidekit.jfa_scoring import *
from sidekit.iv_scoring import *
from sidekit.theano_utils import *
:mod:`frontend` provides methods to process an audio signal in order to extract
useful parameters for speaker verification.
"""
from sidekit.frontend.io import write_pcm
from sidekit.frontend.io import read_pcm
from sidekit.frontend.io import pcmu2lin
from sidekit.frontend.io import read_sph
from sidekit.frontend.io import write_label
from sidekit.frontend.io import read_label
from sidekit.frontend.io import read_spro4
from sidekit.frontend.io import read_audio
from sidekit.frontend.io import write_spro4
from sidekit.frontend.io import read_htk
from sidekit.frontend.io import write_htk
from sidekit.frontend.vad import vad_energy
from sidekit.frontend.vad import vad_snr
from sidekit.frontend.vad import label_fusion
from sidekit.frontend.vad import speech_enhancement
from sidekit.frontend.normfeat import cms
from sidekit.frontend.normfeat import cmvn
from sidekit.frontend.normfeat import stg
from sidekit.frontend.normfeat import rasta_filt
from sidekit.frontend.features import compute_delta
from sidekit.frontend.features import framing
from sidekit.frontend.features import pre_emphasis
from sidekit.frontend.features import trfbank
from sidekit.frontend.features import mel_filter_bank
from sidekit.frontend.features import mfcc
from sidekit.frontend.features import pca_dct
from sidekit.frontend.features import shifted_delta_cepstral
__author__ = "Anthony Larcher and Sylvain Meignier"
__copyright__ = "Copyright 2014-2016 Anthony Larcher and Sylvain Meignier"
__license__ = "LGPL"
__author__ = "Anthony Larcher"
__copyright__ = "Copyright 2014-2016 Anthony Larcher"
__version__ = "1.0.4"
__maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reStructuredText'
libsvm_loaded = False
try:
dirname = os.path.join(path.dirname(path.abspath(__file__)), 'libsvm')
if sys.platform == 'win32':
libsvm = CDLL(path.join(dirname, r'libsvm.dll'))
libsvm_loaded = True
else:
libsvm = CDLL(path.join(dirname, 'libsvm.so.2'))
libsvm_loaded = True
except:
# For unix the prefix 'lib' is not considered.
if find_library('svm'):
libsvm = CDLL(find_library('svm'))
libsvm_loaded = True
elif find_library('libsvm'):
libsvm = CDLL(find_library('libsvm'))
libsvm_loaded = True
else:
libsvm_loaded = False
logging.warning('WARNNG: libsvm is not installed, please refer to the' +
' documentation if you intend to use SVM classifiers')
if libsvm_loaded:
from sidekit.libsvm import *
from sidekit.svm_scoring import *
from sidekit.svm_training import *
__all__ = ["bosaris",
"frontend",
"libsvm",
"frontend",
"sv_utils",
"gmm_scoring",
"svm_scoring",
"svm_training",
"iv_scoring",
"sidekit_io",
"mixture",
"statserver",
"features_server",
"theano_utils"]
# __all__ = ["io",
# "vad",
# "normfeat",
# "features"
# ]
......@@ -34,8 +34,10 @@ This is the 'detplot' module
import numpy as np
import matplotlib
import os
if not "DISPLAY" in os.environ:
matplotlib.use('Agg')
matplotlib.use('PDF')
import matplotlib.pyplot as mpl
import scipy
from collections import namedtuple
......
......@@ -412,7 +412,12 @@ def framing(sig, win_size, win_shift=1, context=(0,0), pad='zeros'):
_win_size = win_size + sum(context)
shape = ((sig.shape[0] - win_size) / win_shift + 1, 1, _win_size, sig.shape[1])
strides = tuple(map(lambda x: x * dsize, [win_shift * sig.shape[1], 1, sig.shape[1], 1]))
return np.lib.stride_tricks.as_strided(np.lib.pad(sig, c, 'constant', constant_values=(0,)),
if pad == 'zeros':
return np.lib.stride_tricks.as_strided(np.lib.pad(sig, c, 'constant', constant_values=(0,)),
shape=shape,
strides=strides).squeeze()
elif pad == 'edge':
return np.lib.stride_tricks.as_strided(np.lib.pad(sig, c, 'edge'),
shape=shape,
strides=strides).squeeze()
......
......@@ -39,6 +39,11 @@ from scipy.io import wavfile
from scipy.signal import decimate
from sidekit.sidekit_io import *
try:
import h5py
h5py_loaded = True
except ImportError:
h5py_loaded = False
__author__ = "Anthony Larcher"
__copyright__ = "Copyright 2014-2016 Anthony Larcher"
......@@ -223,7 +228,7 @@ def read_sph(inputFileName, mode='p'):
inputFileName = "".join((inputFileName, '.sph'))
fid = open(inputFileName, 'rb')
else:
pass # TODO: RAISE an exception
raise Exception('Cannot find file {}'.format(inputFileName))
ffx[0] = inputFileName
elif not isinstance(inputFileName, str):
ffx = inputFileName
......@@ -645,6 +650,29 @@ def write_htk(features,
features = features.astype('>f')
features.tofile(fh)
def write_hdf5(feat, feat_type, label, outputFileName):
with h5py.File(outputFileName, "a") as f:
f.create_dataset(feat_type, data=feat.astype('float32'),
maxshape=(None, None),
compression="gzip",
fletcher32=True)
if label is not None and not "vad" in f:
f.create_dataset("vad", data=label.astype('int8'),
maxshape=(None),
compression="gzip",
fletcher32=True)
def read_hdf5(inputFileName, feature_id, vad=True):
with h5py.File(inputFileName, "r") as f:
feat = f.get(feature_id).value
if vad:
label = f.get("vad").value.astype('bool').squeeze()
else:
label = None
return feat, label
def read_htk(inputFileName,
labelFileName="",
......
......@@ -95,13 +95,20 @@ def gmm_scoring_singleThread(ubm, enroll, ndx, feature_server, scoreMat, segIdx=
llr = np.zeros(np.array(idx_enroll).shape)
for m in range(llr.shape[0]):
# Compute llk for the current model
lp = ubm.compute_log_posterior_probabilities(cep[0], enroll.stat1[idx_enroll[m], :])
if ubm.invcov.ndim == 2:
lp = ubm.compute_log_posterior_probabilities(cep[0], enroll.stat1[idx_enroll[m], :])
elif ubm.invcov.ndim == 3:
lp = ubm.compute_log_posterior_probabilities_full(cep[0], enroll.stat1[idx_enroll[m], :])
ppMax = np.max(lp, axis=1)
loglk = ppMax + np.log(np.sum(np.exp((lp.transpose() - ppMax).transpose()), axis=1))
llr[m] = loglk.mean()
# Compute and substract llk for the ubm
lp = ubm.compute_log_posterior_probabilities(cep[0])
if ubm.invcov.ndim == 2:
#lp = ubm.compute_log_posterior_probabilities(cep[0])
lp = ubm.compute_log_posterior_probabilities(cep[0])
elif ubm.invcov.ndim == 3:
lp = ubm.compute_log_posterior_probabilities_full(cep[0])
ppMax = np.max(lp, axis=1)
loglk = ppMax \
+ np.log(np.sum(np.exp((lp.transpose() - ppMax).transpose()),
......
......@@ -112,12 +112,13 @@ class Mixture(object):
self.w = np.array([])
self.mu = np.array([])
self.invcov = np.array([])
self.invchol = np.array([])
self.cov_var_ctl = np.array([])
self.cst = np.array([])
self.det = np.array([])
self.name = name
self.A = 0
if mixtureFileName == '':
pass
elif mixtureFileFormat.lower() == 'pickle':
......@@ -133,7 +134,7 @@ class Mixture(object):
self.read_htk(mixtureFileName)
else:
raise Exception("Wrong mixtureFileFormat")
@accepts('Mixture', 'Mixture', debug=2)
def __add__(self, other):
"""Overide the sum for a mixture.
......@@ -146,6 +147,28 @@ class Mixture(object):
new_mixture.invcov = self.invcov + other.invcov
return new_mixture
def init_from_diag(self, diag_gmm):
"""
:param diag_gmm:
"""
distrib_nb = diag_gmm.w.shape[0]
dim = diag_gmm.mu.shape[1]
self.w = diag_gmm.w
self.cst = diag_gmm.cst
self.det = diag_gmm.det
self.mu = diag_gmm.mu
self.invcov = np.empty((distrib_nb, dim, dim))
self.invchol = np.empty((distrib_nb, dim, dim))
for gg in range(distrib_nb):
self.invcov[gg] = np.diag(diag_gmm.invcov[gg,:])
self.invchol[gg] = np.linalg.cholesky(self.invcov[gg])
self.cov_var_ctl = np.diag(diag_gmm.cov_var_ctl)
self.name = diag_gmm.name
self.A = np.zeros(self.cst.shape) # we keep zero here as it is not used for full covariance distributions
def _serialize(self):
"""
Serialization is necessary to share the memomry when running multiprocesses
......@@ -206,7 +229,7 @@ class Mixture(object):
self.read_htk(inputFileName)
else:
raise Exception('Error: unknown extension')
def read_hdf5(self, mixtureFileName):
"""Read a Mixture in hdf5 format
......@@ -388,7 +411,7 @@ class Mixture(object):
f.create_dataset('/invcov', self.invcov.shape, "d", self.invcov,
compression="gzip",
fletcher32=True)
f.create_dataset('/cov_var_ctl', self.cov_var_ctl.shape, "d",
f.create_dataset('/cov_var_ctl', self.cov_var_ctl.shape, "d",
self.cov_var_ctl,
compression="gzip",
fletcher32=True)
......@@ -398,10 +421,10 @@ class Mixture(object):
f.create_dataset('/det', self.det.shape, "d", self.det,
compression="gzip",
fletcher32=True)
f.create_dataset('/A', self.A.shape, "d", self.A,
f.create_dataset('/A', np.array(self.A).shape, "d", np.array(self.A),
compression="gzip",
fletcher32=True)
f.close()
@check_path_existance
......@@ -452,15 +475,17 @@ class Mixture(object):
def _compute_all(self):
"""Compute determinant and constant values for each distribution"""
if self.invcov.ndim == 2:
self.det = 1.0 / np.prod(self.invcov, axis=1) # for Diagonal covariance only
elif self.invcov.ndim == 3:
if self.invcov.ndim == 2: # for Diagonal covariance only
self.det = 1.0 / np.prod(self.invcov, axis=1)
elif self.invcov.ndim == 3: # For full covariance dstributions
for gg in range(self.mu.shape[1]):
self.det[gg] = np.linalg.det(self.invcov[gg])[0] # a verifier
self.det[gg] = 1./np.linalg.det(self.invcov[gg])
self.cst = 1.0 / (np.sqrt(self.det) *
(2.0 * np.pi) ** (self.dim() / 2.0))
self.A = (np.square(self.mu) * self.invcov).sum(1) - 2.0 * (np.log(self.w) + np.log(self.cst))
self.cst = 1.0 / (np.sqrt(self.det) * (2.0 * np.pi) ** (self.dim() / 2.0))
if self.invcov.ndim == 2:
self.A = (np.square(self.mu) * self.invcov).sum(1) - 2.0 * (np.log(self.w) + np.log(self.cst))
elif self.invcov.ndim == 3:
self.A = 0
def validate(self):
"""Verify the format of the Mixture
......@@ -517,23 +542,13 @@ class Mixture(object):
"""
if cep.ndim == 1:
cep = cep[:, np.newaxis]
A = self.A
# ON NE GERE PAS ENCORE L'ADAPTATION MAP, JUSTE L'E.M.
#if mu is None:
# mu = self.mu
#else:
# # for MAP, Compute the data independent term
# A = (np.square(mu.reshape(self.mu.shape)) * self.invcov).sum(1) \
# - 2.0 * (np.log(self.w) + np.log(self.cst))
if mu is None:
mu = self.mu
tmp = (cep - mu[:,np.newaxis,:])
a = np.einsum('ijk,ikm->ijm', tmp, self.invchol)
lp = np.log(self.w[:, np.newaxis]) + np.log(self.cst[:,np.newaxis]) - 0.5 * (a * a).sum(-1)
# Compute the data independent term
B = np.dot(np.square(cep), self.invcov.T) \
- 2.0 * np.dot(cep, np.transpose(mu.reshape(self.mu.shape) * self.invcov))
# Compute the exponential term
lp = -0.5 * (B + A)
return lp
return lp.T
def compute_log_posterior_probabilities(self, cep, mu=None):
""" Compute log posterior probabilities for a set of feature frames.
......@@ -544,9 +559,9 @@ class Mixture(object):
:return: A ndarray of log-posterior probabilities corresponding to the
input feature set.
"""
"""
if cep.ndim == 1:
cep = cep[:, np.newaxis]
cep = cep[np.newaxis, :]
A = self.A
if mu is None:
mu = self.mu
......@@ -558,7 +573,7 @@ class Mixture(object):
# Compute the data independent term
B = np.dot(np.square(cep), self.invcov.T) \
- 2.0 * np.dot(cep, np.transpose(mu.reshape(self.mu.shape) * self.invcov))
# Compute the exponential term
lp = -0.5 * (B + A)
return lp
......@@ -610,32 +625,6 @@ class Mixture(object):
self._compute_all()
def _expectation_full(self, accum, cep):
"""Expectation step of the EM algorithm. Calculate the expected value
of the log likelihood function, with respect to the conditional
distribution.
:param accum: a Mixture object to store the accumulated statistics
:param cep: a set of input feature frames
:return loglk: float, the log-likelihood computed over the input set of
feature frames.
"""
if cep.ndim == 1:
cep = cep[:, np.newaxis]
lp = self.compute_log_posterior_probabilities_full(cep)
pp, loglk = sum_log_probabilities(lp)
# zero order statistics
accum.w += pp.sum(0)
# first order statistics
accum.mu += np.dot(cep.T, pp).T
# second order statistics
accum.invcov += np.dot(np.square(cep.T), pp).T # version for diagonal covariance
# return the log-likelihood
return loglk
def _expectation(self, accum, cep):
"""Expectation step of the EM algorithm. Calculate the expected value
of the log likelihood function, with respect to the conditional
......@@ -649,15 +638,22 @@ class Mixture(object):
"""
if cep.ndim == 1:
cep = cep[:, np.newaxis]
lp = self.compute_log_posterior_probabilities(cep)
pp, loglk = sum_log_probabilities(lp)
if self.invcov.ndim == 2:
lp = self.compute_log_posterior_probabilities(cep)
elif self.invcov.ndim == 3:
lp = self.compute_log_posterior_probabilities_full(cep)
pp, loglk = sum_log_probabilities(lp)
# zero order statistics
accum.w += pp.sum(0)
# first order statistics
accum.mu += np.dot(cep.T, pp).T
# second order statistics
accum.invcov += np.dot(np.square(cep.T), pp).T # version for diagonal covariance
if self.invcov.ndim == 2:
accum.invcov += np.dot(np.square(cep.T), pp).T # version for diagonal covariance
elif self.invcov.ndim == 3:
tmp = np.einsum('ijk,ilk->ijl', cep[:,:,np.newaxis],cep[:,:,np.newaxis])
accum.invcov += np.einsum('ijk,im->mjk',tmp, pp)
# return the log-likelihood
return loglk
......@@ -690,9 +686,16 @@ class Mixture(object):
"""
self.w = accum.w / np.sum(accum.w)
self.mu = accum.mu / accum.w[:, np.newaxis]
cov = accum.invcov / accum.w[:, np.newaxis] - np.square(self.mu)
cov = self.varianceControl(cov, floor_cov, ceil_cov, self.cov_var_ctl)
self.invcov = 1.0 / cov
if self.invcov.ndim == 2:
cov = accum.invcov / accum.w[:, np.newaxis] - np.square(self.mu)
cov = self.varianceControl(cov, floor_cov, ceil_cov, self.cov_var_ctl)
self.invcov = 1.0 / cov
elif self.invcov.ndim == 3:
cov = accum.invcov / accum.w[:, np.newaxis, np.newaxis] - np.einsum('ijk,ilk->ijl', self.mu[:,:,np.newaxis],self.mu[:,:,np.newaxis])
# ADD VARIANCE CONTROL
for gg in range(self.w.shape[0]):
self.invcov[gg] = np.linalg.inv(cov[gg])
self.invchol[gg] = np.linalg.cholesky(self.invcov[gg])
self._compute_all()
def _init(self, cep):
......@@ -755,10 +758,10 @@ class Mixture(object):
logging.debug('Expectation')
# E step
self._expectation_list(stat_acc=accum,
feature_list=featureList,
self._expectation_list(stat_acc=accum,
feature_list=featureList,
feature_server=fs,
llk_acc=llk_acc,
llk_acc=llk_acc,
numThread=numThread)
llk.append(llk_acc[0] / np.sum(accum.w))
......@@ -822,7 +825,7 @@ class Mixture(object):
tmp = multiprocessing.Array(ctypes.c_double, llk_acc.size)
llk_acc = np.ctypeslib.as_array(tmp.get_obj())
llk_acc = llk_acc.reshape(sh)
# E step
# llk.append(self._expectation_parallel(accum, cep, numThread) / cep.shape[0])
# self._expectation(accum,cep)
......@@ -850,6 +853,11 @@ class Mixture(object):
self.name, len(cep))
return llk
def EM_full(self, cep, distrib_nb, iteration_min=3, iteration_max=10,
llk_gain=0.01, do_init=True):
# ATTENTION, on considère que la MIXTURE EST DEJA INITIALISÉE AVEC UNE MIXTURE DIAGONALE
pass
def _init_uniform(self, cep, distrib_nb):
# Load data to initialize the mixture
......@@ -873,3 +881,73 @@ class Mixture(object):
self._compute_all()
def EM_convert_full(self, fs, featureList, distrib_nb,
iterations=2, numThread=1):
"""Expectation-Maximization estimation of the Mixture parameters.
:param fs: sidekit.FeaturesServer used to load data
:param featureList: list of feature files to train the GMM
:param distrib_nb: final number of distributions
:param iterations: list of iteration number for each step of the learning process
:param numThread: number of thread to launch for parallel computing
:param llk_gain: limit of the training gain. Stop the training when gain between two iterations is less than this value
:return llk: a list of log-likelihoods obtained after each iteration
"""
llk = []
# for N iterations:
for it in range(iterations):
logging.debug('EM convert full it: %d', it)
# initialize the accumulator
accum = copy.deepcopy(self)
for i in range(it):
accum._reset()
# serialize the accum
accum._serialize()
llk_acc = np.zeros(1)
sh = llk_acc.shape
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
tmp = multiprocessing.Array(ctypes.c_double, llk_acc.size)
llk_acc = np.ctypeslib.as_array(tmp.get_obj())
llk_acc = llk_acc.reshape(sh)
logging.debug('Expectation')
# E step
self._expectation_list(stat_acc=accum,
feature_list=featureList,
feature_server=fs,
llk_acc=llk_acc,
numThread=numThread)
llk.append(llk_acc[0] / np.sum(accum.w))
# M step
logging.debug('Maximisation')
self._maximization(accum)
if i > 0:
# gain = llk[-1] - llk[-2]
# if gain < llk_gain:
# logging.debug(
# 'EM (break) distrib_nb: %d %i/%d gain: %f -- %s, %d',
# self.mu.shape[0], i + 1, it, gain, self.name,
# len(cep))
# break
# else:
# logging.debug(
# 'EM (continu) distrib_nb: %d %i/%d gain: %f -- %s, %d',
# self.mu.shape[0], i + 1, it, gain, self.name,
# len(cep))
# break
pass
else:
# logging.debug(