Commit 6c5c4a9e authored by Anthony Larcher's avatar Anthony Larcher
Browse files

Initial

parents
Pipeline #6 skipped
*.pyc
*.DS_Store
docs
# -*- coding: utf-8 -*-
"""
Copyright 2014-2016 Anthony Larcher
.. topic::sidekit
| This file is part of SIDEKIT.
|
| SIDEKIT is a python package for speaker verification.
| Home page: http://www-lium.univ-lemans.fr/sidekit/
|
| SIDEKIT is free software: you can redistribute it and/or modify
| it under the terms of the GNU Lesser General Public License as
| published by the Free Software Foundation, either version 3 of the License,
| or (at your option) any later version.
|
| SIDEKIT is distributed in the hope that it will be useful,
| but WITHOUT ANY WARRANTY; without even the implied warranty of
| MERCHANTABILITY or fFITNESS FOR A PARTICULAR PURPOSE. See the
| GNU Lesser General Public License for more details.
|
| You should have received a copy of the GNU Lesser General Public License
| along with SIDEKIT. If not, see <http://www.gnu.org/licenses/>.
"""
PARALLEL_MODULE = 'multiprocessing' # can be , threading, multiprocessing MPI is planned in the future
import sys
# Import libsvm
import logging
from ctypes import *
from ctypes.util import find_library
from os import path
from sidekit.sidekit_wrappers import *
# Import bosaris-like classes
from sidekit.bosaris import IdMap
from sidekit.bosaris import Ndx
from sidekit.bosaris import Key
from sidekit.bosaris import Scores
from sidekit.bosaris import DetPlot
from sidekit.bosaris import effective_prior
from sidekit.bosaris import fast_minDCF
# Import classes
from sidekit.features_server import FeaturesServer
from sidekit.mixture import Mixture
from sidekit.statserver import StatServer
import sidekit.frontend.io
import sidekit.frontend.vad
import sidekit.frontend.normfeat
import sidekit.frontend.features
# Import function libraries
from sidekit.sidekit_io import *
from sidekit.sv_utils import *
from sidekit.lid_utils import *
from sidekit.gmm_scoring import *
from sidekit.jfa_scoring import *
from sidekit.iv_scoring import *
from sidekit.theano_utils import *
__license__ = "LGPL"
__author__ = "Anthony Larcher"
__copyright__ = "Copyright 2014-2016 Anthony Larcher"
__version__ = "1.0.4"
__maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reStructuredText'
libsvm_loaded = False
try:
dirname = os.path.join(path.dirname(path.abspath(__file__)), 'libsvm')
if sys.platform == 'win32':
libsvm = CDLL(path.join(dirname, r'libsvm.dll'))
libsvm_loaded = True
else:
libsvm = CDLL(path.join(dirname, 'libsvm.so.2'))
libsvm_loaded = True
except:
# For unix the prefix 'lib' is not considered.
if find_library('svm'):
libsvm = CDLL(find_library('svm'))
libsvm_loaded = True
elif find_library('libsvm'):
libsvm = CDLL(find_library('libsvm'))
libsvm_loaded = True
else:
libsvm_loaded = False
logging.warning('WARNNG: libsvm is not installed, please refer to the' +
' documentation if you intend to use SVM classifiers')
if libsvm_loaded:
from sidekit.libsvm import *
from sidekit.svm_scoring import *
from sidekit.svm_training import *
__all__ = ["bosaris",
"frontend",
"libsvm",
"frontend",
"sv_utils",
"gmm_scoring",
"svm_scoring",
"svm_training",
"iv_scoring",
"sidekit_io",
"mixture",
"statserver",
"features_server",
"theano_utils"]
License agreement
Agnitio Labs
Non-Commercial Use Only
_____________________________________________________________________
This AGNITIO Labs License Agreement, including all exhibits ("AGN-LA") is a
legal agreement between you and AGNITIO S. L. (“AGNITIO” or “we”) for the
software or data identified above, which may include source code, and any
associated materials, text or speech files, associated media and "online" or
electronic documentation and any updates we provide in our discretion
(together, the "Software").
By installing, copying, or otherwise using this Software, you agree to be bound
by the terms of this AGN-LA. If you do not agree, do not install copy or use
the Software. The Software is protected by copyright and other intellectual
property laws and is licensed, not sold.
SCOPE OF RIGHTS:
You may use, copy, reproduce, and distribute this Software for any
non-commercial purpose, subject to the restrictions in this AGN-LA. Some
purposes which can be non-commercial are teaching, academic research, public
demonstrations and personal experimentation. You may also distribute this
Software with books or other teaching materials, or publish the Software on
websites, that are intended to teach the use of the Software for academic or
other non-commercial purposes.
You may not use or distribute this Software or any derivative works in any form
for commercial purposes. Examples of commercial purposes would be running
business operations, licensing, leasing, or selling the Software, distributing
the Software for use with commercial products, using the Software in the
creation or use of commercial products or any other activity which purpose is
to procure a commercial gain to you or others.
If the Software includes source code or data, you may create derivative works
of such portions of the Software and distribute the modified Software for
non-commercial purposes, as provided herein.
If you distribute the Software or any derivative works of the Software, you
will distribute them under the same terms and conditions as in this license,
and you will not grant other rights to the Software or derivative works that
are different from those provided by this AGN-LA.
If you have created derivative works of the Software, and distribute such
derivative works, you will cause the modified files to carry prominent notices
so that recipients know that they are not receiving the original Software. Such
notices must state: (i) that you have changed the Software; and (ii) the date
of any changes.
In return, we simply require that you agree:
1. That you will not remove any copyright or other notices from the Software.
2. That if any of the Software is in binary format, you will not attempt to
modify such portions of the Software, or to reverse engineer or decompile them,
except and only to the extent authorized by applicable law.
3. That AGNITIO is granted back, without any restrictions or limitations, a
non-exclusive, perpetual, irrevocable, royalty-free, assignable and
sub-licensable license, to reproduce, publicly perform or display, install,
use, modify, post, distribute, make and have made, sell and transfer your
modifications to and/or derivative works of the Software source code or data,
for any purpose.
4. That any feedback about the Software provided by you to us is voluntarily
given, and AGNITIO shall be free to use the feedback as it sees fit without
obligation or restriction of any kind, even if the feedback is designated by
you as confidential.
5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS,
IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF
MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST
INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR
NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF
YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON
WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS.
6. THAT NEITHER AGNITIO NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR
ANY DAMAGES RELATED TO THE SOFTWARE OR THIS AGN-LA, INCLUDING DIRECT, INDIRECT,
SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW
PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS
LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE
WORKS.
7. That we have no duty of reasonable care or lack of negligence, and we are
not obligated to (and will not) provide technical support for the Software.
8. That if you breach this AGN-LA or if you sue anyone over patents that you
think may apply to or read on the Software or anyone's use of the Software,
this AGN-LA (and your license and rights obtained herein) terminate
automatically. Upon any such termination, you shall destroy all of your copies
of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this
AGN-LA shall survive any termination of this AGN-LA.
9. That the patent rights, if any, granted to you in this AGN-LA only apply to
the Software, not to any derivative works you make.
10. That the Software may be subject to Europe export jurisdiction at the time
it is licensed to you, and it may be subject to additional export or import
laws in other places. You agree to comply with all such laws and regulations
that may apply to the Software after delivery of the software to you.
11. That all rights not expressly granted to you in this AGN-LA are reserved.
12. That this AGN-LA shall be construed and controlled by the laws of the
Kingdom of Spain, without regard to conflicts of law. If any provision of this
AGN-LA shall be deemed unenforceable or contrary to law, the rest of this
AGN-LA shall remain in full effect and interpreted in an enforceable manner
that most nearly captures the intent of the original language.
Copyright (c) AGNITIO. All rights reserved.
# -*- coding: utf-8 -*-
"""
This package is a translation of a part of the BOSARIS toolkit.
The authors thank Niko Brummer and Agnitio for allowing them to
translate this code and provide the community with efficient structures
and tools.
The BOSARIS Toolkit is a collection of functions and classes in Matlab
that can be used to calibrate, fuse and plot scores from speaker recognition
(or other fields in which scores are used to test the hypothesis that two
samples are from the same source) trials involving a model and a test segment.
The toolkit was written at the BOSARIS2010 workshop which took place at the
University of Technology in Brno, Czech Republic from 5 July to 6 August 2010.
See the User Guide (available on the toolkit website)1 for a discussion of the
theory behind the toolkit and descriptions of some of the algorithms used.
The BOSARIS toolkit in MATLAB can be downloaded from `the website
<https://sites.google.com/site/bosaristoolkit/>`_.
"""
from sidekit.bosaris.idmap import IdMap
from sidekit.bosaris.ndx import Ndx
from sidekit.bosaris.plotwindow import PlotWindow
from sidekit.bosaris.key import Key
from sidekit.bosaris.scores import Scores
from sidekit.bosaris.detplot import DetPlot
from sidekit.bosaris.detplot import effective_prior
from sidekit.bosaris.detplot import fast_minDCF
__author__ = "Anthony Larcher"
__maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reStructuredText'
__credits__ = ["Niko Brummer", "Edward de Villiers"]
This diff is collapsed.
# -*- coding: utf-8 -*-
# This package is a translation of a part of the BOSARIS toolkit.
# The authors thank Niko Brummer and Agnitio for allowing them to
# translate this code and provide the community with efficient structures
# and tools.
#
# The BOSARIS Toolkit is a collection of functions and classes in Matlab
# that can be used to calibrate, fuse and plot scores from speaker recognition
# (or other fields in which scores are used to test the hypothesis that two
# samples are from the same source) trials involving a model and a test segment.
# The toolkit was written at the BOSARIS2010 workshop which took place at the
# University of Technology in Brno, Czech Republic from 5 July to 6 August 2010.
# See the User Guide (available on the toolkit website)1 for a discussion of the
# theory behind the toolkit and descriptions of some of the algorithms used.
#
# The BOSARIS toolkit in MATLAB can be downloaded from `the website
# <https://sites.google.com/site/bosaristoolkit/>`_.
"""
This is the 'idmap' module
"""
import os.path
import sys
import numpy as np
import pickle
import gzip
import logging
import copy
from sidekit.sidekit_wrappers import check_path_existance
try:
import h5py
h5py_loaded = True
except ImportError:
h5py_loaded = False
__author__ = "Anthony Larcher"
__maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reStructuredText'
__credits__ = ["Niko Brummer", "Edward de Villiers"]
class IdMap:
"""A class that stores a map between identifiers (strings). One
list is called 'leftids' and the other 'rightids'. The class
provides methods that convert a sequence of left ids to a
sequence of right ids and vice versa. If 'leftids' or 'rightids'
contains duplicates then all occurrences are used as the index
when mapping.
:attr leftids: a list of classes in a ndarray
:attr rightids: a list of segments in a ndarray
:attr start: index of the first frame of the segment
:attr stop: index of the last frame of the segment
"""
def __init__(self, idmapFileName='', idmapFileFormat='hdf5'):
"""Initialize an IdMap object
:param idmapFileName: name of a file to load. Default is ''.
:param idmapFileFormat: format of the file to load. Can be:
- 'pickle'
- 'hdf5' (default)
- 'txt'
In case the idmapFileName is empty, initialize an empty IdMap object.
"""
self.leftids = np.empty(0, dtype="|O")
self.rightids = np.empty(0, dtype="|O")
self.start = np.empty(0, dtype="|O")
self.stop = np.empty(0, dtype="|O")
if idmapFileName == '':
pass
elif idmapFileFormat.lower() == 'pickle':
self.read_pickle(idmapFileName)
elif idmapFileFormat.lower() in ['hdf5', 'h5']:
if h5py_loaded:
self.read_hdf5(idmapFileName)
else:
raise Exception('h5py is not installed, chose another' + ' format to load your IdMap')
elif idmapFileFormat.lower() == 'txt':
self.read_txt(idmapFileName)
else:
raise Exception('Wrong output format, must be pickle, hdf5 or txt')
@check_path_existance
def save(self, outputFileName):
"""Save the IdMap object to file. The format of the file
to create is set accordingly to the extension of the filename.
This extension can be '.p' for pickle format, '.txt' for text format
and '.hdf5' or '.h5' for HDF5 format.
:param outputFileName: name of the file to write to
:warning: hdf5 format save only leftids and rightids
"""
extension = os.path.splitext(outputFileName)[1][1:].lower()
if extension == 'p':
self.save_pickle(outputFileName)
elif extension in ['hdf5', 'h5']:
if h5py_loaded:
self.save_hdf5(outputFileName)
else:
raise Exception('h5py is not installed, chose another' +
' format to load your IdMap')
elif extension == 'txt':
self.save_txt(outputFileName)
else:
raise Exception('Wrong output format, must be pickle, hdf5 or txt')
@check_path_existance
def save_hdf5(self, outpuFileName):
""" Save IdMap in HDF5 format
:param outpuFileName: name of the file to write to
"""
assert self.validate(), "Error: wrong IdMap format"
with h5py.File(outpuFileName, "w") as f:
f.create_dataset("leftids", data=self.leftids.astype('S'),
maxshape=(None,),
compression="gzip",
fletcher32=True)
f.create_dataset("rightids", data=self.rightids.astype('S'),
maxshape=(None,),
compression="gzip",
fletcher32=True)
# WRITE START and STOP
start = copy.deepcopy(self.start)
start[np.isnan(self.start.astype('float'))] = -1
start = start.astype('int8', copy=False)
stop = copy.deepcopy(self.stop)
stop[np.isnan(self.stop.astype('float'))] = -1
stop = stop.astype('int8', copy=False)
f.create_dataset("start", data=start,
maxshape=(None,),
compression="gzip",
fletcher32=True)
f.create_dataset("stop", data=stop,
maxshape=(None,),
compression="gzip",
fletcher32=True)
@check_path_existance
def save_pickle(self, outputFileName):
"""Save IdMap in PICKLE format
:param outputFileName: name of the file to write to
"""
with gzip.open(outputFileName, "wb" ) as f:
pickle.dump( self, f)
@check_path_existance
def save_txt(self, outputFileName):
"""Saves the Id_Map to a text file.
:param outputFileName: name of the output text file
"""
with open(outputFileName, 'w') as outputFile:
for left, right, start, stop in zip(self.leftids, self.rightids,
self.start, self.stop):
line = ' '.join(filter(None, (left, right, start, stop))) + '\n'
outputFile.write(line)
def map_left_to_right(self, leftidlist):
"""Maps an array of ids to a new array of ids using the given map.
The input ids are matched against the leftids of the map and the
output ids are taken from the corresponding rightids of the map.
Beware: if leftids are not unique in the IdMap, only the last value
corresponding is kept
:param leftidlist: an array of strings to be matched against the
leftids of the idmap. The rightids corresponding to these
leftids will be returned.
:return: an array of strings that are the mappings of the
strings in leftidlist.
"""
tmpDict = dict(zip(self.leftids, self.rightids))
inter = np.intersect1d(self.leftids, leftidlist)
rightids = np.empty(inter.shape[0], '|O')
idx = 0
for left in leftidlist:
if left in inter:
rightids[idx] = tmpDict[left]
idx += 1
lostIds = np.unique(leftidlist).shape[0] - inter.shape[0]
if lostIds:
logging.warning('{} ids could not be mapped'.format(lostIds))
return rightids
def map_right_to_left(self, rightidlist):
"""Maps an array of ids to a new array of ids using the given map.
The input ids are matched against the rightids of the map and the
output ids are taken from the corresponding leftids of the map.
Beware: if rightids are not unique in the IdMap, only the last value
corresponding is kept
:param rightidlist: An array of strings to be matched against the
rightids of the idmap. The leftids corresponding to these
rightids will be returned.
:return: an array of strings that are the mappings of the
strings in rightidlist.
"""
tmpDict = dict(zip(self.rightids, self.leftids))
inter = np.intersect1d(self.rightids, rightidlist)
leftids = np.empty(inter.shape[0], '|O')
idx = 0
for right in rightidlist:
if right in inter:
leftids[idx] = tmpDict[right]
idx += 1
lostIds = np.unique(rightidlist).shape[0] - inter.shape[0]
if lostIds:
logging.warning('{} ids could not be mapped'.format(lostIds))
return leftids
def filter_on_left(self, idlist, keep):
"""Removes some of the information in an idmap. Depending on the
value of 'keep', the idlist indicates the strings to retain or
the strings to discard.
:param idlist: an array of strings which will be compared with
the leftids of the current.
:param keep: A boolean indicating whether idlist contains the ids to
keep or to discard.
:return: a filtered version of the current IdMap.
"""
# get the list of ids to keep
if keep:
keepids = np.unique(idlist)
else:
keepids = np.setdiff1d(self.leftids, idlist)
keep_idx = np.in1d(self.leftids, keepids)
out_idmap = IdMap()
out_idmap.leftids = self.leftids[keep_idx]
out_idmap.rightids = self.rightids[keep_idx]
out_idmap.start = self.start[keep_idx]
out_idmap.stop = self.stop[keep_idx]
return out_idmap
def filter_on_right(self, idlist, keep):
"""Removes some of the information in an idmap. Depending on the
value of 'keep', the idlist indicates the strings to retain or
the strings to discard.
:param idlist: an array of strings which will be compared with
the rightids of the current IdMap.
:param keep: a boolean indicating whether idlist contains the ids to
keep or to discard.
:return: a filtered version of the current IdMap.
"""
# get the list of ids to keep
if keep:
keepids = np.unique(idlist)
else:
keepids = np.setdiff1d(self.rightids, idlist)
keep_idx = np.in1d(self.rightids, keepids)
out_idmap = IdMap()
out_idmap.leftids = self.leftids[keep_idx]
out_idmap.rightids = self.rightids[keep_idx]
out_idmap.start = self.start[keep_idx]
out_idmap.stop = self.stop[keep_idx]
return out_idmap
def validate(self, warn=False):
"""Checks that an object of type Id_Map obeys certain rules that
must alows be true.
:param warn: boolean. If True, print a warning if strings are
duplicated in either left or right array
:return: a boolean value indicating whether the object is valid.
"""
ok = (self.leftids.shape
== self.rightids.shape
== self.start.shape
== self.stop.shape) \
& self.leftids.ndim == 1
if warn & (self.leftids.shape != np.unique(self.leftids).shape):
logging.warning('The left id list contains duplicate identifiers')
if warn & (self.rightids.shape != np.unique(self.rightids).shape):
logging.warning('The right id list contains duplicate identifiers')
return ok
def set(self, left, right):
self.leftids = left
self.rightids = right
self.start = np.empty(self.rightids.shape, '|O')
self.stop = np.empty(self.rightids.shape, '|O')
def read(self, inputFileName):
"""Read an IdMap object from a file.The format of the file to read from
is determined by the extension of the filename.
This extension can be '.p' for pickle format,
'.txt' for text format and '.hdf5' or '.h5' for HDF5 format.
:param inputFileName: name of the file to read from
"""
extension = os.path.splitext(inputFileName)[1][1:].lower()
if extension == 'p':
self.read_pickle(inputFileName)
elif extension in ['hdf5', 'h5']:
if h5py_loaded:
self.read_hdf5(inputFileName)
elif extension == 'txt':
self.read_txt(inputFileName)
else:
raise Exception('Wrong input format, must be pickle, hdf5 or txt')
def read_hdf5(self, inputFileName):
"""Read IdMap in hdf5 format.
:param inputFileName: name of the file to read from
"""
with h5py.File(inputFileName, "r") as f:
self.leftids = f.get("leftids").value
self.rightids = f.get("rightids").value
# if running python 3, need a conversion to unicode