Commit 289b16fe authored by Anthony Larcher's avatar Anthony Larcher
Browse files

svm

parent 390870ca
......@@ -28,8 +28,12 @@ Copyright 2014-2016 Anthony Larcher and Sylvain Meignier
useful parameters for speaker verification.
"""
import logging
import numpy
import os
import sys
from ctypes import *
from ctypes.util import find_library
PARALLEL_MODULE = 'multiprocessing' # can be , threading, multiprocessing MPI is planned in the future
PARAM_TYPE = numpy.float32
......@@ -107,6 +111,33 @@ except ImportError:
from sidekit.sv_utils import clean_stat_server
libsvm_loaded = False
try:
dirname = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'libsvm')
if sys.platform == 'win32':
libsvm = CDLL(os.path.join(dirname, r'libsvm.dll'))
libsvm_loaded = True
else:
libsvm = CDLL(os.path.join(dirname, 'libsvm.so.2'))
libsvm_loaded = True
except:
# For unix the prefix 'lib' is not considered.
if find_library('svm'):
libsvm = CDLL(find_library('svm'))
libsvm_loaded = True
elif find_library('libsvm'):
libsvm = CDLL(find_library('libsvm'))
libsvm_loaded = True
else:
libsvm_loaded = False
logging.warning('WARNNG: libsvm is not installed, please refer to the' +
' documentation if you intend to use SVM classifiers')
if libsvm_loaded:
from sidekit.libsvm import *
from sidekit.svm_scoring import *
from sidekit.svm_training import *
__author__ = "Anthony Larcher and Sylvain Meignier"
......
......@@ -906,7 +906,7 @@ def read_htk(input_file_name,
fid.seek(0, 0) # go back to the begining of the file
if flen > 14 + by * nf: # if file too long
dt = 2 # change type to LPRFEC
hd[5] = 1 # set compressed flag
hd[4] = 1 # set compressed flag
nf += 4 # frame count doesn't include
# compression constants in this case
......@@ -919,7 +919,7 @@ def read_htk(input_file_name,
if dt == 5:
d /= 32767 # scale IREFC
else:
if hd[5]: # compressed data - first read scales
if hd[4]: # compressed data - first read scales
nf -= 4 # frame count includes compression constants
n_col = int(by / 2)
scales = numpy.asarray(struct.unpack(">" + "f" * n_col, fid.read(4 * n_col)))
......
No preview for this file type
......@@ -10,7 +10,7 @@ import os
import pickle
sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path
from sidekit.libsvm.svm import svm_node, svm_problem, svm_parameter, svm_model, toPyModel
from sidekit.libsvm.svm import *
def save_svm(svm_file_name, w, b):
"""
......@@ -156,7 +156,7 @@ def svm_train(arg1, arg2=None, arg3=None):
assert isinstance(arg2, (list, tuple))
y, x, options = arg1, arg2, arg3
param = svm_parameter(options)
prob = svm_problem(y, x, isKernel=(param.kernel_type == PRECOMPUTED))
prob = svm_problem(y, x, isKernel=(param.kernel_type == 'PRECOMPUTED'))
elif isinstance(arg1, svm_problem):
prob = arg1
if isinstance(arg2, svm_parameter):
......@@ -166,7 +166,7 @@ def svm_train(arg1, arg2=None, arg3=None):
if prob is None or param is None:
raise TypeError("Wrong types for the arguments")
if param.kernel_type == PRECOMPUTED:
if param.kernel_type == 'PRECOMPUTED':
for xi in prob.x_space:
idx, val = xi[0].index, xi[0].value
if xi[0].index != 0:
......@@ -194,7 +194,8 @@ def svm_train(arg1, arg2=None, arg3=None):
print("Cross Validation Accuracy = %g%%" % ACC)
return ACC
else:
m = sidekit.libsvm.svm_train(prob, param)
#m = sidekit.libsvm.svm_train(prob, param)
m = libsvm.svm_train(prob, param)
m = toPyModel(m)
# If prob is destroyed, data including SVs pointed by m can remain.
......@@ -253,7 +254,7 @@ def svm_predict(y, x, m, options=""):
prob_estimates = (c_double * nr_class)()
for xi in x:
xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == 'PRECOMPUTED'))
label = libsvm.svm_predict_probability(m, xi, prob_estimates)
values = prob_estimates[:nr_class]
pred_labels += [label]
......@@ -267,7 +268,7 @@ def svm_predict(y, x, m, options=""):
nr_classifier = nr_class*(nr_class-1)//2
dec_values = (c_double * nr_classifier)()
for xi in x:
xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == 'PRECOMPUTED'))
label = sidekit.libsvm.svm_predict_values(m, xi, dec_values)
if nr_class == 1:
values = [1]
......
......@@ -632,7 +632,7 @@ class FForwardNetwork(object):
# If not done yet, compute mean and standard deviation on all training data
if 0 in [len(self.params["input_mean"]), len(self.params["input_std"])]:
if True:
if False:
self.log.info("Compute mean and standard deviation from the training features")
feature_nb, self.params["input_mean"], self.params["input_std"] = mean_std_many(features_server,
feature_size,
......@@ -644,9 +644,11 @@ class FForwardNetwork(object):
else:
self.log.info("Load input mean and standard deviation from file")
ms = numpy.load("input_mean_std.npz")
self.params["input_mean"] = ms["input_mean"]
self.params["input_std"] = ms["input_std"]
#ms = numpy.load("input_mean_std.npz")
#self.params["input_mean"] = ms["input_mean"]
#self.params["input_std"] = ms["input_std"]
self.params["input_mean"] = numpy.zeros(360)
self.params["input_std"] = numpy.ones(360)
# Train the model and get the parameters
self.params = self._train_acoustic(numpy.inf,
......@@ -876,7 +878,8 @@ class FForwardNetwork(object):
# Save in HDF5 format, labels are saved if they don't exist in thge output file
with h5py.File(output_file_structure.format(show), "a") as h5f:
vad = label if show + "vad" in h5f else numpy.ones(bnf.shape[0], dtype='bool')
#vad = label if show + "vad" in h5f else numpy.ones(bnf.shape[0], dtype='bool')
vad = None if show + "vad" in h5f else label
bnf_mean = bnf[vad, :].mean(axis=0)
bnf_std = bnf[vad, :].std(axis=0)
sidekit.frontend.io.write_hdf5(show, h5f,
......
......@@ -483,7 +483,8 @@ class StatServer:
:return: a list of segments belonging to the model
"""
return self.segset[self.modelset == mod_id, :]
#return self.segset[self.modelset == mod_id, :]
return self.segset[self.modelset == mod_id]
def get_model_segments_by_index(self, mod_idx):
"""Return the list of segments belonging to model number modIDX
......@@ -1747,4 +1748,4 @@ class StatServer:
statserver.stat0 = h5f[prefix+"stat0"].value[idx, :]
statserver.stat1 = h5f[prefix+"stat1"].value[idx, :]
return statserver
\ No newline at end of file
return statserver
......@@ -27,10 +27,12 @@ Copyright 2014-2016 Anthony Larcher
:mod:`svm_scoring` provides functions to perform speaker verification
by using Support Vector Machines.
"""
import ctypes
import os
import sys
import numpy
import threading
#import threading
import multiprocessing
import logging
import sidekit.sv_utils
from sidekit.bosaris import Ndx
......@@ -58,7 +60,7 @@ def svm_scoring_singleThread(svm_dir, test_sv, ndx, score, seg_idx=None):
:param score: Scores object to fill
:param seg_idx: list of segments to classify. Classify all if the list is empty.
"""
assert os.path.isdir(svm_dir), 'First parameter should be a directory'
#assert os.path.isdir(svm_dir), 'First parameter should be a directory'
assert isinstance(test_sv, StatServer), 'Second parameter should be a StatServer'
assert isinstance(ndx, Ndx), 'Third parameter should be an Ndx'
......@@ -69,7 +71,9 @@ def svm_scoring_singleThread(svm_dir, test_sv, ndx, score, seg_idx=None):
Msvm = numpy.zeros((ndx.modelset.shape[0], test_sv.stat1.shape[1]))
bsvm = numpy.zeros(ndx.modelset.shape[0])
for m in range(ndx.modelset.shape[0]):
svm_file_name = os.path.join(svm_dir, ndx.modelset[m] + '.svm')
#svm_file_name = os.path.join(svm_dir, ndx.modelset[m] + '.svm')
svm_file_name = svm_dir.format(ndx.modelset[m])
print("load file : {}".format(svm_file_name))
w, b = sidekit.sv_utils.read_svm(svm_file_name)
Msvm[m, :] = w
bsvm[m] = b
......@@ -103,7 +107,7 @@ def svm_scoring(svm_dir, test_sv, ndx, num_thread=1):
:return: a Score object.
"""
# Remove missing models and test segments
existing_models, model_idx = sidekit.sv_utils.check_file_list(ndx.modelset, svm_dir, '.svm')
existing_models, model_idx = sidekit.sv_utils.check_file_list(ndx.modelset, svm_dir)
clean_ndx = ndx.filter(existing_models, test_sv.segset, True)
score = Scores()
......@@ -112,16 +116,22 @@ def svm_scoring(svm_dir, test_sv, ndx, num_thread=1):
score.segset = clean_ndx.segset
score.scoremask = clean_ndx.trialmask
tmp = multiprocessing.Array(ctypes.c_double, score.scoremat.size)
score.scoremat = numpy.ctypeslib.as_array(tmp.get_obj())
score.scoremat = score.scoremat.reshape(score.modelset.shape[0], score.segset.shape[0])
# Split the list of segment to process for multi-threading
los = numpy.array_split(numpy.arange(clean_ndx.segset.shape[0]), num_thread)
jobs = []
for idx in los:
p = threading.Thread(target=svm_scoring_singleThread,
p = multiprocessing.Process(target=svm_scoring_singleThread,
args=(svm_dir, test_sv, ndx, score, idx))
jobs.append(p)
p.start()
for p in jobs:
p.join()
return score
......@@ -31,7 +31,7 @@ import numpy
import os
import logging
from sidekit.libsvm.svmutil import svm_problem, svm_parameter, svm_train
import threading
import multiprocessing
import sidekit.sv_utils
......@@ -121,10 +121,11 @@ def svm_training(svmDir, background_sv, enroll_sv, num_thread=1):
# Process each sub-list of models in a separate thread
jobs = []
for idx, models in enumerate(listOfModels):
p = threading.Thread(target=svm_training_singleThread,
args=(K, msn, bsn, svmDir, background_sv, models, enroll_sv))
p = multiprocessing.Process(target=svm_training_singleThread,
args=(K, msn, bsn, svmDir, background_sv, models, enroll_sv))
jobs.append(p)
p.start()
for p in jobs:
p.join()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment