Commit f81cab1b by Anthony Larcher

doc

parent 846b53ec
 ... ... @@ -47,6 +47,7 @@ from sidekit.bosaris import Key from sidekit.bosaris import Scores from sidekit.bosaris import DetPlot from sidekit.bosaris import effective_prior from sidekit.bosaris import logit_effective_prior from sidekit.bosaris import fast_minDCF # Import classes ... ...
 ... ... @@ -26,6 +26,7 @@ from sidekit.bosaris.key import Key from sidekit.bosaris.scores import Scores from sidekit.bosaris.detplot import DetPlot from sidekit.bosaris.detplot import effective_prior from sidekit.bosaris.detplot import logit_effective_prior from sidekit.bosaris.detplot import fast_minDCF ... ...
 ... ... @@ -109,6 +109,28 @@ def effective_prior(Ptar, cmiss, cfa): p = Ptar * cmiss / (Ptar * cmiss + (1 - Ptar) * cfa) return p def logit_effective_prior(Ptar, cmiss, cfa): """This function adjusts a given prior probability of target p_targ, to incorporate the effects of a cost of miss, cmiss, and a cost of false-alarm, cfa. In particular note: EFFECTIVE_PRIOR(EFFECTIVE_PRIOR(p,cmiss,cfa),1,1) = EFFECTIVE_PRIOR(p,cfa,cmiss) The effective prior for the NIST SRE detection cost fuction, with p_targ = 0.01, cmiss = 10, cfa = 1 is therefore: EFFECTIVE_PRIOR(0.01,10,1) = 0.0917 :param Ptar: is the probability of a target trial :param cmiss: is the cost of a miss :param cfa: is the cost of a false alarm :return: a prior """ p = Ptar * cmiss / (Ptar * cmiss + (1 - Ptar) * cfa) return __logit__(p) def __probit__(p): """Map from [0,1] to [-inf,inf] as used to make DET out of a ROC ... ...
 ... ... @@ -216,10 +216,8 @@ class Ndx: with open(input_filename, 'r') as fid: lines = [l.rstrip().split() for l in fid] models = numpy.array([], '|O') models.resize(len(lines)) testsegs = numpy.array([], '|O') testsegs.resize(len(lines)) models = numpy.empty(len(lines), '|O') testsegs = numpy.empty(len(lines), '|O') for ii in range(len(lines)): models[ii] = lines[ii][0] testsegs[ii] = lines[ii][1] ... ...
 ... ... @@ -45,11 +45,14 @@ ... ... @@ -301,8 +304,8 @@ feat = feat[label] label = label[label] return feat, label return feat, label def _mask(self, cep): """ Keep only the MFCC index present in the filter list ... ... @@ -411,8 +414,8 @@ else: context_label = None return context_feat, context_label return context_feat, context_label
[docs] def get_traps(self, feat, start=None, stop=None, label=None): """ Compute TRAP parameters. The input frames are concatenated to add their left and right context, ... ... @@ -451,8 +454,8 @@ return numpy.dot( context_feat.reshape(-1, hamming_dct.shape[0]), hamming_dct ).reshape(context_feat.shape[0], -1), context_label
[docs] def load(self, show, channel=0, input_feature_filename=None, label=None, start=None, stop=None): """ Depending of the setting of the FeaturesServer, can either: ... ... @@ -505,8 +508,8 @@ channel=channel, label=label, start=start, stop=stop) return self.previous_load
[docs] def get_features(self, show, channel=0, input_feature_filename=None, label=None, start=None, stop=None): """ Get the datasets from a single HDF5 file ... ... @@ -585,7 +588,6 @@ label = h5f.get("/".join((show, "vad"))).value.astype('bool').squeeze()[start:stop] else: label = numpy.ones(feat.shape[0], dtype='bool') # Pad the segment if needed feat = numpy.pad(feat, ((pad_begining, pad_end), (0, 0)), mode='edge') label = numpy.pad(label, ((pad_begining, pad_end)), mode='edge') ... ... @@ -598,8 +600,8 @@ else: feat, label = self.post_processing(feat, label) return feat, label
[docs] def get_tandem_features(self, show, channel=0, label=None, start=None, stop=None): """ Read acoustic parameters from multiple HDF5 files (from disk or extracted by FeaturesExtractor objects). ... ... @@ -628,8 +630,8 @@ label = numpy.ones(feat.shape[0], dtype='bool') # Apply the final post-processing on the concatenated features return self.post_processing(features, label)
[docs] def mean_std(self, show, channel=0, start=None, stop=None): """ Compute the mean and standard deviation vectors for a segment of acoustic features ... ... @@ -665,7 +667,7 @@
 ... ... @@ -45,11 +45,14 @@ ... ... @@ -117,9 +120,9 @@ :return: the equivalence on the mel scale. """ return 1127.01048 * numpy.log(f / 700.0 + 1) return 1127.01048 * numpy.log(f / 700.0 + 1)
[docs]def mel2hz(m): """Convert an array of mel values in Hz. ... ... @@ -127,9 +130,9 @@ :return: the equivalent values in Hertz. """ return (numpy.exp(m / 1127.01048) - 1) * 700.0
[docs]def compute_delta(features, win=3, method='filter', ... ... @@ -163,9 +166,9 @@ for i in range(features.shape[1]): delta[:, i] = numpy.convolve(features[:, i], filt) return delta[win:-win, :]
[docs]def pca_dct(cep, left_ctx=12, right_ctx=12, p=None): """Apply DCT PCA as in [McLaren 2015] paper: Mitchell McLaren and Yun Lei, 'Improved Speaker Recognition ... ... @@ -191,9 +194,9 @@ if p is None: p = numpy.eye(dct_temp.shape[0] * cep.shape[1], dtype=PARAM_TYPE) return (numpy.dot(ceps.reshape(-1, dct_temp.shape[0]), dct_temp).reshape(ceps.shape[0], -1)).dot(p)
[docs]def shifted_delta_cepstral(cep, d=1, p=3, k=7): """ Compute the Shifted-Delta-Cepstral features for language identification ... ... @@ -221,9 +224,9 @@ for ff in range(len(cep)): sdc[ff, :] = delta[idx, :].reshape(1, -1) idx = numpy.roll(idx, 1) return numpy.hstack((cep, sdc))
[docs]def trfbank(fs, nfft, lowfreq, maxfreq, nlinfilt, nlogfilt, midfreq=1000): """Compute triangular filterbank for cepstral coefficient computation. ... ... @@ -304,8 +307,8 @@ fbank[i][lid] = left_slope * (n_frequences[lid] - low) fbank[i][rid[:-1]] = right_slope * (hi - n_frequences[rid[:-1]]) return fbank, frequences
[docs]def mel_filter_bank(fs, nfft, lowfreq, maxfreq, widest_nlogfilt, widest_lowfreq, widest_maxfreq,): """Compute triangular filterbank for cepstral coefficient computation. ... ... @@ -357,9 +360,9 @@ fbank[i][lid] = left_slope * (nfreqs[lid] - low) fbank[i][rid[:-1]] = right_slope * (hi - nfreqs[rid[:-1]]) return fbank, sub_band_freqs
[docs]def mfcc(input_sig, lowfreq=100, maxfreq=8000, nlinfilt=0, nlogfilt=24, ... ... @@ -455,9 +458,9 @@ lst.append(None) del mspec return lst
[docs]def framing(sig, win_size, win_shift=1, context=(0, 0), pad='zeros'): """ :param sig: input signal, can be mono or multi dimensional ... ... @@ -481,9 +484,9 @@ elif pad == 'edge': return numpy.lib.stride_tricks.as_strided(numpy.lib.pad(sig, c, 'edge'), shape=shape, strides=strides).squeeze()
[docs]def dct_basis(nbasis, length): """ :param nbasis: number of CT coefficients to keep ... ... @@ -513,7 +516,7 @@
This diff is collapsed.
 ... ... @@ -45,11 +45,14 @@ ... ... @@ -91,7 +94,7 @@ useful parameters for speaker verification. """ import numpy import pandas #import pandas import scipy.stats as stats from scipy.signal import lfilter ... ... @@ -134,9 +137,9 @@ for i in range(y.shape[0]): y[i, 4:] = lfilter(numerator, denominator, x[i, 4:], axis=-1, zi=zf[i, :])[0] return y.T return y.T
[docs]def cms(features, label=None, global_mean=None): """Performs cepstral mean subtraction ... ... @@ -156,9 +159,9 @@ mu = global_mean else: mu = numpy.mean(features[label, :], axis=0) features -= mu
[docs]def cmvn(features, label=None, global_mean=None, global_std=None): """Performs mean and variance normalization ... ... @@ -183,10 +186,10 @@ mu = numpy.mean(features[label, :], axis=0) stdev = numpy.std(features[label, :], axis=0) features -= mu features /= stdev
[docs]def stg(features, label=None, win=301): """Performs feature warping on a sliding window ... ... @@ -248,9 +251,9 @@ # wrapFeatures = np.copy(features) if add_a_feature: stg_features = stg_features[:-1] features[label, :] = stg_features
[docs]def cep_sliding_norm(features, win=301, label=None, center=True, reduce=False): """ Performs a cepstal mean substitution and standard deviation normalization ... ... @@ -274,7 +277,7 @@ else: d_win = win // 2 df = pandas.DataFrame(features[label, :]) #df = pandas.DataFrame(features[label, :]) r = df.rolling(window=win, center=True) mean = r.mean().values std = r.std().values ... ... @@ -311,7 +314,7 @@
