Commit 3467349f authored by Sulfyderz's avatar Sulfyderz
Browse files

[Error Update]:Adding an initialization to tutorials/tuto_1_iv_model.ipynb.

parent 6dd3f052
......@@ -15,11 +15,12 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"%matplotlib inline\n",
"\n",
"from s4d.diar import Diar\n",
"from s4d.utils import *\n",
"\n",
......@@ -347,7 +348,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.6.6"
}
},
"nbformat": 4,
......
%% Cell type:markdown id: tags:
Train model for Diarization
====
This script trains UBM, TV and PLDA models for a diarization system.
Initialization
---
%% Cell type:code id: tags:
``` python
import logging
%matplotlib inline
from s4d.diar import Diar
from s4d.utils import *
from sidekit import Mixture, FactorAnalyser, StatServer, IdMap
import numpy
import logging
import re
import sidekit
from sidekit.sidekit_io import *
try:
from sortedcontainers import SortedDict as dict
except ImportError:
pass
```
%% Cell type:code id: tags:
``` python
init_logging(level=logging.INFO)
num_thread = 4
audio_dir = '../data/train/{}.wav'
ubm_seg_fn = './data/seg/ubm_ester.seg'
nb_gauss = 1024
mfcc_ubm_fn = './data/mfcc/ubm.h5'
ubm_idmap_fn = './data/mfcc/ubm_idmap.txt'
ubm_fn = './data/model/ester_ubm_'+str(nb_gauss)+'.h5'
tv_seg_fn = './data/seg/train.tv.seg'
rank_tv = 300
it_max_tv = 10
mfcc_tv_fn = './data/mfcc/tv.h5'
tv_idmap_fn = './data/mfcc/tv_idmap.h5'
tv_stat_fn = './data/model/tv.stat.h5'
tv_fn = './data/model/tv_'+str(rank_tv)+'.h5'
plda_seg_fn = './data/seg/train.plda.seg'
rank_plda = 150
it_max_plda = 10
mfcc_plda_fn = './data/mfcc/norm_plda.h5'
plda_idmap_fn = './data/mfcc/plda_idmap.h5'
plda_fn = './data/model/plda_'+str(rank_tv)+'_'+str(rank_plda)+'.h5'
norm_stat_fn = './data/model/norm.stat.h5'
norm_fn = './data/model/norm.h5'
norm_iv_fn = './data/model/norm.iv.h5'
matrices_fn = './data/model/matrices.h5'
model_fn = './data/model/ester_model_{}_{}_{}.h5'.format(nb_gauss, rank_tv, rank_plda)
```
%% Cell type:markdown id: tags:
Step 1: UBM
---
Extract MFCC for the UBM
%% Cell type:code id: tags:
``` python
logging.info('Computing MFCC for UBM')
diar_ubm = Diar.read_seg(ubm_seg_fn, normalize_cluster=True)
fe = get_feature_extractor(audio_dir, 'sid')
ubm_idmap = fe.save_multispeakers(diar_ubm.id_map(), output_feature_filename=mfcc_ubm_fn, keep_all=False)
ubm_idmap.write_txt(ubm_idmap_fn)
```
%% Cell type:markdown id: tags:
Train the UBM by EM
%% Cell type:code id: tags:
``` python
ubm_idmap = IdMap.read_txt(ubm_idmap_fn)
fs = get_feature_server(mfcc_ubm_fn, 'sid')
spk_lst = ubm_idmap.rightids
ubm = Mixture()
ubm.EM_split(fs, spk_lst, nb_gauss,
iterations=(1, 2, 2, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8), num_thread=num_thread,
llk_gain=0.01)
ubm.write(ubm_fn, prefix='ubm/')
```
%% Cell type:markdown id: tags:
Step 2: TV
---
Extract MFCC for TV
%% Cell type:code id: tags:
``` python
logging.info('Computing MFCC for TV')
diar_tv = Diar.read_seg(tv_seg_fn, normalize_cluster=True)
fe = get_feature_extractor(audio_dir, 'sid')
tv_idmap = fe.save_multispeakers(diar_tv.id_map(), output_feature_filename=mfcc_tv_fn, keep_all=False)
tv_idmap.write(tv_idmap_fn)
```
%% Cell type:markdown id: tags:
Train a Total Variability model using the FactorAnalyser class
%% Cell type:code id: tags:
``` python
tv_idmap = IdMap.read(tv_idmap_fn)
ubm = Mixture()
ubm.read(ubm_fn, prefix='ubm/')
fs = get_feature_server(mfcc_tv_fn, 'sid')
tv_idmap.leftids = numpy.copy(tv_idmap.rightids)
tv_stat = StatServer(tv_idmap, ubm.get_distrib_nb(), ubm.dim())
tv_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range(tv_stat.segset.shape[0]), num_thread=num_thread)
tv_stat.write(tv_stat_fn)
fa = FactorAnalyser()
fa.total_variability(tv_stat_fn, ubm, rank_tv, nb_iter=it_max_tv, batch_size=1000, num_thread=num_thread)
write_tv_hdf5([fa.F, fa.mean, fa.Sigma], tv_fn)
```
%% Cell type:markdown id: tags:
Step 3: PLDA
---
Extract the MFCC for the PLDA
%% Cell type:code id: tags:
``` python
logging.info('Computing MFCC for PLDA')
diar_plda = Diar.read_seg(plda_seg_fn, normalize_cluster=True)
fe = get_feature_extractor(audio_dir, 'sid')
plda_idmap = fe.save_multispeakers(diar_plda.id_map(), output_feature_filename=mfcc_plda_fn, keep_all=False)
plda_idmap.write(plda_idmap_fn)
```
%% Cell type:markdown id: tags:
Accumulate statistics
%% Cell type:code id: tags:
``` python
plda_idmap = IdMap.read(plda_idmap_fn)
ubm = Mixture()
ubm.read(ubm_fn, prefix='ubm/')
tv, tv_mean, tv_sigma = read_tv_hdf5(tv_fn)
fs = get_feature_server(mfcc_plda_fn, 'sid')
plda_norm_stat = StatServer(plda_idmap, ubm.get_distrib_nb(), ubm.dim())
plda_norm_stat.accumulate_stat(ubm=ubm, feature_server=fs,
seg_indices=range(plda_norm_stat.segset.shape[0]), num_thread=num_thread)
plda_norm_stat.write(norm_stat_fn)
```
%% Cell type:markdown id: tags:
Extract i-vectors and compute norm
%% Cell type:code id: tags:
``` python
fa = FactorAnalyser(F=tv, mean=tv_mean, Sigma=tv_sigma)
norm_iv = fa.extract_ivectors(ubm, norm_stat_fn, num_thread=num_thread)
norm_iv.write(norm_iv_fn)
norm_mean, norm_cov = norm_iv.estimate_spectral_norm_stat1(1, 'sphNorm')
write_norm_hdf5([norm_mean, norm_cov], norm_fn)
norm_iv.spectral_norm_stat1(norm_mean[:1], norm_cov[:1])
```
%% Cell type:markdown id: tags:
Train the PLDA model
%% Cell type:code id: tags:
``` python
fa = FactorAnalyser()
fa.plda(norm_iv, rank_plda, nb_iter=it_max_plda)
write_plda_hdf5([fa.mean, fa.F, numpy.zeros((rank_tv, 0)), fa.Sigma], plda_fn)
```
%% Cell type:markdown id: tags:
Step 4: Compute additional data (optional)
---
Adding matrices for additional scoring methods:
* Mahalonobis matrix
* Lower Choleski decomposition of the WCCN matrix
* Within- and Between-class Covariance matrices
%% Cell type:code id: tags:
``` python
iv = StatServer(norm_iv_fn)
matrix_dict = {}
logging.info('compute mahalanobis_matrix')
mahalanobis_matrix = iv.get_mahalanobis_matrix_stat1()
matrix_dict['mahalanobis_matrix'] = mahalanobis_matrix
logging.info('compute wccn_choleski')
wccn_choleski = iv.get_wccn_choleski_stat1()
matrix_dict['wccn_choleski'] = wccn_choleski
logging.info('compute two_covariance')
within_covariance = iv.get_within_covariance_stat1()
matrix_dict['two_covariance/within_covariance'] = within_covariance
between_covariance = iv.get_between_covariance_stat1()
matrix_dict['two_covariance/between_covariance'] = between_covariance
write_dict_hdf5(matrix_dict, matrices_fn)
```
%% Cell type:markdown id: tags:
Step 5: Merge in one model
---
%% Cell type:code id: tags:
``` python
with h5py.File(model_fn, 'w') as model:
for fn in [ubm_fn, tv_fn, norm_fn, plda_fn, matrices_fn]:
if not os.path.exists(fn):
continue
with h5py.File(fn, 'r') as fh:
for group in fh:
logging.info(group)
fh.copy(group, model)
```
......
......@@ -44,19 +44,13 @@
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Import theano\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Can not use cuDNN on context None: Disabled by dnn.enabled flag\n",
"Mapped name None to device cuda: GeForce GTX TITAN X (0000:03:00.0)\n"
"/Users/Sulfyderz/Desktop/Doctorat/Tools/Environments/miniconda/Python3/lib/python3.6/site-packages/sidekit/bosaris/detplot.py:39: UserWarning: matplotlib.pyplot as already been imported, this call will have no effect.\n",
" matplotlib.use('PDF')\n",
"WARNING:root:WARNNG: libsvm is not installed, please refer to the documentation if you intend to use SVM classifiers\n"
]
}
],
......@@ -567,7 +561,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.6.6"
}
},
"nbformat": 4,
......
%% Cell type:markdown id: tags:
Diarization for ASR
===================
This script performs a BIC diarization (ussally for ASR decoding)
The proposed diarization system was inspired by the
system [1] which won the RT'04 fall evaluation
and the ESTER1 evaluation. It was developed during the ESTER2
evaluation campaign for the transcription with the goal of minimizing
word error rate.
Automatic transcription requires accurate segment boundaries. Segment
boundaries have to be set within non-informative zones such as filler
words.
Speaker diarization needs to produce homogeneous speech segments;
however, purity and coverage of the speaker clusters are the main
objectives here. Errors such as having two distinct clusters (i.e.,
detected speakers) corresponding to the same real speaker, or
conversely, merging segments of two real speakers into only one cluster,
get heavier penalty in the NIST time-based diarization metric than
misplaced boundaries.
The system is composed of acoustic BIC segmentation followed with BIC
hierarchical clustering. Viterbi decoding is performed to adjust the
segment boundaries.
Music and jingle regions are not removed but a speech activity
diarization could be load before to segment and cluster the show.
Optionally, long segments are cut to be shorter than 20 seconds.
[1] C. Barras, X. Zhu, S. Meignier, and J. L. Gauvain, “Multistage speaker diarization of broadcast news,” IEEE Transactions on Audio, Speech, and Language Processing, vol. 14, no. 5, pp. 1505–1512, Sep. 2006.
%% Cell type:code id: tags:
``` python
%matplotlib inline
__license__ = "LGPL"
__author__ = "Sylvain Meignier"
__copyright__ = "Copyright 2015-2016 Sylvain Meignier"
__license__ = "LGPL"
__maintainer__ = "Sylvain Meignier"
__email__ = "sidekit@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reStructuredText'
import argparse
import logging
import matplotlib
import copy
import os
from matplotlib import pyplot as plot
from s4d.utils import *
from s4d.diar import Diar
from s4d import viterbi, segmentation
from s4d.clustering import hac_bic
from sidekit.sidekit_io import init_logging
from s4d.gui.dendrogram import plot_dendrogram
```
%%%% Output: stream
Import theano
%%%% Output: stream
Can not use cuDNN on context None: Disabled by dnn.enabled flag
Mapped name None to device cuda: GeForce GTX TITAN X (0000:03:00.0)
/Users/Sulfyderz/Desktop/Doctorat/Tools/Environments/miniconda/Python3/lib/python3.6/site-packages/sidekit/bosaris/detplot.py:39: UserWarning: matplotlib.pyplot as already been imported, this call will have no effect.
matplotlib.use('PDF')
WARNING:root:WARNNG: libsvm is not installed, please refer to the documentation if you intend to use SVM classifiers
%% Cell type:markdown id: tags:
BIC diarization
===============
Arguments, variables and logger
-------------------------------
Set the logger
%% Cell type:code id: tags:
``` python
loglevel = logging.INFO
init_logging(level=loglevel)
```
%% Cell type:markdown id: tags:
Set the input audio or mfcc file and the speech activity detection file (optional).
%% Cell type:code id: tags:
``` python
data_dir = 'data'
show = '20041008_1800_1830_INFO_DGA'
input_show = os.path.join(data_dir, 'audio', show + '.wav')
input_sad = os.path.join(data_dir, 'sad', show + '.sad.seg')
#input_sad = None
```
%% Cell type:markdown id: tags:
Size of left or right windows (step 2)
%% Cell type:code id: tags:
``` python
win_size=250
```
%% Cell type:markdown id: tags:
Threshold for:
* Linear segmentation (step 3)
* BIC HAC (step 4)
* Viterbi (step 5)
%% Cell type:code id: tags:
``` python
thr_l = 2
thr_h = 3
thr_vit = -250
```
%% Cell type:markdown id: tags:
If ``save_all`` is ``True`` then all produced diarization are saved
%% Cell type:code id: tags:
``` python
save_all = True
```
%% Cell type:markdown id: tags:
Prepare various variables
%% Cell type:code id: tags:
``` python
wdir = os.path.join('out', show)
if not os.path.exists(wdir):
os.makedirs(wdir)
```
%% Cell type:markdown id: tags:
Step 1: MFCC
-------------
Extract and load the MFCC
%% Cell type:code id: tags:
``` python
logging.info('Make MFCC')
if save_all:
fe = get_feature_extractor(input_show, type_feature_extractor='basic')
mfcc_filename = os.path.join(wdir, show + '.mfcc.h5')
fe.save(show, output_feature_filename=mfcc_filename)
fs = get_feature_server(mfcc_filename, feature_server_type='basic')
else:
fs = get_feature_server(input_show, feature_server_type='basic')
cep, _ = fs.load(show)
```
%%%% Output: stream
2018-06-11 10:46:17,143 - INFO - Make MFCC
2018-06-11 10:46:17,144 - INFO - data/audio ## 20041008_1800_1830_INFO_DGA ## .wav
2018-06-11 10:46:17,145 - INFO - --------------------
2018-06-11 10:46:17,145 - INFO - show: empty keep_all_features: True
audio_filename_structure: data/audio/20041008_1800_1830_INFO_DGA.wav
feature_filename_structure: {}
pre-emphasis: 0.97
lower_frequency: 133.3333 higher_frequency: 6855.4976
sampling_frequency: 16000
filter bank: 40 filters of type log
ceps_number: 13
window_size: 0.025 shift: 0.01
vad: None snr: None
2018-06-11 10:46:17,146 - INFO - --------------------
2018-06-11 10:46:17,147 - INFO - show: empty
input_feature_filename: empty
feature_filename_structure: {}
Post processing options:
mask: None
feat_norm: None
dct_pca: False, dct_pca_config: (12, 12, None)
sdc: False, sdc_config: (1, 3, 7)
delta: False, double_delta: False, delta_filter: [ 0.25 0.5 0.25 0. -0.25 -0.5 -0.25]
rasta: False
keep_all_features: True
2018-06-11 10:46:21,768 - INFO - process part : 0.000000 1822.912125 1822.912125
2018-06-11 10:46:24,513 - INFO - no vad
2018-06-11 10:46:24,518 - INFO - !! size of signal cep: 0.000050 len 13 type size 4
2018-06-11 10:46:24,602 - INFO - [ True True True ..., True True True]
%% Cell type:code id: tags:
``` python
cep.shape
```
%%%% Output: execute_result
(182289, 14)
%% Cell type:markdown id: tags:
Step 2: Initialization
------
The initial diarization is loaded from a speech activity detection
diarization (SAD) or a segment is created from the first to the last
MFCC feature.
%% Cell type:code id: tags:
``` python
logging.info('Check initial segmentation')
if input_sad is not None:
init_diar = Diar.read_seg(input_sad)
init_diar.pack(50)
else:
init_diar = segmentation.init_seg(cep, show)
if save_all:
init_filename = os.path.join(wdir, show + '.i.seg')
Diar.write_seg(init_filename, init_diar)
```
%%%% Output: stream
2018-06-11 10:46:30,818 - INFO - Check initial segmentation
%% Cell type:markdown id: tags:
Step 3: Gaussian Divergence segmentation
----------------------------------------
First segmentation: Segment each segment of ``init_diar`` using the
Gaussian Divergence method
%% Cell type:code id: tags:
``` python
logging.info('Gaussian Divergence segmentation')
seg_diar = segmentation.segmentation(cep, init_diar, win_size)
if save_all:
seg_filename = os.path.join(wdir, show + '.s.seg')
Diar.write_seg(seg_filename, seg_diar)
```
%%%% Output: stream
2018-06-11 10:46:30,828 - INFO - Gaussian Divergence segmentation
%% Cell type:markdown id: tags:
Step 4: linear BIC segmentation
-------------------------------
This segmentation over the signal fuses consecutive segments of the same
speaker from the start to the end of the record. The measure employs the
$\Delta BIC$ based on Bayesian Information Criterion , using full
covariance Gaussians (see class ``gauss.GaussFull``).
%% Cell type:code id: tags:
``` python
logging.info('Linear BIC, alpha: %f', thr_l)
bicl_diar = segmentation.bic_linear(cep, seg_diar, thr_l, sr=False)
if save_all:
bicl_filename = os.path.join(wdir, show + '.l.seg')
Diar.write_seg(bicl_filename, bicl_diar)
```
%%%% Output: stream
2018-06-11 10:46:31,246 - INFO - Linear BIC, alpha: 2.000000
%% Cell type:markdown id: tags:
Step 5: BIC HAC
---------------
Perform a BIC HAC
%% Cell type:code id: tags:
``` python
logging.info('BIC HAC, alpha: %f', thr_h)
bic = hac_bic.HAC_BIC(cep, bicl_diar, thr_h, sr=False)
bich_diar = bic.perform(to_the_end=True)
if save_all:
bichac_filename = os.path.join(wdir, show + '.h.seg')
Diar.write_seg(bichac_filename, bich_diar)
link, data = plot_dendrogram(bic.merge, 0, size=(25,6), log=True)
```
%%%% Output: stream
2018-06-11 10:46:31,374 - INFO - BIC HAC, alpha: 3.000000
%%%% Output: display_data
%% Cell type:markdown id: tags:
Step 5: re-segmentation
-----------------------
Viterbi decoding
* HMM is trained: one GMM per speaker, GMM has 8 component with diagonal covariance matrix. Only a penalty between state
is fixed.
* Emission is computed: likelyhood for each feature
* a Viterbi decoding is performed
%% Cell type:code id: tags:
``` python
logging.info('Viterbi decoding, penalties: %f', thr_vit)
vit_diar = viterbi.viterbi_decoding(cep, bich_diar, thr_vit)
if save_all:
vit_filename = os.path.join(wdir, show + '.d.seg')
Diar.write_seg(vit_filename, vit_diar)
```
%%%% Output: stream
2018-06-11 10:46:32,506 - INFO - Viterbi decoding, penalties: -250.000000
%% Cell type:markdown id: tags:
Compute the diarization error rate
----------------------------------
%% Cell type:code id: tags:
``` python
from s4d import scoring
from tabulate import tabulate
from s4d.gui.viewer import PlotDiar
from s4d.gui.viewer_utils import *
ref = Diar.read_mdtm(os.path.join(data_dir, 'seg', 'ester1.tst.mdtm'))
uem = Diar.read_uem(os.path.join(data_dir, 'seg', 'ester1.tst.uem'))
uem_show = uem.make_index(['show'])
ref_show = ref.make_index(['show'])
der = scoring.compute_der(vit_diar, ref_show[show], uem=uem_show[show], collar=25, no_overlap=False)
tab = scoring.get_header()
tab += der.get_table(show, time=False)
print(tabulate(tab, tablefmt='psql', floatfmt='.2f', headers='firstrow'))
diff_diar = diar_diff(vit_diar, ref_show[show], match=True)
p = PlotDiar(diff_diar, size=(25, 6))
p.draw()
```
%%%% Output: stream
2018-06-11 10:46:41,697 - INFO - append collar
%%%% Output: stream
+-----------------------------+--------+------+--------+-------+------+--------+--------+-----------+
| show | type | fa | miss | sns | fa | miss | conf | speaker |
|-----------------------------+--------+------+--------+-------+------+--------+--------+-----------|
| 20041008_1800_1830_INFO_DGA | rate | 0.19 | 0.45 | 0.64 | 0.19 | 1.01 | 12.12 | 13.33 |
+-----------------------------+--------+------+--------+-------+------+--------+--------+-----------+
uem from ref
100
%%%% Output: display_data
......
......@@ -17,25 +17,9 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Import theano\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Can not use cuDNN on context None: Disabled by dnn.enabled flag\n",
"Mapped name None to device cuda: GeForce GTX TITAN X (0000:03:00.0)\n"
]
}
],
"outputs": [],
"source": [
"%matplotlib inline\n",
"\n",
......@@ -61,7 +45,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
......@@ -73,7 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
......@@ -342,7 +326,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.6.6"
}
},
"nbformat": 4,
......
%% Cell type:markdown id: tags:
i-vector clustering with PLDA scoring
===
This script demonstrates the use of several clustering algorithms using PLDA scoring and i-vectors. The algorithms proposed are:
- Integer Linear Programming (ILP) IV
- HAC IV
- Connected Components (CC) IV
- Combination of CC and HAC, and CC and ILP
It takes as input the segments generated by the second tutorial (BIC-HAC) and uses the model learned in the first.
%% Cell type:code id: tags:
``` python
%matplotlib inline
from s4d.diar import Diar
from s4d.utils import *
from s4d import scoring
from s4d.model_iv import ModelIV
from s4d.clustering.ilp_iv import ilp_iv
from s4d.clustering.hac_iv import hac_iv
from s4d.clustering.cc_iv import connexted_component
from sidekit.sidekit_io import *
from sidekit.bosaris import IdMap, Scores
import matplotlib.pyplot as plt
import logging
import numpy
import copy
import sys
import os
```
%%%% Output: stream
Import theano
%%%% Output: stream
Can not use cuDNN on context None: Disabled by dnn.enabled flag
Mapped name None to device cuda: GeForce GTX TITAN X (0000:03:00.0)
%% Cell type:code id: tags:
``` python
init_logging(level=logging.INFO)
data_dir = 'data'
model_fn = os.path.join(data_dir, 'model', 'ester_model.h5')
```
%% Cell type:code id: tags:
``` python
show = '20041008_1800_1830_INFO_DGA'
audio_fn = os.path.join(data_dir, 'audio', show + '.wav')
out_dir = os.path.join('out', show)
mfcc_fn = os.path.join(out_dir, show + '.test_mfcc.h5')
bic_fn = os.path.join(out_dir, show + '.d.seg')
idmap_fn = os.path.join(out_dir, show + '.idmap.h5')
score_fn = os.path.join(out_dir, show + '.score_plda.h5')
diar_bic = Diar.read_seg(bic_fn)
```
%% Cell type:markdown id: tags:
Exctracting MFCC
===
%% Cell type:code id: tags:
``` python
fe = get_feature_extractor(audio_fn, type_feature_extractor='sid')
idmap_bic = fe.save_multispeakers(diar_bic.id_map(), output_feature_filename=mfcc_fn, keep_all=False)
idmap_bic.write(idmap_fn)
```
%% Cell type:markdown id: tags:
PLDA scoring
===
Train a PLDA model for the show and compute the distance matrix
%% Cell type:code id: tags:
``` python
model_iv = ModelIV(model_fn)
idmap_bic = IdMap(idmap_fn)
fs = get_feature_server(mfcc_fn, 'sid')
model_iv.train(fs, idmap_bic)
distance = model_iv.score_plda_slow()
distance.write(score_fn)
```
%%%% Output: stream
load: data/model/ester_model.h5
%%%% Output: stream
2018-06-20 17:11:30,775 - INFO - out/20041008_1800_1830_INFO_DGA ## 20041008_1800_1830_INFO_DGA ## .test_mfcc.h5
2018-06-20 17:11:30,776 - INFO - feature extractor --> None
2018-06-20 17:11:30,777 - INFO - --------------------
2018-06-20 17:11:30,778 - INFO - None
2018-06-20 17:11:30,779 - INFO - --------------------
2018-06-20 17:11:30,780 - INFO - show: empty
input_feature_filename: empty
feature_filename_structure: out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.test_mfcc.h5
Post processing options:
mask: None
feat_norm: cmvn_sliding
dct_pca: False, dct_pca_config: (12, 12, None)
sdc: False, sdc_config: (1, 3, 7)
delta: True, double_delta: True, delta_filter: [ 0.25 0.5 0.25 0. -0.25 -0.5 -0.25]
rasta: False
keep_all_features: True
2018-06-20 17:11:31,020 - INFO - 20041008_1800_1830_INFO_DGA/S0 start: 0 stop: 4450
2018-06-20 17:11:32,990 - INFO - 20041008_1800_1830_INFO_DGA/S8 start: 0 stop: 25320
2018-06-20 17:11:38,165 - INFO - 20041008_1800_1830_INFO_DGA/S11 start: 0 stop: 4458
2018-06-20 17:11:38,864 - INFO - 20041008_1800_1830_INFO_DGA/S30 start: 0 stop: 2754
2018-06-20 17:11:39,307 - INFO - 20041008_1800_1830_INFO_DGA/S45 start: 0 stop: 4620
2018-06-20 17:11:40,048 - INFO - 20041008_1800_1830_INFO_DGA/S66 start: 0 stop: 4506
2018-06-20 17:11:40,440 - INFO - 20041008_1800_1830_INFO_DGA/S142 start: 0 stop: 1251
2018-06-20 17:11:41,097 - INFO - 20041008_1800_1830_INFO_DGA/S83 start: 0 stop: 15633
2018-06-20 17:11:44,250 - INFO - 20041008_1800_1830_INFO_DGA/S86 start: 0 stop: 1696
2018-06-20 17:11:44,715 - INFO - 20041008_1800_1830_INFO_DGA/S89 start: 0 stop: 5770
2018-06-20 17:11:46,077 - INFO - 20041008_1800_1830_INFO_DGA/S100 start: 0 stop: 4984
2018-06-20 17:11:47,253 - INFO - 20041008_1800_1830_INFO_DGA/S106 start: 0 stop: 6084
2018-06-20 17:11:48,819 - INFO - 20041008_1800_1830_INFO_DGA/S123 start: 0 stop: 10635
2018-06-20 17:11:50,960 - INFO - 20041008_1800_1830_INFO_DGA/S145 start: 0 stop: 545
2018-06-20 17:11:51,071 - INFO - 20041008_1800_1830_INFO_DGA/S146 start: 0 stop: 1438
2018-06-20 17:11:51,215 - INFO - 20041008_1800_1830_INFO_DGA/S148 start: 0 stop: 287
2018-06-20 17:11:51,289 - INFO - 20041008_1800_1830_INFO_DGA/S150 start: 0 stop: 1126
2018-06-20 17:11:51,697 - INFO - 20041008_1800_1830_INFO_DGA/S153 start: 0 stop: 8490
2018-06-20 17:11:54,865 - INFO - 20041008_1800_1830_INFO_DGA/S99 start: 0 stop: 42013
2018-06-20 17:12:03,409 - INFO - 20041008_1800_1830_INFO_DGA/S251 start: 0 stop: 5969
2018-06-20 17:12:04,979 - INFO - 20041008_1800_1830_INFO_DGA/S263 start: 0 stop: 10479
2018-06-20 17:12:07,069 - INFO - 20041008_1800_1830_INFO_DGA/S266 start: 0 stop: 1816
2018-06-20 17:12:07,687 - INFO - 20041008_1800_1830_INFO_DGA/S284 start: 0 stop: 12472
%% Cell type:markdown id: tags:
Run the algorithms
===
The different algorithms are run using a variable threshold $t$, producing a segmentation file for each value of $t$.
%% Cell type:code id: tags:
``` python
ilp_diar_fn = os.path.join(out_dir, show + '.ilp.{:.2f}.seg')
hac_diar_fn = os.path.join(out_dir, show + '.hac.{:.2f}.seg')
cc_diar_fn = os.path.join(out_dir, show + '.cc.{:.2f}.seg')
cc_ilp_diar_fn = os.path.join(out_dir, show + '.cc+ilp.{:.2f}.seg')
cc_hac_diar_fn = os.path.join(out_dir, show + '.cc+hac.{:.2f}_{:.2f}.seg')
t_min = -30
t_max = 80
t_step = 10
logging.info("Threshold t from {} to {} with a step of {}.".format(t_min, t_max-t_step, t_step))
for t in range(t_min, t_max, t_step):
logging.info("t = {}".format(t))
sum_sg0 = sum_sg = sum_cc = 0
scores = Scores(scores_file_name=score_fn)
diar_iv, _ = ilp_iv(diar_bic, scores, threshold=t)
Diar.write_seg(ilp_diar_fn.format(t), diar_iv)
diar_iv, _, _ = hac_iv(diar_bic, scores, threshold=t)
Diar.write_seg(hac_diar_fn.format(t), diar_iv)
diar_iv, cc_list, nb_sg0, nb_sg, nb_cc = connexted_component(diar_bic, scores, threshold=t)
Diar.write_seg(cc_diar_fn.format(t), diar_iv)
sum_sg0 += nb_sg0
sum_sg += nb_sg
sum_cc += nb_cc
diar_out = Diar()
for cc in copy.deepcopy(cc_list):
if cc.type != 'cc':
diar_out.append_diar(cc.diarization)
for cc in cc_list:
if cc.type == 'cc':
diar_iv, _ = ilp_iv(cc.diarization, cc.scores, threshold=t)
diar_out.append_diar(diar_iv)
Diar.write_seg(cc_ilp_diar_fn.format(t), diar_out)
diar_start = Diar()
for cc in cc_list:
if cc.type != 'cc':
diar_start.append_diar(cc.diarization)
for t2 in range(t_min, t_max, t_step):
diar_out = Diar()
diar_out.append_diar(diar_start)
for cc in copy.deepcopy(cc_list):
if cc.type == 'cc':
diar_hac_iv, _, __ = hac_iv(cc.diarization, cc.scores, threshold=t2)
diar_out.append_diar(diar_hac_iv)
Diar.write_seg(cc_hac_diar_fn.format(t, t2), diar_out)
```
%%%% Output: stream
2018-06-20 17:12:16,631 - INFO - Threshold t from -30 to 70 with a step of 10.
2018-06-20 17:12:16,636 - INFO - t = -30
2018-06-20 17:12:16,908 - INFO - t = -20
2018-06-20 17:12:17,045 - INFO - t = -10
2018-06-20 17:12:17,158 - INFO - t = 0
2018-06-20 17:12:17,273 - INFO - t = 10
2018-06-20 17:12:17,391 - INFO - t = 20
2018-06-20 17:12:17,508 - INFO - t = 30
2018-06-20 17:12:17,626 - INFO - t = 40
2018-06-20 17:12:17,743 - INFO - t = 50
2018-06-20 17:12:17,869 - INFO - t = 60
2018-06-20 17:12:17,986 - INFO - t = 70
%% Cell type:markdown id: tags:
Compute the DER
===
Compute the DER for this show for each threshold using ILP
%% Cell type:code id: tags:
``` python
ref = Diar.read_mdtm(os.path.join(data_dir, 'seg', 'ester1.tst.mdtm'))
uem = Diar.read_uem(os.path.join(data_dir, 'seg', 'ester1.tst.uem'))
uems = uem.make_index(['show'])
refs = ref.make_index(['show'])
for t in range(t_min, t_max, t_step):
hyp = Diar.read_seg(ilp_diar_fn.format(t))
res = scoring.compute_der(hyp, refs[show], uem=uems[show], collar=25, no_overlap=False)
print(ilp_diar_fn.format(t), res.get_der())
```
%%%% Output: stream
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.-30.00.seg 32.0336835124
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.-20.00.seg 17.6346925965
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.-10.00.seg 7.94298437815
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.0.00.seg 2.60739284403
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.10.00.seg 2.60739284403
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.20.00.seg 2.60739284403
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.30.00.seg 2.60739284403
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.40.00.seg 2.60739284403
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.50.00.seg 5.0164035389
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.60.00.seg 5.0164035389
out/20041008_1800_1830_INFO_DGA/20041008_1800_1830_INFO_DGA.ilp.70.00.seg 5.0164035389
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment