Commit 99e43094 authored by Florent Desnous 's avatar Florent Desnous
Browse files
parents 28cedd13 33800b29
......@@ -3,4 +3,4 @@ s4d.egg-info/
*/__pycache__/
*.pyc
dist/
tutorials/
......@@ -221,9 +221,10 @@ def automatonAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,diarFinal__clus
## tolerance: In centiseconds
## diarFinal__clusterToDeleteAccordingToDiarRef: List of clusters to delete in the diarFinal only
## modeNoGap: Drops or not the segment actions (i.e. createSegment & deleteSegment)
## mergeStrat_BiggestCluster: Whether we merge in the temporal order or first the biggest cluster for a given reference segment
def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,mergeStrat_BiggestCluster=False,diarFinal__clusterToDeleteAccordingToDiarRef=list()):
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(mergeStrat_BiggestCluster,bool) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list)
## modeNoGap__mergeStrat_BiggestCluster: Whether we merge in the temporal order or first the biggest cluster for a given reference segment (only useful when the modeNoGap is False)
## deleteBoundarySameConsecutiveSpk: Whether we delete a boundary for two consecutive segments with the same speaker
def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,modeNoGap__mergeStrat_BiggestCluster=False,diarFinal__clusterToDeleteAccordingToDiarRef=list(),deleteBoundarySameConsecutiveSpk=False):
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(modeNoGap__mergeStrat_BiggestCluster,bool) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list) and isinstance(deleteBoundarySameConsecutiveSpk,bool)
for u in diarFinal__clusterToDeleteAccordingToDiarRef:
assert isinstance(u,str)
......@@ -288,7 +289,10 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
showname=diarRef.unique('show')[0]
diarRef.sort()
diarHyp.sort()
tolerance=abs(tolerance)
tolerance=abs(tolerance)
if not strictBoundary:
diarRef.pack()
diarHyp.pack()
assert len(diarOverlapArea(diarRef))==0, "Error: diarRef parameter have some overlapped segments.\nReason: No overlap segment allowed.\nSolution: Please put them apart.\n"
assert len(diarOverlapArea(diarHyp))==0, "Error: diarHyp parameter have some overlapped segments.\nReason: No overlap segment allowed.\nSolution: Please put them apart.\n"
......@@ -527,7 +531,7 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
valueBoundaryStart=copy.deepcopy(y['stop'])
if valueBoundaryStart is None:
valueBoundaryStart=valueRef['start']
if mergeStrat_BiggestCluster == True:
if modeNoGap__mergeStrat_BiggestCluster == True:
# Gets the cluster (it which has the most present frames)
dictHypRefSegmentDuration=dict()
for y in listHypRefSegment:
......@@ -542,10 +546,10 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
if cls['start']>y['start']:
cls=y
clusterName=cls['cluster']
# Moves the boundaries
# Pre-string for a good running: listHypRefSegment sorted in ascending order on start, don't overtake the value valueRef['stop'] and valueRef['start']
if modeNoGap == False:
for idx,z in enumerate(listHypRefSegment):
# Moves the boundaries
# Pre-string for a good running: listHypRefSegment sorted in ascending order on start, don't overtake the value valueRef['stop'] and valueRef['start']
nearStop=valueRef['stop']
if idx==0:
boundStop=z['stop']
......@@ -592,22 +596,21 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance):
listHypRefSegment.append(y)
# Replaces the segments which are not in the correct cluster
replaced=False
for y in listHypRefSegment:
if y['cluster']!=clusterName:
replaced=True
yTmp=copy.deepcopy(y)
yTmp['cluster']=clusterName
if modeNoGap == False:
if modeNoGap == False:
replaced=False
for y in listHypRefSegment:
if y['cluster']!=clusterName:
replaced=True
yTmp=copy.deepcopy(y)
yTmp['cluster']=clusterName
actionsSegmentationSegmentDelete.append(copy.deepcopy(y))
actionsIncrementalSegmentationSegmentDeleteTurn.append(copy.deepcopy(y))
valueTmp=dropSegment(y,valueTmp)
if modeNoGap == False:
valueTmp=dropSegment(y,valueTmp)
actionsSegmentationSegmentCreate.append(copy.deepcopy(Segment([valueRef['show'],yTmp['cluster'],yTmp['cluster_type'],yTmp['start'],yTmp['stop']],['show','cluster','cluster_type','start','stop'])))
actionsIncrementalSegmentationSegmentCreateTurn.append(copy.deepcopy(Segment([valueRef['show'],yTmp['cluster'],yTmp['cluster_type'],yTmp['start'],yTmp['stop']],['show','cluster','cluster_type','start','stop'])))
valueTmp.append_seg(yTmp)
if replaced:
valueTmp.sort()
valueTmp.append_seg(yTmp)
if replaced:
valueTmp.sort()
# Merges among them if > 1
if len(listHypRefSegment)>1:
# Gets the new segments, modified by the previous steps
......@@ -618,17 +621,27 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
listTmp.append(y)
elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance):
listTmp.append(y)
actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]]))
newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp)
if not (not deleteBoundarySameConsecutiveSpk and listTmp[0]['cluster']==listTmp[1]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]]))
if modeNoGap == True and listTmp[0]['cluster']!=listTmp[1]['cluster']:
listTmp[1]['cluster']=listTmp[0]['cluster']
newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp)
else:
newSegment=listTmp[1]
for y in range(2,len(listTmp)):
if modeNoGap == True:
if not (Segment.intersection(newSegment,listTmp[y]) is not None or newSegment["stop"]==listTmp[y]["start"] or newSegment["start"]==listTmp[y]["stop"]):
logging.error("Cannot have absence of a segment in Transcriber mode.")
raise Exception("Absence of a segment.")
actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]]))
newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp)
if not (not deleteBoundarySameConsecutiveSpk and newSegment['cluster']==listTmp[y]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]]))
if modeNoGap == True and newSegment['cluster']!=listTmp[y]['cluster']:
listTmp[y]['cluster']=newSegment['cluster']
newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp)
else:
newSegment=listTmp[y]
# Updates diarHyp
diarHyp=valueTmp
......@@ -709,8 +722,10 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
## tolerance: In centiseconds
## diarFinal__clusterToDeleteAccordingToDiarRef: List of clusters to delete in the diarFinal only
## modeNoGap: Drops or not the segment actions (i.e. createSegment & deleteSegment)
def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,diarFinal__clusterToDeleteAccordingToDiarRef=list()):
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list)
## deleteBoundarySameConsecutiveSpk: Whether we delete a boundary for two consecutive segments with the same speaker
## deleteBoundaryMergeCluster: The action "delete a boundary" can merge two consecutive segments with different cluster names (it takes the name of the left/first segment)
def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,diarFinal__clusterToDeleteAccordingToDiarRef=list(),deleteBoundarySameConsecutiveSpk=False,deleteBoundaryMergeCluster=False):
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list) and isinstance(deleteBoundarySameConsecutiveSpk,bool) and isinstance(deleteBoundaryMergeCluster,bool)
for u in diarFinal__clusterToDeleteAccordingToDiarRef:
assert isinstance(u,str)
......@@ -1056,13 +1071,11 @@ def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,mod
actionsIncrementalAssignmentCreateTurn.append(copy.deepcopy([valueRef['cluster'],z['cluster'],copy.deepcopy(z)]))
else:
if z['cluster'] == dictionary[valueRef['cluster']]:
if (modeNoGap == True and idx==0) or (modeNoGap == False):
actionsAssignmentNothing.append(copy.deepcopy(z))
actionsIncrementalAssignmentNothingTurn.append(copy.deepcopy(z))
actionsAssignmentNothing.append(copy.deepcopy(z))
actionsIncrementalAssignmentNothingTurn.append(copy.deepcopy(z))
else:
if (modeNoGap == True and idx==0) or (modeNoGap == False):
actionsAssignmentChange.append(copy.deepcopy([dictionary[valueRef['cluster']],z]))
actionsIncrementalAssignmentChangeTurn.append(copy.deepcopy([dictionary[valueRef['cluster']],z]))
actionsAssignmentChange.append(copy.deepcopy([dictionary[valueRef['cluster']],z]))
actionsIncrementalAssignmentChangeTurn.append(copy.deepcopy([dictionary[valueRef['cluster']],z]))
applyChange=True
if applyChange:
# Updates the diar for the merges afterward
......@@ -1071,6 +1084,8 @@ def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,mod
valueTmp=dropSegment(z,valueTmp)
valueTmp.append_seg(segmentTmp)
valueTmp.sort()
if deleteBoundaryMergeCluster:
break
if not perfectBoundary:
# Gets the new segments, modified by the previous steps
listHypRefSegment=list()
......@@ -1145,22 +1160,30 @@ def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,mod
listTmp.append(y)
elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance):
listTmp.append(y)
if modeNoGap == True:
clusterSelected=listTmp[0]['cluster']
actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]]))
if modeNoGap == True:
listTmp[1]['cluster']=clusterSelected
newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp)
if not (not deleteBoundarySameConsecutiveSpk and listTmp[0]['cluster']==listTmp[1]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]]))
if modeNoGap == True and listTmp[0]['cluster']!=listTmp[1]['cluster']:
listTmp[1]['cluster']=listTmp[0]['cluster']
newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp)
else:
newSegment=listTmp[1]
for y in range(2,len(listTmp)):
if modeNoGap == True:
listTmp[y]['cluster']=clusterSelected
if not (Segment.intersection(newSegment,listTmp[y]) is not None or newSegment["stop"]==listTmp[y]["start"] or newSegment["start"]==listTmp[y]["stop"]):
logging.error("Cannot have absence of a segment in Transcriber mode.")
raise Exception("Absence of a segment.")
actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]]))
newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp)
if not (not deleteBoundarySameConsecutiveSpk and newSegment['cluster']==listTmp[y]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]]))
if modeNoGap == True and newSegment['cluster']!=listTmp[y]['cluster']:
valueTmp=dropSegment(listTmp[y],valueTmp)
listTmp[y]['cluster']=newSegment['cluster']
valueTmp.append_seg(listTmp[y])
valueTmp.sort()
newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp)
else:
newSegment=listTmp[y]
# Updates diarHyp
diarHyp=valueTmp
......
S4D tutorials
===
Here you will find short tutorials on how to use different components of S4D to train and run a complete speaker diarization system.
1. [Train a PLDA model for i-vector clustering](tuto_1_iv_model.ipynb)
2. [Perform a BIC diarization](tuto_2_diar_bic.ipynb)
3. [Use i-vectors for speaker clustering](tuto_3_iv_plda_clustering.ipynb)
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
%% Cell type:markdown id: tags:
Train model for Diarization
====
This script trains UBM, TV and PLDA models for a diarization system.
Initialization
---
%% Cell type:code id: tags:
``` python
import logging
from s4d.diar import Diar
from s4d.utils import *
from sidekit import Mixture, FactorAnalyser, StatServer, IdMap
import numpy
import logging
import re
import sidekit
from sidekit.sidekit_io import *
try:
from sortedcontainers import SortedDict as dict
except ImportError:
pass
```
%% Cell type:code id: tags:
``` python
init_logging(level=logging.INFO)
num_thread = 4
audio_dir = '../data/train/{}.wav'
ubm_seg_fn = './data/seg/ubm_ester.seg'
nb_gauss = 1024
mfcc_ubm_fn = './data/mfcc/ubm.h5'
ubm_idmap_fn = './data/mfcc/ubm_idmap.txt'
ubm_fn = './data/model/ester_ubm_'+str(nb_gauss)+'.h5'
tv_seg_fn = './data/seg/train.tv.seg'
rank_tv = 300
it_max_tv = 10
mfcc_tv_fn = './data/mfcc/tv.h5'
tv_idmap_fn = './data/mfcc/tv_idmap.h5'
tv_stat_fn = './data/model/tv.stat.h5'
tv_fn = './data/model/tv_'+str(rank_tv)+'.h5'
plda_seg_fn = './data/seg/train.plda.seg'
rank_plda = 150
it_max_plda = 10
mfcc_plda_fn = './data/mfcc/norm_plda.h5'
plda_idmap_fn = './data/mfcc/plda_idmap.h5'
plda_fn = './data/model/plda_'+str(rank_tv)+'_'+str(rank_plda)+'.h5'
norm_stat_fn = './data/model/norm.stat.h5'
norm_fn = './data/model/norm.h5'
norm_iv_fn = './data/model/norm.iv.h5'
matrices_fn = './data/model/matrices.h5'
model_fn = './data/model/ester_model_{}_{}_{}.h5'.format(nb_gauss, rank_tv, rank_plda)
```
%% Cell type:markdown id: tags:
Step 1: UBM
---
Extract MFCC for the UBM
%% Cell type:code id: tags:
``` python
logging.info('Computing MFCC for UBM')
diar_ubm = Diar.read_seg(ubm_seg_fn, normalize_cluster=True)
fe = get_feature_extractor(audio_dir, 'sid')
ubm_idmap = fe.save_multispeakers(diar_ubm.id_map(), output_feature_filename=mfcc_ubm_fn, keep_all=False)
ubm_idmap.write_txt(ubm_idmap_fn)
```
%% Cell type:markdown id: tags:
Train the UBM by EM
%% Cell type:code id: tags:
``` python
ubm_idmap = IdMap.read_txt(ubm_idmap_fn)
fs = get_feature_server(mfcc_ubm_fn, 'sid')
spk_lst = ubm_idmap.rightids
ubm = Mixture()
ubm.EM_split(fs, spk_lst, nb_gauss,
iterations=(1, 2, 2, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8), num_thread=num_thread,
llk_gain=0.01)
ubm.write(ubm_fn, prefix='ubm/')
```
%% Cell type:markdown id: tags:
Step 2: TV
---
Extract MFCC for TV
%% Cell type:code id: tags:
``` python
logging.info('Computing MFCC for TV')
diar_tv = Diar.read_seg(tv_seg_fn, normalize_cluster=True)
fe = get_feature_extractor(audio_dir, 'sid')
tv_idmap = fe.save_multispeakers(diar_tv.id_map(), output_feature_filename=mfcc_tv_fn, keep_all=False)
tv_idmap.write(tv_idmap_fn)
```
%% Cell type:markdown id: tags:
Train a Total Variability model using the FactorAnalyser class
%% Cell type:code id: tags:
``` python
tv_idmap = IdMap.read(tv_idmap_fn)
ubm = Mixture()
ubm.read(ubm_fn, prefix='ubm/')
fs = get_feature_server(mfcc_tv_fn, 'sid')
tv_idmap.leftids = numpy.copy(tv_idmap.rightids)
tv_stat = StatServer(tv_idmap, ubm.get_distrib_nb(), ubm.dim())
tv_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range(tv_stat.segset.shape[0]), num_thread=num_thread)
tv_stat.write(tv_stat_fn)
fa = FactorAnalyser()
fa.total_variability(tv_stat_fn, ubm, rank_tv, nb_iter=it_max_tv, batch_size=1000, num_thread=num_thread)
write_tv_hdf5([fa.F, fa.mean, fa.Sigma], tv_fn)
```
%% Cell type:markdown id: tags:
Step 3: PLDA
---
Extract the MFCC for the PLDA
%% Cell type:code id: tags:
``` python
logging.info('Computing MFCC for PLDA')
diar_plda = Diar.read_seg(plda_seg_fn, normalize_cluster=True)
fe = get_feature_extractor(audio_dir, 'sid')
plda_idmap = fe.save_multispeakers(diar_plda.id_map(), output_feature_filename=mfcc_plda_fn, keep_all=False)
plda_idmap.write(plda_idmap_fn)
```
%% Cell type:markdown id: tags:
Accumulate statistics
%% Cell type:code id: tags:
``` python
plda_idmap = IdMap.read(plda_idmap_fn)
ubm = Mixture()
ubm.read(ubm_fn, prefix='ubm/')
tv, tv_mean, tv_sigma = read_tv_hdf5(tv_fn)
fs = get_feature_server(mfcc_plda_fn, 'sid')
plda_norm_stat = StatServer(plda_idmap, ubm.get_distrib_nb(), ubm.dim())
plda_norm_stat.accumulate_stat(ubm=ubm, feature_server=fs,
seg_indices=range(plda_norm_stat.segset.shape[0]), num_thread=num_thread)
plda_norm_stat.write(norm_stat_fn)
```
%% Cell type:markdown id: tags:
Extract i-vectors and compute norm
%% Cell type:code id: tags:
``` python
fa = FactorAnalyser(F=tv, mean=tv_mean, Sigma=tv_sigma)
norm_iv = fa.extract_ivectors(ubm, norm_stat_fn, num_thread=num_thread)
norm_iv.write(norm_iv_fn)
norm_mean, norm_cov = norm_iv.estimate_spectral_norm_stat1(1, 'sphNorm')
write_norm_hdf5([norm_mean, norm_cov], norm_fn)
norm_iv.spectral_norm_stat1(norm_mean[:1], norm_cov[:1])
```
%% Cell type:markdown id: tags:
Train the PLDA model
%% Cell type:code id: tags:
``` python
fa = FactorAnalyser()
fa.plda(norm_iv, rank_plda, nb_iter=it_max_plda)
write_plda_hdf5([fa.mean, fa.F, numpy.zeros((rank_tv, 0)), fa.Sigma], plda_fn)
```
%% Cell type:markdown id: tags:
Step 4: Compute additional data (optional)
---
Adding matrices for additional scoring methods:
* Mahalonobis matrix
* Lower Choleski decomposition of the WCCN matrix
* Within- and Between-class Covariance matrices
%% Cell type:code id: tags:
``` python
iv = StatServer(norm_iv_fn)
matrix_dict = {}
logging.info('compute mahalanobis_matrix')
mahalanobis_matrix = iv.get_mahalanobis_matrix_stat1()
matrix_dict['mahalanobis_matrix'] = mahalanobis_matrix
logging.info('compute wccn_choleski')
wccn_choleski = iv.get_wccn_choleski_stat1()
matrix_dict['wccn_choleski'] = wccn_choleski
logging.info('compute two_covariance')
within_covariance = iv.get_within_covariance_stat1()
matrix_dict['two_covariance/within_covariance'] = within_covariance
between_covariance = iv.get_between_covariance_stat1()
matrix_dict['two_covariance/between_covariance'] = between_covariance
write_dict_hdf5(matrix_dict, matrices_fn)
```
%% Cell type:markdown id: tags:
Step 5: Merge in one model
---
%% Cell type:code id: tags:
``` python
with h5py.File(model_fn, 'w') as model:
for fn in [ubm_fn, tv_fn, norm_fn, plda_fn, matrices_fn]:
if not os.path.exists(fn):
continue
with h5py.File(fn, 'r') as fh:
for group in fh:
logging.info(group)
fh.copy(group, model)
```
This diff is collapsed.
%% Cell type:markdown id: tags:
i-vector clustering with PLDA scoring
===
This script demonstrates the use of several clustering algorithms using PLDA scoring and i-vectors. The algorithms proposed are:
- Integer Linear Programming (ILP) IV
- HAC IV
- Connected Components (CC) IV
- Combination of CC and HAC, and CC and ILP
It takes as input the segments generated by the second tutorial (BIC-HAC) and uses the model learned in the first.
%% Cell type:code id: tags:
``` python
%matplotlib inline
from s4d.diar import Diar
from s4d.utils import *
from s4d import scoring
from s4d.model_iv import ModelIV
from s4d.clustering.ilp_iv import ilp_iv
from s4d.clustering.hac_iv import hac_iv
from s4d.clustering.cc_iv import connexted_component
from sidekit.sidekit_io import *
from sidekit.bosaris import IdMap, Scores
import matplotlib.pyplot as plt
import logging
import numpy
import copy
import sys
import os
```
%%%% Output: stream
Import theano
%%%% Output: stream
Can not use cuDNN on context None: Disabled by dnn.enabled flag
Mapped name None to device cuda: GeForce GTX TITAN X (0000:03:00.0)
%% Cell type:code id: tags:
``` python
init_logging(level=logging.INFO)
data_dir = 'data'
model_fn = os.path.join(data_dir, 'model', 'ester_model.h5')
```
%% Cell type:code id: tags:
``` python
show = '20041008_1800_1830_INFO_DGA'
audio_fn = os.path.join(data_dir, 'audio', show + '.wav')
out_dir = os.path.join('out', show)
mfcc_fn = os.path.join(out_dir, show + '.test_mfcc.h5')
bic_fn = os.path.join(out_dir, show + '.d.seg')
idmap_fn = os.path.join(out_dir, show + '.idmap.h5')
score_fn = os.path.join(out_dir, show + '.score_plda.h5')
diar_bic = Diar.read_seg(bic_fn)
```
%% Cell type:markdown id: tags:
Exctracting MFCC
===
%% Cell type:code id: tags:
``` python
fe = get_feature_extractor(audio_fn, type_feature_extractor='sid')
idmap_bic = fe.save_multispeakers(diar_bic.id_map(), output_feature_filename=mfcc_fn, keep_all=False)
idmap_bic.write(idmap_fn)
```
%% Cell type:markdown id: tags:
PLDA scoring
===
Train a PLDA model for the show and compute the distance matrix
%% Cell type:code id: tags:
``` python
model_iv = ModelIV(model_fn)
idmap_bic = IdMap(idmap_fn)
fs = get_feature_server(mfcc_fn, 'sid')
model_iv.train(fs, idmap_bic)
distance = model_iv.score_plda_slow()
distance.write(score_fn)
```
%%%% Output: stream
load: data/model/ester_model.h5
%%%% Output: stream
2018-06-20 17:11:30,775 - INFO - out/20041008_1800_1830_INFO_DGA ## 20041008_1800_1830_INFO_DGA ## .test_mfcc.h5