Commit 99e43094 authored by Florent Desnous 's avatar Florent Desnous
Browse files
parents 28cedd13 33800b29
...@@ -3,4 +3,4 @@ s4d.egg-info/ ...@@ -3,4 +3,4 @@ s4d.egg-info/
*/__pycache__/ */__pycache__/
*.pyc *.pyc
dist/ dist/
tutorials/
...@@ -221,9 +221,10 @@ def automatonAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,diarFinal__clus ...@@ -221,9 +221,10 @@ def automatonAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,diarFinal__clus
## tolerance: In centiseconds ## tolerance: In centiseconds
## diarFinal__clusterToDeleteAccordingToDiarRef: List of clusters to delete in the diarFinal only ## diarFinal__clusterToDeleteAccordingToDiarRef: List of clusters to delete in the diarFinal only
## modeNoGap: Drops or not the segment actions (i.e. createSegment & deleteSegment) ## modeNoGap: Drops or not the segment actions (i.e. createSegment & deleteSegment)
## mergeStrat_BiggestCluster: Whether we merge in the temporal order or first the biggest cluster for a given reference segment ## modeNoGap__mergeStrat_BiggestCluster: Whether we merge in the temporal order or first the biggest cluster for a given reference segment (only useful when the modeNoGap is False)
def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,mergeStrat_BiggestCluster=False,diarFinal__clusterToDeleteAccordingToDiarRef=list()): ## deleteBoundarySameConsecutiveSpk: Whether we delete a boundary for two consecutive segments with the same speaker
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(mergeStrat_BiggestCluster,bool) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list) def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,modeNoGap__mergeStrat_BiggestCluster=False,diarFinal__clusterToDeleteAccordingToDiarRef=list(),deleteBoundarySameConsecutiveSpk=False):
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(modeNoGap__mergeStrat_BiggestCluster,bool) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list) and isinstance(deleteBoundarySameConsecutiveSpk,bool)
for u in diarFinal__clusterToDeleteAccordingToDiarRef: for u in diarFinal__clusterToDeleteAccordingToDiarRef:
assert isinstance(u,str) assert isinstance(u,str)
...@@ -289,6 +290,9 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal ...@@ -289,6 +290,9 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
diarRef.sort() diarRef.sort()
diarHyp.sort() diarHyp.sort()
tolerance=abs(tolerance) tolerance=abs(tolerance)
if not strictBoundary:
diarRef.pack()
diarHyp.pack()
assert len(diarOverlapArea(diarRef))==0, "Error: diarRef parameter have some overlapped segments.\nReason: No overlap segment allowed.\nSolution: Please put them apart.\n" assert len(diarOverlapArea(diarRef))==0, "Error: diarRef parameter have some overlapped segments.\nReason: No overlap segment allowed.\nSolution: Please put them apart.\n"
assert len(diarOverlapArea(diarHyp))==0, "Error: diarHyp parameter have some overlapped segments.\nReason: No overlap segment allowed.\nSolution: Please put them apart.\n" assert len(diarOverlapArea(diarHyp))==0, "Error: diarHyp parameter have some overlapped segments.\nReason: No overlap segment allowed.\nSolution: Please put them apart.\n"
...@@ -527,7 +531,7 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal ...@@ -527,7 +531,7 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
valueBoundaryStart=copy.deepcopy(y['stop']) valueBoundaryStart=copy.deepcopy(y['stop'])
if valueBoundaryStart is None: if valueBoundaryStart is None:
valueBoundaryStart=valueRef['start'] valueBoundaryStart=valueRef['start']
if mergeStrat_BiggestCluster == True: if modeNoGap__mergeStrat_BiggestCluster == True:
# Gets the cluster (it which has the most present frames) # Gets the cluster (it which has the most present frames)
dictHypRefSegmentDuration=dict() dictHypRefSegmentDuration=dict()
for y in listHypRefSegment: for y in listHypRefSegment:
...@@ -542,10 +546,10 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal ...@@ -542,10 +546,10 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
if cls['start']>y['start']: if cls['start']>y['start']:
cls=y cls=y
clusterName=cls['cluster'] clusterName=cls['cluster']
if modeNoGap == False:
for idx,z in enumerate(listHypRefSegment):
# Moves the boundaries # Moves the boundaries
# Pre-string for a good running: listHypRefSegment sorted in ascending order on start, don't overtake the value valueRef['stop'] and valueRef['start'] # Pre-string for a good running: listHypRefSegment sorted in ascending order on start, don't overtake the value valueRef['stop'] and valueRef['start']
if modeNoGap == False:
for idx,z in enumerate(listHypRefSegment):
nearStop=valueRef['stop'] nearStop=valueRef['stop']
if idx==0: if idx==0:
boundStop=z['stop'] boundStop=z['stop']
...@@ -592,17 +596,16 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal ...@@ -592,17 +596,16 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance): elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance):
listHypRefSegment.append(y) listHypRefSegment.append(y)
# Replaces the segments which are not in the correct cluster # Replaces the segments which are not in the correct cluster
if modeNoGap == False:
replaced=False replaced=False
for y in listHypRefSegment: for y in listHypRefSegment:
if y['cluster']!=clusterName: if y['cluster']!=clusterName:
replaced=True replaced=True
yTmp=copy.deepcopy(y) yTmp=copy.deepcopy(y)
yTmp['cluster']=clusterName yTmp['cluster']=clusterName
if modeNoGap == False:
actionsSegmentationSegmentDelete.append(copy.deepcopy(y)) actionsSegmentationSegmentDelete.append(copy.deepcopy(y))
actionsIncrementalSegmentationSegmentDeleteTurn.append(copy.deepcopy(y)) actionsIncrementalSegmentationSegmentDeleteTurn.append(copy.deepcopy(y))
valueTmp=dropSegment(y,valueTmp) valueTmp=dropSegment(y,valueTmp)
if modeNoGap == False:
actionsSegmentationSegmentCreate.append(copy.deepcopy(Segment([valueRef['show'],yTmp['cluster'],yTmp['cluster_type'],yTmp['start'],yTmp['stop']],['show','cluster','cluster_type','start','stop']))) actionsSegmentationSegmentCreate.append(copy.deepcopy(Segment([valueRef['show'],yTmp['cluster'],yTmp['cluster_type'],yTmp['start'],yTmp['stop']],['show','cluster','cluster_type','start','stop'])))
actionsIncrementalSegmentationSegmentCreateTurn.append(copy.deepcopy(Segment([valueRef['show'],yTmp['cluster'],yTmp['cluster_type'],yTmp['start'],yTmp['stop']],['show','cluster','cluster_type','start','stop']))) actionsIncrementalSegmentationSegmentCreateTurn.append(copy.deepcopy(Segment([valueRef['show'],yTmp['cluster'],yTmp['cluster_type'],yTmp['start'],yTmp['stop']],['show','cluster','cluster_type','start','stop'])))
valueTmp.append_seg(yTmp) valueTmp.append_seg(yTmp)
...@@ -618,17 +621,27 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal ...@@ -618,17 +621,27 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
listTmp.append(y) listTmp.append(y)
elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance): elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance):
listTmp.append(y) listTmp.append(y)
if not (not deleteBoundarySameConsecutiveSpk and listTmp[0]['cluster']==listTmp[1]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]])) actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]])) actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]]))
if modeNoGap == True and listTmp[0]['cluster']!=listTmp[1]['cluster']:
listTmp[1]['cluster']=listTmp[0]['cluster']
newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp) newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp)
else:
newSegment=listTmp[1]
for y in range(2,len(listTmp)): for y in range(2,len(listTmp)):
if modeNoGap == True: if modeNoGap == True:
if not (Segment.intersection(newSegment,listTmp[y]) is not None or newSegment["stop"]==listTmp[y]["start"] or newSegment["start"]==listTmp[y]["stop"]): if not (Segment.intersection(newSegment,listTmp[y]) is not None or newSegment["stop"]==listTmp[y]["start"] or newSegment["start"]==listTmp[y]["stop"]):
logging.error("Cannot have absence of a segment in Transcriber mode.") logging.error("Cannot have absence of a segment in Transcriber mode.")
raise Exception("Absence of a segment.") raise Exception("Absence of a segment.")
if not (not deleteBoundarySameConsecutiveSpk and newSegment['cluster']==listTmp[y]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]])) actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]])) actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]]))
if modeNoGap == True and newSegment['cluster']!=listTmp[y]['cluster']:
listTmp[y]['cluster']=newSegment['cluster']
newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp) newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp)
else:
newSegment=listTmp[y]
# Updates diarHyp # Updates diarHyp
diarHyp=valueTmp diarHyp=valueTmp
...@@ -709,8 +722,10 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal ...@@ -709,8 +722,10 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
## tolerance: In centiseconds ## tolerance: In centiseconds
## diarFinal__clusterToDeleteAccordingToDiarRef: List of clusters to delete in the diarFinal only ## diarFinal__clusterToDeleteAccordingToDiarRef: List of clusters to delete in the diarFinal only
## modeNoGap: Drops or not the segment actions (i.e. createSegment & deleteSegment) ## modeNoGap: Drops or not the segment actions (i.e. createSegment & deleteSegment)
def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,diarFinal__clusterToDeleteAccordingToDiarRef=list()): ## deleteBoundarySameConsecutiveSpk: Whether we delete a boundary for two consecutive segments with the same speaker
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list) ## deleteBoundaryMergeCluster: The action "delete a boundary" can merge two consecutive segments with different cluster names (it takes the name of the left/first segment)
def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,diarFinal__clusterToDeleteAccordingToDiarRef=list(),deleteBoundarySameConsecutiveSpk=False,deleteBoundaryMergeCluster=False):
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list) and isinstance(deleteBoundarySameConsecutiveSpk,bool) and isinstance(deleteBoundaryMergeCluster,bool)
for u in diarFinal__clusterToDeleteAccordingToDiarRef: for u in diarFinal__clusterToDeleteAccordingToDiarRef:
assert isinstance(u,str) assert isinstance(u,str)
...@@ -1056,11 +1071,9 @@ def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,mod ...@@ -1056,11 +1071,9 @@ def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,mod
actionsIncrementalAssignmentCreateTurn.append(copy.deepcopy([valueRef['cluster'],z['cluster'],copy.deepcopy(z)])) actionsIncrementalAssignmentCreateTurn.append(copy.deepcopy([valueRef['cluster'],z['cluster'],copy.deepcopy(z)]))
else: else:
if z['cluster'] == dictionary[valueRef['cluster']]: if z['cluster'] == dictionary[valueRef['cluster']]:
if (modeNoGap == True and idx==0) or (modeNoGap == False):
actionsAssignmentNothing.append(copy.deepcopy(z)) actionsAssignmentNothing.append(copy.deepcopy(z))
actionsIncrementalAssignmentNothingTurn.append(copy.deepcopy(z)) actionsIncrementalAssignmentNothingTurn.append(copy.deepcopy(z))
else: else:
if (modeNoGap == True and idx==0) or (modeNoGap == False):
actionsAssignmentChange.append(copy.deepcopy([dictionary[valueRef['cluster']],z])) actionsAssignmentChange.append(copy.deepcopy([dictionary[valueRef['cluster']],z]))
actionsIncrementalAssignmentChangeTurn.append(copy.deepcopy([dictionary[valueRef['cluster']],z])) actionsIncrementalAssignmentChangeTurn.append(copy.deepcopy([dictionary[valueRef['cluster']],z]))
applyChange=True applyChange=True
...@@ -1071,6 +1084,8 @@ def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,mod ...@@ -1071,6 +1084,8 @@ def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,mod
valueTmp=dropSegment(z,valueTmp) valueTmp=dropSegment(z,valueTmp)
valueTmp.append_seg(segmentTmp) valueTmp.append_seg(segmentTmp)
valueTmp.sort() valueTmp.sort()
if deleteBoundaryMergeCluster:
break
if not perfectBoundary: if not perfectBoundary:
# Gets the new segments, modified by the previous steps # Gets the new segments, modified by the previous steps
listHypRefSegment=list() listHypRefSegment=list()
...@@ -1145,22 +1160,30 @@ def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,mod ...@@ -1145,22 +1160,30 @@ def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,mod
listTmp.append(y) listTmp.append(y)
elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance): elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance):
listTmp.append(y) listTmp.append(y)
if modeNoGap == True: if not (not deleteBoundarySameConsecutiveSpk and listTmp[0]['cluster']==listTmp[1]['cluster']):
clusterSelected=listTmp[0]['cluster']
actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]])) actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]])) actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]]))
if modeNoGap == True: if modeNoGap == True and listTmp[0]['cluster']!=listTmp[1]['cluster']:
listTmp[1]['cluster']=clusterSelected listTmp[1]['cluster']=listTmp[0]['cluster']
newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp) newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp)
else:
newSegment=listTmp[1]
for y in range(2,len(listTmp)): for y in range(2,len(listTmp)):
if modeNoGap == True: if modeNoGap == True:
listTmp[y]['cluster']=clusterSelected
if not (Segment.intersection(newSegment,listTmp[y]) is not None or newSegment["stop"]==listTmp[y]["start"] or newSegment["start"]==listTmp[y]["stop"]): if not (Segment.intersection(newSegment,listTmp[y]) is not None or newSegment["stop"]==listTmp[y]["start"] or newSegment["start"]==listTmp[y]["stop"]):
logging.error("Cannot have absence of a segment in Transcriber mode.") logging.error("Cannot have absence of a segment in Transcriber mode.")
raise Exception("Absence of a segment.") raise Exception("Absence of a segment.")
if not (not deleteBoundarySameConsecutiveSpk and newSegment['cluster']==listTmp[y]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]])) actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]])) actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]]))
if modeNoGap == True and newSegment['cluster']!=listTmp[y]['cluster']:
valueTmp=dropSegment(listTmp[y],valueTmp)
listTmp[y]['cluster']=newSegment['cluster']
valueTmp.append_seg(listTmp[y])
valueTmp.sort()
newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp) newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp)
else:
newSegment=listTmp[y]
# Updates diarHyp # Updates diarHyp
diarHyp=valueTmp diarHyp=valueTmp
......
S4D tutorials
===
Here you will find short tutorials on how to use different components of S4D to train and run a complete speaker diarization system.
1. [Train a PLDA model for i-vector clustering](tuto_1_iv_model.ipynb)
2. [Perform a BIC diarization](tuto_2_diar_bic.ipynb)
3. [Use i-vectors for speaker clustering](tuto_3_iv_plda_clustering.ipynb)
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.