Commit 39134cb1 authored by Sulfyderz's avatar Sulfyderz
Browse files

[New]:Adding a new parameter to some automaton methods in diar_tools.

parent 0ff7d6a8
......@@ -220,10 +220,11 @@ def automatonAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,diarFinal__clus
## WARNING: The automaton follows the temporal order
## tolerance: In centiseconds
## diarFinal__clusterToDeleteAccordingToDiarRef: List of clusters to delete in the diarFinal only
## modeNoGap: Drops or not the segment actions (i.e. createSegment & deleteSegment)
## mergeStrat_BiggestCluster: Whether we merge in the temporal order or first the biggest cluster for a given reference segment
def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,mergeStrat_BiggestCluster=False,diarFinal__clusterToDeleteAccordingToDiarRef=list()):
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(mergeStrat_BiggestCluster,bool) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list)
## modeNoGap: Drops or not the segment actions (i.e. createSegment & deleteSegment) and the action "delete a boundary" can merge two consecutive segments with different cluster names (it takes the name of the left/first segment)
## modeNoGap__mergeStrat_BiggestCluster: Whether we merge in the temporal order or first the biggest cluster for a given reference segment (only useful when the modeNoGap is False)
## deleteBoundarySameConsecutiveSpk: Whether we delete a boundary for two consecutive segments with the same speaker
def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,modeNoGap__mergeStrat_BiggestCluster=False,diarFinal__clusterToDeleteAccordingToDiarRef=list(),deleteBoundarySameConsecutiveSpk=False):
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(modeNoGap__mergeStrat_BiggestCluster,bool) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list) and isinstance(deleteBoundarySameConsecutiveSpk,bool)
for u in diarFinal__clusterToDeleteAccordingToDiarRef:
assert isinstance(u,str)
......@@ -288,7 +289,10 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
showname=diarRef.unique('show')[0]
diarRef.sort()
diarHyp.sort()
tolerance=abs(tolerance)
tolerance=abs(tolerance)
if not strictBoundary:
diarRef.pack()
diarHyp.pack()
assert len(diarOverlapArea(diarRef))==0, "Error: diarRef parameter have some overlapped segments.\nReason: No overlap segment allowed.\nSolution: Please put them apart.\n"
assert len(diarOverlapArea(diarHyp))==0, "Error: diarHyp parameter have some overlapped segments.\nReason: No overlap segment allowed.\nSolution: Please put them apart.\n"
......@@ -527,7 +531,7 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
valueBoundaryStart=copy.deepcopy(y['stop'])
if valueBoundaryStart is None:
valueBoundaryStart=valueRef['start']
if mergeStrat_BiggestCluster == True:
if modeNoGap__mergeStrat_BiggestCluster == True:
# Gets the cluster (it which has the most present frames)
dictHypRefSegmentDuration=dict()
for y in listHypRefSegment:
......@@ -542,10 +546,10 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
if cls['start']>y['start']:
cls=y
clusterName=cls['cluster']
# Moves the boundaries
# Pre-string for a good running: listHypRefSegment sorted in ascending order on start, don't overtake the value valueRef['stop'] and valueRef['start']
if modeNoGap == False:
for idx,z in enumerate(listHypRefSegment):
# Moves the boundaries
# Pre-string for a good running: listHypRefSegment sorted in ascending order on start, don't overtake the value valueRef['stop'] and valueRef['start']
nearStop=valueRef['stop']
if idx==0:
boundStop=z['stop']
......@@ -592,22 +596,21 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance):
listHypRefSegment.append(y)
# Replaces the segments which are not in the correct cluster
replaced=False
for y in listHypRefSegment:
if y['cluster']!=clusterName:
replaced=True
yTmp=copy.deepcopy(y)
yTmp['cluster']=clusterName
if modeNoGap == False:
if modeNoGap == False:
replaced=False
for y in listHypRefSegment:
if y['cluster']!=clusterName:
replaced=True
yTmp=copy.deepcopy(y)
yTmp['cluster']=clusterName
actionsSegmentationSegmentDelete.append(copy.deepcopy(y))
actionsIncrementalSegmentationSegmentDeleteTurn.append(copy.deepcopy(y))
valueTmp=dropSegment(y,valueTmp)
if modeNoGap == False:
valueTmp=dropSegment(y,valueTmp)
actionsSegmentationSegmentCreate.append(copy.deepcopy(Segment([valueRef['show'],yTmp['cluster'],yTmp['cluster_type'],yTmp['start'],yTmp['stop']],['show','cluster','cluster_type','start','stop'])))
actionsIncrementalSegmentationSegmentCreateTurn.append(copy.deepcopy(Segment([valueRef['show'],yTmp['cluster'],yTmp['cluster_type'],yTmp['start'],yTmp['stop']],['show','cluster','cluster_type','start','stop'])))
valueTmp.append_seg(yTmp)
if replaced:
valueTmp.sort()
valueTmp.append_seg(yTmp)
if replaced:
valueTmp.sort()
# Merges among them if > 1
if len(listHypRefSegment)>1:
# Gets the new segments, modified by the previous steps
......@@ -618,17 +621,27 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
listTmp.append(y)
elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance):
listTmp.append(y)
actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]]))
newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp)
if not (not deleteBoundarySameConsecutiveSpk and listTmp[0]['cluster']==listTmp[1]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]]))
if modeNoGap == True and listTmp[0]['cluster']!=listTmp[1]['cluster']:
listTmp[1]['cluster']=listTmp[0]['cluster']
newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp)
else:
newSegment=listTmp[1]
for y in range(2,len(listTmp)):
if modeNoGap == True:
if not (Segment.intersection(newSegment,listTmp[y]) is not None or newSegment["stop"]==listTmp[y]["start"] or newSegment["start"]==listTmp[y]["stop"]):
logging.error("Cannot have absence of a segment in Transcriber mode.")
raise Exception("Absence of a segment.")
actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]]))
newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp)
if not (not deleteBoundarySameConsecutiveSpk and newSegment['cluster']==listTmp[y]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]]))
if modeNoGap == True and newSegment['cluster']!=listTmp[y]['cluster']:
listTmp[y]['cluster']=newSegment['cluster']
newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp)
else:
newSegment=listTmp[y]
# Updates diarHyp
diarHyp=valueTmp
......@@ -708,9 +721,10 @@ def automatonSegmentation(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=Fal
## WARNING: The automaton follows the temporal order
## tolerance: In centiseconds
## diarFinal__clusterToDeleteAccordingToDiarRef: List of clusters to delete in the diarFinal only
## modeNoGap: Drops or not the segment actions (i.e. createSegment & deleteSegment)
def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,diarFinal__clusterToDeleteAccordingToDiarRef=list()):
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list)
## modeNoGap: Drops or not the segment actions (i.e. createSegment & deleteSegment) and the action "delete a boundary" can merge two consecutive segments with different cluster names (it takes the name of the left/first segment)
## deleteBoundarySameConsecutiveSpk: Whether we delete a boundary for two consecutive segments with the same speaker
def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,modeNoGap=False,diarFinal__clusterToDeleteAccordingToDiarRef=list(),deleteBoundarySameConsecutiveSpk=False):
assert isinstance(diarHyp,Diar) and isinstance(diarRef,Diar) and isinstance(modeNoGap,bool) and (diarUem is None or isinstance(diarUem,Diar)) and isinstance(tolerance,numbers.Number) and isinstance(diarFinal__clusterToDeleteAccordingToDiarRef,list) and isinstance(deleteBoundarySameConsecutiveSpk,bool)
for u in diarFinal__clusterToDeleteAccordingToDiarRef:
assert isinstance(u,str)
......@@ -1145,22 +1159,27 @@ def automatonSegmentationAssignment(diarHyp,diarRef,diarUem=None,tolerance=0,mod
listTmp.append(y)
elif tolerance!=0 and y['start']>=(valueRef['start']-tolerance):
listTmp.append(y)
if modeNoGap == True:
clusterSelected=listTmp[0]['cluster']
actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]]))
if modeNoGap == True:
listTmp[1]['cluster']=clusterSelected
newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp)
if not (not deleteBoundarySameConsecutiveSpk and listTmp[0]['cluster']==listTmp[1]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([listTmp[0],listTmp[1]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([listTmp[0],listTmp[1]]))
if modeNoGap == True and listTmp[0]['cluster']!=listTmp[1]['cluster']:
listTmp[1]['cluster']=listTmp[0]['cluster']
newSegment,valueTmp=mergeSegment(listTmp[0],listTmp[1],valueTmp)
else:
newSegment=listTmp[1]['cluster']
for y in range(2,len(listTmp)):
if modeNoGap == True:
listTmp[y]['cluster']=clusterSelected
if not (Segment.intersection(newSegment,listTmp[y]) is not None or newSegment["stop"]==listTmp[y]["start"] or newSegment["start"]==listTmp[y]["stop"]):
logging.error("Cannot have absence of a segment in Transcriber mode.")
raise Exception("Absence of a segment.")
actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]]))
newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp)
if not (not deleteBoundarySameConsecutiveSpk and newSegment['cluster']==listTmp[y]['cluster']):
actionsSegmentationBoundaryMerge.append(copy.deepcopy([newSegment,listTmp[y]]))
actionsIncrementalSegmentationBoundaryMergeTurn.append(copy.deepcopy([newSegment,listTmp[y]]))
if modeNoGap == True and newSegment['cluster']!=listTmp[y]['cluster']:
listTmp[y]['cluster']=newSegment['cluster']
newSegment,valueTmp=mergeSegment(newSegment,listTmp[y],valueTmp)
else:
newSegment=listTmp[y]
# Updates diarHyp
diarHyp=valueTmp
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment