Commit 2d24a8c8 authored by Sulfyderz's avatar Sulfyderz
Browse files

Adding a method called "read_stm" in diar.py.

parent 566b6235
......@@ -806,6 +806,56 @@ class Diar():
fic.close()
return diarization
@classmethod
def read_stm(cls,filename, normalize_cluster=False, encoding="ISO-8859-1"):
"""
Read a segmentation file
:param filename: the str input filename
:param normalize_cluster: normalize the cluster by removing upper case
and accents
:return: a diarization object
"""
fic = open(filename, 'r', encoding=encoding)
diarization = Diar()
if not diarization._attributes.exist('gender'):
diarization.add_attribut(new_attribut='gender', default='U')
try:
for line in fic:
line = re.sub('\s+',' ',line)
line = line.strip()
# logging.debug(line)
if line.startswith('#') or line.startswith(';;'):
continue
# split line into fields
split = line.split()
show = split[0]
loc = split[2]
if normalize_cluster:
loc = str2str_normalize(loc)
start = int(float(split[3])*100)
stop = int(float(split[4])*100)
addon = split[5].replace(">", "").replace("<", "").replace(","," ")
lineBis = re.sub('\s+',' ',addon)
lineBis = lineBis.strip()
gender = lineBis.split()[2]
if normalize_cluster:
word = str2str_normalize(word)
# print(show, tmp, start, length, gender, channel, env, speaker)
if gender == "female":
diarization.append(show=show, cluster=loc, start=start,
stop=stop,gender="F")
elif gender == "male":
diarization.append(show=show, cluster=loc, start=start,
stop=stop,gender="M")
else:
diarization.append(show=show, cluster=loc, start=start,
stop=stop)
except Exception as e:
logging.error(sys.exc_info()[0])
logging.error(line)
fic.close()
return diarization
@classmethod
def read_mdtm(cls, filename, normalize_cluster=False, encoding="utf8"):
"""
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment