Commit 8d64e00b authored by glelan's avatar glelan Committed by Gaël Le Lan
Browse files

Ndx read_txt() bugfix/refactor/speedup

parent 045ae1b1
......@@ -24,6 +24,7 @@ import h5py
import logging
import numpy
import sys
import pandas as pd
from sidekit.sidekit_wrappers import check_path_existance, deprecated
__author__ = "Anthony Larcher"
......@@ -203,36 +204,35 @@ class Ndx:
return ndx
def read_txt(cls, input_filename):
def read_txt(cls, input_filename, header_present=False, separator=' '):
"""Creates an Ndx object from information stored in a text file.
:param input_filename: name of the file to read from
ndx = Ndx()
:param header_present: specify if txt file should ignore a header line
:param separator: specify field separator (e.g. ' ', '\t', ',')
with open(input_filename, 'r') as fid:
lines = [l.rstrip().split() for l in fid]
:return: a Ndx object
models = numpy.empty(len(lines), '|O')
testsegs = numpy.empty(len(lines), '|O')
for ii in range(len(lines)):
models[ii] = lines[ii][0]
testsegs[ii] = lines[ii][1]
if header_present:
pairs = pd.read_csv(input_filename, sep=separator)
pairs = pd.read_csv(input_filename, sep=separator, header=None)
modelset = numpy.unique(models)
segset = numpy.unique(testsegs)
modelset = pd.Index(pairs.iloc[:, 0].unique())
segset = pd.Index(pairs.iloc[:, 1].unique())
trialmask = numpy.zeros((modelset.shape[0], segset.shape[0]), dtype="bool")
for m in range(modelset.shape[0]):
segs = testsegs[numpy.array(ismember(models, modelset[m]))]
trialmask[m, ] = ismember(segset, segs)
for m in range(pairs.shape[0]):
trialmask[modelset.get_loc(pairs.iloc[m, 0]), segset.get_loc(pairs.iloc[m, 1])] = True
ndx.modelset = modelset
ndx.segset = segset
ndx = cls()
ndx.modelset = modelset.values
ndx.segset = segset.values
ndx.trialmask = trialmask
assert ndx.validate(), "Wrong Ndx format"
return ndx
def merge(self, ndx_list):
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment