Commit 5c6ff3ac authored by Florent Desnous 's avatar Florent Desnous
Browse files

Modified perform() to use Cython interface instead of glpsol command

parent e34ffb0c
import os
from ecyglpki import Problem, SimplexControls
from scipy.cluster import hierarchy as hac
import matplotlib.pyplot as plt
from scipy import stats
......@@ -22,7 +22,107 @@ class ILP_IV:
hac.dendrogram(link, color_threshold=self.thr, labels=cluster_list)
plt.show()
def perform(self, filename='tmp.ilp', rm_tmp=True):
def perform(self, save_ilp=False):
"""
Creates an LP problem and solves it using GLPK library
(with ecyglpki Cython interface)
:param save_ilp: if True, saves the generated problem
in CPLEX LP format (problem.ilp), and the solution of
the problem (solution.ilp).
:return: a Diar object with the new clusters and a
dictionary of clusters (old clusters as key, new ones
as value in an array).
"""
distances, t = scores2distance(self.scores, self.thr)
cluster_list = sorted(self.scores.modelset.tolist())
lp = Problem()
lp.set_obj_dir('minimize')
# columns
cols = []
for idx, cluster in enumerate(cluster_list):
col = '{}{}{}'.format(cluster, self.sep, cluster)
cols.append(col)
lp.add_named_cols(col)
lp.set_obj_coef(col, 1)
lp.set_col_bnds(col, 0, None)
# sum of dist > thr in the lower triangular part od the distance matrix
mask = (np.tril(distances, -1) > t)
threshold = np.tril(distances, -1).copy()
threshold[mask] = 0
s = np.sum(threshold) + 1
l = len(cluster_list)
for i in range(l):
cluster_i = cluster_list[i]
for j in range(i + 1, l):
if distances[i, j] < t:
cluster_j = cluster_list[j]
v = distances[i, j] / s
col = '{}{}{}'.format(cluster_i, self.sep, cluster_j)
cols.append(col)
lp.add_named_cols(col)
lp.set_obj_coef(col, v)
lp.set_col_bnds(col, 0, None)
# rows
for i, cluster_i in enumerate(cluster_list):
r_cols = {}
row = 'S{}'.format(i)
lp.add_named_rows(row)
r_cols['{}{}{}'.format(cluster_i, self.sep, cluster_i)] = 1
for j, cluster_j in enumerate(cluster_list):
if i != j and distances[i, j] < t:
col = '{}{}{}'.format(cluster_i, self.sep, cluster_j)
if col not in cols:
cols.append(col)
lp.add_named_cols(col)
lp.set_col_bnds(col, 0, None)
r_cols[col] = 1
#boundaries <= 0
col = '{}{}{}'.format(cluster_i, self.sep, cluster_j)
idx = lp.add_rows(1)
lp.set_mat_row(idx, {'{}{}{}'.format(cluster_j, self.sep, cluster_j):-1, col:1})
lp.set_row_bnds(idx, None, 0)
lp.set_mat_row(row, r_cols)
lp.set_row_bnds(row, 1, 1)
if save_ilp:
lp.write_lp('problem.ilp')
# solving problem
ctrl = SimplexControls()
ctrl.presolve = True
lp.simplex(ctrl)
if save_ilp:
lp.print_sol('solution.ilp')
cluster_dict = dict()
for i in range(lp.get_num_cols()):
names = lp.get_col_name(i+1).split(self.sep)
activity = lp.get_col_prim(i+1)
if activity == 1 and names[0] != names[1]:
if names[1] not in cluster_dict:
cluster_dict[names[1]] = []
cluster_dict[names[1]].append(names[0])
table = copy.deepcopy(self.diar)
for idx in cluster_dict:
table.rename('cluster', cluster_dict[idx], idx)
return table, cluster_dict
def _perform(self, filename='tmp.ilp', rm_tmp=True):
"""
Same as perform(), using glpk solver directly
instead of the Cython interface (slower).
"""
table = copy.deepcopy(self.diar)
logging.debug('ilp filename: %s', filename)
f = open(filename, 'w')
......@@ -117,4 +217,4 @@ class ILP_IV:
def ilp_iv(diar, scores, threshold=0.0):
ilp = ILP_IV(diar, scores, threshold)
return ilp.perform(rm_tmp=True)
return ilp.perform()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment