GEDLIB  1.0
process_results.py
Go to the documentation of this file.
1 #//////////////////////////////////////////////////////////////////////////#
2 # #
3 # Copyright (C) 2018 by David B. Blumenthal #
4 # #
5 # This file is part of GEDLIB. #
6 # #
7 # GEDLIB is free software: you can redistribute it and/or modify it #
8 # under the terms of the GNU Lesser General Public License as published #
9 # by the Free Software Foundation, either version 3 of the License, or #
10 # (at your option) any later version. #
11 # #
12 # GEDLIB is distributed in the hope that it will be useful, #
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15 # GNU Lesser General Public License for more details. #
16 # #
17 # You should have received a copy of the GNU Lesser General Public #
18 # License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. #
19 # #
20 #//////////////////////////////////////////////////////////////////////////#
21 
22 
37 
38 '''Processes results of experiments for VLDB Journal paper.'''
39 
40 import csv
41 from pickle import NONE
42 import argparse
43 from decimal import Decimal
44 import os.path
45 
46 def computes_no_lb(method_name, method_config=""):
47  if method_name == "BP":
48  return True
49  elif method_name == "SUBGRAPH":
50  return True
51  elif method_name == "WALKS":
52  return True
53  elif method_name == "RINGOPT":
54  return True
55  elif method_name == "RINGMS":
56  return True
57  elif method_name == "RINGMLDNN":
58  return True
59  elif method_name == "RINGMLSVM":
60  return True
61  elif method_name == "PREDICTDNN":
62  return True
63  elif method_name == "PREDICTSVM":
64  return True
65  elif method_name == "REFINE":
66  return True
67  elif method_name == "KREFINE":
68  return True
69  elif method_name == "BPBEAM":
70  return True
71  elif method_name == "IBPBEAM":
72  return True
73  elif method_name == "IPFP":
74  return True
75  elif method_name == "SA":
76  return True
77  elif (method_config != "") and (int(method_config[2:-2].split(",")[0]) > 1):
78  return True
79  else:
80  return False
81 
82 def computes_no_ub(method_name):
83  if method_name == "HED":
84  return True
85  elif method_name == "BRANCHCOMPACT":
86  return True
87  elif method_name == "PARTITION":
88  return True
89  elif method_name == "HYBRID":
90  return True
91  else:
92  return False
93 
94 def is_ls_based(method):
95  if method.name == "REFINE":
96  return True
97  elif method.name == "KREFINE":
98  return True
99  elif method.name == "BPBEAM":
100  return True
101  elif method.name == "IBPBEAM":
102  return True
103  elif method.name == "IPFP":
104  return True
105  else:
106  return False
107 
108 def is_lsape_based(method):
109  if method.name == "BP":
110  return True
111  elif method.name == "BRANCH":
112  return True
113  elif method.name == "BRANCHFAST":
114  return True
115  elif method.name == "BRANCHUNI":
116  return True
117  elif method.name == "STAR":
118  return True
119  elif method.name == "NODE":
120  return True
121  elif method.name == "SUBGRAPH":
122  return True
123  elif method.name == "WALKS":
124  return True
125  elif method.name == "RINGOPT":
126  return True
127  elif method.name == "RINGMS":
128  return True
129  elif method.name == "RINGMLDNN":
130  return True
131  elif method.name == "RINGMLSVM":
132  return True
133  elif method.name == "PREDICTDNN":
134  return True
135  elif method.name == "PREDICTSVM":
136  return True
137  else:
138  return False
139 
140 def uses_randpost(method):
141  if is_ls_based(method):
142  return (int(method.config[2:-2].split(",")[2]) > 0)
143  else:
144  return False
145 
146 def uses_multi_start(method):
147  if is_ls_based(method):
148  return (int(method.config[2:-2].split(",")[0]) > 1)
149  else:
150  return False
151 
152 def uses_multi_sol(method):
153  if is_lsape_based(method):
154  return (int(method.config[2:-2].split(",")[0]) > 1)
155  else:
156  return False
157 
158 def uses_centralities(method):
159  if is_lsape_based(method):
160  return (float(method.config[2:-2].split(",")[1]) > 0)
161  else:
162  return False
163 
164 class Method:
165 
166  def __init__(self, name, lb, ub, t, coeff_lb, coeff_ub):
167  self.consider_lb = True
168  self.name = name[0]
169  if self.name == "SUBGGRAPH":
170  self.name = "SUBGRAPH"
171  self.config = name[1]
172  self.lb = float("{0:.2f}".format(Decimal(lb)))
173  self.ub = float("{0:.2f}".format(Decimal(ub)))
174  self.t = float("{:.6f}".format(Decimal(t)))
175  self.coeff_lb = float("{:.2f}".format(Decimal(coeff_lb)))
176  self.precise_coeff_lb = coeff_lb
177  self.coeff_ub = float("{:.2f}".format(Decimal(coeff_ub)))
178  self.precise_coeff_ub = coeff_ub
179  self.is_fastest_lb = not computes_no_lb(self.name, self.config)
180  self.is_fastest_ub = not computes_no_ub(self.name)
181  self.has_tightest_lb = not computes_no_lb(self.name, self.config)
182  self.has_tightest_ub = not computes_no_ub(self.name)
183  self.has_best_coeff_lb = not computes_no_lb(self.name, self.config)
184  self.has_best_coeff_ub = not computes_no_ub(self.name)
185  self.is_maximum_lb = not computes_no_lb(self.name, self.config)
186  self.is_maximum_ub = not computes_no_ub(self.name)
187  self.discard_for_lb = computes_no_lb(self.name, self.config)
188  self.discard_for_ub = computes_no_ub(self.name)
189  self.score_lb = 0
190  self.score_ub = 0
191  self.adj_list_lb = []
192  self.adj_list_ub = []
193 
194  def stats(self):
195  method_stats = "\\begin{tiny}$\left(\\begin{smallmatrix}\\\\"
196  if self.consider_lb:
197  method_stats = method_stats + "t\\text{ in \si{\second}} & d_{\LB} & c_{\LB} & s_{LB}\\\\"
198  else:
199  method_stats = method_stats + "t\\text{ in \si{\second}} & d_{\UB} & c_{\UB} & s_{UB}\\\\"
200  method_stats = method_stats + "\\num{" + "{:.2E}".format(Decimal(str(self.t))) + "} & "
201  if self.consider_lb:
202  method_stats = method_stats + "\\num{" + str(self.lb) + "} & "
203  method_stats = method_stats + "\\num{" + str(self.coeff_lb) + "} & "
204  method_stats = method_stats + "\\num{" + "{:.2f}".format(Decimal(str(self.score_lb))) + "}"
205  else:
206  method_stats = method_stats + "\\num{" + str(self.ub) + "} &"
207  method_stats = method_stats + "\\num{" + str(self.coeff_ub) + "} &"
208  method_stats = method_stats + "\\num{" + "{:.2f}".format(Decimal(str(self.score_ub))) + "}"
209  method_stats = method_stats + "\\end{smallmatrix}\\right)$\end{tiny}"
210  return method_stats
211 
212  def tikz_descriptor(self):
213  descriptor = "\\" + self.name
214  if self.is_maximum() and (self.config != ""):
215  descriptor = descriptor + " " + self.config
216  if not self.is_maximum():
217  descriptor = descriptor + "\\\\" + self.config
218  if self.consider_lb and self.is_maximum_lb:
219  descriptor = descriptor + "\\\\" + self.stats()
220  if (not self.consider_lb) and self.is_maximum_ub:
221  descriptor = descriptor + "\\\\" + self.stats()
222  return descriptor
223 
224  def label(self):
225  labels = []
226  if self.consider_lb and self.has_tightest_lb:
227  labels.append("\\textcolor{Blue}{$d^\star_{\LB}$}")
228  if (not self.consider_lb) and self.has_tightest_ub:
229  labels.append("\\textcolor{Blue}{$d^\star_{\UB}$}")
230  if self.consider_lb and self.is_fastest_lb:
231  labels.append("\\textcolor{Red}{$t^\star_{\LB}$}")
232  if (not self.consider_lb) and self.is_fastest_ub:
233  labels.append("\\textcolor{Red}{$t^\star_{\UB}$}")
234  if self.consider_lb and self.has_best_coeff_lb:
235  labels.append("\\textcolor{Green}{$c^\star_{\LB}$}")
236  if (not self.consider_lb) and self.has_best_coeff_ub:
237  labels.append("\\textcolor{Green}{$c^\star_{\UB}$}")
238  if len(labels) == 0:
239  return ""
240  label = labels[0]
241  for index in range(1, len(labels)):
242  label = label + " \\\\ " + labels[index]
243  return label
244 
245  def compare_tightness(self, other):
246  if self.consider_lb:
247  if self.lb > other.lb:
248  return 1
249  elif self.lb == other.lb:
250  return 0
251  else:
252  return -1
253  else:
254  if self.ub < other.ub:
255  return 1
256  elif self.ub == other.ub:
257  return 0
258  else:
259  return -1
260 
261  def compare_time(self, other):
262  if self.t < other.t:
263  return 1
264  elif self.t == other.t:
265  return 0
266  else:
267  return -1
268 
269  def compare_coeff(self, other):
270  if self.consider_lb:
271  if self.coeff_lb > other.coeff_lb:
272  return 1
273  elif self.coeff_lb == other.coeff_lb:
274  return 0
275  else:
276  return -1
277  else:
278  if self.coeff_ub > other.coeff_ub:
279  return 1
280  elif self.coeff_ub == other.coeff_ub:
281  return 0
282  else:
283  return -1
284 
285  def get_edge_label(self, other):
286  is_better_or_equal = True
287  if self.compare_tightness(other) < 0:
288  is_better_or_equal = False
289  if self.consider_lb:
290  self.has_tightest_lb = False
291  else:
292  self.has_tightest_ub = False
293  if self.compare_time(other) < 0:
294  is_better_or_equal = False
295  if self.consider_lb:
296  self.is_fastest_lb = False
297  else:
298  self.is_fastest_ub = False
299  if self.compare_coeff(other) < 0:
300  is_better_or_equal = False
301  if self.consider_lb:
302  self.has_best_coeff_lb = False
303  else:
304  self.has_best_coeff_ub = False
305  label = ""
306  if self.compare_tightness(other) > 0:
307  if self.consider_lb:
308  other.has_tightest_lb = False
309  else:
310  other.has_tightest_ub = False
311  label = label + "d"
312  if self.compare_time(other) > 0:
313  if self.consider_lb:
314  other.is_fastest_lb = False
315  else:
316  other.is_fastest_ub = False
317  label = label + "t"
318  if self.compare_coeff(other) > 0:
319  if self.consider_lb:
320  other.has_best_coeff_lb = False
321  else:
322  other.has_best_coeff_ub = False
323  label = label + "c"
324  if is_better_or_equal and (label != ""):
325  if self.consider_lb:
326  other.is_maximum_lb = False
327  else:
328  other.is_maximum_ub = False
329  if is_better_or_equal:
330  return label
331  else:
332  return ""
333 
334  def as_table_row(self):
335  table_row = "\\" + self.name
336  if self.is_maximum_lb:
337  table_row = table_row + " $\LB^\star$"
338  if self.is_maximum_ub:
339  table_row = table_row + " $\UB^\star$"
340  if self.config != "":
341  table_row = table_row + " & " + self.config
342  else:
343  table_row = table_row + " & {--}"
344  if not computes_no_lb(self.name):
345  table_row = table_row + " & " + str(self.lb)
346  else:
347  table_row = table_row + " & {--}"
348  if not computes_no_ub(self.name):
349  table_row = table_row + " & " + str(self.ub)
350  else:
351  table_row = table_row + " & {--}"
352  table_row = table_row + " & " + "{:.2E}".format(Decimal(str(self.t)))
353  if not computes_no_lb(self.name):
354  table_row = table_row + " & " + str(self.coeff_lb)
355  else:
356  table_row = table_row + " & {--}"
357  if not computes_no_ub(self.name):
358  table_row = table_row + " & " + str(self.coeff_ub)
359  else:
360  table_row = table_row + " & {--}"
361  if not computes_no_lb(self.name):
362  table_row = table_row + " & " + "{:.2}".format(Decimal(str(self.score_lb)))
363  else:
364  table_row = table_row + " & {--}"
365  if not computes_no_ub(self.name):
366  table_row = table_row + " & " + "{:.2}".format(Decimal(str(self.score_ub)))
367  else:
368  table_row = table_row + " & {--}"
369  table_row = table_row + " \\\\\n"
370  return table_row
371 
372  def is_maximum(self):
373  if self.consider_lb:
374  return self.is_maximum_lb
375  else:
376  return self.is_maximum_ub
377 
378  def dist(self):
379  if self.consider_lb:
380  return self.lb
381  else:
382  return self.ub
383 
384  def score(self):
385  if self.consider_lb:
386  return self.score_lb
387  else:
388  return self.score_ub
389 
390  def coeff(self):
391  if self.consider_lb:
392  return self.coeff_lb
393  else:
394  return self.coeff_ub
395 
396  def has_tightest_dist(self):
397  if self.consider_lb:
398  return self.has_tightest_lb
399  else:
400  return self.has_tightest_ub
401 
402  def is_fastest(self):
403  if self.consider_lb:
404  return self.is_fastest_lb
405  else:
406  return self.is_fastest_ub
407 
408  def has_best_coeff(self):
409  if self.consider_lb:
410  return self.has_best_coeff_lb
411  else:
412  return self.has_best_coeff_ub
413 
414  def discard(self):
415  if self.consider_lb:
416  return self.discard_for_lb
417  else:
418  return self.discard_for_ub
419 
420  def do_discard(self):
421  if self.consider_lb:
422  self.discard_for_lb = True
423  else:
424  self.discard_for_ub = True
425 
426  def get_adj_list(self):
427  if self.consider_lb:
428  return self.adj_list_lb
429  else:
430  return self.adj_list_ub
431 
432  def set_adj_list(self, new_adj_list):
433  if self.consider_lb:
434  self.adj_list_lb = new_adj_list
435  else:
436  self.adj_list_ub = new_adj_list
437 
438  def set_score(self, best_dist, best_t, best_coeff):
439  if self.consider_lb:
440  self.score_lb = ((self.lb / best_dist) + (best_t / self.t) + (self.coeff_lb / best_coeff)) / 3.0
441  else:
442  self.score_ub = ((best_dist / self.ub) + (best_t / self.t) + (self.coeff_ub / best_coeff)) / 3.0
443 
444 def parse_method_name(method_name):
445  method_name_list = method_name.split(",", 1)
446  if (len(method_name_list) == 1):
447  method_name_list.append("")
448  return method_name_list
449 
450 def dfs(methods, is_discarded_edge, parent_id, child_id, seen):
451  if seen[child_id]:
452  return
453  for edge in methods[child_id].get_adj_list():
454  is_discarded_edge[parent_id][edge[0]] = True;
455  dfs(methods, is_discarded_edge, parent_id, edge[0], seen)
456  seen[child_id] = True
457 
458 def read_results_from_csv_files(dataset, args):
459  methods = []
460  result_file_names = []
461  prefix = os.path.join("results", dataset) + "__"
462  if not args.no_lsape:
463  result_file_names.append(prefix + "lsape_based_methods.csv")
464  if not args.no_ls:
465  result_file_names.append(prefix + "ls_based_methods.csv")
466  if not args.no_lp:
467  result_file_names.append(prefix + "lp_based_methods.csv")
468  if not args.no_misc:
469  result_file_names.append(prefix + "misc_methods.csv")
470  for result_file_name in result_file_names:
471  with open(result_file_name, "r") as result_file: csv_reader = csv.reader(result_file,delimiter=";")
472  next(csv_reader, NONE)
473  for row in csv_reader:
474  methods.append(Method(parse_method_name(row[0]), row[1], row[2], row[3], row[4], row[5]))
475  return methods
476 
477 def build_dependency_graph(methods, consider_lb):
478  # set consider_lb for all methods
479  for method in methods:
480  method.consider_lb = consider_lb
481  # construct dominance graph
482  num_methods = len(methods)
483  adj_list = [[] for method_id in range(0,num_methods)]
484  for id_1 in range(0, num_methods):
485  method_1 = methods[id_1]
486  new_adj_list = []
487  if not method_1.discard():
488  for id_2 in range(0, num_methods):
489  method_2 = methods[id_2]
490  if not method_2.discard():
491  edge_label = method_1.get_edge_label(method_2)
492  if edge_label != "":
493  new_adj_list.append((id_2, edge_label))
494  method_1.set_adj_list(new_adj_list)
495  # compute scores
496  best_dist = 0
497  best_t = 0
498  best_coeff = 0
499  for method in methods:
500  if method.discard():
501  continue
502  if method.has_tightest_dist():
503  best_dist = method.dist()
504  if method.is_fastest():
505  best_t = method.t
506  if method.has_best_coeff():
507  best_coeff = method.coeff()
508  for method in methods:
509  method.set_score(best_dist, best_t, best_coeff)
510  # discard methods such that are not maximal w.r.t. their score within their heuristic
511  method_names = set()
512  for method in methods:
513  method_names.add(method.name)
514  best_scores = {method_name : -1 for method_name in method_names}
515  for method in methods:
516  if method.score() > best_scores[method.name]:
517  best_scores[method.name] = method.score()
518  for method in methods:
519  if (method.score() < best_scores[method.name]) and (not method.is_maximum()):
520  method.do_discard()
521  # select undiscarded methods
522  undiscarded_method_ids = []
523  for id_1 in range(0, num_methods):
524  method_1 = methods[id_1]
525  new_adj_list = []
526  if not method_1.discard():
527  undiscarded_method_ids.append(id_1)
528  old_adj_list = method_1.get_adj_list()
529  for edge in old_adj_list:
530  if not methods[edge[0]].discard():
531  new_adj_list.append(edge)
532  method_1.set_adj_list(new_adj_list)
533  # compute transitive reduction of undiscarded methods
534  is_discarded_edge = {id_1 : {id_2 : False for id_2 in undiscarded_method_ids} for id_1 in undiscarded_method_ids}
535  for method_id in undiscarded_method_ids:
536  method = methods[method_id]
537  seen = {child_id : False for child_id in undiscarded_method_ids}
538  for edge in method.get_adj_list():
539  dfs(methods, is_discarded_edge, method_id, edge[0], seen)
540  # discard edges that are not contained in transitive reduction
541  for id_1 in range(0, num_methods):
542  method_1 = methods[id_1]
543  new_adj_list = []
544  if not method_1.discard():
545  old_adj_list = method_1.get_adj_list()
546  for edge in old_adj_list:
547  if not is_discarded_edge[id_1][edge[0]]:
548  new_adj_list.append(edge)
549  method_1.set_adj_list(new_adj_list)
550  return methods
551 
552 def infix(args):
553  the_infix = ""
554  if args.no_lsape:
555  the_infix = the_infix + "_no-lsape"
556  if args.no_ls:
557  the_infix = the_infix + "_no-ls"
558  if args.no_lp:
559  the_infix = the_infix + "_no-lp"
560  if args.no_misc:
561  the_infix = the_infix + "_no-misc"
562  return the_infix
563 
564 class AggregatedScores:
565 
566  def __init__(self, method_names, scores_lb, scores_ub, has_tightest_lb, is_fastest_lb, has_best_coeff_lb, has_tightest_ub, is_fastest_ub, has_best_coeff_ub):
567  self.method_names = method_names
568  self.scores_lb = scores_lb
569  self.scores_ub = scores_ub
570  self.has_tightest_lb = has_tightest_lb
571  self.is_fastest_lb = is_fastest_lb
572  self.has_best_coeff_lb = has_best_coeff_lb
573  self.has_tightest_ub = has_tightest_ub
574  self.is_fastest_ub = is_fastest_ub
575  self.has_best_coeff_ub = has_best_coeff_ub
576 
577  def chi_lb(self, method_name):
578  chi = "$("
579  if self.has_tightest_lb[method_name]:
580  chi = chi + "\mathbf{1},"
581  else:
582  chi = chi + "0,"
583  if self.is_fastest_lb[method_name]:
584  chi = chi + "\mathbf{1},"
585  else:
586  chi = chi + "0,"
587  if self.has_best_coeff_lb[method_name]:
588  chi = chi + "\mathbf{1})$"
589  else:
590  chi = chi + "0)$"
591  return chi
592 
593  def chi_ub(self, method_name):
594  chi = "$("
595  if self.has_tightest_ub[method_name]:
596  chi = chi + "\mathbf{1},"
597  else:
598  chi = chi + "0,"
599  if self.is_fastest_ub[method_name]:
600  chi = chi + "\mathbf{1},"
601  else:
602  chi = chi + "0,"
603  if self.has_best_coeff_ub[method_name]:
604  chi = chi + "\mathbf{1})$"
605  else:
606  chi = chi + "0)$"
607  return chi
608 
609  def write_to_csv_file(self, args):
610  csv_file_name = os.path.join(args.table_dir, args.dataset) + infix(args) + "_scores.csv"
611  csv_file = open(csv_file_name, "w")
612  csv_file.write("heuristic;chi_lb;score_lb;chi_ub;score_ub\n")
613  for method_name in self.method_names:
614  csv_file.write(method_name + ";")
615  if computes_no_lb(method_name):
616  csv_file.write("na;na;")
617  else:
618  csv_file.write(self.chi_lb(method_name) + ";" + str(self.scores_lb[method_name]) + ";")
619  if computes_no_ub(method_name):
620  csv_file.write("na;na\n")
621  else:
622  csv_file.write(self.chi_ub(method_name) + ";" + str(self.scores_ub[method_name]) + "\n")
623  ext_names = ["MULTISOL", "CENTRALITIES", "MULTISTART", "RANDPOST"]
624  for ext_name in ext_names:
625  csv_file.write(ext_name + ";na;na;")
626  csv_file.write(self.chi_ub(ext_name) + ";" + str(self.scores_ub[ext_name]) + "\n")
627  csv_file.close()
628 
629 def create_latex_tables(args, datasets, aggregated_scores, lsape_based_method_names, lp_based_method_names, ls_based_method_names, misc_method_names, lsape_ext_names, ls_ext_names):
630  sum_best_coeff_lb = {}
631  table_ub_file_name = os.path.join(args.table_dir, "results_UB.tex")
632  table_ub = open(table_ub_file_name, "w")
633  table_ub.write("%!TEX root = ../root.tex\n")
634  table_ub.write("\\begin{tabular}{@{}lcS[table-format=1.2]cS[table-format=1.2]cS[table-format=1.2]cS[table-format=1.2]cS[table-format=1.2]cS[table-format=1.2]@{}}\n")
635  table_ub.write("\\toprule\n")
636  table_ub.write("heuristic & \multicolumn{2}{c}{\\letter} & \multicolumn{2}{c}{\\mutagenicity} & \multicolumn{2}{c}{\\aids} & \multicolumn{2}{c}{\\protein} & \multicolumn{2}{c}{\\fingerprint} & \multicolumn{2}{c}{\\grec} \\\\\n")
637  table_ub.write("\midrule\n")
638  table_ub.write("& {$\chi_\UB$} & {$\widehat{s_\UB}$} & {$\chi_\UB$} & {$\widehat{s_\UB}$} & {$\chi_\UB$} & {$\widehat{s_\UB}$} & {$\chi_\UB$} & {$\widehat{s_\UB}$} & {$\chi_\UB$} & {$\widehat{s_\UB}$} & {$\chi_\UB$} & {$\widehat{s_\UB}$} \\\\\n")
639  table_ub.write("\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7} \cmidrule(lr){8-9} \cmidrule(lr){10-11} \cmidrule(l){12-13}\n")
640  table_ub.write("\multicolumn{13}{@{}l}{\emph{instantiations of the paradigm \LSAPEGED}} \\\\\n")
641  for method_name in lsape_based_method_names:
642  table_ub.write("\\" + method_name)
643  for dataset in datasets:
644  if aggregated_scores[dataset].scores_ub[method_name] > 0:
645  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(method_name) + " & \\bfseries " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[method_name])))
646  else:
647  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(method_name) + " & " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[method_name])))
648  table_ub.write(" \\\\\n")
649  table_ub.write("\multicolumn{13}{@{}l}{\emph{extensions of the paradigm \LSAPEGED}} \\\\\n")
650  for ext_name in lsape_ext_names:
651  table_ub.write("\\" + ext_name)
652  for dataset in datasets:
653  if aggregated_scores[dataset].scores_ub[ext_name] > 0:
654  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(ext_name) + " & \\bfseries " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[ext_name])))
655  else:
656  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(ext_name) + " & " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[ext_name])))
657  table_ub.write(" \\\\\n")
658  table_ub.write("\midrule\n")
659  table_ub.write("\multicolumn{13}{@{}l}{\emph{instantiations of the paradigm \LPGED}} \\\\\n")
660  for method_name in lp_based_method_names:
661  table_ub.write("\\" + method_name)
662  for dataset in datasets:
663  if aggregated_scores[dataset].scores_ub[method_name] > 0:
664  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(method_name) + " & \\bfseries " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[method_name])))
665  else:
666  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(method_name) + " & " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[method_name])))
667  table_ub.write(" \\\\\n")
668  table_ub.write("\midrule\n")
669  table_ub.write("\multicolumn{13}{@{}l}{\emph{instantiations of the paradigm \LSGED}} \\\\\n")
670  for method_name in ls_based_method_names:
671  table_ub.write("\\" + method_name)
672  for dataset in datasets:
673  if aggregated_scores[dataset].scores_ub[method_name] > 0:
674  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(method_name) + " & \\bfseries " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[method_name])))
675  else:
676  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(method_name) + " & " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[method_name])))
677  table_ub.write(" \\\\\n")
678  table_ub.write("\multicolumn{13}{@{}l}{\emph{extensions of the paradigm \LSGED}} \\\\\n")
679  for ext_name in ls_ext_names:
680  table_ub.write("\\" + ext_name)
681  for dataset in datasets:
682  if aggregated_scores[dataset].scores_ub[ext_name] > 0:
683  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(ext_name) + " & \\bfseries " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[ext_name])))
684  else:
685  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(ext_name) + " & " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[ext_name])))
686  table_ub.write(" \\\\\n")
687  table_ub.write("\midrule\n")
688  table_ub.write("\multicolumn{13}{@{}l}{\emph{miscellaneous heuristics}} \\\\\n")
689  for method_name in misc_method_names:
690  if not computes_no_ub(method_name):
691  table_ub.write("\\" + method_name)
692  for dataset in datasets:
693  if aggregated_scores[dataset].scores_ub[method_name] > 0:
694  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(method_name) + " & \\bfseries " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[method_name])))
695  else:
696  table_ub.write(" & " + aggregated_scores[dataset].chi_ub(method_name) + " & " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_ub[method_name])))
697  table_ub.write(" \\\\\n")
698  table_ub.write("\\bottomrule\n")
699  table_ub.write("\end{tabular}\n")
700  table_ub.close()
701  table_lb_file_name = os.path.join(args.table_dir, "results_LB.tex")
702  table_lb = open(table_lb_file_name, "w")
703  table_lb.write("%!TEX root = ../root.tex\n")
704  table_lb.write("\\begin{tabular}{@{}lcS[table-format=1.2]cS[table-format=1.2]cS[table-format=1.2]cS[table-format=1.2]cS[table-format=1.2]cS[table-format=1.2]@{}}\n")
705  table_lb.write("\\toprule\n")
706  table_lb.write("heuristic & \multicolumn{2}{c}{\\letter} & \multicolumn{2}{c}{\\mutagenicity} & \multicolumn{2}{c}{\\aids} & \multicolumn{2}{c}{\\protein} & \multicolumn{2}{c}{\\fingerprint} & \multicolumn{2}{c}{\\grec} \\\\\n")
707  table_lb.write("\midrule\n")
708  table_lb.write("& {$\chi_\LB$} & {$\widehat{s_\LB}$} & {$\chi_\LB$} & {$\widehat{s_\LB}$} & {$\chi_\LB$} & {$\widehat{s_\LB}$} & {$\chi_\LB$} & {$\widehat{s_\LB}$} & {$\chi_\LB$} & {$\widehat{s_\LB}$} & {$\chi_\LB$} & {$\widehat{s_\LB}$} \\\\\n")
709  table_lb.write("\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7} \cmidrule(lr){8-9} \cmidrule(lr){10-11} \cmidrule(l){12-13}\n")
710  table_lb.write("\multicolumn{13}{@{}l}{\emph{instantiations of the paradigm \LSAPEGED}} \\\\\n")
711  for method_name in lsape_based_method_names:
712  if not computes_no_lb(method_name):
713  table_lb.write("\\" + method_name)
714  for dataset in datasets:
715  if aggregated_scores[dataset].scores_lb[method_name]> 0:
716  table_lb.write(" & " + aggregated_scores[dataset].chi_lb(method_name) + " & \\bfseries " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_lb[method_name])))
717  else:
718  table_lb.write(" & " + aggregated_scores[dataset].chi_lb(method_name) + " & " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_lb[method_name])))
719  table_lb.write(" \\\\\n")
720  table_lb.write("\midrule\n")
721  table_lb.write("\multicolumn{13}{@{}l}{\emph{instantiations of the paradigm \LPGED}} \\\\\n")
722  for method_name in lp_based_method_names:
723  table_lb.write("\\" + method_name)
724  for dataset in datasets:
725  if aggregated_scores[dataset].scores_lb[method_name]> 0:
726  table_lb.write(" & " + aggregated_scores[dataset].chi_lb(method_name) + " & \\bfseries " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_lb[method_name])))
727  else:
728  table_lb.write(" & " + aggregated_scores[dataset].chi_lb(method_name) + " & " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_lb[method_name])))
729  table_lb.write(" \\\\\n")
730  table_lb.write("\midrule\n")
731  table_lb.write("\multicolumn{13}{@{}l}{\emph{miscellaneous heuristics}} \\\\\n")
732  for method_name in misc_method_names:
733  if not computes_no_lb(method_name):
734  table_lb.write("\\" + method_name)
735  for dataset in datasets:
736  if aggregated_scores[dataset].scores_lb[method_name]> 0:
737  table_lb.write(" & " + aggregated_scores[dataset].chi_lb(method_name) + " & \\bfseries " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_lb[method_name])))
738  else:
739  table_lb.write(" & " + aggregated_scores[dataset].chi_lb(method_name) + " & " + "{0:.2f}".format(Decimal(aggregated_scores[dataset].scores_lb[method_name])))
740  table_lb.write(" \\\\\n")
741  table_lb.write("\\bottomrule\n")
742  table_lb.write("\end{tabular}\n")
743  table_lb.close()
744 
745 def create_pareto_data(args, dataset, methods, consider_lb, method_names):
746  selected_method_names = [method_name for method_name in method_names if not computes_no_ub(method_name)]
747  infix = "UB"
748  bound_column_name = "avg_ub"
749  runtime_column_name = "avg_runtime"
750  if consider_lb:
751  selected_method_names = [method_name for method_name in method_names if not computes_no_lb(method_name)]
752  infix = "LB"
753  bound_column_name = "avg_lb"
754  max_score_methods = {method_name : None for method_name in selected_method_names}
755  for method in methods:
756  if not method.discard():
757  if max_score_methods[method.name] is None:
758  max_score_methods[method.name] = method
759  elif method.score() > max_score_methods[method.name].score():
760  max_score_methods[method.name] = method
761  pareto_data_file_name = os.path.join(args.data_dir, dataset + "_Pareto_" + infix + ".csv")
762  pareto_data_file = open(pareto_data_file_name, "w")
763  for method_name in max_score_methods:
764  pareto_data_file.write(method_name + "_" + runtime_column_name + "," + method_name + "_" + bound_column_name + ",")
765  pareto_data_file.write("\n")
766  for method_name in max_score_methods:
767  pareto_data_file.write(str(max_score_methods[method_name].t) + "," + str(max_score_methods[method_name].dist()) + ",")
768  pareto_data_file.write("\n")
769  pareto_data_file.close()
770 
771 def create_barplots(args, datasets, aggregated_scores, method_names, lsape_ext_names, ls_ext_names):
772  lb_method_names = [method_name for method_name in method_names if not computes_no_lb(method_name)]
773  ub_method_names = [method_name for method_name in method_names if not computes_no_ub(method_name)]
774  # sum scores and chi for lower bounds
775  scores_lb = {method_name : 0 for method_name in lb_method_names}
776  has_tightest_lb = {method_name : 0 for method_name in lb_method_names}
777  is_fastest_lb = {method_name : 0 for method_name in lb_method_names}
778  has_best_coeff_lb = {method_name : 0 for method_name in lb_method_names}
779  for method_name in lb_method_names:
780  for dataset in datasets:
781  scores_lb[method_name] = scores_lb[method_name] + aggregated_scores[dataset].scores_lb[method_name]
782  if aggregated_scores[dataset].has_tightest_lb[method_name]:
783  has_tightest_lb[method_name] = has_tightest_lb[method_name] + 1
784  if aggregated_scores[dataset].is_fastest_lb[method_name]:
785  is_fastest_lb[method_name] = is_fastest_lb[method_name] + 1
786  if aggregated_scores[dataset].has_best_coeff_lb[method_name]:
787  has_best_coeff_lb[method_name] = has_best_coeff_lb[method_name] + 1
788  scores_lb = [(k, v / 6.0) for k, v in sorted(scores_lb.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
789  has_tightest_lb = [(k, v) for k, v in sorted(has_tightest_lb.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
790  is_fastest_lb = [(k, v) for k, v in sorted(is_fastest_lb.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
791  has_best_coeff_lb = [(k, v) for k, v in sorted(has_best_coeff_lb.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
792  # sum scores and chi for upper bounds of heuristics
793  scores_ub = {method_name : 0 for method_name in ub_method_names}
794  has_tightest_ub = {method_name : 0 for method_name in ub_method_names}
795  is_fastest_ub = {method_name : 0 for method_name in ub_method_names}
796  has_best_coeff_ub = {method_name : 0 for method_name in ub_method_names}
797  for method_name in ub_method_names:
798  for dataset in datasets:
799  scores_ub[method_name] = scores_ub[method_name] + aggregated_scores[dataset].scores_ub[method_name]
800  if aggregated_scores[dataset].has_tightest_ub[method_name]:
801  has_tightest_ub[method_name] = has_tightest_ub[method_name] + 1
802  if aggregated_scores[dataset].is_fastest_ub[method_name]:
803  is_fastest_ub[method_name] = is_fastest_ub[method_name] + 1
804  if aggregated_scores[dataset].has_best_coeff_ub[method_name]:
805  has_best_coeff_ub[method_name] = has_best_coeff_ub[method_name] + 1
806  scores_ub = [(k, v / 6.0) for k, v in sorted(scores_ub.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
807  has_tightest_ub = [(k, v) for k, v in sorted(has_tightest_ub.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
808  is_fastest_ub = [(k, v) for k, v in sorted(is_fastest_ub.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
809  has_best_coeff_ub = [(k, v) for k, v in sorted(has_best_coeff_ub.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
810  # sum scores and chi for upper bounds of LSAPE extensions
811  scores_ub_lsape_ext = {method_name : 0 for method_name in lsape_ext_names}
812  has_tightest_ub_lsape_ext = {method_name : 0 for method_name in lsape_ext_names}
813  has_best_coeff_ub_lsape_ext = {method_name : 0 for method_name in lsape_ext_names}
814  for ext_name in lsape_ext_names:
815  for dataset in datasets:
816  scores_ub_lsape_ext[ext_name] = scores_ub_lsape_ext[ext_name] + aggregated_scores[dataset].scores_ub[ext_name]
817  if aggregated_scores[dataset].has_tightest_ub[ext_name]:
818  has_tightest_ub_lsape_ext[ext_name] = has_tightest_ub_lsape_ext[ext_name] + 1
819  if aggregated_scores[dataset].has_best_coeff_ub[ext_name]:
820  has_best_coeff_ub_lsape_ext[ext_name] = has_best_coeff_ub_lsape_ext[ext_name] + 1
821  scores_ub_lsape_ext = [(k, v / 6.0) for k, v in sorted(scores_ub_lsape_ext.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
822  has_tightest_ub_lsape_ext = [(k, v) for k, v in sorted(has_tightest_ub_lsape_ext.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
823  has_best_coeff_ub_lsape_ext = [(k, v) for k, v in sorted(has_best_coeff_ub_lsape_ext.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
824  # sum scores and chi for upper bounds of LS extensions
825  scores_ub_ls_ext = {method_name : 0 for method_name in ls_ext_names}
826  has_tightest_ub_ls_ext = {method_name : 0 for method_name in ls_ext_names}
827  has_best_coeff_ub_ls_ext = {method_name : 0 for method_name in ls_ext_names}
828  for ext_name in ls_ext_names:
829  for dataset in datasets:
830  scores_ub_ls_ext[ext_name] = scores_ub_ls_ext[ext_name] + aggregated_scores[dataset].scores_ub[ext_name]
831  if aggregated_scores[dataset].has_tightest_ub[ext_name]:
832  has_tightest_ub_ls_ext[ext_name] = has_tightest_ub_ls_ext[ext_name] + 1
833  if aggregated_scores[dataset].has_best_coeff_ub[ext_name]:
834  has_best_coeff_ub_ls_ext[ext_name] = has_best_coeff_ub_ls_ext[ext_name] + 1
835  scores_ub_ls_ext = [(k, v / 6.0) for k, v in sorted(scores_ub_ls_ext.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
836  has_tightest_ub_ls_ext = [(k, v) for k, v in sorted(has_tightest_ub_ls_ext.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
837  has_best_coeff_ub_ls_ext = [(k, v) for k, v in sorted(has_best_coeff_ub_ls_ext.items(), key=lambda kv: kv[1], reverse=True) if v > 0]
838  barplot_file_name = os.path.join(args.tikz_dir, "scores_LB.tex")
839  write_barplot(barplot_file_name, "$\myavg_\mathcal{D}\widehat{s_\LB}$", 1.1, 1, scores_lb)
840  barplot_file_name = os.path.join(args.tikz_dir, "has_tightest_LB.tex")
841  write_barplot(barplot_file_name, "$(\sum_\mathcal{D}{\chi_\LB})_1$", 6.6, 0.5, has_tightest_lb)
842  barplot_file_name = os.path.join(args.tikz_dir, "is_fastest_LB.tex")
843  write_barplot(barplot_file_name, "$(\sum_\mathcal{D}{\chi_\LB})_2$", 6.6, 0.5, is_fastest_lb)
844  barplot_file_name = os.path.join(args.tikz_dir, "has_best_coeff_LB.tex")
845  write_barplot(barplot_file_name, "$(\sum_\mathcal{D}{\chi_\LB})_3$", 6.6, 1, has_best_coeff_lb)
846  barplot_file_name = os.path.join(args.tikz_dir, "scores_UB.tex")
847  write_barplot(barplot_file_name, "$\myavg_\mathcal{D}\widehat{s_\UB}$", 1.1, 1, scores_ub, scores_ub_lsape_ext, scores_ub_ls_ext)
848  barplot_file_name = os.path.join(args.tikz_dir, "has_tightest_UB.tex")
849  write_barplot(barplot_file_name, "$(\sum_\mathcal{D}{\chi_\UB})_1$", 6.6, 0.5, has_tightest_ub, has_tightest_ub_lsape_ext, has_tightest_ub_ls_ext)
850  barplot_file_name = os.path.join(args.tikz_dir, "is_fastest_UB.tex")
851  write_barplot(barplot_file_name, "$(\sum_\mathcal{D}{\chi_\UB})_2$", 6.6, 0.5, is_fastest_ub)
852  barplot_file_name = os.path.join(args.tikz_dir, "has_best_coeff_UB.tex")
853  write_barplot(barplot_file_name, "$(\sum_\mathcal{D}{\chi_\UB})_3$", 6.6, 1, has_best_coeff_ub, has_best_coeff_ub_lsape_ext, has_best_coeff_ub_ls_ext)
854 
855 def write_barplot(barplot_file_name, ylabel, ymax, width, method_bars, lsape_ext_bars = [], ls_ext_bars = []):
856  num_bars = len(method_bars + lsape_ext_bars + ls_ext_bars)
857  xtick = ",".join(str(i * 0.5) for i in range(1, num_bars + 1))
858  xticklabels = ",".join("\\" + kv[0] for kv in (method_bars + lsape_ext_bars + ls_ext_bars))
859  barplot_file = open(barplot_file_name, "w")
860  barplot_file.write("%!TEX root = ../root.tex\n")
861  barplot_file.write("\\begin{tikzpicture}\n")
862  barplot_file.write("\\begin{axis}[\n")
863  barplot_file.write("height = 0.4\linewidth,\n")
864  barplot_file.write("width = " + str(width) + "\linewidth,\n")
865  barplot_file.write("xmin = 0,\n")
866  barplot_file.write("xmax = " + str(0.5*(num_bars+1)) + ",\n")
867  barplot_file.write("ymin = 0,\n")
868  barplot_file.write("ymax = " + str(ymax) + ",\n")
869  barplot_file.write("xtick = {" + xtick + "},\n")
870  barplot_file.write("xticklabels = {" + xticklabels + "},\n")
871  barplot_file.write("xticklabel style={rotate=67.5, anchor=east},\n")
872  barplot_file.write("ylabel = " + ylabel + ",\n")
873  barplot_file.write("every axis plot/.append style={ybar,bar width=.2, bar shift=0pt}]\n")
874  xcoord = .5
875  for kv in method_bars:
876  barplot_file.write("\\addplot[" + kv[0] + "] coordinates {(" + str(xcoord) + "," + str(kv[1]) + ")};\n")
877  xcoord = xcoord + .5
878  if len(lsape_ext_bars) > 0:
879  barplot_file.write("\draw[densely dotted] ({axis cs:" + str(xcoord - .25) + ",0}|-{rel axis cs:0,1}) -- ({axis cs:" + str(xcoord - .25) + ",0}|-{rel axis cs:0,0});\n")
880  for kv in lsape_ext_bars:
881  barplot_file.write("\\addplot[" + kv[0] + "] coordinates {(" + str(xcoord) + "," + str(kv[1]) + ")};\n")
882  xcoord = xcoord + .5
883  if len(ls_ext_bars) > 0:
884  barplot_file.write("\draw[densely dotted] ({axis cs:" + str(xcoord - .25) + ",0}|-{rel axis cs:0,1}) -- ({axis cs:" + str(xcoord - .25) + ",0}|-{rel axis cs:0,0});\n")
885  for kv in ls_ext_bars:
886  barplot_file.write("\\addplot[" + kv[0] + "] coordinates {(" + str(xcoord) + "," + str(kv[1]) + ")};\n")
887  xcoord = xcoord + .5
888  barplot_file.write("\end{axis}\n")
889  barplot_file.write("\end{tikzpicture}")
890  barplot_file.close()
891 
892 def aggregate_scores(methods, method_names, method_ext_names):
893  scores_lb = {method_name : 0.0 for method_name in method_names}
894  scores_ub = {method_name : 0.0 for method_name in method_ext_names}
895  has_tightest_lb = {method_name : False for method_name in method_names}
896  is_fastest_lb = {method_name : False for method_name in method_names}
897  has_best_coeff_lb = {method_name : False for method_name in method_names}
898  has_tightest_ub = {method_name : False for method_name in method_ext_names}
899  is_fastest_ub = {method_name : False for method_name in method_ext_names}
900  has_best_coeff_ub = {method_name : False for method_name in method_ext_names}
901  sum_scores_ub_lsape = 0.0
902  sum_scores_ub_centralities = 0.0
903  sum_scores_ub_multi_sol = 0.0
904  sum_scores_ub_ls = 0.0
905  sum_scores_ub_randpost = 0.0
906  sum_scores_ub_multi_start = 0.0
907  for method in methods:
908  if method.is_maximum_lb:
909  if method.score_lb > scores_lb[method.name]:
910  scores_lb[method.name] = method.score_lb
911  if method.is_maximum_ub:
912  if method.score_ub > scores_ub[method.name]:
913  scores_ub[method.name] = method.score_ub
914  if is_lsape_based(method):
915  sum_scores_ub_lsape = sum_scores_ub_lsape + method.score_ub
916  if uses_centralities(method):
917  sum_scores_ub_centralities = sum_scores_ub_centralities + method.score_ub
918  if uses_multi_sol(method):
919  sum_scores_ub_multi_sol = sum_scores_ub_multi_sol + method.score_ub
920  if is_ls_based(method):
921  sum_scores_ub_ls = sum_scores_ub_ls + method.score_ub
922  if uses_multi_start(method):
923  sum_scores_ub_multi_start = sum_scores_ub_multi_start + method.score_ub
924  if uses_randpost(method):
925  sum_scores_ub_randpost = sum_scores_ub_randpost + method.score_ub
926  if method.has_tightest_lb:
927  has_tightest_lb[method.name] = True
928  if method.is_fastest_lb:
929  is_fastest_lb[method.name] = True
930  if method.has_best_coeff_lb:
931  has_best_coeff_lb[method.name] = True
932  if method.has_tightest_ub:
933  has_tightest_ub[method.name] = True
934  if uses_centralities(method):
935  has_tightest_ub["CENTRALITIES"] = True
936  if uses_multi_sol(method):
937  has_tightest_ub["MULTISOL"] = True
938  if uses_multi_start(method):
939  has_tightest_ub["MULTISTART"] = True
940  if uses_randpost(method):
941  has_tightest_ub["RANDPOST"] = True
942  if method.is_fastest_ub:
943  is_fastest_ub[method.name] = True
944  if uses_centralities(method):
945  is_fastest_ub["CENTRALITIES"] = True
946  if uses_multi_sol(method):
947  is_fastest_ub["MULTISOL"] = True
948  if uses_multi_start(method):
949  is_fastest_ub["MULTISTART"] = True
950  if uses_randpost(method):
951  is_fastest_ub["RANDPOST"] = True
952  if method.has_best_coeff_ub:
953  has_best_coeff_ub[method.name] = True
954  if uses_centralities(method):
955  has_best_coeff_ub["CENTRALITIES"] = True
956  if uses_multi_sol(method):
957  has_best_coeff_ub["MULTISOL"] = True
958  if uses_multi_start(method):
959  has_best_coeff_ub["MULTISTART"] = True
960  if uses_randpost(method):
961  has_best_coeff_ub["RANDPOST"] = True
962  if sum_scores_ub_lsape > 0:
963  scores_ub["MULTISOL"] = sum_scores_ub_multi_sol / sum_scores_ub_lsape
964  scores_ub["CENTRALITIES"] = sum_scores_ub_centralities / sum_scores_ub_lsape
965  if sum_scores_ub_ls > 0:
966  scores_ub["MULTISTART"] = sum_scores_ub_multi_start / sum_scores_ub_ls
967  scores_ub["RANDPOST"] = sum_scores_ub_randpost / sum_scores_ub_ls
968  return AggregatedScores(method_names, scores_lb, scores_ub, has_tightest_lb, is_fastest_lb, has_best_coeff_lb, has_tightest_ub, is_fastest_ub, has_best_coeff_ub)
969 
970 def create_coeff_vs_dist_table(args, dataset, methods, consider_lb):
971  table_file_name = os.path.join(args.data_dir, dataset) + infix(args)
972  if consider_lb:
973  table_file_name = table_file_name + "_LB.csv"
974  else:
975  table_file_name = table_file_name + "_UB.csv"
976  table_file = open(table_file_name, "w")
977  if consider_lb:
978  table_file.write("avg_lb,coeff_lb\n")
979  else:
980  table_file.write("avg_ub,coeff_ub\n")
981  for method in methods:
982  if consider_lb:
983  if not computes_no_lb(method.name, method.config):
984  table_file.write(str(method.lb) + "," + str(method.coeff_lb) + "\n")
985  else:
986  if not computes_no_ub(method.name):
987  table_file.write(str(method.ub) + "," + str(method.coeff_ub) + "\n")
988  table_file.close()
989 
990 def create_tikz_graph(args, dataset, methods, consider_lb):
991  tikz_file_name = os.path.join(args.tikz_dir, dataset) + infix(args)
992  if consider_lb:
993  tikz_file_name = tikz_file_name + "_LB.tex"
994  else:
995  tikz_file_name = tikz_file_name + "_UB.tex"
996  # construct tikz file
997  tikz_file = open(tikz_file_name, "w")
998  tikz_file.write("%!TEX root = ../root.tex\n")
999  tikz_file.write("\\begin{tikzpicture}[rounded corners,every label/.style={align=center}]\n")
1000  tikz_file.write("\graph[layered layout,grow=right,\n")
1001  tikz_file.write("sibling distance=5pt,\n")
1002  tikz_file.write("part distance=5pt,\n")
1003  tikz_file.write("component distance=5pt,\n")
1004  tikz_file.write("sibling sep=5pt,\n")
1005  tikz_file.write("level sep=12pt,\n")
1006  tikz_file.write("part sep=5pt,\n")
1007  tikz_file.write("component sep=5pt,\n")
1008  tikz_file.write("component direction=up,\n")
1009  tikz_file.write("component align=first node,\n")
1010  tikz_file.write("nodes={minimum width=4mm, minimum height=4mm, align=center, font=\scriptsize},\n")
1011  tikz_file.write("edge quotes mid,\n")
1012  tikz_file.write("edges={nodes={font=\scriptsize, fill=white, inner sep=1.5pt}}] {\n")
1013  edge_id = 0
1014  tikz_file.write("{ [same layer]\n")
1015  for method_id in range(0, len(methods)):
1016  method = methods[method_id]
1017  if (not method.discard()) and method.is_maximum():
1018  tikz_file.write(str(method_id) + " [" + method.name + ", minimum width=2.5cm, as={" + method.tikz_descriptor() + "}, label=left:{\\footnotesize " + method.label() + "}],\n");
1019  tikz_file.write("};\n")
1020  for method_id in range(0, len(methods)):
1021  method = methods[method_id]
1022  if (not method.discard()) and (not method.is_maximum()):
1023  tikz_file.write(str(method_id) + " [" + method.name + ", as={" + method.tikz_descriptor() + "}];\n");
1024  for method_id in range(0, len(methods)):
1025  for edge in methods[method_id].get_adj_list():
1026  tikz_file.write(str(method_id) + " -> [" + edge[1] + "] " + str(edge[0]) + ";\n")
1027  tikz_file.write("};\n")
1028  tikz_file.write("\end{tikzpicture}")
1029  tikz_file.close()
1030 
1031 parser = argparse.ArgumentParser(description="Generates TikZ dominance graph from CSV file.")
1032 parser.add_argument("tikz_dir", help="name of output directory for TikZ files")
1033 parser.add_argument("table_dir", help="name of output directory for LaTeX tables")
1034 parser.add_argument("data_dir", help="name of output directory for csv tables")
1035 parser.add_argument("--no_lsape", help="do not consider LSAPE based methods", action="store_true")
1036 parser.add_argument("--no_ls", help="do not consider local search based methods", action="store_true")
1037 parser.add_argument("--no_lp", help="do not consider LP based methods", action="store_true")
1038 parser.add_argument("---no_misc", help="do not consider miscellaneous methods", action="store_true")
1039 
1040 args = parser.parse_args()
1041 datasets = ["Letter_HIGH", "Mutagenicity", "AIDS", "Protein", "Fingerprint", "GREC"]
1042 aggregated_scores = {}
1043 lsape_based_method_names = ["NODE", "BP", "BRANCH", "BRANCHFAST", "BRANCHUNI", "STAR", "SUBGRAPH", "WALKS", "RINGOPT", "RINGMS", "RINGMLSVM", "RINGMLDNN", "PREDICTSVM", "PREDICTDNN"]
1044 lp_based_method_names = ["FONE", "FTWO", "COMPACTMIP", "JUSTICEIP"]
1045 ls_based_method_names = ["REFINE", "KREFINE", "BPBEAM", "IBPBEAM", "IPFP"]
1046 misc_method_names = ["HED", "BRANCHTIGHT", "SA", "BRANCHCOMPACT", "PARTITION", "HYBRID"]
1047 lsape_ext_names = ["MULTISOL", "CENTRALITIES"]
1048 ls_ext_names = ["MULTISTART", "RANDPOST"]
1049 method_names = lsape_based_method_names + lp_based_method_names + ls_based_method_names + misc_method_names
1050 method_ext_names = method_names + lsape_ext_names + ls_ext_names
1051 for dataset in datasets:
1052  methods = read_results_from_csv_files(dataset, args)
1053  for consider_lb in [True, False]:
1054  methods = build_dependency_graph(methods, consider_lb)
1055  create_pareto_data(args, dataset, methods, consider_lb, method_names)
1056  create_tikz_graph(args, dataset, methods, consider_lb)
1057  create_coeff_vs_dist_table(args, dataset, methods, consider_lb)
1058  aggregated_scores[dataset] = aggregate_scores(methods, method_names, method_ext_names)
1059 create_latex_tables(args, datasets, aggregated_scores, lsape_based_method_names, lp_based_method_names, ls_based_method_names, misc_method_names, lsape_ext_names, ls_ext_names)
1060 create_barplots(args, datasets, aggregated_scores, method_names, lsape_ext_names, ls_ext_names)
1061