GEDLIB  1.0
test_best_methods.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * *
3  * Copyright (C) 2018 by David B. Blumenthal *
4  * *
5  * This file is part of GEDLIB. *
6  * *
7  * GEDLIB is free software: you can redistribute it and/or modify it *
8  * under the terms of the GNU Lesser General Public License as published *
9  * by the Free Software Foundation, either version 3 of the License, or *
10  * (at your option) any later version. *
11  * *
12  * GEDLIB is distributed in the hope that it will be useful, *
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15  * GNU Lesser General Public License for more details. *
16  * *
17  * You should have received a copy of the GNU Lesser General Public *
18  * License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. *
19  * *
20  ***************************************************************************/
21 
31 #include "util.hpp"
32 
33 class Method {
34 private:
35  // method and options
36  ged::Options::GEDMethod ged_method_;
37 
38 
39  std::string options_() const {
40  std::string options("--threads 6");
41  if (ged_method_ == ged::Options::GEDMethod::F2 or ged_method_ == ged::Options::GEDMethod::BLP_NO_EDGE_LABELS) {
42  options += " --relax TRUE";
43  }
44  return options;
45  }
46 
47 public:
48  Method(ged::Options::GEDMethod ged_method) :
49  ged_method_{ged_method} {}
50 
51  std::string name() const {
52  std::stringstream name;
53  if (ged_method_ == ged::Options::GEDMethod::BRANCH_UNIFORM) {
54  name << "BRANCHUNI";
55  }
56  else if (ged_method_ == ged::Options::GEDMethod::BRANCH_FAST) {
57  name << "BRANCHFAST";
58  }
59  else if (ged_method_ == ged::Options::GEDMethod::NODE) {
60  name << "NODE";
61  }
62  else if (ged_method_ == ged::Options::GEDMethod::F2) {
63  name << "FTWO";
64  }
65  else if (ged_method_ == ged::Options::GEDMethod::BLP_NO_EDGE_LABELS) {
66  name << "JUSTICEIP";
67  }
68  else if (ged_method_ == ged::Options::GEDMethod::REFINE) {
69  name << "REFINE";
70  }
71  else {
72  name << "IPFP";
73  }
74  return name.str();
75  }
76 
77  void run_on_dataset(const std::string & dataset, ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> & env, double & avg_lb, double & avg_ub, double & avg_runtime) const {
78  env.set_method(ged_method_, options_());
79  if (dataset != "Protein" or ged_method_ != ged::Options::GEDMethod::PARTITION) {
80  env.init_method();
81  }
82  std::size_t num_runs{env.graph_ids().second * env.graph_ids().second};
83  ged::ProgressBar progress_bar(num_runs);
84  std::cout << "\r\t" << name() << ": " << progress_bar << std::flush;
85  avg_runtime = 0;
86  avg_ub = 0;
87  avg_lb = 0;
88  for (ged::GEDGraph::GraphID g_id = env.graph_ids().first; g_id != env.graph_ids().second; g_id++) {
89  for (ged::GEDGraph::GraphID h_id = env.graph_ids().first; h_id != env.graph_ids().second; h_id++) {
90  env.run_method(g_id, h_id);
91  avg_lb += env.get_lower_bound(g_id, h_id);
92  avg_ub += env.get_upper_bound(g_id, h_id);
93  avg_runtime += env.get_runtime(g_id, h_id);
94 
95  progress_bar.increment();
96  std::cout << "\r\t" << name() << ": " << progress_bar << std::flush;
97  }
98  }
99  avg_lb /= static_cast<double>(num_runs);
100  avg_ub /= static_cast<double>(num_runs);
101  avg_runtime /= static_cast<double>(num_runs);
102  std::cout << "\n";
103  }
104 };
105 
106 
107 void test_on_dataset(const std::string & dataset) {
108 
109  // Initialize environment.
110  std::cout << "\n=== " << dataset << " ===\n";
111  std::cout << "\tInitializing the environment ...\n";
112 
113  // Collect all tested methods.
114  std::vector<ged::Options::GEDMethod> ged_methods{ged::Options::GEDMethod::NODE, ged::Options::GEDMethod::BRANCH_UNIFORM, ged::Options::GEDMethod::BRANCH_FAST, ged::Options::GEDMethod::IPFP, ged::Options::GEDMethod::REFINE, ged::Options::GEDMethod::F2, ged::Options::GEDMethod::BLP_NO_EDGE_LABELS};
115  std::vector<Method> methods;
116  for (auto ged_method : ged_methods) {
117  methods.emplace_back(ged_method);
118  }
119 
120  // Collect all suffixes.
121  std::size_t max_max_size_div_10{0};
122  if (dataset == "AIDS") {
123  max_max_size_div_10 = 8;
124  }
125  else if (dataset == "Protein") {
126  max_max_size_div_10 = 6;
127  }
128  else if (dataset == "Mutagenicity") {
129  max_max_size_div_10 = 10;
130  }
131 
132  // Write the header of the result file.
133  std::string result_filename("../results/");
134  result_filename += dataset + "__best_methods.csv";
135  std::ofstream result_file(result_filename.c_str());
136  for (const auto & method : methods) {
137  result_file << method.name() + "_avg_lb," + method.name() + "_avg_ub," + method.name() + "_avg_runtime,";
138  }
139  result_file << "avg_num_nodes\n";
140  result_file.close();
141  // Run the tests.
142  for (std::size_t max_size_dev_10{1}; max_size_dev_10 <= max_max_size_div_10; max_size_dev_10++) {
144  util::setup_environment(dataset, max_size_dev_10, env);
145  double avg_ub{0};
146  double avg_lb{0};
147  double avg_runtime{0};
148  for (auto & method : methods) {
149  method.run_on_dataset(dataset, env, avg_lb, avg_ub, avg_runtime);
150  result_file.open(result_filename.c_str(),std::ios_base::app);
151  result_file << avg_lb << "," << avg_ub << "," << avg_runtime << ",";
152  result_file.close();
153  }
154  result_file.open(result_filename.c_str(),std::ios_base::app);
155  result_file << env.get_avg_num_nodes() << "\n";
156  result_file.close();
157  }
158 }
159 
160 int main(int argc, char* argv[]) {
161  std::vector<std::string> datasets;
162  for (int i{1}; i < argc; i++) {
163  datasets.push_back(std::string(argv[i]));
164  util::check_dataset(datasets.back());
165  }
166  if (datasets.empty()) {
167  util::setup_size_test_datasets(datasets);
168  }
169  for (auto dataset : datasets) {
170  try {
171  test_on_dataset(dataset);
172  }
173  catch (const std::exception & error) {
174  std::cerr << error.what() << ". " << "Error on " << dataset << ".\n";
175  }
176  }
177  return 0;
178 }
179 
180 
181 
Provides utility functions for tests of VLDB J. submission.
std::pair< GEDGraph::GraphID, GEDGraph::GraphID > graph_ids() const
Provides access to the IDs of the graphs contained in the environment.
Definition: ged_env.ipp:507
std::vector< GEDGraph >::size_type GraphID
Type of internally used graph IDs.
Definition: ged_graph.hpp:112
Selects ged::BranchFast.
void init_method()
Initializes the method specified by call to set_method().
Definition: ged_env.ipp:521
GEDMethod
Selects the method.
double get_runtime(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns runtime.
Definition: ged_env.ipp:567
Selects ged::Partition.
double get_avg_num_nodes() const
Returns average number of nodes.
Definition: ged_env.ipp:592
A progress bar class.
double get_upper_bound(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns upper bound for edit distance between the input graphs.
Definition: ged_env.ipp:545
void run_method(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id)
Runs the GED method specified by call to set_method() between the graphs with IDs g_id and h_id...
Definition: ged_env.ipp:471
void set_method(Options::GEDMethod method, const std::string &options=std::string(""))
Sets the GEDMethod to be used by run_method().
Definition: ged_env.ipp:384
Selects ged::Refine.
Selects ged::BranchUniform.
double get_lower_bound(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns lower bound for edit distance between the input graphs.
Definition: ged_env.ipp:534
Provides the API of GEDLIB.
Definition: ged_data.hpp:48