GEDLIB  1.0
test_ls_based_methods.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * *
3  * Copyright (C) 2018 by David B. Blumenthal *
4  * *
5  * This file is part of GEDLIB. *
6  * *
7  * GEDLIB is free software: you can redistribute it and/or modify it *
8  * under the terms of the GNU Lesser General Public License as published *
9  * by the Free Software Foundation, either version 3 of the License, or *
10  * (at your option) any later version. *
11  * *
12  * GEDLIB is distributed in the hope that it will be useful, *
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15  * GNU Lesser General Public License for more details. *
16  * *
17  * You should have received a copy of the GNU Lesser General Public *
18  * License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. *
19  * *
20  ***************************************************************************/
21 
31 #include "util.hpp"
32 
33 class Method {
34 private:
35  // method and options
36  ged::Options::GEDMethod ged_method_;
37  std::size_t num_initial_solutions_;
38  double ratio_initial_solutions_;
39  double randpost_penalty_;
40  std::size_t num_randpost_loops_;
41  std::size_t max_swap_size_;
42  std::size_t num_orderings_;
43 
44 
45  std::string options_() const {
46  std::string options("");
47  options += "--threads 6 --initial-solutions " + std::to_string(num_initial_solutions_) + " --ratio-runs-from-initial-solutions " + std::to_string(ratio_initial_solutions_) + " --num-randpost-loops " + std::to_string(num_randpost_loops_) + " --randpost-penalty " + std::to_string(randpost_penalty_);
48  if (ged_method_ == ged::Options::GEDMethod::REFINE) {
49  options += " --max-swap-size " + std::to_string(max_swap_size_);
50  }
51  else if (ged_method_ == ged::Options::GEDMethod::BP_BEAM) {
52  options += " --num-orderings " + std::to_string(num_orderings_);
53  }
54  return options;
55  }
56 
57 public:
58  Method(ged::Options::GEDMethod ged_method, std::size_t num_initial_solutions, double ratio_initial_solutions, std::size_t num_randpost_loops, double randpost_penalty = 0.0, std::size_t max_swap_size = 2, std::size_t num_orderings = 1) :
59  ged_method_{ged_method},
60  num_initial_solutions_{num_initial_solutions},
61  ratio_initial_solutions_{ratio_initial_solutions},
62  randpost_penalty_{randpost_penalty},
63  num_randpost_loops_{num_randpost_loops},
64  max_swap_size_{max_swap_size},
65  num_orderings_{num_orderings} {}
66 
67  std::string name() const {
68  std::stringstream name;
69  if (ged_method_ == ged::Options::GEDMethod::BP_BEAM) {
70  if (num_orderings_ == 1) {
71  name << "BPBEAM";
72  }
73  else {
74  name << "IBPBEAM";
75  }
76  }
77  else if (ged_method_ == ged::Options::GEDMethod::REFINE) {
78  if (max_swap_size_ == 2) {
79  name << "REFINE";
80  }
81  else {
82  name << "KREFINE";
83  }
84  }
85  else {
86  name << "IPFP";
87  }
88  name << ",$(" << num_initial_solutions_ << "," << ratio_initial_solutions_ << "," << num_randpost_loops_ << "," << randpost_penalty_ << ")$";
89  return name.str();
90  }
91 
92  void run_on_dataset(const std::string & dataset, ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> & env, double & avg_lb, double & avg_ub, double & avg_runtime,
93  double & classification_coefficient_lb, double & classification_coefficient_ub) const {
94  env.set_method(ged_method_, options_());
95  env.init_method();
96  std::size_t num_runs{env.graph_ids().second * env.graph_ids().second};
97  ged::ProgressBar progress_bar(num_runs);
98  std::cout << "\r\t" << name() << ": " << progress_bar << std::flush;
99  std::size_t num_intra_class_runs{0};
100  std::size_t num_inter_class_runs{0};
101  double largest_lb{0.0};
102  double avg_intra_class_lb{0.0};
103  double avg_inter_class_lb{0.0};
104  double largest_ub{0.0};
105  double avg_intra_class_ub{0.0};
106  double avg_inter_class_ub{0.0};
107  avg_runtime = 0;
108  avg_ub = 0;
109  for (ged::GEDGraph::GraphID g_id = env.graph_ids().first; g_id != env.graph_ids().second; g_id++) {
110  for (ged::GEDGraph::GraphID h_id = env.graph_ids().first; h_id != env.graph_ids().second; h_id++) {
111  env.run_method(g_id, h_id);
112  avg_lb += env.get_lower_bound(g_id, h_id);
113  avg_ub += env.get_upper_bound(g_id, h_id);
114  avg_runtime += env.get_runtime(g_id, h_id);
115  if (env.get_lower_bound(g_id, h_id) > largest_lb) {
116  largest_lb = env.get_lower_bound(g_id, h_id);
117  }
118  if (env.get_upper_bound(g_id, h_id) > largest_ub) {
119  largest_ub = env.get_upper_bound(g_id, h_id);
120  }
121  if (env.get_graph_class(g_id) == env.get_graph_class(h_id)) {
122  avg_intra_class_lb += env.get_lower_bound(g_id, h_id);
123  avg_intra_class_ub += env.get_upper_bound(g_id, h_id);
124  num_intra_class_runs++;
125  }
126  else {
127  avg_inter_class_lb += env.get_lower_bound(g_id, h_id);
128  avg_inter_class_ub += env.get_upper_bound(g_id, h_id);
129  num_inter_class_runs++;
130  }
131  progress_bar.increment();
132  std::cout << "\r\t" << name() << ": " << progress_bar << std::flush;
133  }
134  }
135  avg_lb /= static_cast<double>(num_runs);
136  avg_ub /= static_cast<double>(num_runs);
137  avg_runtime /= static_cast<double>(num_runs);
138  avg_intra_class_lb /= static_cast<double>(num_intra_class_runs);
139  avg_intra_class_ub /= static_cast<double>(num_intra_class_runs);
140  avg_inter_class_lb /= static_cast<double>(num_inter_class_runs);
141  avg_inter_class_ub /= static_cast<double>(num_inter_class_runs);
142  if (largest_lb > 0) {
143  classification_coefficient_lb = (avg_inter_class_lb - avg_intra_class_lb) / largest_lb;
144  }
145  else {
146  classification_coefficient_lb = 0;
147  }
148  if (largest_ub > 0) {
149  classification_coefficient_ub = (avg_inter_class_ub - avg_intra_class_ub) / largest_ub;
150  }
151  else {
152  classification_coefficient_ub = 0;
153  }
154  std::cout << "\n";
155  }
156 };
157 
158 struct RandpostSetup {
159  std::size_t num_initial_solutions;
160  double ratio_initial_solutions;
161  std::size_t num_randpost_loops;
162  RandpostSetup (std::size_t num_initial_solutions, double ratio_initial_solutions, std::size_t num_randpost_loops) :
163  num_initial_solutions{num_initial_solutions},
164  ratio_initial_solutions{ratio_initial_solutions},
165  num_randpost_loops{num_randpost_loops} {}
166 };
167 
168 
169 void test_on_dataset(const std::string & dataset, bool only_refine, bool only_ipfp, bool only_bp_beam) {
170 
171  // Initialize environment.
172  std::cout << "\n=== " << dataset << " ===\n";
173  std::cout << "\tInitializing the environment ...\n";
175  util::setup_environment(dataset, false, env);
176 
177  // Collect all tested methods.
178  std::vector<ged::Options::GEDMethod> ged_methods;
179  if (only_refine) {
180  ged_methods = {ged::Options::GEDMethod::REFINE};
181  }
182  else if (only_ipfp) {
183  ged_methods = {ged::Options::GEDMethod::IPFP};
184  }
185  else if (only_bp_beam) {
186  ged_methods = {ged::Options::GEDMethod::BP_BEAM};
187  }
188  else {
190  }
191  std::vector<std::size_t> nums_orderings{1, 20};
192  std::vector<std::size_t> max_swap_sizes{2, 3};
193  std::vector<RandpostSetup> randpost_setups;
194  randpost_setups.emplace_back(1, 1, 0);
195  randpost_setups.emplace_back(10, 1, 0);
196  randpost_setups.emplace_back(20, 1, 0);
197  randpost_setups.emplace_back(30, 1, 0);
198  randpost_setups.emplace_back(40, 1, 0);
199  randpost_setups.emplace_back(40, 0.5, 1);
200  randpost_setups.emplace_back(40, 0.25, 3);
201  randpost_setups.emplace_back(40, 0.125, 7);
202  std::vector<double> randpost_penalties{0.0, 1.0};
203 
204  std::vector<Method> methods;
205  for (auto ged_method : ged_methods) {
206  for (auto randpost_setup : randpost_setups) {
207  if (randpost_setup.num_randpost_loops > 0) {
208  for (auto randpost_penalty : randpost_penalties) {
209  if (ged_method == ged::Options::GEDMethod::REFINE) {
210  for (auto max_swap_size : max_swap_sizes) {
211  methods.emplace_back(ged_method, randpost_setup.num_initial_solutions, randpost_setup.ratio_initial_solutions, randpost_setup.num_randpost_loops, randpost_penalty, max_swap_size);
212  }
213  }
214  else if (ged_method == ged::Options::GEDMethod::BP_BEAM) {
215  for (auto num_orderings : nums_orderings) {
216  methods.emplace_back(ged_method, randpost_setup.num_initial_solutions, randpost_setup.ratio_initial_solutions, randpost_setup.num_randpost_loops, randpost_penalty, 2, num_orderings);
217  }
218  }
219  else {
220  methods.emplace_back(ged_method, randpost_setup.num_initial_solutions, randpost_setup.ratio_initial_solutions, randpost_setup.num_randpost_loops, randpost_penalty);
221  }
222  }
223  }
224  else {
225  if (ged_method == ged::Options::GEDMethod::REFINE) {
226  for (auto max_swap_size : max_swap_sizes) {
227  methods.emplace_back(ged_method, randpost_setup.num_initial_solutions, randpost_setup.ratio_initial_solutions, randpost_setup.num_randpost_loops, 0.0, max_swap_size);
228  }
229  }
230  else if (ged_method == ged::Options::GEDMethod::BP_BEAM) {
231  for (auto num_orderings : nums_orderings) {
232  methods.emplace_back(ged_method, randpost_setup.num_initial_solutions, randpost_setup.ratio_initial_solutions, randpost_setup.num_randpost_loops, 0.0, 2, num_orderings);
233  }
234  }
235  else {
236  methods.emplace_back(ged_method, randpost_setup.num_initial_solutions, randpost_setup.ratio_initial_solutions, randpost_setup.num_randpost_loops);
237  }
238  }
239  }
240  }
241 
242 
243  std::string result_filename("../results/");
244  if (only_refine) {
245  result_filename += dataset + "__ls_based_methods_REFINE.csv";
246  }
247  else if (only_ipfp) {
248  result_filename += dataset + "__ls_based_methods_IPFP.csv";
249  }
250  else if (only_bp_beam) {
251  result_filename += dataset + "__ls_based_methods_BP_BEAM.csv";
252  }
253  else {
254  result_filename += dataset + "__ls_based_methods.csv";
255  }
256  std::ofstream result_file(result_filename.c_str());
257  result_file << "method;avg_lb;avg_ub;avg_runtime;classification_coefficient_lb;classification_coefficient_ub\n";
258  result_file.close();
259  double avg_ub{0};
260  double avg_lb{0};
261  double avg_runtime{0};
262  double classification_coefficient_lb{0};
263  double classification_coefficient_ub{0};
264  for (auto & method : methods) {
265  method.run_on_dataset(dataset, env, avg_lb, avg_ub, avg_runtime, classification_coefficient_lb, classification_coefficient_ub);
266  result_file.open(result_filename.c_str(),std::ios_base::app);
267  result_file << method.name() << ";" << avg_lb << ";" << avg_ub << ";" << avg_runtime << ";" << classification_coefficient_lb << ";" << classification_coefficient_ub << "\n";
268  result_file.close();
269  }
270 }
271 
272 int main(int argc, char* argv[]) {
273  int i{1};
274  bool only_refine{false};
275  bool only_ipfp{false};
276  bool only_bp_beam{false};
277  if (argc > 1) {
278  if (std::string(argv[i]) == "--refine") {
279  only_refine = true;
280  i++;
281  }
282  else if (std::string(argv[i]) == "--ipfp") {
283  only_ipfp = true;
284  i++;
285  }
286  else if (std::string(argv[i]) == "--bp-beam") {
287  only_bp_beam = true;
288  i++;
289  }
290  }
291  std::vector<std::string> datasets;
292  for (; i < argc; i++) {
293  datasets.push_back(std::string(argv[i]));
294  util::check_dataset(datasets.back());
295  }
296  if (datasets.empty()) {
297  util::setup_datasets(datasets);
298  }
299  for (auto dataset : datasets) {
300  try {
301  test_on_dataset(dataset, only_refine, only_ipfp, only_bp_beam);
302  }
303  catch (const std::exception & error) {
304  std::cerr << error.what() << ". " << "Error on " << dataset << ".\n";
305  }
306  }
307  return 0;
308 }
309 
310 
311 
Provides utility functions for tests of VLDB J. submission.
std::pair< GEDGraph::GraphID, GEDGraph::GraphID > graph_ids() const
Provides access to the IDs of the graphs contained in the environment.
Definition: ged_env.ipp:507
std::vector< GEDGraph >::size_type GraphID
Type of internally used graph IDs.
Definition: ged_graph.hpp:112
void init_method()
Initializes the method specified by call to set_method().
Definition: ged_env.ipp:521
GEDMethod
Selects the method.
const std::string & get_graph_class(GEDGraph::GraphID graph_id) const
Returns the graph class.
Definition: ged_env.ipp:578
double get_runtime(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns runtime.
Definition: ged_env.ipp:567
A progress bar class.
double get_upper_bound(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns upper bound for edit distance between the input graphs.
Definition: ged_env.ipp:545
void run_method(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id)
Runs the GED method specified by call to set_method() between the graphs with IDs g_id and h_id...
Definition: ged_env.ipp:471
void set_method(Options::GEDMethod method, const std::string &options=std::string(""))
Sets the GEDMethod to be used by run_method().
Definition: ged_env.ipp:384
Selects ged::Refine.
double get_lower_bound(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns lower bound for edit distance between the input graphs.
Definition: ged_env.ipp:534
Provides the API of GEDLIB.
Definition: ged_data.hpp:48