GEDLIB  1.0
test_lsape_based_methods.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * *
3  * Copyright (C) 2018 by David B. Blumenthal *
4  * *
5  * This file is part of GEDLIB. *
6  * *
7  * GEDLIB is free software: you can redistribute it and/or modify it *
8  * under the terms of the GNU Lesser General Public License as published *
9  * by the Free Software Foundation, either version 3 of the License, or *
10  * (at your option) any later version. *
11  * *
12  * GEDLIB is distributed in the hope that it will be useful, *
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15  * GNU Lesser General Public License for more details. *
16  * *
17  * You should have received a copy of the GNU Lesser General Public *
18  * License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. *
19  * *
20  ***************************************************************************/
21 
31 #include "util.hpp"
32 
33 class Method {
34 private:
35  // method and options
36  ged::Options::GEDMethod ged_method_;
37  std::size_t num_solutions_;
38  std::string centralities_;
39  std::string set_distances_;
40  std::string ml_method_;
41 
42 
43  std::string options_(const std::string & dataset) const {
44  std::string options("");
45  options += "--threads 6 --max-num-solutions " + std::to_string(num_solutions_) + " --centrality-method " + centralities_;
46  if (set_distances_ != "") {
47  options += " --led-method " + set_distances_;
48  options += " --load ../ini/" + dataset + "_ring_" + set_distances_ + ".ini";
49  }
50  if (ml_method_ != "") {
51  std::string ged_method_name{(ged_method_ == ged::Options::GEDMethod::RING_ML) ? "_ring_ml_" : "_bipartite_ml_BIPARTITE_"};
52  options += " --ml-method " + ml_method_;
53  if (ml_method_ == "DNN") {
54  options += " --load ../ini/" + dataset + ged_method_name + "dnn.ini";
55  }
56  else {
57  options += " --load ../ini/" + dataset + ged_method_name + "one_class_svm.ini";
58  }
59  }
60  if (ged_method_ == ged::Options::GEDMethod::WALKS) {
61  options += " --load ../ini/" + dataset + "_walks.ini";
62  }
63  if (ged_method_ == ged::Options::GEDMethod::SUBGRAPH) {
64  options += " --load ../ini/" + dataset + "_subgraph.ini";
65  }
66  return options;
67  }
68 
69 public:
70  Method(ged::Options::GEDMethod ged_method, std::size_t num_solutions, std::string centralities, const std::string & set_distances = "", const std::string ml_method = "") :
71  ged_method_{ged_method},
72  num_solutions_{num_solutions},
73  centralities_{centralities},
74  set_distances_{set_distances},
75  ml_method_{ml_method} {
76  if (num_solutions_ <= 0) {
77  throw ged::Error("Invalid number of solutions.");
78  }
79  if (centralities_ != "NONE" and centralities_ != "DEGREE" and centralities_ != "EIGENVALUE" and centralities_ != "PAGERANK") {
80  throw ged::Error("Invalid node centralities.");
81  }
82  if (ged_method_ == ged::Options::GEDMethod::RING) {
83  if (set_distances != "LSAPE_OPTIMAL" and set_distances != "LSAPE_GREEDY" and set_distances != "GAMMA") {
84  throw ged::Error("Invalid set distances.");
85  }
86  }
87  else if (set_distances_ != "") {
88  throw ged::Error("Invalid set distances.");
89  }
90  if (ged_method_ == ged::Options::GEDMethod::BIPARTITE_ML or ged_method_ == ged::Options::GEDMethod::RING_ML) {
91  if (ml_method != "DNN" and ml_method != "ONE_CLASS_SVM") {
92  throw ged::Error("Invalid machine learning method.");
93  }
94  }
95  else if (ml_method_ != "") {
96  throw ged::Error("Invalid machine learning method.");
97  }
98  }
99 
100  std::string name() const {
101  std::stringstream name;
102  if (ged_method_ == ged::Options::GEDMethod::RING) {
103  if (set_distances_ == "GAMMA") {
104  name << "RINGMS";
105  }
106  else {
107  name << "RINGOPT";
108  }
109  }
110  else if (ged_method_ == ged::Options::GEDMethod::RING_ML) {
111  if (ml_method_ == "DNN") {
112  name << "RINGMLDNN";
113  }
114  else {
115  name << "RINGMLSVM";
116  }
117  }
118  else if (ged_method_ == ged::Options::GEDMethod::BIPARTITE_ML) {
119  if (ml_method_ == "DNN") {
120  name << "PREDICTDNN";
121  }
122  else {
123  name << "PREDICTSVM";
124  }
125  }
126  else if (ged_method_ == ged::Options::GEDMethod::BIPARTITE) {
127  name << "BP";
128  }
129  else if (ged_method_ == ged::Options::GEDMethod::BRANCH_FAST) {
130  name << "BRANCHFAST";
131  }
132  else if (ged_method_ == ged::Options::GEDMethod::BRANCH) {
133  name << "BRANCH";
134  }
135  else if (ged_method_ == ged::Options::GEDMethod::BRANCH_UNIFORM) {
136  name << "BRANCHUNI";
137  }
138  else if (ged_method_ == ged::Options::GEDMethod::STAR) {
139  name << "STAR";
140  }
141  else if (ged_method_ == ged::Options::GEDMethod::SUBGRAPH) {
142  name << "SUBGRAPH";
143  }
144  else if (ged_method_ == ged::Options::GEDMethod::NODE) {
145  name << "NODE";
146  }
147  else if (ged_method_ == ged::Options::GEDMethod::WALKS) {
148  name << "WALKS";
149  }
150  name << ",$(" << num_solutions_;
151  if (centralities_ == "NONE") {
152  name << ",0.0)$";
153  }
154  else {
155  name << ",0.7)$";
156  }
157  return name.str();
158  }
159 
160  void run_on_dataset(const std::string & dataset, ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> & env, double & avg_lb, double & avg_ub, double & avg_runtime,
161  double & classification_coefficient_lb, double & classification_coefficient_ub) const {
162  env.set_method(ged_method_, options_(dataset));
163  env.init_method();
164  std::size_t num_runs{env.graph_ids().second * env.graph_ids().second};
165  ged::ProgressBar progress_bar(num_runs);
166  std::cout << "\r\t" << name() << ": " << progress_bar << std::flush;
167  std::size_t num_intra_class_runs{0};
168  std::size_t num_inter_class_runs{0};
169  double largest_lb{0.0};
170  double avg_intra_class_lb{0.0};
171  double avg_inter_class_lb{0.0};
172  double largest_ub{0.0};
173  double avg_intra_class_ub{0.0};
174  double avg_inter_class_ub{0.0};
175  avg_runtime = 0;
176  avg_ub = 0;
177  avg_lb = 0;
178  for (ged::GEDGraph::GraphID g_id = env.graph_ids().first; g_id != env.graph_ids().second; g_id++) {
179  for (ged::GEDGraph::GraphID h_id = env.graph_ids().first; h_id != env.graph_ids().second; h_id++) {
180  env.run_method(g_id, h_id);
181  avg_lb += env.get_lower_bound(g_id, h_id);
182  avg_ub += env.get_upper_bound(g_id, h_id);
183  avg_runtime += env.get_runtime(g_id, h_id);
184  if (env.get_lower_bound(g_id, h_id) > largest_lb) {
185  largest_lb = env.get_lower_bound(g_id, h_id);
186  }
187  if (env.get_upper_bound(g_id, h_id) > largest_ub) {
188  largest_ub = env.get_upper_bound(g_id, h_id);
189  }
190  if (env.get_graph_class(g_id) == env.get_graph_class(h_id)) {
191  avg_intra_class_lb += env.get_lower_bound(g_id, h_id);
192  avg_intra_class_ub += env.get_upper_bound(g_id, h_id);
193  num_intra_class_runs++;
194  }
195  else {
196  avg_inter_class_lb += env.get_lower_bound(g_id, h_id);
197  avg_inter_class_ub += env.get_upper_bound(g_id, h_id);
198  num_inter_class_runs++;
199  }
200  progress_bar.increment();
201  std::cout << "\r\t" << name() << ": " << progress_bar << std::flush;
202  }
203  }
204  avg_lb /= static_cast<double>(num_runs);
205  avg_ub /= static_cast<double>(num_runs);
206  avg_runtime /= static_cast<double>(num_runs);
207  avg_intra_class_lb /= static_cast<double>(num_intra_class_runs);
208  avg_intra_class_ub /= static_cast<double>(num_intra_class_runs);
209  avg_inter_class_lb /= static_cast<double>(num_inter_class_runs);
210  avg_inter_class_ub /= static_cast<double>(num_inter_class_runs);
211  if (largest_lb > 0) {
212  classification_coefficient_lb = (avg_inter_class_lb - avg_intra_class_lb) / largest_lb;
213  }
214  else {
215  classification_coefficient_lb = 0;
216  }
217  if (largest_ub > 0) {
218  classification_coefficient_ub = (avg_inter_class_ub - avg_intra_class_ub) / largest_ub;
219  }
220  else {
221  classification_coefficient_ub = 0;
222  }
223  std::cout << "\n";
224  }
225 };
226 
227 
228 void test_on_dataset(const std::string & dataset, bool only_lb) {
229 
230  // Initialize environment.
231  std::cout << "\n=== " << dataset << " ===\n";
232  std::cout << "\tInitializing the environment ...\n";
234  util::setup_environment(dataset, false, env);
235 
236  // Collect all tested methods.
237  std::vector<ged::Options::GEDMethod> ged_methods;
238  if (only_lb) {
240  }
241  else {
243  }
244  std::vector<std::string> set_distances{"GAMMA", "LSAPE_OPTIMAL"};
245  std::vector<std::string> centralities{"NONE", "PAGERANK"};
246  std::vector<std::string> ml_methods{"DNN", "ONE_CLASS_SVM"};
247  std::vector<std::size_t> nums_solutions{1, 4, 7, 10};
248  std::vector<Method> methods;
249  for (auto ged_method : ged_methods) {
250  for (const auto & centrality_method : centralities) {
251  for (auto num_solutions : nums_solutions) {
252  if (ged_method == ged::Options::GEDMethod::RING) {
253  for (const auto & set_distance : set_distances) {
254  methods.emplace_back(ged_method, num_solutions, centrality_method, set_distance);
255  }
256  }
257  else if (ged_method == ged::Options::GEDMethod::RING_ML or ged_method == ged::Options::GEDMethod::BIPARTITE_ML) {
258  for (const auto & ml_method : ml_methods) {
259  methods.emplace_back(ged_method, num_solutions, centrality_method, "", ml_method);
260  }
261  }
262  else {
263  methods.emplace_back(ged_method, num_solutions, centrality_method);
264  }
265  }
266  }
267  }
268 
269  // Run the tests.
270  std::string result_filename("../results/");
271  if (only_lb) {
272  result_filename += dataset + "__lsape_based_methods_LB.csv";
273  }
274  else {
275  result_filename += dataset + "__lsape_based_methods.csv";
276  }
277  std::ofstream result_file(result_filename.c_str());
278  result_file << "method;avg_lb;avg_ub;avg_runtime;classification_coefficient_lb;classification_coefficient_ub\n";
279  result_file.close();
280  double avg_ub{0};
281  double avg_lb{0};
282  double avg_runtime{0};
283  double classification_coefficient_lb{0};
284  double classification_coefficient_ub{0};
285  for (auto & method : methods) {
286  method.run_on_dataset(dataset, env, avg_lb, avg_ub, avg_runtime, classification_coefficient_lb, classification_coefficient_ub);
287  result_file.open(result_filename.c_str(),std::ios_base::app);
288  result_file << method.name() << ";" << avg_lb << ";" << avg_ub << ";" << avg_runtime << ";" << classification_coefficient_lb << ";" << classification_coefficient_ub << "\n";
289  result_file.close();
290  }
291 }
292 
293 int main(int argc, char* argv[]) {
294  std::vector<std::string> datasets;
295  bool only_lb{false};
296  int i{1};
297  if (argc > 1) {
298  if (std::string(argv[i]) == "--lb") {
299  only_lb = true;
300  i++;
301  }
302  }
303  for (; i < argc; i++) {
304  datasets.push_back(std::string(argv[i]));
305  util::check_dataset(datasets.back());
306  }
307  if (datasets.empty()) {
308  util::setup_datasets(datasets);
309  }
310  for (auto dataset : datasets) {
311  try {
312  test_on_dataset(dataset, only_lb);
313  }
314  catch (const std::exception & error) {
315  std::cerr << error.what() << ". " << "Error on " << dataset << ".\n";
316  }
317  }
318  return 0;
319 }
320 
321 
322 
Provides utility functions for tests of VLDB J. submission.
std::pair< GEDGraph::GraphID, GEDGraph::GraphID > graph_ids() const
Provides access to the IDs of the graphs contained in the environment.
Definition: ged_env.ipp:507
std::vector< GEDGraph >::size_type GraphID
Type of internally used graph IDs.
Definition: ged_graph.hpp:112
Selects ged::Branch.
Selects ged::BranchFast.
void init_method()
Initializes the method specified by call to set_method().
Definition: ged_env.ipp:521
Selects ged::Walks.
GEDMethod
Selects the method.
const std::string & get_graph_class(GEDGraph::GraphID graph_id) const
Returns the graph class.
Definition: ged_env.ipp:578
double get_runtime(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns runtime.
Definition: ged_env.ipp:567
Runtime error class.
Definition: error.hpp:37
A progress bar class.
Selects ged::BipartiteML.
Selects ged::Bipartite.
double get_upper_bound(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns upper bound for edit distance between the input graphs.
Definition: ged_env.ipp:545
void run_method(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id)
Runs the GED method specified by call to set_method() between the graphs with IDs g_id and h_id...
Definition: ged_env.ipp:471
void set_method(Options::GEDMethod method, const std::string &options=std::string(""))
Sets the GEDMethod to be used by run_method().
Definition: ged_env.ipp:384
Selects ged::BranchUniform.
Selects ged::Subgraph.
double get_lower_bound(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns lower bound for edit distance between the input graphs.
Definition: ged_env.ipp:534
Provides the API of GEDLIB.
Definition: ged_data.hpp:48