GEDLIB  1.0
tests.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * *
3  * Copyright (C) 2018 by David B. Blumenthal *
4  * *
5  * This file is part of GEDLIB. *
6  * *
7  * GEDLIB is free software: you can redistribute it and/or modify it *
8  * under the terms of the GNU Lesser General Public License as published *
9  * by the Free Software Foundation, either version 3 of the License, or *
10  * (at your option) any later version. *
11  * *
12  * GEDLIB is distributed in the hope that it will be useful, *
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15  * GNU Lesser General Public License for more details. *
16  * *
17  * You should have received a copy of the GNU Lesser General Public *
18  * License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. *
19  * *
20  ***************************************************************************/
21 
31 #include "util.hpp"
32 
33 class Method {
34 private:
35  // method and options
36  ged::Options::GEDMethod ged_method_;
37  std::size_t num_threads_;
38  std::size_t num_solutions_;
39  std::string centralities_;
40  std::string set_distances_;
41  std::string ml_method_;
42 
43 
44  std::string options_(const std::string & dataset) const {
45  std::string options("");
46  options += "--threads " + std::to_string(num_threads_) + " --max-num-solutions " + std::to_string(num_solutions_) + " --centrality-method " + centralities_;
47  if (set_distances_ != "") {
48  options += " --led-method " + set_distances_;
49  options += " --load ../output/" + dataset + "_ring_" + set_distances_ + ".ini";
50  }
51  if (ml_method_ != "") {
52  std::string ged_method_name{(ged_method_ == ged::Options::GEDMethod::RING_ML) ? "_ring_ml_" : "_bipartite_ml_BIPARTITE_"};
53  if (ml_method_ == "DNN" or ml_method_ == "SVM") {
54  options += " --ml-method " + ml_method_;
55  }
56  else if (ml_method_ == "ONE_CLASS_SVM_LIKELIHOOD") {
57  options += " --ml-method ONE_CLASS_SVM --one-class-svm-likelihood TRUE";
58  }
59  else {
60  options += " --ml-method ONE_CLASS_SVM --one-class-svm-likelihood FALSE";
61  }
62  if (ml_method_ == "DNN") {
63  options += " --load ../output/" + dataset + ged_method_name + "dnn.ini";
64  }
65  else if (ml_method_ == "SVM") {
66  options += " --load ../output/" + dataset + ged_method_name + "svm.ini";
67  }
68  else {
69  options += " --load ../output/" + dataset + ged_method_name + "one_class_svm.ini";
70  }
71  }
72  if (ged_method_ == ged::Options::GEDMethod::WALKS) {
73  options += " --load ../output/" + dataset + "_walks.ini";
74  }
75  if (ged_method_ == ged::Options::GEDMethod::SUBGRAPH) {
76  options += " --load ../output/" + dataset + "_subgraph.ini";
77  }
78  return options;
79  }
80 
81 public:
82  Method(ged::Options::GEDMethod ged_method, std::size_t threads, std::size_t solutions, std::string centralities, const std::string & set_distances = "", const std::string ml_method = "") :
83  ged_method_{ged_method},
84  num_threads_{threads},
85  num_solutions_{solutions},
86  centralities_{centralities},
87  set_distances_{set_distances},
88  ml_method_{ml_method} {
89  if (num_threads_ <= 0) {
90  throw ged::Error("Invalid number of threads.");
91  }
92  if (num_solutions_ <= 0) {
93  throw ged::Error("Invalid number of solutions.");
94  }
95  if (centralities_ != "NONE" and centralities_ != "DEGREE" and centralities_ != "EIGENVALUE" and centralities_ != "PAGERANK") {
96  throw ged::Error("Invalid node centralities.");
97  }
98  if (ged_method_ == ged::Options::GEDMethod::RING) {
99  if (set_distances != "LSAPE_OPTIMAL" and set_distances != "LSAPE_GREEDY" and set_distances != "GAMMA") {
100  throw ged::Error("Invalid set distances.");
101  }
102  }
103  else if (set_distances_ != "") {
104  throw ged::Error("Invalid set distances.");
105  }
106  if (ged_method_ == ged::Options::GEDMethod::BIPARTITE_ML or ged_method_ == ged::Options::GEDMethod::RING_ML) {
107  if (ml_method != "DNN" and ml_method != "SVM" and ml_method != "ONE_CLASS_SVM_LIKELIHOOD" and ml_method != "ONE_CLASS_SVM_SCALE") {
108  throw ged::Error("Invalid machine learning method.");
109  }
110  }
111  else if (ml_method_ != "") {
112  throw ged::Error("Invalid machine learning method.");
113  }
114  }
115 
116  std::string name() const {
117  std::stringstream name;
118  name << ged_method_ << "__C-" << centralities_;
119  if (set_distances_ != "") {
120  name << "__LED-" << set_distances_;
121  }
122  if (ml_method_ != "") {
123  name << "__ML-" << ml_method_;
124  }
125  return name.str();
126  }
127 
128  std::size_t num_threads() const {
129  return num_threads_;
130  }
131 
132  std::size_t num_solutions() const {
133  return num_solutions_;
134  }
135 
136  void run_on_dataset(const std::string & dataset, ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> & env, double & avg_ub, double & avg_runtime, double & avg_classification_ratio) const {
137  env.set_method(ged_method_, options_(dataset));
138  env.init_method();
139  std::size_t num_runs{(env.graph_ids().second * env.graph_ids().second) - env.graph_ids().second};
140  ged::ProgressBar progress_bar(num_runs);
141  std::cout << "\r\t" << name() << ", " << num_threads_ << " threads, " << num_solutions_ << " solutions: " << progress_bar << std::flush;
142  ged::GEDGraph::GraphID closest_graph_id{std::numeric_limits<ged::GEDGraph::GraphID>::max()};
143  double distance_to_closest_graph{std::numeric_limits<double>::infinity()};
144  avg_runtime = 0;
145  avg_ub = 0;
146  avg_classification_ratio = 0;
147  for (ged::GEDGraph::GraphID g_id = env.graph_ids().first; g_id != env.graph_ids().second; g_id++) {
148  closest_graph_id = std::numeric_limits<ged::GEDGraph::GraphID>::max();
149  distance_to_closest_graph = std::numeric_limits<double>::infinity();
150  for (ged::GEDGraph::GraphID h_id = env.graph_ids().first; h_id != env.graph_ids().second; h_id++) {
151  if (g_id == h_id) {
152  continue;
153  }
154  env.run_method(g_id, h_id);
155  avg_ub += env.get_upper_bound(g_id, h_id);
156  avg_runtime += env.get_runtime(g_id, h_id);
157  if (env.get_upper_bound(g_id, h_id) < distance_to_closest_graph) {
158  distance_to_closest_graph = env.get_upper_bound(g_id, h_id);
159  closest_graph_id = h_id;
160  }
161  progress_bar.increment();
162  std::cout << "\r\t" << name() << ", " << num_threads() << " threads, " << num_solutions() << " solutions: " << progress_bar << std::flush;
163  }
164  if (env.get_graph_class(g_id) == env.get_graph_class(closest_graph_id)) {
165  avg_classification_ratio += 1.0;
166  }
167  }
168  avg_ub /= static_cast<double>(num_runs);
169  avg_runtime /= static_cast<double>(num_runs);
170  avg_classification_ratio /= static_cast<double>(env.graph_ids().second);
171  std::cout << "\n";
172  }
173 };
174 
175 
176 void test_on_dataset(const std::string & dataset, const std::vector<string> & ml_methods, bool quick) {
177 
178  // Initialize environment.
179  std::cout << "\n=== " << dataset << " ===\n";
180  std::cout << "\tInitializing the environment ...\n";
182  util::setup_environment(dataset, false, env);
183 
184  // Learn the parameters.
186  std::vector<std::string> set_distances{"GAMMA", "LSAPE_GREEDY", "LSAPE_OPTIMAL"};
187  std::vector<std::string> centralities{"NONE", "PAGERANK"};
188  std::vector<std::size_t> threads{1, 4, 7, 10};
189  std::vector<std::size_t> solutions{1, 4, 7, 10};
190  if (quick) {
191  centralities = {"NONE"};
192  threads = {10};
193  solutions = {10};
194  }
195  std::vector<Method> methods;
196  for (auto ged_method : ged_methods) {
197  for (auto num_threads : threads) {
198  for (auto num_solutions : solutions) {
199  for (const auto & centrality_method : centralities) {
200  if (ged_method == ged::Options::GEDMethod::RING) {
201  for (const auto & set_distance : set_distances) {
202  methods.emplace_back(ged_method, num_threads, num_solutions, centrality_method, set_distance);
203  }
204  }
205  else if (ged_method == ged::Options::GEDMethod::RING_ML or ged_method == ged::Options::GEDMethod::BIPARTITE_ML) {
206  for (const auto & ml_method : ml_methods) {
207  methods.emplace_back(ged_method, num_threads, num_solutions, centrality_method, "", ml_method);
208  }
209  }
210  else {
211  methods.emplace_back(ged_method, num_threads, num_solutions, centrality_method);
212  }
213  }
214  }
215  }
216  }
217  std::string result_filename("../output/");
218  result_filename += dataset + "__RESULTS.csv";
219  std::ofstream result_file(result_filename.c_str());
220  result_file << "method,num_threads,num_solutions,avg_ub,avg_runtime,avg_classification_ratio\n";
221  result_file.close();
222  double avg_ub{0};
223  double avg_runtime{0};
224  double avg_classification_ratio{0};
225  for (auto & method : methods) {
226  method.run_on_dataset(dataset, env, avg_ub, avg_runtime, avg_classification_ratio);
227  result_file.open(result_filename.c_str(),std::ios_base::app);
228  result_file << method.name() << "," << method.num_threads() << "," << method.num_solutions() << ",";
229  result_file << avg_ub << "," << avg_runtime << "," << avg_classification_ratio << "\n";
230  result_file.close();
231  }
232 }
233 
234 int main(int argc, char* argv[]) {
235  std::vector<std::string> datasets;
236  std::vector<std::string> ml_methods{"DNN", "SVM", "ONE_CLASS_SVM_LIKELIHOOD", "ONE_CLASS_SVM_SCALE"};
237  bool quick{false};
238  int i{1};
239  if (argc > 1) {
240  std::string first_option(argv[i]);
241  if (first_option == "--no-svm") {
242  ml_methods = {"DNN", "ONE_CLASS_SVM_LIKELIHOOD"};
243  i++;
244  }
245  else if (first_option == "--quick") {
246  ml_methods = {"DNN", "ONE_CLASS_SVM_LIKELIHOOD"};
247  quick = true;
248  i++;
249  }
250  else {
251  std::cout << "first option = \"" << first_option << "\"\n";
252  }
253  }
254  for (; i < argc; i++) {
255  datasets.push_back(std::string(argv[i]));
256  util::check_dataset(datasets.back());
257  }
258  if (datasets.empty()) {
259  util::setup_datasets(datasets);
260  }
261  for (auto dataset : datasets) {
262  try {
263  test_on_dataset(dataset, ml_methods, quick);
264  }
265  catch (const std::exception & error) {
266  std::cerr << error.what() << ". " << "Error on " << dataset << ".\n";
267  }
268  }
269  return 0;
270 }
271 
272 
273 
std::pair< GEDGraph::GraphID, GEDGraph::GraphID > graph_ids() const
Provides access to the IDs of the graphs contained in the environment.
Definition: ged_env.ipp:507
std::vector< GEDGraph >::size_type GraphID
Type of internally used graph IDs.
Definition: ged_graph.hpp:112
Selects ged::Branch.
Selects ged::BranchFast.
void init_method()
Initializes the method specified by call to set_method().
Definition: ged_env.ipp:521
Selects ged::Walks.
GEDMethod
Selects the method.
const std::string & get_graph_class(GEDGraph::GraphID graph_id) const
Returns the graph class.
Definition: ged_env.ipp:578
double get_runtime(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns runtime.
Definition: ged_env.ipp:567
Provides utility functions for tests of PR submission.
Runtime error class.
Definition: error.hpp:37
A progress bar class.
Selects ged::BipartiteML.
Selects ged::Bipartite.
double get_upper_bound(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns upper bound for edit distance between the input graphs.
Definition: ged_env.ipp:545
void run_method(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id)
Runs the GED method specified by call to set_method() between the graphs with IDs g_id and h_id...
Definition: ged_env.ipp:471
void set_method(Options::GEDMethod method, const std::string &options=std::string(""))
Sets the GEDMethod to be used by run_method().
Definition: ged_env.ipp:384
Selects ged::Subgraph.
Provides the API of GEDLIB.
Definition: ged_data.hpp:48