GEDLIB  1.0
test_lsape_based_methods.cpp
Go to the documentation of this file.
1 /***************************************************************************
2 * *
3 * Copyright (C) 2018 by David B. Blumenthal *
4 * *
5 * This file is part of GEDLIB. *
6 * *
7 * GEDLIB is free software: you can redistribute it and/or modify it *
8 * under the terms of the GNU Lesser General Public License as published *
9 * by the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * GEDLIB is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU Lesser General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU Lesser General Public *
18 * License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. *
19 * *
20 ***************************************************************************/
21 
40 #define GXL_GEDLIB_SHARED
41 #include "../../../src/env/ged_env.hpp"
42 
43 constexpr std::size_t centrality() {return 20;}
44 constexpr std::size_t no_centrality() {return 0;}
45 constexpr std::size_t lsape_optimal() {return 0;}
46 constexpr std::size_t gamma() {return 40;}
47 constexpr std::size_t lsape_greedy() {return 80;}
48 constexpr std::size_t no_led_option() {return 0;}
49 
50 template <typename ScalarT>
51 void init_result_map(std::map<std::size_t, ScalarT> & result_map, ScalarT val) {
52  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::BIPARTITE)] = val;
53  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::BIPARTITE) + centrality()] = val;
54  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::BRANCH)] = val;
55  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::BRANCH) + centrality()] = val;
56  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::BRANCH_FAST)] = val;
57  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::BRANCH_FAST) + centrality()] = val;
58  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::WALKS)] = val;
59  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::WALKS) + centrality()] = val;
60  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::SUBGRAPH)] = val;
61  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::SUBGRAPH) + centrality()] = val;
62  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + lsape_optimal()] = val;
63  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + lsape_optimal() + centrality()] = val;
64  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + gamma()] = val;
65  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + gamma() + centrality()] = val;
66  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + lsape_greedy()] = val;
67  result_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + lsape_greedy() + centrality()] = val;
68 }
69 
70 void init_method_name_map(std::map<std::size_t, std::string> & method_name_map) {
71  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::BIPARTITE)] = "bipartite";
72  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::BIPARTITE) + centrality()] = "bipartite_pagerank";
73  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::BRANCH)] = "branch";
74  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::BRANCH) + centrality()] = "branch_pagerank";
75  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::BRANCH_FAST)] = "branch_fast";
76  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::BRANCH_FAST) + centrality()] = "branch_fast_pagerank";
77  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::WALKS)] = "walks";
78  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::WALKS) + centrality()] = "walks_pagerank";
79  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::SUBGRAPH)] = "subgraph";
80  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::SUBGRAPH) + centrality()] = "subgraph_pagerank";
81  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + lsape_optimal()] = "ring_lsape_optimal";
82  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + lsape_optimal() + centrality()] = "ring_lsape_optimal_pagerank";
83  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + gamma()] = "ring_multisets";
84  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + gamma() + centrality()] = "ring_multisets_pagerank";
85  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + lsape_greedy()] = "ring_lsape_greedy";
86  method_name_map[static_cast<std::size_t>(ged::Options::GEDMethod::RING) + lsape_greedy() + centrality()] = "ring_lsape_greedy_pagerank";
87 }
88 
89 void normalize_result_map(std::map<std::size_t, double> & result_map, std::size_t val) {
90  for (auto & kv : result_map) {
91  kv.second /= static_cast<double>(val);
92  }
93 }
94 
96  ged::Options::GEDMethod method, std::size_t centrality, std::size_t gamma, const std::string & options,
97  std::map<std::size_t, double> & avg_time_in_sec, std::map<std::size_t, double> & avg_ub, double & best_ub, std::map<std::size_t, double> & ub,
98  std::map<std::size_t, double> & distance_to_closest_graph, std::map<std::size_t, ged::GEDGraph::GraphID> & closest_graph) {
99  env.set_method(method, options);
100  env.run_method(g_id, h_id);
101  std::size_t method_index{static_cast<std::size_t>(method) + centrality + gamma};
102  double time{env.get_runtime(g_id, h_id)};
103  ub[method_index] = env.get_upper_bound(g_id, h_id);
104  avg_time_in_sec[method_index] += time;
105  avg_ub[method_index] += ub.at(method_index);
106  best_ub = std::min(best_ub, ub.at(method_index));
107  if (ub.at(method_index) < distance_to_closest_graph.at(method_index)) {
108  distance_to_closest_graph[method_index] = ub.at(method_index);
109  closest_graph[method_index] = h_id;
110  }
111 }
112 
113 void run_tests_on_dataset(const std::string & dataset) {
114 
115  // Initialize environment.
116  std::cout << "\n=== " << dataset << " ===\n";
117  std::cout << "\tInitializing the environment ...\n";
119  std::vector<ged::GEDGraph::GraphID> graph_ids(env.load_gxl_graphs(std::string("../../../data/datasets/") + dataset + "/", std::string("../../../data/collections/") + dataset + ".xml"));
120  if (dataset == "GREC") {
122  }
123  else {
125  }
126  env.init();
127 
128  std::string output_prefix("../output/");
129  output_prefix += dataset + "_";
130 
131  // Define option strings.
132  std::string threads_option("--threads 5");
133  std::string centrality_option("--centrality-method PAGERANK");
134  std::string walks_depth_option("--load ");
135  walks_depth_option += output_prefix + "walks.ini";
136  std::string subgraph_depth_option("--load ");
137  subgraph_depth_option += output_prefix + "subgraph.ini";
138  std::string ring_option_lsape_optimal("--led-method LSAPE_OPTIMAL --load ");
139  ring_option_lsape_optimal += output_prefix + "ring_LSAPE_OPTIMAL.ini";
140  std::string ring_option_lsape_greedy("--led-method LSAPE_GREEDY --load ");
141  ring_option_lsape_greedy += output_prefix + "ring_LSAPE_GREEDY.ini";
142  std::string ring_option_gamma("--led-method GAMMA --load ");
143  ring_option_gamma += output_prefix + "ring_GAMMA.ini";
144 
145 
146  // Initialize result variables.
147  std::map<std::size_t, double> avg_time_in_sec;
148  init_result_map(avg_time_in_sec, 0.0);
149  std::map<std::size_t, double> avg_ub;
150  init_result_map(avg_ub, 0.0);
151  std::map<std::size_t, double> classification_ratio;
152  init_result_map(classification_ratio, 0.0);
153  std::map<std::size_t, double> avg_dev_best_ub;
154  init_result_map(avg_dev_best_ub, 0.0);
155  std::map<std::size_t, double> distance_to_closest_graph;
156  std::map<std::size_t, ged::GEDGraph::GraphID> closest_graph;
157  std::map<std::size_t, double> ub;
158  double best_ub;
159 
160  // Initialize outfile and progress bar.
161  std::ofstream outfile(output_prefix + "results.csv");
162  outfile << "method,avg_time_in_sec,avg_ub,classification_ratio,avg_dev_best_ub\n";
163  std::size_t num_runs{(graph_ids.size() * graph_ids.size()) - graph_ids.size()};
164  ged::ProgressBar progress_bar(num_runs);
165  std::cout << "\r\tRunning the tests: " << progress_bar << std::flush;
166  std::size_t num_best_ub_zero{0};
167  for (auto g_id : graph_ids) {
168  // Reset result variables used for computing classification ratio.
169  init_result_map(distance_to_closest_graph, std::numeric_limits<double>::max());
170  init_result_map(closest_graph, std::numeric_limits<ged::GEDGraph::GraphID>::max());
171  for (auto h_id : graph_ids) {
172  if (g_id == h_id) {
173  continue;
174  }
175 
176  // Reset result variables for computing average deviation from best upper bound.
177  best_ub = std::numeric_limits<double>::max();
178  init_result_map(ub, std::numeric_limits<double>::max());
179 
180  // Run methods and update result variables for computing average upper bound and runtime.
181  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::BIPARTITE, no_centrality(), no_led_option(), threads_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
182  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::BIPARTITE, centrality(), no_led_option(), threads_option + " " + centrality_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
183  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::BRANCH, no_centrality(), no_led_option(), threads_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
184  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::BRANCH, centrality(), no_led_option(), threads_option + " " + centrality_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
185  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::BRANCH_FAST, no_centrality(), no_led_option(), threads_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
186  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::BRANCH_FAST, centrality(), no_led_option(), threads_option + " " + centrality_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
187  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::WALKS, no_centrality(), no_led_option(), walks_depth_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
188  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::WALKS, centrality(), no_led_option(), centrality_option + " " + walks_depth_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
189  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::SUBGRAPH, no_centrality(), no_led_option(), threads_option + " " + subgraph_depth_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
190  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::SUBGRAPH, centrality(), no_led_option(), threads_option + " " + centrality_option + " " + subgraph_depth_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
191  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::RING, no_centrality(), lsape_optimal(), threads_option + " " + ring_option_lsape_optimal, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
192  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::RING, centrality(), lsape_optimal(), threads_option + " " + ring_option_lsape_optimal + " " + centrality_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
193  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::RING, no_centrality(), gamma(), threads_option + " " + ring_option_gamma, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
194  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::RING, centrality(), gamma(), threads_option + " " + ring_option_gamma + " " + centrality_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
195  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::RING, no_centrality(), lsape_greedy(), threads_option + " " + ring_option_lsape_greedy, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
196  run_single_test(env, g_id, h_id, ged::Options::GEDMethod::RING, centrality(), lsape_greedy(), threads_option + " " + ring_option_lsape_greedy + " " + centrality_option, avg_time_in_sec, avg_ub, best_ub, ub, distance_to_closest_graph, closest_graph);
197 
198  // Update result variables for computing average deviation from best upper bound.
199  for (auto & kv : avg_dev_best_ub) {
200  if (best_ub > 0) {
201  kv.second += (ub.at(kv.first) - best_ub) / best_ub;
202  }
203  else {
204  num_best_ub_zero++;
205  }
206  }
207 
208  // Update and output progress.
209  progress_bar.increment();
210  std::cout << "\r\tRunning the tests: " << progress_bar << std::flush;
211  }
212 
213  // Update result variables for computing classification ratio.
214  for (auto & kv : classification_ratio) {
215  kv.second += env.get_graph_class(closest_graph.at(kv.first)) == env.get_graph_class(g_id) ? 1.0 : 0.0;
216  }
217  }
218  std::cout << "\n";
219 
220  // Normalize the result variables.
221  normalize_result_map(avg_ub, num_runs);
222  normalize_result_map(avg_time_in_sec, num_runs);
223  normalize_result_map(avg_dev_best_ub, num_runs - num_best_ub_zero);
224  normalize_result_map(classification_ratio, graph_ids.size());
225 
226  // Write the results to the output file.
227  std::map<std::size_t, std::string> method_names;
228  init_method_name_map(method_names);
229  for (auto & kv : method_names) {
230  outfile << kv.second << "," << avg_time_in_sec.at(kv.first) << "," << avg_ub.at(kv.first) << "," << classification_ratio.at(kv.first) << "," << avg_dev_best_ub.at(kv.first) << "\n";
231  }
232  outfile.close();
233 }
234 
235 int main(int argc, char* argv[]) {
236  std::vector<std::string> datasets{"mao","pah","alkane","acyclic"};
237  for (auto dataset : datasets) {
238  try {
239  run_tests_on_dataset(dataset);
240  }
241  catch (const std::exception & error) {
242  std::cerr << error.what() << ". " << "Error on " << dataset << "\n";
243  }
244  }
245  return 0;
246 }
std::vector< GEDGraph >::size_type GraphID
Type of internally used graph IDs.
Definition: ged_graph.hpp:112
Selects ged::Branch.
void init(Options::InitType init_type=Options::InitType::EAGER_WITHOUT_SHUFFLED_COPIES)
Initializes the environment.
Definition: ged_env.ipp:655
Selects ged::BranchFast.
std::vector< GEDGraph::GraphID > load_gxl_graphs(const std::string &graph_dir, const std::string &collection_file, Options::GXLNodeEdgeType node_type=Options::GXLNodeEdgeType::LABELED, Options::GXLNodeEdgeType edge_type=Options::GXLNodeEdgeType::LABELED, const std::unordered_set< std::string > &irrelevant_node_attributes=std::unordered_set< std::string >(), const std::unordered_set< std::string > &irrelevant_edge_attributes=std::unordered_set< std::string >())
Loads graphs given in the GXL file format.
void set_edit_costs(Options::EditCosts edit_costs, std::initializer_list< double > edit_cost_constants={})
Sets the edit costs to one of the predefined edit costs.
Definition: ged_env.ipp:55
Selects ged::Walks.
GEDMethod
Selects the method.
const std::string & get_graph_class(GEDGraph::GraphID graph_id) const
Returns the graph class.
Definition: ged_env.ipp:578
double get_runtime(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns runtime.
Definition: ged_env.ipp:567
A progress bar class.
Selects ged::Bipartite.
double get_upper_bound(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id) const
Returns upper bound for edit distance between the input graphs.
Definition: ged_env.ipp:545
void run_method(GEDGraph::GraphID g_id, GEDGraph::GraphID h_id)
Runs the GED method specified by call to set_method() between the graphs with IDs g_id and h_id...
Definition: ged_env.ipp:471
void set_method(Options::GEDMethod method, const std::string &options=std::string(""))
Sets the GEDMethod to be used by run_method().
Definition: ged_env.ipp:384
Selects ged::Subgraph.
Provides the API of GEDLIB.
Definition: ged_data.hpp:48