GEDLIB  1.0
bipartite_ml.ipp
Go to the documentation of this file.
1 /***************************************************************************
2 * *
3 * Copyright (C) 2018 by David B. Blumenthal *
4 * *
5 * This file is part of GEDLIB. *
6 * *
7 * GEDLIB is free software: you can redistribute it and/or modify it *
8 * under the terms of the GNU Lesser General Public License as published *
9 * by the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * GEDLIB is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU Lesser General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU Lesser General Public *
18 * License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. *
19 * *
20 ***************************************************************************/
21 
27 #ifndef SRC_METHODS_BIPARTITE_ML_IPP_
28 #define SRC_METHODS_BIPARTITE_ML_IPP_
29 
30 namespace ged {
31 
32 template<class UserNodeLabel, class UserEdgeLabel>
33 BipartiteML<UserNodeLabel, UserEdgeLabel>::
34 ~BipartiteML() {
35  delete lsape_method_;
36 }
37 
38 template<class UserNodeLabel, class UserEdgeLabel>
39 BipartiteML<UserNodeLabel, UserEdgeLabel>::
40 BipartiteML(const GEDData<UserNodeLabel, UserEdgeLabel> & ged_data) :
41 MLBasedMethod<UserNodeLabel, UserEdgeLabel>(ged_data),
42 lsape_method_{new Bipartite<UserNodeLabel, UserEdgeLabel>(this->ged_data_)},
43 lsape_method_options_(""),
44 lsape_instance_(),
45 global_features_(),
46 row_features_(),
47 col_features_() {}
48 
49 // === Definitions of member functions inherited from MLBasedMethod. ===
50 template<class UserNodeLabel, class UserEdgeLabel>
51 void
54  lsape_method_->init();
55 }
56 
57 template<class UserNodeLabel, class UserEdgeLabel>
58 void
61  delete lsape_method_;
62  lsape_method_ = new Bipartite<UserNodeLabel, UserEdgeLabel>(this->ged_data_);
63  lsape_method_options_ = std::string("");
64 }
65 
66 template<class UserNodeLabel, class UserEdgeLabel>
67 std::string
70  return "[--lsape-method <arg>] [--lsape-options <arg>]";
71 }
72 
73 template<class UserNodeLabel, class UserEdgeLabel>
74 bool
76 ml_parse_option_(const std::string & option, const std::string & arg) {
77  bool is_valid_option{false};
78  if (option == "lsape-method") {
79  if (arg == "BRANCH_FAST") {
80  lsape_method_ = new BranchFast<UserNodeLabel, UserEdgeLabel>(this->ged_data_);
81  }
82  else if (arg == "BRANCH_UNIFORM") {
83  lsape_method_ = new BranchUniform<UserNodeLabel, UserEdgeLabel>(this->ged_data_);
84  }
85  else if (arg == "BRANCH") {
86  lsape_method_ = new Branch<UserNodeLabel, UserEdgeLabel>(this->ged_data_);
87  }
88  else if (arg == "NODE") {
89  lsape_method_ = new Node<UserNodeLabel, UserEdgeLabel>(this->ged_data_);
90  }
91  else if (arg == "RING") {
92  lsape_method_ = new Ring<UserNodeLabel, UserEdgeLabel>(this->ged_data_);
93  }
94  else if (arg == "SUBGRAPH") {
95  lsape_method_ = new Subgraph<UserNodeLabel, UserEdgeLabel>(this->ged_data_);
96  }
97  else if (arg == "WALKS") {
98  lsape_method_ = new Walks<UserNodeLabel, UserEdgeLabel>(this->ged_data_);
99  }
100  else if (arg != "BIPARTITE") {
101  throw Error("Invalid argument \"" + arg + "\" for option lsape-method. Usage: options = \"[--lsape-method BIPARTITE|BRANCH_FAST|BRANCH_UNIFORM|BRANCH|NODE|RING|SUBGRAPH|WALKS] [...]");
102  }
103  is_valid_option = true;
104  }
105  else if (option == "lsape-options") {
106  lsape_method_options_ = arg;
107  std::size_t bad_option_start{lsape_method_options_.find("--threads")};
108  std::size_t next_option_start;
109  if (bad_option_start != std::string::npos) {
110  next_option_start = lsape_method_options_.find("--", bad_option_start + 1);
111  if (next_option_start != std::string::npos) {
112  lsape_method_options_ = lsape_method_options_.substr(0, bad_option_start) + lsape_method_options_.substr(next_option_start);
113  }
114  else {
115  lsape_method_options_ = lsape_method_options_.substr(0, bad_option_start);
116  }
117  }
118  is_valid_option = true;
119  }
120  if (lsape_method_options_ != "") {
121  lsape_method_->set_options(lsape_method_options_ + " --threads " + std::to_string(this->num_threads_));
122  }
123  else {
124  lsape_method_->set_options(std::string("--threads ") + std::to_string(this->num_threads_));
125  }
126  return is_valid_option;
127 }
128 
129 template<class UserNodeLabel, class UserEdgeLabel>
130 void
132 ml_init_feature_variables_(const GEDGraph & g, const GEDGraph & h, std::size_t num_threads) {
133  populate_lsape_instance_(g, h, num_threads);
134  Eigen::ArrayXXd substitution_matrix(lsape_instance_.topLeftCorner(g.num_nodes(), h.num_nodes()));
135  compute_global_features_(substitution_matrix);
136  row_features_.init(substitution_matrix);
137  col_features_.init(substitution_matrix);
138 }
139 
140 template<class UserNodeLabel, class UserEdgeLabel>
141 void
143 ml_populate_substitution_feature_vector_(const GEDGraph & g, const GEDGraph & h, GEDGraph::NodeID i, GEDGraph::NodeID k, std::vector<double> & feature_vector) {
144  std::size_t row{i};
145  std::size_t col{k};
146  feature_vector.clear();
147  add_global_features_(feature_vector);
148  add_cell_features_(row, col, this->ged_data_.node_cost(g.get_node_label(i), h.get_node_label(k)), feature_vector);
149  row_features_.add_features_(lsape_instance_, row, col, feature_vector);
150  col_features_.add_features_(lsape_instance_, row, col, feature_vector);
151 }
152 
153 template<class UserNodeLabel, class UserEdgeLabel>
154 void
156 ml_populate_deletion_feature_vector_(const GEDGraph & g, GEDGraph::NodeID i, std::vector<double> & feature_vector) {
157  std::size_t row{i};
158  std::size_t col{static_cast<std::size_t>(lsape_instance_.cols() - 1)};
159  feature_vector.clear();
160  add_global_features_(feature_vector);
161  add_cell_features_(row, col, this->ged_data_.node_cost(g.get_node_label(i), dummy_label()), feature_vector);
162  row_features_.add_features_(lsape_instance_, row, col, feature_vector);
163  col_features_.add_features_(lsape_instance_, row, col, feature_vector);
164 }
165 
166 template<class UserNodeLabel, class UserEdgeLabel>
167 void
169 ml_populate_insertion_feature_vector_(const GEDGraph & h, GEDGraph::NodeID k, std::vector<double> & feature_vector) {
170  std::size_t row{static_cast<std::size_t>(lsape_instance_.rows() - 1)};
171  std::size_t col{k};
172  feature_vector.clear();
173  add_global_features_(feature_vector);
174  add_cell_features_(row, col, this->ged_data_.node_cost(dummy_label(), h.get_node_label(k)), feature_vector);
175  row_features_.add_features_(lsape_instance_, row, col, feature_vector);
176  col_features_.add_features_(lsape_instance_, row, col, feature_vector);
177 }
178 
179 template<class UserNodeLabel, class UserEdgeLabel>
180 std::size_t
183  return 24;
184 }
185 
186 // === Definition of private class RowFeatures_. ===
187 template<class UserNodeLabel, class UserEdgeLabel>
190 RowFeatures_() :
191 maxima_(),
192 minima_(),
193 means_(),
194 deviations_(),
195 leaders_(),
196 intervals_() {}
197 
198 template<class UserNodeLabel, class UserEdgeLabel>
199 void
202 init(const Eigen::ArrayXXd & substitution_matrix) {
203 
204  // Compute row maxima.
205  maxima_ = substitution_matrix.rowwise().maxCoeff();
206 
207  // Compute row minima and their positions.
208  minima_.resize(substitution_matrix.rows());
209  std::vector<RowVector_::Index> col_min(substitution_matrix.rows());
210  for (auto row = 0; row < substitution_matrix.rows(); row++) {
211  minima_(row) = substitution_matrix.row(row).minCoeff(&col_min[row]);
212  }
213 
214  // Compute row means.
215  means_ = substitution_matrix.rowwise().mean();
216 
217  // Compute row deviations.
218  if (substitution_matrix.rows() <= 1) {
219  deviations_.resize(substitution_matrix.rows());
220  for (auto row = 0; row < substitution_matrix.rows(); row++) {
221  deviations_(row) = 0.0;
222  }
223  }
224  else {
225  deviations_ = ((substitution_matrix.colwise() - substitution_matrix.rowwise().mean()).square().rowwise().sum() / (substitution_matrix.rows() - 1)).sqrt();
226  }
227 
228  // Compute row leaders.
229  RowVector_ second_minima(substitution_matrix.rows());
230  if (substitution_matrix.rows() == 1) {
231  second_minima = minima_;
232  }
233  else {
234  for (auto row = 0; row < substitution_matrix.rows(); row++) {
235  second_minima(row) = maxima_(row);
236  for (auto col = 0; col < substitution_matrix.cols(); col++) {
237  if ((col != col_min.at(row)) and (substitution_matrix(row, col) < second_minima(row))) {
238  second_minima(row) = substitution_matrix(row, col);
239  }
240  }
241  }
242  }
243  leaders_ = (minima_ - second_minima);
244  for (auto row = 0; row < substitution_matrix.rows(); row++) {
245  if (second_minima(row) != 0) {
246  leaders_(row) /= second_minima(row);
247  }
248  }
249 
250  // Compute row intervals.
251  intervals_ = maxima_ - minima_;
252  if (intervals_.mean() > 0) {
253  intervals_ /= intervals_.mean();
254  }
255 }
256 
257 template<class UserNodeLabel, class UserEdgeLabel>
258 void
261 add_features_(const Eigen::ArrayXXd & matrix, std::size_t row, std::size_t col, std::vector<double> & feature_vector) const {
262 
263  // Add zeroes if row is the dummy row.
264  if (row == static_cast<std::size_t>(matrix.rows() - 1)) {
265  for (unsigned short counter{0}; counter < 9; counter++) {
266  feature_vector.push_back(0.0);
267  }
268  return;
269  }
270 
271  // Add row maximum feature.
272  feature_vector.push_back(maxima_(row));
273 
274  // Add row minimum feature.
275  feature_vector.push_back(minima_(row));
276 
277  // Add row mean feature.
278  feature_vector.push_back(means_(row));
279 
280  // Add row deviation feature.
281  feature_vector.push_back(deviations_(row));
282 
283  // Add row uniqueness feature.
284  double uniqueness{(matrix.row(row) - matrix(row, col)).abs().maxCoeff()};
285  feature_vector.push_back(uniqueness);
286 
287  // Add row divergence feature.
288  double divergence{(matrix.row(row) - matrix(row, col)).abs().sum()};
289  if (((matrix.cols() - 1) * (matrix(row, col) - means_(row))) != 0) {
290  divergence /= ((matrix.cols() - 1) * (matrix(row, col) - means_(row)));
291  }
292  feature_vector.push_back(divergence);
293 
294  // Add row leader feature.
295  feature_vector.push_back(leaders_(row));
296 
297  // Add row interval feature.
298  feature_vector.push_back(intervals_(row));
299 
300  // Add row outlierness feature.
301  double outlierness{matrix(row, col)};
302  if (means_(row) != deviations_(row)) {
303  outlierness /= (means_(row) - deviations_(row));
304  }
305  feature_vector.push_back(outlierness);
306 }
307 
308 // === Definition of private class ColFeatures_. ===
309 template<class UserNodeLabel, class UserEdgeLabel>
312 ColFeatures_() :
313 maxima_(),
314 minima_(),
315 means_(),
316 deviations_(),
317 leaders_(),
318 intervals_() {}
319 
320 template<class UserNodeLabel, class UserEdgeLabel>
321 void
324 init(const Eigen::ArrayXXd & substitution_matrix) {
325 
326  // Compute column maxima.
327  maxima_ = substitution_matrix.colwise().maxCoeff();
328 
329  // Compute column minima and their positions.
330  minima_.resize(substitution_matrix.cols());
331  std::vector<ColumnVector_::Index> row_min(substitution_matrix.cols());
332  for (auto col = 0; col < substitution_matrix.cols(); col++) {
333  minima_(col) = substitution_matrix.col(col).minCoeff(&row_min[col]);
334  }
335 
336  // Compute column means.
337  means_ = substitution_matrix.colwise().mean();
338 
339  // Compute column deviations.
340  if (substitution_matrix.cols() <= 1) {
341  deviations_.resize(substitution_matrix.cols());
342  for (auto col = 0; col < substitution_matrix.cols(); col++) {
343  deviations_(col) = 0.0;
344  }
345  }
346  else {
347  deviations_ = ((substitution_matrix.rowwise() - substitution_matrix.colwise().mean()).square().colwise().sum() / (substitution_matrix.cols() - 1)).sqrt();
348  }
349 
350  // Compute column leaders.
351  ColumnVector_ second_minima(substitution_matrix.cols());
352  if (substitution_matrix.cols() == 1) {
353  second_minima = minima_;
354  }
355  else {
356  for (auto col = 0; col < substitution_matrix.cols(); col++) {
357  second_minima(col) = maxima_(col);
358  for (auto row = 0; row < substitution_matrix.rows(); row++) {
359  if ((row != row_min.at(col)) and (substitution_matrix(row, col) < second_minima(col))) {
360  second_minima(col) = substitution_matrix(row, col);
361  }
362  }
363  }
364  }
365  leaders_ = (minima_ - second_minima);
366  for (auto col = 0; col < substitution_matrix.cols(); col++) {
367  if (second_minima(col) != 0) {
368  leaders_(col) /= second_minima(col);
369  }
370  }
371 
372  // Compute row intervals.
373  intervals_ = maxima_ - minima_;
374  if (intervals_.mean() > 0) {
375  intervals_ /= intervals_.mean();
376  }
377 }
378 
379 template<class UserNodeLabel, class UserEdgeLabel>
380 void
383 add_features_(const Eigen::ArrayXXd & matrix, std::size_t row, std::size_t col, std::vector<double> & feature_vector) const {
384 
385  // Add zeroes if col is the dummy column.
386  if (col == static_cast<std::size_t>(matrix.cols() - 1)) {
387  for (unsigned short counter{0}; counter < 9; counter++) {
388  feature_vector.push_back(0.0);
389  }
390  return;
391  }
392 
393  // Add column maximum feature.
394  feature_vector.push_back(maxima_(col));
395 
396  // Add column minimum feature.
397  feature_vector.push_back(minima_(col));
398 
399  // Add column mean feature.
400  feature_vector.push_back(means_(col));
401 
402  // Add column deviation feature.
403  feature_vector.push_back(deviations_(col));
404 
405  // Add column uniqueness feature.
406  double uniqueness{(matrix.col(col) - matrix(row, col)).abs().maxCoeff()};
407  feature_vector.push_back(uniqueness);
408 
409  // Add column divergence feature.
410  double divergence{(matrix.col(col) - matrix(row, col)).abs().sum()};
411  if (((matrix.rows() - 1) * (matrix(row, col) - means_(col))) != 0) {
412  divergence /= ((matrix.rows() - 1) * (matrix(row, col) - means_(col)));
413  }
414  feature_vector.push_back(divergence);
415 
416  // Add column leader feature.
417  feature_vector.push_back(leaders_(col));
418 
419  // Add column interval feature.
420  feature_vector.push_back(intervals_(col));
421 
422  // Add column outlierness feature.
423  double outlierness{matrix(row, col)};
424  if (means_(col) != deviations_(col)) {
425  outlierness /= (means_(col) - deviations_(col));
426  }
427  feature_vector.push_back(outlierness);
428 }
429 
430 // === Definitions of private helper member functions. ===
431 template<class UserNodeLabel, class UserEdgeLabel>
432 void
434 populate_lsape_instance_(const GEDGraph & g, const GEDGraph & h, std::size_t num_threads) {
435  DMatrix lsape_instance;
436  lsape_method_->populate_instance(g, h, lsape_instance);
437  lsape_instance_ = lsape_instance.matrix();
438 }
439 
440 template<class UserNodeLabel, class UserEdgeLabel>
441 void
443 compute_global_features_(const Eigen::ArrayXXd & substitution_matrix) {
444  global_features_.clear();
445  global_features_.push_back(substitution_matrix.maxCoeff());
446  global_features_.push_back(substitution_matrix.minCoeff());
447  global_features_.push_back(substitution_matrix.mean());
448  if ((substitution_matrix.rows() * substitution_matrix.cols()) <= 1) {
449  global_features_.push_back(0.0);
450  }
451  else {
452  global_features_.push_back(std::sqrt((substitution_matrix - global_features_.at(2)).square().sum() / (substitution_matrix.rows() * substitution_matrix.cols() - 1)));
453  }
454 }
455 
456 template<class UserNodeLabel, class UserEdgeLabel>
457 void
459 add_global_features_(std::vector<double> & feature_vector) const {
460  for (auto feature = global_features_.begin(); feature != global_features_.end(); feature++) {
461  feature_vector.push_back(*feature);
462  }
463 }
464 
465 template<class UserNodeLabel, class UserEdgeLabel>
466 void
468 add_cell_features_(std::size_t row, std::size_t col, double node_cost, std::vector<double> & feature_vector) const {
469  // Add node cost feature.
470  feature_vector.push_back(node_cost);
471 
472  // Add edge cost feature.
473  feature_vector.push_back(lsape_instance_(row, col) - node_cost);
474 }
475 
476 }
477 
478 #endif /* SRC_METHODS_BIPARTITE_ML_IPP_ */
Computes lower and upper bounds for general edit costs.
Definition: branch.hpp:42
std::size_t num_nodes() const
Returns the number of nodes.
Definition: ged_graph.ipp:211
std::size_t num_threads_
The number of threads to be used.
Computes an upper bound for general edit costs.
Definition: walks.hpp:47
const GEDData< UserNodeLabel, UserEdgeLabel > & ged_data_
The data on which the method is run.
Definition: ged_method.hpp:124
virtual std::size_t ml_get_num_features_() final
Returns the number of features.
Uses characteristics of an LSAPE instance for defining feature vectors for node edit operations...
Computes lower and upper bounds for general edit costs.
Definition: node.hpp:42
virtual void ml_set_default_options_() final
Sets all options that are not among the ones shared by all derived classes of ged::MLBasedMethod to d...
LabelID get_node_label(NodeID node) const
Returns the label of a given node.
Definition: ged_graph.ipp:126
virtual void ml_populate_insertion_feature_vector_(const GEDGraph &h, GEDGraph::NodeID k, std::vector< double > &feature_vector) final
Computes insertion feature vector.
Runtime error class.
Definition: error.hpp:37
virtual void ml_init_() final
Initializes the method after initializing the global variables for the graphs.
Computes lower and upper bounds for general edit costs.
Definition: bipartite.hpp:42
Computes lower and upper bounds for general edit costs.
Definition: branch_fast.hpp:45
Eigen::Matrix< ScalarT, Eigen::Dynamic, Eigen::Dynamic > & matrix()
Returns reference to the internal Eigen matrix.
Definition: matrix.ipp:228
The normalized input graphs used by GEDLIB. All labels are integers.
Definition: ged_graph.hpp:104
constexpr LabelID dummy_label()
Returns a dummy label.
virtual void ml_populate_substitution_feature_vector_(const GEDGraph &g, const GEDGraph &h, GEDGraph::NodeID i, GEDGraph::NodeID k, std::vector< double > &feature_vector) final
Computes substitution feature vector.
virtual bool ml_parse_option_(const std::string &option, const std::string &arg) final
Parses one option that is not among the ones shared by all derived classes of ged::MLBasedMethod.
Global namespace for GEDLIB.
virtual void ml_populate_deletion_feature_vector_(const GEDGraph &g, GEDGraph::NodeID i, std::vector< double > &feature_vector) final
Computes deletion feature vector.
Computes upper bounds for general edit costs.
Definition: subgraph.hpp:49
virtual void ml_init_feature_variables_(const GEDGraph &g, const GEDGraph &h, std::size_t num_threads) final
Initializes variables that are used for populating the feature vectors of assignments between two inp...
Computes an upper bound for general edit costs.
Definition: ring.hpp:51
Computes lower and upper bounds for uniform edit costs.
std::size_t NodeID
Internally used vertex ID type.
Definition: ged_graph.hpp:108
virtual std::string ml_valid_options_string_() const final
Returns string of all valid options that are not among the ones shared by all derived classes of ged:...