GEDLIB  1.0
ml_based_method.hpp
Go to the documentation of this file.
1 /***************************************************************************
2 * *
3 * Copyright (C) 2018 by David B. Blumenthal *
4 * *
5 * This file is part of GEDLIB. *
6 * *
7 * GEDLIB is free software: you can redistribute it and/or modify it *
8 * under the terms of the GNU Lesser General Public License as published *
9 * by the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * GEDLIB is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU Lesser General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU Lesser General Public *
18 * License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. *
19 * *
20 ***************************************************************************/
21 
27 #ifndef SRC_METHODS_ML_BASED_METHOD_HPP_
28 #define SRC_METHODS_ML_BASED_METHOD_HPP_
29 
30 namespace ged {
31 
54 template<class UserNodeLabel, class UserEdgeLabel>
55 class MLBasedMethod : public LSAPEBasedMethod<UserNodeLabel, UserEdgeLabel> {
56 
57 public:
58 
59  virtual ~MLBasedMethod() = 0;
60 
62 
70  double predict(const GEDGraph & g, const GEDGraph & h, const NodeMap::Assignment & assignment);
71 
72 
73 protected:
74 
78  std::size_t num_features_;
79 
80 private:
81 
82  enum MLMethod_ {DNN, SVM, ONE_CLASS_SVM};
83 
84  class Assignment_ {
85 
86  public:
87 
88  Assignment_(std::size_t row_in_master, std::size_t col_in_master, bool good_assignment, const std::vector<double> & feature_vector);
89 
90  Assignment_(const std::string & line, std::size_t num_features);
91 
92  Assignment_(const Assignment_ & assignment);
93 
94  std::size_t row_in_master() const;
95 
96  std::size_t col_in_master() const;
97 
98  std::string to_string() const;
99 
100  double * dnn_feature_vector();
101 
102  struct svm_node * svm_feature_vector();
103 
104  double * type();
105 
106  bool is_good_assignment() const;
107 
108  std::size_t num_features() const;
109 
110  private:
111 
112  std::size_t row_in_master_;
113 
114  std::size_t col_in_master_;
115 
116  double type_;
117 
118  std::vector<double> dnn_feature_vector_;
119 
120  std::vector<struct svm_node> svm_feature_vector_;
121  };
122 
123  struct DNNParams_ {
124 
125  DNNParams_();
126 
127  std::vector<FANN::activation_function_enum> activation_candidates;
128 
129  unsigned int min_num_hidden_layers;
130 
131  unsigned int max_num_hidden_layers;
132 
133  unsigned int min_num_neurons_per_layer;
134 
135  unsigned int max_num_neurons_per_layer;
136  };
137 
138  class DNN_ {
139 
140  public:
141 
142  DNN_();
143 
144  std::size_t load(const std::string & filename);
145 
146  void train(FANN::training_data & training_data, const MLBasedMethod::DNNParams_ & params, const std::string & filename, std::size_t num_threads);
147 
148  double decision_value(double * feature_vector);
149 
150  private:
151 
152  float cross_validate_(FANN::training_data & training_data, const MLBasedMethod::DNNParams_ & params, unsigned int num_hidden_layers, unsigned int num_neurons_per_layer, FANN::activation_function_enum hidden_activation);
153 
154  float train_and_validate_(FANN::neural_net & neural_net, FANN::training_data & training_data, FANN::training_data & validation_data, std::size_t max_num_epochs);
155 
156  FANN::neural_net neural_net_;
157  };
158 
159  struct SVMParams_ {
160 
161  SVMParams_();
162 
163  int min_gamma_exp;
164 
165  int max_gamma_exp;
166 
167  int min_c_exp;
168 
169  int max_c_exp;
170 
171  double min_nu;
172 
173  double max_nu;
174  };
175 
176  class SVM_ {
177 
178  public:
179 
180  ~SVM_();
181 
182  SVM_();
183 
184  std::size_t load(const std::string & filename);
185 
186  void train(struct svm_problem * training_data, const MLBasedMethod::SVMParams_ & params, std::size_t num_features, const std::string & filename, std::size_t num_threads);
187 
188  double decision_value(struct svm_node * feature_vector) const;
189 
190  private:
191 
192  struct svm_model * svm_model_;
193 
194  };
195 
196  class OneClassSVM_ {
197 
198  public:
199 
200  ~OneClassSVM_();
201 
202  OneClassSVM_();
203 
204  std::size_t load(const std::string & filename, bool use_likelihood);
205 
206  void train(struct svm_problem * training_data, bool use_likelihood, std::size_t num_features, const std::string & filename);
207 
208  double decision_value(struct svm_node * feature_vector) const;
209 
210  private:
211 
212  struct svm_model * svm_model_;
213 
214  double rho_;
215 
216  double sum_alpha_;
217 
218  double scale_factor_;
219 
220  bool use_likelihood_;
221 
222  void compute_rho_and_scale_factor_(std::size_t num_features);
223  };
224 
225  std::pair<GEDGraph::GraphID, GEDGraph::GraphID> prediction_initialized_;
226 
227  std::vector<Assignment_> assignments_;
228 
229  MLMethod_ ml_method_;
230 
231  GEDMethod<UserNodeLabel, UserEdgeLabel> * ground_truth_method_;
232 
233  std::string ground_truth_options_;
234 
235  DNNParams_ dnn_params_;
236 
237  DNN_ dnn_;
238 
239  FANN::training_data dnn_training_data_;
240 
241  std::vector<double *> dnn_feature_vectors_;
242 
243  std::vector<double *> dnn_types_;
244 
245  SVMParams_ svm_params_;
246 
247  SVM_ svm_;
248 
249  struct svm_problem svm_training_data_;
250 
251  std::vector<struct svm_node *> svm_feature_vectors_;
252 
253  std::vector<double> svm_types_;
254 
255  bool one_class_svm_use_likelihood_;
256 
257  OneClassSVM_ one_class_svm_;
258 
259  std::string infile_;
260 
261  std::string outfile_;
262 
263  std::string logfile_;
264 
265  std::string training_infile_;
266 
267  std::string training_outfile_;
268 
269  std::string ground_truth_infile_;
270 
271  std::string ground_truth_outfile_;
272 
273  // Member functions inherited from LSAPEBasedMethod.
274 
275  virtual void lsape_init_() final;
276 
277  virtual void lsape_pre_graph_init_(bool called_at_runtime) final;
278 
279  virtual void lsape_populate_instance_(const GEDGraph & g, const GEDGraph & h, DMatrix & master_problem) final;
280 
281  virtual std::string lsape_valid_options_string_() const final;
282 
283  virtual void lsape_set_default_options_() final;
284 
285  virtual bool lsape_parse_option_(const std::string & option, const std::string & value) final;
286 
287  virtual void lsape_init_graph_(const GEDGraph & graph) final;
288 
289  virtual void lsape_default_post_graph_init_() final;
290 
291  // Private helper functions.
292 
293  bool initialized_for_prediction_(const GEDGraph & g, const GEDGraph & h) const;
294 
295  void generate_assignments_(const GEDGraph & g, const GEDGraph & h);
296 
297  void load_or_generate_training_data_();
298 
299  void save_training_data_();
300 
301  bool load_config_file_() const;
302 
303  bool log_prediction_ratios_() const;
304 
305  bool compute_or_load_ground_truth_() const;
306 
307  double decision_value_(Assignment_ & assignmment);
308 
309  void train_();
310 
311  // Virtual functions to be overridden by derived classes.
312 
320  virtual void ml_init_feature_variables_(const GEDGraph & g, const GEDGraph & h, std::size_t num_threads);
321 
327  virtual std::string ml_valid_options_string_() const;
328 
336  virtual bool ml_parse_option_(const std::string & option, const std::string & arg);
337 
342  virtual void ml_set_default_options_();
343 
348  virtual void ml_init_();
349 
355  virtual void ml_init_graph_(const GEDGraph & graph);
356 
362  virtual std::size_t ml_get_num_features_();
363 
368  virtual void ml_init_for_num_features_();
369 
379  virtual void ml_populate_substitution_feature_vector_(const GEDGraph & g, const GEDGraph & h, GEDGraph::NodeID i, GEDGraph::NodeID k, std::vector<double> & feature_vector);
380 
388  virtual void ml_populate_deletion_feature_vector_(const GEDGraph & g, GEDGraph::NodeID i, std::vector<double> & feature_vector);
389 
397  virtual void ml_populate_insertion_feature_vector_(const GEDGraph & h, GEDGraph::NodeID k, std::vector<double> & feature_vector);
398 
399 };
400 
401 }
402 
403 #endif /* SRC_METHODS_ML_BASED_METHOD_HPP_ */
404 
virtual void lsape_pre_graph_init_(bool called_at_runtime) final
Initializes the method at runtime or during initialization before initializing the global variables f...
Contains the standardized input data along with basic functionality.
Definition: ged_data.hpp:55
virtual bool lsape_parse_option_(const std::string &option, const std::string &value) final
Parses one option that is not among the ones shared by all derived classes of ged::LSAPEBasedMethod.
virtual std::string lsape_valid_options_string_() const final
Returns string of all valid options that are not among the ones shared by all derived classes of ged:...
virtual void lsape_set_default_options_() final
Sets all options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod t...
virtual bool ml_parse_option_(const std::string &option, const std::string &arg)
Parses one option that is not among the ones shared by all derived classes of ged::MLBasedMethod.
virtual std::string ml_valid_options_string_() const
Returns string of all valid options that are not among the ones shared by all derived classes of ged:...
Abstract class for the (suboptimal) computation of the graph edit distance.
Definition: ged_method.hpp:40
std::size_t num_features_
The size of the feature vectors.
virtual std::size_t ml_get_num_features_()
Returns the number of features.
virtual void lsape_init_graph_(const GEDGraph &graph) final
Initializes global variables for one graph.
Abstract class for methods that use lossy transformations to LSAPE for approximating the graph edit d...
virtual void ml_init_feature_variables_(const GEDGraph &g, const GEDGraph &h, std::size_t num_threads)
Initializes variables that are used for populating the feature vectors of assignments between two inp...
virtual void lsape_init_() final
Initializes the method after initializing the global variables for the graphs.
virtual void lsape_populate_instance_(const GEDGraph &g, const GEDGraph &h, DMatrix &master_problem) final
Populates the LSAPE instance.
virtual void ml_init_graph_(const GEDGraph &graph)
Initializes global variables for one graph.
The normalized input graphs used by GEDLIB. All labels are integers.
Definition: ged_graph.hpp:104
virtual void ml_populate_deletion_feature_vector_(const GEDGraph &g, GEDGraph::NodeID i, std::vector< double > &feature_vector)
Computes deletion feature vector.
virtual void ml_init_()
Initializes the method after initializing the global variables for the graphs.
Global namespace for GEDLIB.
virtual void ml_set_default_options_()
Sets all options that are not among the ones shared by all derived classes of ged::MLBasedMethod to d...
virtual void ml_init_for_num_features_()
Initializes the derived class for running with feature vectors of size ged::MLBasedMethod::num_featur...
virtual void ml_populate_substitution_feature_vector_(const GEDGraph &g, const GEDGraph &h, GEDGraph::NodeID i, GEDGraph::NodeID k, std::vector< double > &feature_vector)
Computes substitution feature vector.
Abstract class for methods that transform GED to LSAPE by using a SVM or a DNN to predict the cost of...
virtual void lsape_default_post_graph_init_() final
Default initializes the method at runtime after initializing the global variables for the graphs...
std::size_t NodeID
Internally used vertex ID type.
Definition: ged_graph.hpp:108
double predict(const GEDGraph &g, const GEDGraph &h, const NodeMap::Assignment &assignment)
Predicts the type of a node assignment.
virtual void ml_populate_insertion_feature_vector_(const GEDGraph &h, GEDGraph::NodeID k, std::vector< double > &feature_vector)
Computes insertion feature vector.