GEDLIB  1.0
ged_data.hpp
Go to the documentation of this file.
1 /***************************************************************************
2  * *
3  * Copyright (C) 2018 by David B. Blumenthal *
4  * *
5  * This file is part of GEDLIB. *
6  * *
7  * GEDLIB is free software: you can redistribute it and/or modify it *
8  * under the terms of the GNU Lesser General Public License as published *
9  * by the Free Software Foundation, either version 3 of the License, or *
10  * (at your option) any later version. *
11  * *
12  * GEDLIB is distributed in the hope that it will be useful, *
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15  * GNU Lesser General Public License for more details. *
16  * *
17  * You should have received a copy of the GNU Lesser General Public *
18  * License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. *
19  * *
20  ***************************************************************************/
21 
27 #ifndef SRC_ENV_GED_DATA_HPP_
28 #define SRC_ENV_GED_DATA_HPP_
29 
30 #include "ged_graph.hpp"
31 #include "result.hpp"
32 #include "matrix.hpp"
33 #include "common_types.hpp"
34 #include "node_map.hpp"
35 #include "../edit_costs/edit_costs.hpp"
36 #include "../edit_costs/chem_1.hpp"
37 #include "../edit_costs/chem_2.hpp"
38 #include "../edit_costs/cmu.hpp"
39 #include "../edit_costs/grec_1.hpp"
40 #include "../edit_costs/grec_2.hpp"
41 #include "../edit_costs/protein.hpp"
42 #include "../edit_costs/fingerprint.hpp"
43 #include "../edit_costs/letter.hpp"
44 #include "../edit_costs/constant.hpp"
45 
46 namespace ged {
47 
48 template<class, class, class> class GEDEnv;
49 
54 template<class UserNodeLabel, class UserEdgeLabel>
55 class GEDData {
56 
57  template<class, class, class> friend class GEDEnv;
58 
59 public:
60 
64  ~GEDData();
65 
70  std::size_t num_graphs() const;
71 
77  const GEDGraph & graph(GEDGraph::GraphID graph_id) const;
78 
84 
89  std::size_t num_graphs_without_shuffled_copies() const;
90 
97 
103  bool is_shuffled_graph_copy(GEDGraph::GraphID graph_id) const;
104 
111  std::vector<GEDGraph>::const_iterator begin() const;
112 
119  std::vector<GEDGraph>::const_iterator end() const;
120 
125  std::size_t num_node_labels() const;
126 
131  std::size_t num_edge_labels() const;
132 
137  std::size_t max_num_nodes() const;
138 
143  std::size_t max_num_edges() const;
144 
154  double node_cost(LabelID label1, LabelID label2) const;
155 
162  void vectorize_node_label(LabelID node_label, std::vector<double> & vector_representation) const;
163 
173  double edge_cost(LabelID label1, LabelID label2) const;
174 
181  void vectorize_edge_label(LabelID edge_label, std::vector<double> & vector_representation) const;
182 
191  void save_node_map(const std::string & filename, GEDGraph::NodeID g_id, GEDGraph::NodeID h_id, const NodeMap & node_map, bool append = true) const;
192 
201  void load_node_map(const std::string & filename, GEDGraph::NodeID g_id, GEDGraph::NodeID h_id, NodeMap & node_map) const;
202 
209  void compute_induced_cost(const GEDGraph & g, const GEDGraph & h, NodeMap & node_map) const;
210 
220  double swap_cost(const GEDGraph & g, const GEDGraph & h, const NodeMap::Assignment & assignment_1, const NodeMap::Assignment & assignment_2, NodeMap & node_map) const;
221 
229  void swap_assignments(const NodeMap::Assignment & assignment_1, const NodeMap::Assignment & assignment_2, double swap_cost, NodeMap & node_map) const;
230 
235  bool quasimetric_costs() const;
236 
243  bool quasimetric_costs(const GEDGraph & g, const GEDGraph & h) const;
244 
249  double max_node_edit_cost() const;
250 
255  double max_edge_edit_cost() const;
256 
263  double max_edit_cost(const GEDGraph & g, const GEDGraph & h) const;
264 
271  double min_edit_cost(const GEDGraph & g, const GEDGraph & h) const;
272 
279  double max_node_edit_cost(const GEDGraph & g, const GEDGraph & h) const;
280 
287  double min_node_edit_cost(const GEDGraph & g, const GEDGraph & h) const;
288 
295  double max_edge_edit_cost(const GEDGraph & g, const GEDGraph & h) const;
296 
303  double min_edge_edit_cost(const GEDGraph & g, const GEDGraph & h) const;
304 
310  double max_node_del_cost(const GEDGraph & graph) const;
311 
317  double min_node_del_cost(const GEDGraph & graph) const;
318 
324  double mean_node_del_cost(const GEDGraph & graph) const;
325 
331  double max_node_ins_cost(const GEDGraph & graph) const;
332 
338  double min_node_ins_cost(const GEDGraph & graph) const;
339 
345  double mean_node_ins_cost(const GEDGraph & graph) const;
346 
353  double max_node_subs_cost(const GEDGraph & g, const GEDGraph & h) const;
354 
361  double min_node_subs_cost(const GEDGraph & g, const GEDGraph & h) const;
362 
369  double mean_node_subs_cost(const GEDGraph & g, const GEDGraph & h) const;
370 
376  double max_edge_del_cost(const GEDGraph & graph) const;
377 
383  double min_edge_del_cost(const GEDGraph & graph) const;
384 
390  double mean_edge_del_cost(const GEDGraph & graph) const;
391 
397  double max_edge_ins_cost(const GEDGraph & graph) const;
398 
404  double min_edge_ins_cost(const GEDGraph & graph) const;
405 
411  double mean_edge_ins_cost(const GEDGraph & graph) const;
412 
419  double max_edge_subs_cost(const GEDGraph & g, const GEDGraph & h) const;
420 
427  double min_edge_subs_cost(const GEDGraph & g, const GEDGraph & h) const;
428 
435  double mean_edge_subs_cost(const GEDGraph & g, const GEDGraph & h) const;
436 
437 private:
438 
439  std::vector<GEDGraph> graphs_;
440 
441  std::vector<std::string> graph_names_;
442 
443  std::vector<std::string> graph_classes_;
444 
445  std::size_t num_graphs_without_shuffled_copies_;
446 
447  std::vector<std::map<std::string, GEDGraph::NodeID>> strings_to_internal_node_ids_;
448 
449  std::vector<std::map<GEDGraph::NodeID, std::string>> internal_node_ids_to_strings_;
450 
452 
453  DMatrix node_costs_;
454 
455  DMatrix edge_costs_;
456 
457  std::vector<UserNodeLabel> node_labels_;
458 
459  std::vector<UserEdgeLabel> edge_labels_;
460 
461  Options::InitType init_type_;
462 
463  bool delete_edit_costs_;
464 
465  std::size_t max_num_nodes_;
466 
467  std::size_t max_num_edges_;
468 
469  GEDData();
470 
471  void set_edit_costs_(Options::EditCosts edit_costs, const std::vector<double> & edit_cost_constants);
472 
473  void set_edit_costs_(EditCosts<UserNodeLabel, UserEdgeLabel> * edit_costs);
474 
475  void init_cost_matrices_();
476 
477  bool eager_init_() const;
478 
479  LabelID node_label_to_id_(const UserNodeLabel & node_label);
480 
481  UserNodeLabel id_to_node_label(LabelID label_id) const;
482 
483  LabelID edge_label_to_id_(const UserEdgeLabel & edge_label);
484 
485  UserEdgeLabel id_to_edge_label(LabelID label_id) const;
486 
487  GEDGraph::NodeID string_to_node_id_(GEDGraph::GraphID graph_id, const std::string & string) const;
488 
489  std::string node_id_to_string_(GEDGraph::GraphID graph_id, GEDGraph::NodeID node_id) const;
490 
491 };
492 
493 }
494 
495 #include "ged_data.ipp"
496 
497 #endif /* SRC_ENV_GED_DATA_HPP_ */
std::size_t max_num_nodes() const
Returns maximal number of nodes.
Definition: ged_data.ipp:414
double min_node_subs_cost(const GEDGraph &g, const GEDGraph &h) const
Returns the minimal cost of substituting a node contained in a graph by a node contained in another g...
Definition: ged_data.ipp:828
~GEDData()
Destructor.
Definition: ged_data.ipp:53
Contains the standardized input data along with basic functionality.
Definition: ged_data.hpp:55
void compute_induced_cost(const GEDGraph &g, const GEDGraph &h, NodeMap &node_map) const
Computes the edit cost between two graphs induced by a node map.
Definition: ged_data.ipp:540
double edge_cost(LabelID label1, LabelID label2) const
Returns edge relabeling, insertion, or deletion cost.
Definition: ged_data.ipp:428
A class for node maps.
Definition: node_map.hpp:43
double node_cost(LabelID label1, LabelID label2) const
Returns node relabeling, insertion, or deletion cost.
Definition: ged_data.ipp:454
ged::Matrix class declaration.
double max_node_ins_cost(const GEDGraph &graph) const
Returns the maximal cost of inserting a node contained in a graph.
Definition: ged_data.ipp:779
std::vector< GEDGraph >::size_type GraphID
Type of internally used graph IDs.
Definition: ged_graph.hpp:112
std::size_t max_num_edges() const
Returns maximal number of nodes.
Definition: ged_data.ipp:421
void swap_assignments(const NodeMap::Assignment &assignment_1, const NodeMap::Assignment &assignment_2, double swap_cost, NodeMap &node_map) const
Swaps two assignments in a node map.
Definition: ged_data.ipp:656
ged::NodeMap class declaration.
const GEDGraph & graph(GEDGraph::GraphID graph_id) const
Provides access to a graph.
Definition: ged_data.ipp:325
ged::GEDGraph class declaration.
void vectorize_edge_label(LabelID edge_label, std::vector< double > &vector_representation) const
Computes an edge label&#39;s representation as a real-valued vector.
Definition: ged_data.ipp:447
double mean_node_ins_cost(const GEDGraph &graph) const
Returns the mean cost of inserting a node contained in a graph.
Definition: ged_data.ipp:801
double min_edit_cost(const GEDGraph &g, const GEDGraph &h) const
Returns the minimal edit cost between two graphs.
Definition: ged_data.ipp:1004
ged::Result struct declaration.
double mean_node_subs_cost(const GEDGraph &g, const GEDGraph &h) const
Returns the mean cost of substituting a node contained in a graph by a node contained in another grap...
Definition: ged_data.ipp:843
double min_node_del_cost(const GEDGraph &graph) const
Returns the minimal cost of deleting a node contained in a graph.
Definition: ged_data.ipp:754
double min_edge_edit_cost(const GEDGraph &g, const GEDGraph &h) const
Returns the minimal edge edit cost between two graphs.
Definition: ged_data.ipp:1047
double min_edge_subs_cost(const GEDGraph &g, const GEDGraph &h) const
Returns the minimal cost of substituting a edge contained in a graph by a edge contained in another g...
Definition: ged_data.ipp:872
std::size_t num_edge_labels() const
Returns the number of edge labels.
Definition: ged_data.ipp:407
bool is_shuffled_graph_copy(GEDGraph::GraphID graph_id) const
Checks if a graph is a shuffled copy of another graph.
Definition: ged_data.ipp:379
double max_node_del_cost(const GEDGraph &graph) const
Returns the maximal cost of deleting a node contained in a graph.
Definition: ged_data.ipp:743
std::size_t LabelID
Internally used type of node and edge labels.
double mean_edge_del_cost(const GEDGraph &graph) const
Returns the mean cost of deleting a edge contained in a graph.
Definition: ged_data.ipp:930
double max_edit_cost(const GEDGraph &g, const GEDGraph &h) const
Returns the maximal edit cost between two graphs.
Definition: ged_data.ipp:991
std::vector< GEDGraph >::const_iterator end() const
Provides access to the graphs contained in the instance.
Definition: ged_data.ipp:393
double max_node_edit_cost() const
Returns the maximal node edit cost between any two graphs contained in the instance.
Definition: ged_data.ipp:727
InitType
Selects the initialization type of the environment.
bool shuffled_graph_copies_available() const
Checks if shuffled graph copies are available.
Definition: ged_data.ipp:352
GEDGraph::GraphID id_shuffled_graph_copy(GEDGraph::GraphID graph_id) const
Returns ID of a graph&#39;s shuffled copy.
Definition: ged_data.ipp:366
double mean_edge_ins_cost(const GEDGraph &graph) const
Returns the mean cost of inserting a edge contained in a graph.
Definition: ged_data.ipp:977
double swap_cost(const GEDGraph &g, const GEDGraph &h, const NodeMap::Assignment &assignment_1, const NodeMap::Assignment &assignment_2, NodeMap &node_map) const
Computes the cost of swapping two assignments in a node map while leaving the node map unchanged...
Definition: ged_data.ipp:570
void vectorize_node_label(LabelID node_label, std::vector< double > &vector_representation) const
Computes a node label&#39;s representation as a real-valued vector.
Definition: ged_data.ipp:473
Type declarations used by various classes.
std::size_t num_graphs_without_shuffled_copies() const
Returns the number of graphs in the instance without the shuffled copies.
Definition: ged_data.ipp:359
double max_edge_ins_cost(const GEDGraph &graph) const
Returns the maximal cost of inserting a edge contained in a graph.
Definition: ged_data.ipp:955
double min_edge_del_cost(const GEDGraph &graph) const
Returns the minimal cost of deleting a edge contained in a graph.
Definition: ged_data.ipp:944
double mean_edge_subs_cost(const GEDGraph &g, const GEDGraph &h) const
Returns the mean cost of substituting a edge contained in a graph by a edge contained in another grap...
Definition: ged_data.ipp:887
std::vector< GEDGraph >::const_iterator begin() const
Provides access to the graphs contained in the instance.
Definition: ged_data.ipp:386
double max_node_subs_cost(const GEDGraph &g, const GEDGraph &h) const
Returns the maximal cost of substituting a node contained in a graph by a node contained in another g...
Definition: ged_data.ipp:815
double min_edge_ins_cost(const GEDGraph &graph) const
Returns the minimal cost of inserting a edge contained in a graph.
Definition: ged_data.ipp:966
double max_edge_del_cost(const GEDGraph &graph) const
Returns the maximal cost of deleting a edge contained in a graph.
Definition: ged_data.ipp:919
The normalized input graphs used by GEDLIB. All labels are integers.
Definition: ged_graph.hpp:104
double max_edge_edit_cost() const
Returns the maximal edge edit cost between any two graphs contained in the instance.
Definition: ged_data.ipp:903
bool quasimetric_costs() const
Checks if the edit costs are quasimetric.
Definition: ged_data.ipp:669
double max_edge_subs_cost(const GEDGraph &g, const GEDGraph &h) const
Returns the maximal cost of substituting a edge contained in a graph by a edge contained in another g...
Definition: ged_data.ipp:859
double min_node_edit_cost(const GEDGraph &g, const GEDGraph &h) const
Returns the minimal node edit cost between two graphs.
Definition: ged_data.ipp:1027
Global namespace for GEDLIB.
EditCosts
Selects the edit costs.
void save_node_map(const std::string &filename, GEDGraph::NodeID g_id, GEDGraph::NodeID h_id, const NodeMap &node_map, bool append=true) const
Saves a node map.
Definition: ged_data.ipp:480
std::size_t num_node_labels() const
Returns the number of node labels.
Definition: ged_data.ipp:400
double mean_node_del_cost(const GEDGraph &graph) const
Returns the mean cost of deleting a node contained in a graph.
Definition: ged_data.ipp:765
double min_node_ins_cost(const GEDGraph &graph) const
Returns the minimal cost of inserting a node contained in a graph.
Definition: ged_data.ipp:790
ged::GEDData class definition.
std::size_t NodeID
Internally used vertex ID type.
Definition: ged_graph.hpp:108
Abstract class for defining edit cost functions.
Definition: edit_costs.hpp:38
Provides the API of GEDLIB.
Definition: ged_data.hpp:48
void load_node_map(const std::string &filename, GEDGraph::NodeID g_id, GEDGraph::NodeID h_id, NodeMap &node_map) const
Loads a node map from a file.
Definition: ged_data.ipp:501
std::size_t num_graphs() const
Returns the number of graphs.
Definition: ged_data.ipp:318