GEDLIB  1.0
protein.ipp
Go to the documentation of this file.
1 /***************************************************************************
2 * *
3 * Copyright (C) 2018 by David B. Blumenthal *
4 * *
5 * This file is part of GEDLIB. *
6 * *
7 * GEDLIB is free software: you can redistribute it and/or modify it *
8 * under the terms of the GNU Lesser General Public License as published *
9 * by the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * GEDLIB is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU Lesser General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU Lesser General Public *
18 * License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. *
19 * *
20 ***************************************************************************/
21 
27 #ifndef SRC_EDIT_COSTS_PROTEIN_IPP_
28 #define SRC_EDIT_COSTS_PROTEIN_IPP_
29 
30 namespace ged {
31 
32 template<>
33 Protein<GXLLabel, GXLLabel>::
34 ~Protein() {}
35 
36 template<>
38 Protein(double node_ins_del_cost, double edge_ins_del_cost, double alpha) :
39 node_ins_del_cost_{node_ins_del_cost},
40 edge_ins_del_cost_{edge_ins_del_cost},
41 alpha_{alpha} {}
42 
43 template<>
44 double
46 node_ins_cost_fun(const GXLLabel & node_label) const {
47  return alpha_ * node_ins_del_cost_;
48 }
49 
50 template<>
51 double
53 node_del_cost_fun(const GXLLabel & node_label) const {
54  return alpha_ * node_ins_del_cost_;
55 }
56 
57 template<>
58 double
60 node_rel_cost_fun(const GXLLabel & node_label_1, const GXLLabel & node_label_2) const {
61  if (node_label_1.at("type") != node_label_2.at("type")) {
62  return alpha_ * 2 * node_ins_del_cost_;
63  }
64  return (alpha_ * levenshtein_distance_(node_label_1.at("sequence"), node_label_2.at("sequence")));
65 }
66 
67 template<>
68 double
70 edge_ins_cost_fun(const GXLLabel & edge_label) const {
71  return (1 - alpha_) * edge_ins_del_cost_ * std::stod(edge_label.at("frequency"));
72 }
73 
74 template<>
75 double
77 edge_del_cost_fun(const GXLLabel & edge_label) const {
78  return (1 - alpha_) * edge_ins_del_cost_ * std::stod(edge_label.at("frequency"));
79 }
80 
81 template<>
82 double
84 edge_rel_cost_fun(const GXLLabel & edge_label_1, const GXLLabel & edge_label_2) const {
85  DMatrix edge_rel_cost_matrix(std::stod(edge_label_1.at("frequency")) + 1, std::stod(edge_label_2.at("frequency")) + 1);
86  for (std::size_t i{0}; i < edge_rel_cost_matrix.num_rows() - 1; i++) {
87  edge_rel_cost_matrix(i, edge_rel_cost_matrix.num_cols() - 1) = edge_ins_del_cost_;
88  }
89  for (std::size_t k{0}; k < edge_rel_cost_matrix.num_cols() - 1; k++) {
90  edge_rel_cost_matrix(edge_rel_cost_matrix.num_rows() - 1, k) = edge_ins_del_cost_;
91  }
92  for (std::size_t i{0}; i < edge_rel_cost_matrix.num_rows() - 1; i++) {
93  for (std::size_t k{0}; k < edge_rel_cost_matrix.num_cols() - 1; k++) {
94  if (edge_label_1.at(std::string("type") + std::to_string(i)) == edge_label_2.at(std::string("type") + std::to_string(k))) {
95  edge_rel_cost_matrix(i, k) = 0;
96  }
97  else {
98  edge_rel_cost_matrix(i, k) = 2 * edge_ins_del_cost_;
99  }
100  }
101  }
102  LSAPESolver lsape_solver(&edge_rel_cost_matrix);
103  lsape_solver.solve();
104  return (1 - alpha_) * lsape_solver.minimal_cost();
105 }
106 
107 template<class UserNodeLabel, class UserEdgeLabel>
108 double
109 Protein<UserNodeLabel, UserEdgeLabel>::
110 levenshtein_distance_(const std::string & string_1, const std::string & string_2) const {
111  DMatrix prefix_distance(string_1.size() + 1, string_2.size() + 1);
112  prefix_distance(0, 0) = 0.0;
113  for (std::size_t prefix_size_1{1}; prefix_size_1 <= string_1.size(); prefix_size_1++) {
114  prefix_distance(prefix_size_1, 0) = static_cast<double>(prefix_size_1);
115  }
116  for (std::size_t prefix_size_2{1}; prefix_size_2 <= string_2.size(); prefix_size_2++) {
117  prefix_distance(0, prefix_size_2) = static_cast<double>(prefix_size_2);
118  }
119  for (std::size_t prefix_size_1{1}; prefix_size_1 <= string_1.size(); prefix_size_1++) {
120  for (std::size_t prefix_size_2{1}; prefix_size_2 <= string_2.size(); prefix_size_2++) {
121  prefix_distance(prefix_size_1, prefix_size_2) = prefix_distance(prefix_size_1 - 1, prefix_size_2 - 1);
122  if (string_1.at(prefix_size_1 - 1) != string_2.at(prefix_size_2 - 1)) {
123  prefix_distance(prefix_size_1, prefix_size_2) += 1;
124  }
125  prefix_distance(prefix_size_1, prefix_size_2) = std::min(prefix_distance(prefix_size_1, prefix_size_2), prefix_distance(prefix_size_1 - 1, prefix_size_2) + 1);
126  prefix_distance(prefix_size_1, prefix_size_2) = std::min(prefix_distance(prefix_size_1, prefix_size_2), prefix_distance(prefix_size_1, prefix_size_2 - 1) + 1);
127  }
128  }
129  return prefix_distance(string_1.size(), string_2.size());
130 }
131 
132 }
133 
134 #endif /* SRC_EDIT_COSTS_PROTEIN_IPP_ */
virtual double edge_ins_cost_fun(const UserEdgeLabel &edge_label) const final
Edge insertion cost function.
Matrix< double > DMatrix
Matrix with double entries.
Definition: matrix.hpp:199
virtual double node_rel_cost_fun(const UserNodeLabel &node_label_1, const UserNodeLabel &node_label_2) const final
Node relabeling cost function.
virtual double node_ins_cost_fun(const UserNodeLabel &node_label) const final
Node insertions cost function.
std::map< std::string, std::string > GXLLabel
Type of node and edge labels of graphs given in the .gxl file format.
virtual double node_del_cost_fun(const UserNodeLabel &node_label) const final
Node deletion cost function.
Global namespace for GEDLIB.
virtual double edge_del_cost_fun(const UserEdgeLabel &edge_label) const final
Edge deletion cost function.
Protein(double node_ins_del_cost=11, double edge_ins_del_cost=1, double alpha=0.75)
Constructor.
virtual double edge_rel_cost_fun(const UserEdgeLabel &edge_label_1, const UserEdgeLabel &edge_label_2) const final
Edge relabeling cost function.