31 #ifndef SRC_TESTS_VLDBJ2018_UTIL_HPP_ 32 #define SRC_TESTS_VLDBJ2018_UTIL_HPP_ 34 #define GXL_GEDLIB_SHARED 35 #include "../../../src/env/ged_env.hpp" 39 bool is_chemical_dataset(
const std::string & dataset) {
40 return ((dataset ==
"AIDS") or (dataset ==
"Mutagenicity") or (dataset ==
"acyclic") or (dataset ==
"alkane") or (dataset ==
"mao") or (dataset ==
"pah"));
43 bool is_letter_dataset(
const std::string & dataset) {
44 return ((dataset ==
"Letter_HIGH") or (dataset ==
"Letter_LOW") or (dataset ==
"Letter_MED"));
47 void check_dataset(
const std::string & dataset) {
48 if (not (is_chemical_dataset(dataset) or is_letter_dataset(dataset) or (dataset ==
"CMU-GED") or (dataset ==
"Fingerprint") or (dataset ==
"GREC") or (dataset ==
"Protein"))) {
49 throw ged::Error(std::string(
"Dataset \"") + dataset +
"\" does not exists.");
53 std::string graph_dir(
const std::string & dataset) {
54 std::string root_dir(
"../../../data/datasets/");
55 if ((dataset ==
"AIDS") or (dataset ==
"Fingerprint") or (dataset ==
"GREC") or (dataset ==
"Protein") or (dataset ==
"Mutagenicity")) {
56 return (root_dir + dataset +
"/data/");
58 else if ((dataset ==
"Letter_HIGH")) {
59 return (root_dir +
"Letter/HIGH/");
61 else if ((dataset ==
"Letter_LOW")) {
62 return (root_dir +
"Letter/LOW/");
64 else if ((dataset ==
"Letter_MED")) {
65 return (root_dir +
"Letter/MED/");
67 else if (dataset ==
"CMU-GED") {
68 return (root_dir + dataset +
"/CMU/");
70 else if ((dataset ==
"acyclic") or (dataset ==
"alkane") or (dataset ==
"mao") or (dataset ==
"pah")) {
71 return (root_dir + dataset +
"/");
74 throw ged::Error(std::string(
"Dataset \"") + dataset +
"\" does not exists.");
79 std::string train_collection(
const std::string & dataset) {
80 std::string root_dir(
"../collections/");
81 check_dataset(dataset);
82 if (is_letter_dataset(dataset)) {
83 return (root_dir +
"Letter_train.xml");
85 return root_dir + dataset +
"_train.xml";
88 std::string test_collection(
const std::string & dataset) {
89 std::string root_dir(
"../collections/");
90 check_dataset(dataset);
91 if (is_letter_dataset(dataset)) {
92 return (root_dir +
"Letter_test.xml");
94 return root_dir + dataset +
"_test.xml";
97 std::string size_constrained_collection(
const std::string & dataset, std::size_t max_size_div_10) {
98 std::string suffix(std::to_string((max_size_div_10 * 10) - 9) +
"-" + std::to_string(max_size_div_10 * 10));
99 std::string root_dir(
"../collections/");
100 check_dataset(dataset);
101 return root_dir + dataset + suffix +
".xml";
104 std::string config_prefix(
const std::string & dataset) {
105 check_dataset(dataset);
106 return std::string(
"../ini/" + dataset +
"_");
109 std::string init_options(
const std::string & dataset,
const std::string & config_suffix,
const std::string & data_suffix =
"",
bool save_train =
false,
bool load_train =
false, std::size_t threads = 8) {
110 check_dataset(dataset);
111 std::string options(
"--threads ");
112 options += std::to_string(threads) +
" --save ../ini/";
113 options += dataset +
"_" + config_suffix +
".ini";
116 throw ged::Error(
"Training data cannot be both saved and loaded.");
118 options +=
" --save-train ../ini/" + dataset +
"_" + data_suffix +
".data";
121 options +=
" --load-train ../ini/" + dataset +
"_" + data_suffix +
".data";
126 std::string ground_truth_option(
const std::string & dataset) {
127 check_dataset(dataset);
131 return std::string(
" --ground-truth-method IPFP");
135 if (is_chemical_dataset(dataset)) {
138 else if (is_letter_dataset(dataset)) {
141 else if (dataset ==
"CMU-GED") {
144 else if (dataset ==
"Fingerprint") {
147 else if (dataset ==
"GREC") {
150 else if (dataset ==
"Protein") {
154 throw ged::Error(std::string(
"Dataset \"") + dataset +
"\" does not exists.");
160 check_dataset(dataset);
161 if ((dataset ==
"Fingerprint") or (dataset ==
"CMU-GED")) {
168 check_dataset(dataset);
169 if (is_letter_dataset(dataset)) {
175 std::unordered_set<std::string> irrelevant_node_attributes(
const std::string & dataset) {
176 check_dataset(dataset);
177 std::unordered_set<std::string> irrelevant_attributes;
178 if ((dataset ==
"AIDS")) {
179 irrelevant_attributes.insert({
"x",
"y",
"symbol"});
181 else if (dataset ==
"Protein") {
182 irrelevant_attributes.insert(
"aaLength");
184 return irrelevant_attributes;
187 std::unordered_set<std::string> irrelevant_edge_attributes(
const std::string & dataset) {
188 check_dataset(dataset);
189 std::unordered_set<std::string> irrelevant_attributes;
190 if ((dataset ==
"GREC")) {
191 irrelevant_attributes.insert({
"angle0",
"angle1"});
193 else if (dataset ==
"Protein") {
194 irrelevant_attributes.insert({
"distance0",
"distance1"});
196 else if (dataset ==
"Fingerprint") {
197 irrelevant_attributes.insert(
"angle");
199 return irrelevant_attributes;
203 if (is_chemical_dataset(dataset) or (dataset ==
"Protein")) {
210 std::vector<ged::GEDGraph::GraphID> graph_ids(env.
load_gxl_graphs(graph_dir(dataset), (train ? train_collection(dataset) : test_collection(dataset)), node_type(dataset), edge_type(dataset), irrelevant_node_attributes(dataset), irrelevant_edge_attributes(dataset)));
212 env.
init(init_type(dataset));
217 std::vector<ged::GEDGraph::GraphID> graph_ids(env.
load_gxl_graphs(graph_dir(dataset), size_constrained_collection(dataset, max_size_div_10), node_type(dataset), edge_type(dataset), irrelevant_node_attributes(dataset), irrelevant_edge_attributes(dataset)));
219 env.
init(init_type(dataset));
223 void setup_datasets(std::vector<std::string> & datasets) {
224 datasets = {
"Letter_HIGH",
"Mutagenicity",
"AIDS",
"Protein",
"GREC",
"Fingerprint"};
227 void setup_size_test_datasets(std::vector<std::string> & datasets) {
228 datasets = {
"Mutagenicity",
"AIDS",
"Protein"};
Selects ged::Fingerprint.
void init(Options::InitType init_type=Options::InitType::EAGER_WITHOUT_SHUFFLED_COPIES)
Initializes the environment.
std::vector< GEDGraph::GraphID > load_gxl_graphs(const std::string &graph_dir, const std::string &collection_file, Options::GXLNodeEdgeType node_type=Options::GXLNodeEdgeType::LABELED, Options::GXLNodeEdgeType edge_type=Options::GXLNodeEdgeType::LABELED, const std::unordered_set< std::string > &irrelevant_node_attributes=std::unordered_set< std::string >(), const std::unordered_set< std::string > &irrelevant_edge_attributes=std::unordered_set< std::string >())
Loads graphs given in the GXL file format.
void set_edit_costs(Options::EditCosts edit_costs, std::initializer_list< double > edit_cost_constants={})
Sets the edit costs to one of the predefined edit costs.
GXLNodeEdgeType
Selects whether nodes or edges of graphs given in GXL file format are labeled or unlabeled.
Unlabeled nodes or edges.
InitType
Selects the initialization type of the environment.
Eager initialization, no shuffled graph copies are constructed.
EditCosts
Selects the edit costs.
Lazy initialization, no shuffled graph copies are constructed.
Provides the API of GEDLIB.