GEDLIB  1.0
graphs_of_given_size.py
Go to the documentation of this file.
1 #//////////////////////////////////////////////////////////////////////////#
2 # #
3 # Copyright (C) 2018 by David B. Blumenthal #
4 # #
5 # This file is part of GEDLIB. #
6 # #
7 # GEDLIB is free software: you can redistribute it and/or modify it #
8 # under the terms of the GNU Lesser General Public License as published #
9 # by the Free Software Foundation, either version 3 of the License, or #
10 # (at your option) any later version. #
11 # #
12 # GEDLIB is distributed in the hope that it will be useful, #
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15 # GNU Lesser General Public License for more details. #
16 # #
17 # You should have received a copy of the GNU Lesser General Public #
18 # License along with GEDLIB. If not, see <http://www.gnu.org/licenses/>. #
19 # #
20 #//////////////////////////////////////////////////////////////////////////#
21 
22 
42 '''
43 Python script that generates a collection of graphs with given number of nodes from directory containing GXL files.
44 '''
45 
46 
47 import xml.etree.ElementTree as ET
48 import argparse
49 import os.path
50 
51 def graph_size(dir, gxl_file):
52  num_nodes = 0
53  graph = ET.parse(os.path.join(dir, gxl_file)).getroot()
54  for node in graph.findall("graph/node"):
55  num_nodes = num_nodes + 1
56  #print("graph ", gxl_file, " has size ", num_nodes)
57  return num_nodes
58 
59 # Parse the command line arguments.
60 parser = argparse.ArgumentParser(description="Creates collection of graphs with given number of nodes from directory containing GXL files.")
61 parser.add_argument("dataset", help="path to existing dataset file")
62 parser.add_argument("collection", help="path to sample file to be generated by the script")
63 parser.add_argument("dir", help="path to directory containing GXL files")
64 parser.add_argument("min_size", help="minimal number of nodes", type=int)
65 parser.add_argument("max_size", help="maximal number of nodes", type=int)
66 args = parser.parse_args()
67 if args.dataset == args.collection:
68  raise Exception("dataset file equals collection file")
69 if not os.path.isdir(args.dir):
70  raise Exception("Invalid argument \"" + dir + "\": not a directory. Usage: python graphs_of_given_size.py <dataset> <collection> <dir> <min-num-nodes> <max-num-nodes>")
71 
72 # Parse the dataset file.
73 dataset = ET.parse(args.dataset).getroot()
74 graphs = [(graph.attrib["file"], graph.attrib["class"]) for graph in dataset]
75 
76 # Select graphs with size between min_size and max_size.
77 file = open(args.collection, "w")
78 file.write("<?xml version=\"1.0\"?>")
79 file.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">")
80 file.write("\n<GraphCollection>")
81 for file_class in graphs:
82  size = graph_size(args.dir, file_class[0])
83  if (size >= args.min_size) and (size <= args.max_size):
84  file.write("\n\t<graph file=\"" + file_class[0] + "\" class=\"" + file_class[1] + "\"/>")
85 file.write("\n</GraphCollection>")
86 file.close()