43 Python script that generates a collection of graphs with given number of nodes from directory containing GXL files. 47 import xml.etree.ElementTree
as ET
51 def graph_size(dir, gxl_file):
53 graph = ET.parse(os.path.join(dir, gxl_file)).getroot()
54 for node
in graph.findall(
"graph/node"):
55 num_nodes = num_nodes + 1
60 parser = argparse.ArgumentParser(description=
"Creates collection of graphs with given number of nodes from directory containing GXL files.")
61 parser.add_argument(
"dataset", help=
"path to existing dataset file")
62 parser.add_argument(
"collection", help=
"path to sample file to be generated by the script")
63 parser.add_argument(
"dir", help=
"path to directory containing GXL files")
64 parser.add_argument(
"min_size", help=
"minimal number of nodes", type=int)
65 parser.add_argument(
"max_size", help=
"maximal number of nodes", type=int)
66 args = parser.parse_args()
67 if args.dataset == args.collection:
68 raise Exception(
"dataset file equals collection file")
69 if not os.path.isdir(args.dir):
70 raise Exception(
"Invalid argument \"" + dir +
"\": not a directory. Usage: python graphs_of_given_size.py <dataset> <collection> <dir> <min-num-nodes> <max-num-nodes>")
73 dataset = ET.parse(args.dataset).getroot()
74 graphs = [(graph.attrib[
"file"], graph.attrib[
"class"])
for graph
in dataset]
77 file = open(args.collection,
"w")
78 file.write(
"<?xml version=\"1.0\"?>")
79 file.write(
"\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">")
80 file.write(
"\n<GraphCollection>")
81 for file_class
in graphs:
82 size = graph_size(args.dir, file_class[0])
83 if (size >= args.min_size)
and (size <= args.max_size):
84 file.write(
"\n\t<graph file=\"" + file_class[0] +
"\" class=\"" + file_class[1] +
"\"/>")
85 file.write(
"\n</GraphCollection>")