00001 """Graphviz's dot language parser.
00002
00003 The dotparser parses graphviz files in dot and dot files and transforms them
00004 into a class representation defined by pydot.
00005
00006 The module needs pyparsing (tested with version 1.2.2) and pydot
00007
00008 Author: Michael Krause <michael@krause-software.de>
00009 Fixes by: Ero Carrera <ero@dkbza.org>
00010 """
00011
00012 from __future__ import division, print_function
00013
00014 __author__ = ['Michael Krause', 'Ero Carrera']
00015 __license__ = 'MIT'
00016
00017 import sys
00018 import pydot
00019 import codecs
00020
00021 from pyparsing import __version__ as pyparsing_version
00022
00023 from pyparsing import (
00024 nestedExpr, Literal, CaselessLiteral, Word, OneOrMore,
00025 Forward, Group, Optional, Combine, nums, restOfLine,
00026 cStyleComment, alphanums, printables, ParseException,
00027 ParseResults, CharsNotIn, QuotedString
00028 )
00029
00030
00031 PY3 = not sys.version_info < (3, 0, 0)
00032
00033 if PY3:
00034 basestring = str
00035
00036
00037 class P_AttrList:
00038
00039 def __init__(self, toks):
00040 self.attrs = {}
00041 i = 0
00042
00043 while i < len(toks):
00044 attrname = toks[i]
00045 if i + 2 < len(toks) and toks[i + 1] == '=':
00046 attrvalue = toks[i + 2]
00047 i += 3
00048 else:
00049 attrvalue = None
00050 i += 1
00051
00052 self.attrs[attrname] = attrvalue
00053
00054 def __repr__(self):
00055 return "%s(%r)" % (self.__class__.__name__, self.attrs)
00056
00057
00058 class DefaultStatement(P_AttrList):
00059
00060 def __init__(self, default_type, attrs):
00061 self.default_type = default_type
00062 self.attrs = attrs
00063
00064 def __repr__(self):
00065 return "%s(%s, %r)" % (
00066 self.__class__.__name__,
00067 self.default_type, self.attrs
00068 )
00069
00070
00071 top_graphs = list()
00072
00073
00074 def push_top_graph_stmt(str, loc, toks):
00075 attrs = {}
00076 g = None
00077
00078 for element in toks:
00079 if (isinstance(element, (ParseResults, tuple, list)) and
00080 len(element) == 1 and isinstance(element[0], basestring)):
00081 element = element[0]
00082
00083 if element == 'strict':
00084 attrs['strict'] = True
00085
00086 elif element in ['graph', 'digraph']:
00087 attrs = {}
00088
00089 g = pydot.Dot(graph_type=element, **attrs)
00090 attrs['type'] = element
00091
00092 top_graphs.append(g)
00093
00094 elif isinstance(element, basestring):
00095 g.set_name(element)
00096
00097 elif isinstance(element, pydot.Subgraph):
00098 g.obj_dict['attributes'].update(element.obj_dict['attributes'])
00099 g.obj_dict['edges'].update(element.obj_dict['edges'])
00100 g.obj_dict['nodes'].update(element.obj_dict['nodes'])
00101 g.obj_dict['subgraphs'].update(element.obj_dict['subgraphs'])
00102 g.set_parent_graph(g)
00103
00104 elif isinstance(element, P_AttrList):
00105 attrs.update(element.attrs)
00106
00107 elif isinstance(element, (ParseResults, list)):
00108 add_elements(g, element)
00109
00110 else:
00111 raise ValueError("Unknown element statement: %r " % element)
00112
00113 for g in top_graphs:
00114 update_parent_graph_hierarchy(g)
00115
00116 if len(top_graphs) == 1:
00117 return top_graphs[0]
00118
00119 return top_graphs
00120
00121
00122 def update_parent_graph_hierarchy(g, parent_graph=None, level=0):
00123 if parent_graph is None:
00124 parent_graph = g
00125
00126 for key_name in ('edges',):
00127 if isinstance(g, pydot.frozendict):
00128 item_dict = g
00129 else:
00130 item_dict = g.obj_dict
00131
00132 if key_name not in item_dict:
00133 continue
00134
00135 for key, objs in item_dict[key_name].items():
00136 for obj in objs:
00137 if 'parent_graph' in obj and obj['parent_graph'].get_parent_graph() == g:
00138 if obj['parent_graph'] is g:
00139 pass
00140 else:
00141 obj['parent_graph'].set_parent_graph(parent_graph)
00142
00143 if key_name == 'edges' and len(key) == 2:
00144 for idx, vertex in enumerate(obj['points']):
00145 if isinstance(vertex, (pydot.Graph, pydot.Subgraph, pydot.Cluster)):
00146 vertex.set_parent_graph(parent_graph)
00147 if isinstance(vertex, pydot.frozendict):
00148 if vertex['parent_graph'] is g:
00149 pass
00150 else:
00151 vertex['parent_graph'].set_parent_graph(parent_graph)
00152
00153
00154 def add_defaults(element, defaults):
00155 d = element.__dict__
00156 for key, value in defaults.items():
00157 if not d.get(key):
00158 d[key] = value
00159
00160
00161 def add_elements(g, toks, defaults_graph=None, defaults_node=None, defaults_edge=None):
00162 if defaults_graph is None:
00163 defaults_graph = {}
00164 if defaults_node is None:
00165 defaults_node = {}
00166 if defaults_edge is None:
00167 defaults_edge = {}
00168
00169 for elm_idx, element in enumerate(toks):
00170 if isinstance(element, (pydot.Subgraph, pydot.Cluster)):
00171 add_defaults(element, defaults_graph)
00172 g.add_subgraph(element)
00173
00174 elif isinstance(element, pydot.Node):
00175 add_defaults(element, defaults_node)
00176 g.add_node(element)
00177
00178 elif isinstance(element, pydot.Edge):
00179 add_defaults(element, defaults_edge)
00180 g.add_edge(element)
00181
00182 elif isinstance(element, ParseResults):
00183 for e in element:
00184 add_elements(g, [e], defaults_graph, defaults_node, defaults_edge)
00185
00186 elif isinstance(element, DefaultStatement):
00187 if element.default_type == 'graph':
00188 default_graph_attrs = pydot.Node('graph', **element.attrs)
00189 g.add_node(default_graph_attrs)
00190
00191 elif element.default_type == 'node':
00192 default_node_attrs = pydot.Node('node', **element.attrs)
00193 g.add_node(default_node_attrs)
00194
00195 elif element.default_type == 'edge':
00196 default_edge_attrs = pydot.Node('edge', **element.attrs)
00197 g.add_node(default_edge_attrs)
00198 defaults_edge.update(element.attrs)
00199
00200 else:
00201 raise ValueError("Unknown DefaultStatement: %s " % element.default_type)
00202
00203 elif isinstance(element, P_AttrList):
00204 g.obj_dict['attributes'].update(element.attrs)
00205
00206 else:
00207 raise ValueError("Unknown element statement: %r" % element)
00208
00209
00210 def push_graph_stmt(str, loc, toks):
00211 g = pydot.Subgraph('')
00212 add_elements(g, toks)
00213 return g
00214
00215
00216 def push_subgraph_stmt(str, loc, toks):
00217 g = pydot.Subgraph('')
00218
00219 for e in toks:
00220 if len(e) == 3:
00221 e[2].set_name(e[1])
00222 if e[0] == 'subgraph':
00223 e[2].obj_dict['show_keyword'] = True
00224 return e[2]
00225 else:
00226 if e[0] == 'subgraph':
00227 e[1].obj_dict['show_keyword'] = True
00228 return e[1]
00229
00230 return g
00231
00232
00233 def push_default_stmt(str, loc, toks):
00234
00235
00236
00237 default_type = toks[0][0]
00238 if len(toks) > 1:
00239 attrs = toks[1].attrs
00240 else:
00241 attrs = {}
00242
00243 if default_type in ['graph', 'node', 'edge']:
00244 return DefaultStatement(default_type, attrs)
00245 else:
00246 raise ValueError("Unknown default statement: %r " % toks)
00247
00248
00249 def push_attr_list(str, loc, toks):
00250 p = P_AttrList(toks)
00251 return p
00252
00253
00254 def get_port(node):
00255 if len(node) > 1:
00256 if isinstance(node[1], ParseResults):
00257 if len(node[1][0]) == 2:
00258 if node[1][0][0] == ':':
00259 return node[1][0][1]
00260 return None
00261
00262
00263 def do_node_ports(node):
00264 node_port = ''
00265
00266 if len(node) > 1:
00267 node_port = ''.join([str(a) + str(b) for a, b in node[1]])
00268
00269 return node_port
00270
00271
00272 def push_edge_stmt(str, loc, toks):
00273 tok_attrs = [a for a in toks if isinstance(a, P_AttrList)]
00274 attrs = {}
00275
00276 for a in tok_attrs:
00277 attrs.update(a.attrs)
00278
00279 e = []
00280
00281 if isinstance(toks[0][0], pydot.Graph):
00282 n_prev = pydot.frozendict(toks[0][0].obj_dict)
00283 else:
00284 n_prev = toks[0][0] + do_node_ports(toks[0])
00285
00286 if isinstance(toks[2][0], ParseResults):
00287 n_next_list = [[n.get_name()] for n in toks[2][0]]
00288 for n_next in [n for n in n_next_list]:
00289 n_next_port = do_node_ports(n_next)
00290 e.append(pydot.Edge(n_prev, n_next[0] + n_next_port, **attrs))
00291
00292 elif isinstance(toks[2][0], pydot.Graph):
00293 e.append(pydot.Edge(n_prev, pydot.frozendict(toks[2][0].obj_dict), **attrs))
00294
00295 elif isinstance(toks[2][0], pydot.Node):
00296 node = toks[2][0]
00297
00298 if node.get_port() is not None:
00299 name_port = node.get_name() + ":" + node.get_port()
00300 else:
00301 name_port = node.get_name()
00302
00303 e.append(pydot.Edge(n_prev, name_port, **attrs))
00304
00305 elif isinstance(toks[2][0], type('')):
00306 for n_next in [n for n in tuple(toks)[2::2]]:
00307 if isinstance(n_next, P_AttrList) or not isinstance(n_next[0], type('')):
00308 continue
00309
00310 n_next_port = do_node_ports(n_next)
00311 e.append(pydot.Edge(n_prev, n_next[0] + n_next_port, **attrs))
00312
00313 n_prev = n_next[0] + n_next_port
00314
00315 else:
00316
00317 pass
00318
00319 return e
00320
00321
00322 def push_node_stmt(s, loc, toks):
00323
00324 if len(toks) == 2:
00325 attrs = toks[1].attrs
00326 else:
00327 attrs = {}
00328
00329 node_name = toks[0]
00330 if isinstance(node_name, list) or isinstance(node_name, tuple):
00331 if len(node_name) > 0:
00332 node_name = node_name[0]
00333
00334 n = pydot.Node(str(node_name), **attrs)
00335 return n
00336
00337
00338 graphparser = None
00339
00340
00341 def graph_definition():
00342 global graphparser
00343
00344 if not graphparser:
00345
00346 colon = Literal(":")
00347 lbrace = Literal("{")
00348 rbrace = Literal("}")
00349 lbrack = Literal("[")
00350 rbrack = Literal("]")
00351 lparen = Literal("(")
00352 rparen = Literal(")")
00353 equals = Literal("=")
00354 comma = Literal(",")
00355
00356
00357
00358
00359 semi = Literal(";")
00360 at = Literal("@")
00361 minus = Literal("-")
00362
00363
00364 strict_ = CaselessLiteral("strict")
00365 graph_ = CaselessLiteral("graph")
00366 digraph_ = CaselessLiteral("digraph")
00367 subgraph_ = CaselessLiteral("subgraph")
00368 node_ = CaselessLiteral("node")
00369 edge_ = CaselessLiteral("edge")
00370
00371
00372 identifier = Word(alphanums + "_.").setName("identifier")
00373
00374
00375 double_quoted_string = QuotedString('"', multiline=True, unquoteResults=False)
00376
00377 noncomma_ = "".join([c for c in printables if c != ","])
00378 alphastring_ = OneOrMore(CharsNotIn(noncomma_ + ' '))
00379
00380 def parse_html(s, loc, toks):
00381 return '<%s>' % ''.join(toks[0])
00382
00383 opener = '<'
00384 closer = '>'
00385 html_text = nestedExpr(
00386 opener, closer,
00387 (CharsNotIn(opener + closer))
00388 ).setParseAction(parse_html).leaveWhitespace()
00389
00390 ID = (
00391 identifier | html_text |
00392 double_quoted_string |
00393 alphastring_
00394 ).setName("ID")
00395
00396 float_number = Combine(
00397 Optional(minus) +
00398 OneOrMore(Word(nums + "."))
00399 ).setName("float_number")
00400
00401 righthand_id = (float_number | ID).setName("righthand_id")
00402
00403 port_angle = (at + ID).setName("port_angle")
00404
00405 port_location = (
00406 OneOrMore(Group(colon + ID)) |
00407 Group(colon + lparen + ID + comma + ID + rparen)
00408 ).setName("port_location")
00409
00410 port = (
00411 Group(port_location + Optional(port_angle)) |
00412 Group(port_angle + Optional(port_location))
00413 ).setName("port")
00414
00415 node_id = (ID + Optional(port))
00416 a_list = OneOrMore(
00417 ID + Optional(equals + righthand_id) + Optional(comma.suppress())
00418 ).setName("a_list")
00419
00420 attr_list = OneOrMore(
00421 lbrack.suppress() + Optional(a_list) + rbrack.suppress()
00422 ).setName("attr_list")
00423
00424 attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt")
00425
00426 edgeop = (Literal("--") | Literal("->")).setName("edgeop")
00427
00428 stmt_list = Forward()
00429 graph_stmt = Group(
00430 lbrace.suppress() + Optional(stmt_list) +
00431 rbrace.suppress() + Optional(semi.suppress())
00432 ).setName("graph_stmt")
00433
00434 edge_point = Forward()
00435
00436 edgeRHS = OneOrMore(edgeop + edge_point)
00437 edge_stmt = edge_point + edgeRHS + Optional(attr_list)
00438
00439 subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph")
00440
00441 edge_point << Group(subgraph | graph_stmt | node_id).setName('edge_point')
00442
00443 node_stmt = (
00444 node_id + Optional(attr_list) + Optional(semi.suppress())
00445 ).setName("node_stmt")
00446
00447 assignment = (ID + equals + righthand_id).setName("assignment")
00448 stmt = (
00449 assignment | edge_stmt | attr_stmt |
00450 subgraph | graph_stmt | node_stmt
00451 ).setName("stmt")
00452 stmt_list << OneOrMore(stmt + Optional(semi.suppress()))
00453
00454 graphparser = OneOrMore((
00455 Optional(strict_) + Group((graph_ | digraph_)) +
00456 Optional(ID) + graph_stmt
00457 ).setResultsName("graph"))
00458
00459 singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine)
00460
00461
00462 graphparser.ignore(singleLineComment)
00463 graphparser.ignore(cStyleComment)
00464
00465 assignment.setParseAction(push_attr_list)
00466 a_list.setParseAction(push_attr_list)
00467 edge_stmt.setParseAction(push_edge_stmt)
00468 node_stmt.setParseAction(push_node_stmt)
00469 attr_stmt.setParseAction(push_default_stmt)
00470
00471 subgraph.setParseAction(push_subgraph_stmt)
00472 graph_stmt.setParseAction(push_graph_stmt)
00473 graphparser.setParseAction(push_top_graph_stmt)
00474
00475 return graphparser
00476
00477
00478 def parse_dot_data(data):
00479 global top_graphs
00480
00481 top_graphs = list()
00482
00483 if PY3:
00484 if isinstance(data, bytes):
00485
00486 try:
00487 idx = data.index(b'charset') + 7
00488 while data[idx] in b' \t\n\r=':
00489 idx += 1
00490 fst = idx
00491 while data[idx] not in b' \t\n\r];,':
00492 idx += 1
00493 charset = data[fst:idx].strip(b'"\'').decode('ascii')
00494 data = data.decode(charset)
00495 except:
00496 data = data.decode('utf-8')
00497 else:
00498 if data.startswith(codecs.BOM_UTF8):
00499 data = data.decode('utf-8')
00500
00501 try:
00502
00503 graphparser = graph_definition()
00504
00505 if pyparsing_version >= '1.2':
00506 graphparser.parseWithTabs()
00507
00508 tokens = graphparser.parseString(data)
00509
00510 if len(tokens) == 1:
00511 return tokens[0]
00512 else:
00513 return [g for g in tokens]
00514
00515 except ParseException:
00516 err = sys.exc_info()[1]
00517 print(err.line)
00518 print(" " * (err.column - 1) + "^")
00519 print(err)
00520 return None