libvlfeat: webdoc.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/python
00002 # file: webdoc.py
00003 # author: Andrea Vedaldi
00004 # description: A website formatter utility
00005 
00006 # Copyright (C) 2007-13 Andrea Vedaldi and Brian Fulkerson.
00007 # All rights reserved.
00008 #
00009 # This file is part of the VLFeat library and is made available under
00010 # the terms of the BSD license (see the COPYING file).
00011 
00012 import cProfile
00013 import types
00014 import xml.sax
00015 import xml.sax.saxutils
00016 import re
00017 import os
00018 import sys
00019 import random
00020 import copy
00021 import htmlentitydefs
00022 
00023 from xml.sax.handler import ContentHandler
00024 from xml.sax         import parse
00025 from urlparse        import urlparse
00026 from urlparse        import urlunparse
00027 from optparse        import OptionParser
00028 from doxytag         import Doxytag
00029 
00030 # this is used for syntax highlighting
00031 try:
00032     import pygments
00033     import pygments.lexers
00034     import pygments.formatters
00035     has_pygments = True
00036 except ImportError:
00037     has_pygments = False
00038 
00039 DOCTYPE_XHTML_TRANSITIONAL = \
00040     '<!DOCTYPE html PUBLIC ' \
00041     '"-//W3C//DTD XHTML 1.0 Transitional//EN" ' \
00042     '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
00043 
00044 # Create a dictonary that maps unicode characters to HTML entities
00045 mapUnicodeToHtmlEntity = { }
00046 for k, v in htmlentitydefs.name2codepoint.items():
00047     c = unichr(v)
00048     if c == u'&' or c == u'<' or c == u'>': continue
00049     mapUnicodeToHtmlEntity [c] = "&%s;"  % k
00050 
00051 # This indexes the document nodes by ID
00052 nodeIndex = { }
00053 nodeUniqueCount = 0
00054 doxygenIndex = None
00055 doxygenDir = ''
00056 
00057 def getDoxygenURL(tag):
00058     url = ''
00059     rootURL = nodeIndex['root'].getPublishURL()
00060     if rootURL: url += rootURL + '/'
00061     if doxygenDir: url += doxygenDir + '/'
00062     url += doxygenIndex.index[tag]
00063     return url
00064 
00065 def getUniqueNodeID(id = None):
00066     """
00067     getUniqueNodeID() generates an unique ID for a document node.
00068     getUniqueNodeID(id) generates an unique ID adding a suffix to id.
00069     """
00070     global nodeUniqueCount
00071     if id is None: id = "id"
00072     uniqueId = id
00073     while uniqueId in nodeIndex:
00074         nodeUniqueCount += 1
00075         uniqueId = "%s-%d" % (id, nodeUniqueCount)
00076     return uniqueId
00077 
00078 def dumpIndex():
00079     """
00080     Dump the node index, for debugging purposes.
00081     """
00082     for x in nodeIndex.itervalues():
00083       print x
00084 
00085 def ensureDir(dirName):
00086     """
00087     Create the directory DIRNAME if it does not exsits.
00088     """
00089     if os.path.isdir(dirName):
00090         pass
00091     elif os.path.isfile(dirName):
00092         raise OSError("cannot create the direcory '%s'"
00093                       "because there exists already "
00094                       "a file with that name" % newdir)
00095     else:
00096         head, tail = os.path.split(dirName)
00097         if head and not os.path.isdir(head):
00098             ensureDir(head)
00099         if tail:
00100             os.mkdir(dirName)
00101 
00102 def calcRelURL(toURL, fromURL):
00103     """
00104     Calculates a relative URL.
00105     """
00106     fromURL  = urlparse(fromURL)
00107     toURL    = urlparse(toURL)
00108     if not fromURL.scheme == toURL.scheme: return urlunparse(toURL)
00109     if not fromURL.netloc == toURL.netloc: return urlunparse(toURL)
00110 
00111     fromPath = fromURL.path.split("/")
00112     toPath   = toURL.path.split("/")
00113     for j in xrange(len(fromPath) - 1): fromPath[j] += u"/"
00114     for j in xrange(len(toPath)   - 1): toPath[j] += u"/"
00115 
00116     # abs path: ['/', 'dir1/', ..., 'dirN/', 'file']
00117     # rel path: ['dir1/', ..., 'dirN/', 'file']
00118     # path with no file: ['dir1/', ..., 'dirN/', '']
00119 
00120     # find common path (but do not count file name)
00121     i = 0
00122     while True:
00123         if i >= len(fromPath) - 1: break
00124         if i >= len(toPath) - 1: break
00125         if not fromPath[i] == toPath[i]: break
00126         i = i + 1
00127 
00128     # a/b/c/d.html  --> ../../../d.html
00129     # a/b//c/d.html --> ../../../d.html
00130     for j in xrange(len(fromPath) - 1):
00131         if len(fromPath[j]) > 1: fromPath[j] = u"../"
00132         else:                    fromPath[j] = u""
00133 
00134     fromPath = fromPath[i:-1]
00135     toPath = toPath[i:]
00136     relPath = u"".join(fromPath) + "".join(toPath)
00137 
00138     return urlunparse(("", "", relPath, "", "", toURL.fragment))
00139 
00140 def walkNodes(rootNode, nodeType = None, nodeBarrier = None):
00141     if nodeBarrier and rootNode.isA(nodeBarrier):
00142         return
00143     for n in rootNode.getChildren():
00144         for m in walkNodes(n, nodeType, nodeBarrier):
00145             yield m
00146     if not nodeType or rootNode.isA(nodeType):
00147         yield rootNode
00148 
00149 def walkAncestors(leafNode, nodeType = None):
00150     if not nodeType or leafNode.isA(nodeType):
00151         yield leafNode
00152     p = leafNode.getParent()
00153     if p:
00154         for m in walkAncestors(p, nodeType):
00155             yield m
00156 
00157 # --------------------------------------------------------------------
00158 class DocLocation:
00159 # --------------------------------------------------------------------
00160     """
00161     A location consisting of a URL (file), a row number, and a column number.
00162     """
00163     def __init__(self, URL, row, column):
00164         self.URL = URL
00165         self.row = row
00166         self.column = column
00167 
00168     def __str__(self):
00169         return "%s:%d:%d" % (self.URL,
00170                              self.row,
00171                              self.column)
00172 
00173 # --------------------------------------------------------------------
00174 class DocError(BaseException):
00175 # --------------------------------------------------------------------
00176     """
00177     An error consisting of a stack of locations and a message.
00178     """
00179     def __init__(self, message):
00180         BaseException.__init__(self,message)
00181         self.locations = []
00182 
00183     def __str__(self):
00184         str = ""
00185         if len(self.locations) > 0:
00186             for i in xrange(len(self.locations)-1,0,-1):
00187                 str += "included from %s:\n" % self.locations[i]
00188             return str + "%s: error: %s" % (self.locations[0], BaseException.__str__(self))
00189         else:
00190             return self.message
00191 
00192     def appendLocation(self, location):
00193         self.locations.append(location)
00194         return self
00195 
00196 # --------------------------------------------------------------------
00197 class makeGuard(object):
00198 # --------------------------------------------------------------------
00199     """
00200     Decorates the method of a DocNode object so that,
00201     on raising a DocError exception, the location of the node
00202     is appended to it.
00203     """
00204 
00205     def __init__(self, func):
00206         self.func = func
00207 
00208     def __call__(self, obj, *args, **keys):
00209         try:
00210             self.func(obj, *args, **keys)
00211         except DocError, e:
00212             if len(e.locations) == 0:
00213                 e.appendLocation(obj.getLocation())
00214             raise e
00215         except:
00216             raise
00217 
00218     def __get__(self, obj, type=None):
00219         return types.MethodType(self, obj, type)
00220 
00221 # --------------------------------------------------------------------
00222 class DocBareNode:
00223 # --------------------------------------------------------------------
00224     """
00225     A node of the document tree without parent, children, or any
00226     other attribute. It is used to implement common leaf nodes such
00227     as text chunks.
00228     """
00229     def __init__(self): pass
00230 
00231     def isA(self, classInfo):
00232         """
00233         Returns TRUE if the node is of class CLASSINFO.
00234         """
00235         return isinstance(self, classInfo)
00236 
00237     def getChildren(self):
00238         """
00239         Returs an empty list
00240         """
00241         return []
00242 
00243     def setParent(self, parent): pass
00244     def getPublishDirName(self): pass
00245     def getPublishFileName(self): pass
00246     def getPublishURL(self): pass
00247     def visit(self, generator): pass
00248     def publish(self, generator, pageNode = None): pass
00249     def publishIndex(self, gen, inPage, activePageNodes, full=False): return False
00250 
00251 # --------------------------------------------------------------------
00252 class DocNode(DocBareNode):
00253 # --------------------------------------------------------------------
00254     """
00255     A node of the document with a parent, childern, attributes, and
00256     additional meta-information such as the location
00257     of the XML element that caused this node to be generated.
00258     """
00259     def __init__(self, attrs, URL, locator):
00260         self.parent = None
00261         self.children = []
00262         self.attrs = attrs
00263         self.sourceURL = None
00264         self.sourceRow = None
00265         self.sourceColumn = None
00266         if attrs.has_key('id'):
00267             self.id = attrs['id']
00268         else:
00269             self.id = getUniqueNodeID()
00270         self.sourceURL = URL
00271         if locator:
00272             self.sourceRow = locator.getLineNumber()
00273             self.sourceColumn = locator.getColumnNumber()
00274         nodeIndex[self.id] = self
00275 
00276     def __str__(self):
00277         return "%s:%s -> %s" % (self.getLocation(), self.getID(), self.getPublishURL())
00278 
00279     def dump(self):
00280         """
00281         Recusively dump the tree of nodes, for debugging purposes.
00282         """
00283         depth = self.getDepth()
00284         print " " * depth, self
00285         for x in self.children: x.dump()
00286 
00287     def getID(self):
00288         """
00289         Return the node ID.
00290         """
00291         return self.id
00292 
00293     def getParent(self):
00294         """
00295         Return the node parent.
00296         """
00297         return self.parent
00298 
00299     def getChildren(self):
00300         """
00301         Return the list of node children.
00302         """
00303         return self.children
00304 
00305     def getAttributes(self):
00306         """
00307         Return the dictionary of node attributes.
00308         """
00309         return self.attrs
00310 
00311     def getDepth(self):
00312         """
00313         Return the depth of the node in the tree.
00314         """
00315         if self.parent:
00316             return self.parent.getDepth() + 1
00317         else:
00318             return 0
00319 
00320     def setParent(self, parent):
00321         """
00322         Set the parent of the node.
00323         """
00324         self.parent = parent
00325 
00326     def adopt(self, orfan):
00327         """
00328         Adds ORFAN to the node children and make the node the parent
00329         of ORFAN. ORFAN can also be a sequence of orfans.
00330         """
00331         self.children.append(orfan)
00332         orfan.setParent(self)
00333 
00334     def findAncestors(self, nodeType = None):
00335         """
00336         Return the node ancestors of type NODETYPE. If NODETYPE is
00337         None, returns all ancestors.
00338         """
00339         if nodeType is None:
00340             nodeType = DocNode
00341         if self.parent:
00342             if self.parent.isA(nodeType):
00343                 found = [self.parent]
00344             else:
00345                 found = []
00346             found = found + self.parent.findAncestors(nodeType)
00347             return found
00348         return []
00349 
00350     def findChildren(self, nodeType = None):
00351         """
00352         Returns the node chldren of type NODTYPE. If NODETYPE is None,
00353         returns all children.
00354         """
00355         if nodeType is None:
00356             nodeType = DocNode
00357         return [x for x in self.children if x.isA(nodeType)]
00358 
00359     def getLocation(self):
00360         """
00361         Get the location (file, row number, and column number)
00362         where this node was instanitated
00363         """
00364         location = DocLocation(self.sourceURL,
00365                                self.sourceRow,
00366                                self.sourceColumn)
00367         if self.parent:
00368             parentLocation = self.parent.getLocation()
00369             if location.URL is None: location.URL = parentLocation.URL
00370             if location.row is None: location.URL = parentLocation.row
00371             if location.column is None: location.URL = parentLocation.column
00372         return location
00373 
00374     def getPublishDirName(self):
00375         """
00376         Returns the publish dir name of the parent.
00377         """
00378         if self.parent:
00379             return self.parent.getPublishDirName()
00380         return None
00381 
00382     def getPublishFileName(self):
00383         """
00384         Returns NONE.
00385         """
00386         return None
00387 
00388     def getPublishURL(self):
00389         """
00390         Returns NONE.
00391         """
00392         return None
00393 
00394     def visit(self, generator):
00395         """
00396         Recursively calls VISIT() on its children.
00397         """
00398         for c in self.getChildren():
00399             c.visit(generator)
00400         return None
00401 
00402     def publish(self, generator, pageNode = None):
00403         """
00404         Recursively calls PUBLISH() on its children.
00405         """
00406         for c in self.getChildren():
00407             c.publish(generator, pageNode)
00408         return None
00409 
00410     publish = makeGuard(publish)
00411 
00412     def publishIndex(self, gen, inPage, activePageNodes, full=False):
00413         """
00414         Recursively calls PUBLISHINDEX() on its children.
00415         """
00416         hasIndexedChildren = False
00417         for c in self.getChildren():
00418             hasIndexedChildren = c.publishIndex(gen, inPage, activePageNodes, full) \
00419                 or hasIndexedChildren
00420         return hasIndexedChildren
00421 
00422     def publishTableOfContents(self, gen, pageNode):
00423         """
00424         Create a TOC corresponding to the H1, H2, ... tags in a DocPage."
00425         """
00426         gen.putString("<div class='toc'>\n")
00427         gen.putString("<h3>Table of Contents</h3>")
00428         previousLevel = 0
00429         for q in pageNode.getChildren():
00430             for x in walkNodes(q, DocHtmlElement, DocPage):
00431                 if x.tag not in ['h1', 'h2', 'h3', 'h4', 'h5']: continue
00432                 level = int(x.tag[1]) # e.g. h2 -> level = 2
00433                 title = "".join([y.text for y in walkNodes(x, DocHtmlText)])
00434                 while previousLevel < level:
00435                     gen.putString("<ul>")
00436                     previousLevel += 1
00437                 while previousLevel > level:
00438                     gen.putString("</ul>\n")
00439                     previousLevel -= 1
00440                 gen.putString('<li class="level%d">'
00441                              '<a href="#%s">%s</a>'
00442                              '</li>\n' % (level, x.id, title))
00443             while previousLevel > 0:
00444                 gen.putString("</ul>\n")
00445                 previousLevel -= 1
00446         gen.putString("</div><!-- Table of contents -->\n")
00447 
00448     def expandAttr(self, value, pageNode):
00449         """
00450         Expand an attribute by substituting any directive with its value.
00451         """
00452         xvalue = ""
00453         next = 0
00454         for m in re.finditer("%[-\w._#:]+;", value):
00455             if next < m.start():
00456                 xvalue += value[next : m.start()]
00457             next = m.end()
00458             directive = value[m.start()+1 : m.end()-1]
00459             mo = re.match('pathto:(.*)', directive)
00460             if mo:
00461                 toNodeID = mo.group(1)
00462                 toNodeURL = None
00463                 if nodeIndex.has_key(toNodeID):
00464                     toNodeURL = nodeIndex[toNodeID].getPublishURL()
00465                 if toNodeURL is None:
00466                     print "%s: warning: could not cross-reference '%s'" % (self.getLocation(), toNodeID)
00467                     toNodeURL = toNodeID
00468                 fromPageURL = pageNode.getPublishURL()
00469                 xvalue += calcRelURL(toNodeURL, fromPageURL)
00470                 continue
00471             mo = re.match('env:(.*)', directive)
00472             if mo:
00473                 envName = mo.group(1)
00474                 if envName in os.environ:
00475                     xvalue += os.environ[envName]
00476                 else:
00477                     print "%s: warning: the environment variable '%s' not defined" % (self.getLocation(), envName)
00478                 continue
00479             mo = re.match('dox:(.*)', directive)
00480             if mo:
00481                 if doxygenIndex is None:
00482                     if opts.verb > 1:
00483                                                 print "%s: warning: no Doxygen tag file loaded, skipping this directive." % self.getLocation()
00484                     continue
00485                 if not mo.group(1) in doxygenIndex.index:
00486                     print "%s: warning: the ID %s was not found in the Doxygen tag file." % (self.getLocation(), mo.group(2))
00487                     continue
00488                 toNodeURL = getDoxygenURL(mo.group(1))
00489                 fromPageURL = pageNode.getPublishURL()
00490                 xvalue += calcRelURL(toNodeURL, fromPageURL)
00491                 continue
00492             raise DocError("unknown directive '%s' found while expanding an attribute" % directive)
00493         if next < len(value): xvalue += value[next:]
00494         #print "EXPAND: ", value, " -> ", xvalue
00495         return xvalue
00496 
00497 # --------------------------------------------------------------------
00498 class DocInclude(DocNode):
00499 # --------------------------------------------------------------------
00500     def __init__(self, attrs, URL, locator):
00501         DocNode.__init__(self, attrs, URL, locator)
00502         if not attrs.has_key("src"):
00503             raise DocError("include missing 'src' attribute")
00504         self.filePath = attrs["src"]
00505 
00506     def __str__(self):
00507         return DocNode.__str__(self) + ":<web:include src=%s>" \
00508             % xml.sax.saxutils.quoteattr(self.filePath)
00509 
00510 # --------------------------------------------------------------------
00511 class DocDir(DocNode):
00512 # --------------------------------------------------------------------
00513     def __init__(self, attrs, URL, locator):
00514         DocNode.__init__(self, attrs, URL, locator)
00515         if not attrs.has_key("name"):
00516             raise DocError("dir tag missing 'name' attribute")
00517         self.dirName = attrs["name"]
00518 
00519     def __str__(self):
00520         return DocNode.__str__(self) + ":<web:dir name=%s>" \
00521             % xml.sax.saxutils.quoteattr(self.dirName)
00522 
00523     def getPublishDirName(self):
00524         return self.parent.getPublishDirName() + self.dirName + os.sep
00525 
00526     def visit(self, generator):
00527         generator.changeDir(self.dirName)
00528         DocNode.visit(self, generator)
00529         generator.parentDir()
00530 
00531 # --------------------------------------------------------------------
00532 class DocGroup(DocNode):
00533 # --------------------------------------------------------------------
00534     def __init__(self, attrs, URL, locator):
00535         DocNode.__init__(self, attrs, URL, locator)
00536 
00537     def __str__(self):
00538         return DocNode.__str__(self) + ":<web:group>"
00539 
00540 # --------------------------------------------------------------------
00541 class DocCDATAText(DocBareNode):
00542 # --------------------------------------------------------------------
00543     def __init__(self, text):
00544         DocBareNode.__init__(self)
00545         self.text = text
00546 
00547     def __str__(self):
00548         return DocNode.__str__(self) + ":CDATA text:" + self.text
00549 
00550     def publish(self, gen, pageNode = None):
00551         gen.putString(self.text)
00552 
00553 # --------------------------------------------------------------------
00554 class DocCDATA(DocNode):
00555 # --------------------------------------------------------------------
00556     def __init__(self):
00557         DocNode.__init__(self, {}, None, None)
00558 
00559     def __str__(self):
00560         return DocNode.__str__(self) + ":CDATA"
00561 
00562     def publish(self, gen, pageNode = None):
00563         gen.putString("<![CDATA[")
00564         DocNode.publish(self, gen, pageNode)
00565         gen.putString("]]>")
00566 
00567     publish = makeGuard(publish)
00568 
00569 # --------------------------------------------------------------------
00570 class DocHtmlText(DocBareNode):
00571 # --------------------------------------------------------------------
00572     def __init__(self, text):
00573         DocBareNode.__init__(self)
00574         self.text = text
00575 
00576     def __str__(self):
00577         return DocNode.__str__(self) + ":text:'" + \
00578             self.text.encode('utf-8').encode('string_escape') + "'"
00579 
00580     def publish(self, gen, pageNode = None):
00581         # find occurences of %directive; in the text node and do the
00582         # appropriate substitutions
00583         next = 0
00584         for m in re.finditer("%(\w+)(?::([-\w._#]+))?;", self.text):
00585             if next < m.start():
00586                 gen.putXMLString(self.text[next : m.start()])
00587             next = m.end()
00588             directive = self.text[m.start()+1 : m.end()-1]
00589             directive = m.group(1)
00590             if m.group(2):
00591                 options = [x.strip().lower() for x in m.group(2).split(',')]
00592             else:
00593                 options = []
00594 
00595             if directive == "content":
00596                 pageNode.publish(gen, pageNode)
00597 
00598             elif directive == "pagestyle":
00599                 for q in pageNode.getChildren():
00600                     for s in walkNodes(q, DocPageStyle, DocPage):
00601                         s.expand(gen, pageNode)
00602 
00603             elif directive == "pagescript":
00604                 for q in pageNode.getChildren():
00605                     for s in walkNodes(q, DocPageScript, DocPage):
00606                         s.expand(gen, pageNode)
00607 
00608             elif directive == "pagetitle":
00609                 gen.putString(pageNode.title)
00610 
00611             elif directive == "path":
00612                 ancPages = [x for x in walkAncestors(pageNode, DocPage)]
00613                 plain=False
00614                 for option in options:
00615                     if option=="plain":
00616                         plain=True
00617                     else:
00618                         print "warning: ignoring unknown option '%s' while expanding 'path'" % option
00619                 if ancPages is not None:
00620                     for i,p in enumerate(reversed(ancPages)):
00621                         if plain:
00622                             if i > 0: gen.putString(" > ")
00623                             gen.putString(p.title)
00624                         else:
00625                             if i > 0: gen.putString("<span class='separator'>></span>")
00626                             gen.putString("<span class='page'><a href=")
00627                             gen.putXMLAttr(
00628                                 pageNode.expandAttr("%%pathto:%s;" % p.getID(), pageNode))
00629                             gen.putString(">%s</a></span>" % p.title)
00630 
00631             elif directive == "navigation":
00632                 gen.putString("<ul>\n")
00633                 # get the branch of DocPage nodes from the site root to this page
00634                 activePageNodes = [x for x in walkAncestors(pageNode, DocPage)]
00635                 # find the root site node and publish the contents
00636                 siteNode = walkAncestors(pageNode, DocSite).next()
00637                 siteNode.publishIndex(gen, pageNode, activePageNodes, True)
00638                 gen.putString("</ul>\n")
00639 
00640             elif directive == "tableofcontents":
00641                 pageNode.publishTableOfContents(gen, pageNode)
00642 
00643             elif directive == "env":
00644                 envName = m.group(2)
00645                 if envName in os.environ:
00646                     gen.putString(os.environ[envName])
00647                 else:
00648                     print "warning: environment variable '%s' not defined" % envName
00649             else:
00650                 print "warning: ignoring unknown directive '%s'" % label
00651         if next < len(self.text):
00652             gen.putXMLString(self.text[next:])
00653 
00654 
00655 # --------------------------------------------------------------------
00656 class DocCodeText(DocBareNode):
00657 # --------------------------------------------------------------------
00658     def __init__(self, text):
00659         DocBareNode.__init__(self)
00660         self.text = text
00661 
00662     def __str__(self):
00663         return DocNode.__str__(self) + ":text:'" + \
00664             self.text.encode('utf-8').encode('string_escape') + "'"
00665 
00666 # --------------------------------------------------------------------
00667 class DocCode(DocNode):
00668 # --------------------------------------------------------------------
00669     def __init__(self, attrs, URL = None, locator = None):
00670         DocNode.__init__(self, attrs, URL, locator)
00671         self.type = "plain"
00672         if attrs.has_key("type"): self.type = attrs["type"]
00673 
00674     def __str__(self):
00675         str = "<web:precode"
00676         for k, v in self.attrs.items():
00677             str = str + " " + k + "='" + xml.sax.saxutils.escape(v) + "'"
00678             str = str + "> type = " + self.type
00679         return DocNode.__str__(self) + ":" + str
00680 
00681     def publish(self, gen, pageNode = None):
00682         code = ""
00683         for n in self.getChildren():
00684             if n.isA(DocCodeText):
00685                 code = code + n.text
00686         if has_pygments and not self.type == "plain":
00687             try:
00688                 lexer = pygments.lexers.get_lexer_by_name(self.type)
00689                 gen.putString(pygments.highlight(code,
00690                                                  lexer,
00691                                                  pygments.formatters.HtmlFormatter()))
00692             except pygments.util.ClassNotFound:
00693                 print "warning: could not find a syntax highlighter for '%s'" % self.type
00694                 gen.putString("<pre>" + code + "</pre>")
00695         else:
00696             gen.putString("<pre>" + code + "</pre>")
00697         DocNode.publish(self, gen, pageNode)
00698 
00699     publish = makeGuard(publish)
00700 
00701 # --------------------------------------------------------------------
00702 class DocHtmlElement(DocNode):
00703 # --------------------------------------------------------------------
00704     def __init__(self, tag, attrs, URL = None, locator = None):
00705         DocNode.__init__(self, attrs, URL, locator)
00706         self.tag = tag
00707 
00708     def __str__(self):
00709         str = "<html:" + self.tag
00710         for k, v in self.attrs.items():
00711             str = str + " " + k + "='" + xml.sax.saxutils.escape(v) + "'"
00712         str = str + ">"
00713         return DocNode.__str__(self) + ":" + str
00714 
00715     def getPublishURL(self):
00716         anc = self.findAncestors(DocPage)
00717         if len(anc) == 0: return None
00718         return anc[0].getPublishURL() + "#" + self.id
00719 
00720     def publish(self, gen, pageNode = None):
00721         gen.putString("<")
00722         gen.putString(self.tag)
00723         # make sure headings have and id (for ToCs)
00724         if self.tag in ['h1', 'h2', 'h3', 'h4', 'h5'] and \
00725            not "id" in self.attrs:
00726             self.attrs["id"] = self.id ;
00727         for name, value in self.attrs.items():
00728             gen.putString(" ")
00729             gen.putString(name)
00730             gen.putString("=")
00731             gen.putXMLAttr(self.expandAttr(value, pageNode))
00732         if self.tag == 'br':
00733             # workaround for browser that do not like <br><br/>
00734             gen.putString("/>")
00735         elif self.tag == 'code':
00736             # expand tags such as <code>vl_function</code> as links
00737             gen.putString("/>")
00738             text = "".join([y.text for y in walkNodes(self, DocHtmlText)])
00739             ok = nodeIndex.has_key(text)
00740             if ok: gen.putString("<a href=" + self.expandAttr("%%pathto:%s;" % text, pageNode) + ">")
00741             DocNode.publish(self, gen, pageNode)
00742             if ok: gen.putString("</a>")
00743             gen.putString("</")
00744             gen.putString(self.tag)
00745             gen.putString(">")
00746         else:
00747             gen.putString(">")
00748             DocNode.publish(self, gen, pageNode)
00749             gen.putString("</")
00750             gen.putString(self.tag)
00751             gen.putString(">")
00752 
00753     publish = makeGuard(publish)
00754 
00755 # --------------------------------------------------------------------
00756 class DocTemplate(DocNode):
00757 # --------------------------------------------------------------------
00758     def __init__(self, attrs, URL, locator):
00759         DocNode.__init__(self, attrs, URL, locator)
00760 
00761 # --------------------------------------------------------------------
00762 class DocPageStyle(DocNode):
00763 # --------------------------------------------------------------------
00764     def __init__(self, attrs, URL, locator):
00765         DocNode.__init__(self, attrs, URL, locator)
00766 
00767     def publish(self, gen, pageNode = None):
00768         return None
00769 
00770     def expand(self, gen, pageNode = None):
00771         sa = self.getAttributes()
00772         if sa.has_key("href"):
00773             gen.putString("<link rel=\"stylesheet\" type=")
00774             if sa.has_key("type"):
00775                 gen.putXMLAttr(self.expandAttr(sa["type"], pageNode))
00776             else:
00777                 gen.putString("\"text/css\" ")
00778             gen.putString("href=")
00779             gen.putXMLAttr(self.expandAttr(sa["href"], pageNode))
00780             gen.putString("></link>\n")
00781         else:
00782             gen.putString("<style rel=\"stylesheet\" type=")
00783             if sa.has_key("type"):
00784                 gen.putXMLAttr(self.expandAttr(sa["type"], pageNode))
00785             else:
00786                 gen.putString("\"text/css\" ")
00787                 gen.putString(">")
00788             DocNode.publish(self, gen, pageNode)
00789             gen.putString("</style>\n")
00790 
00791     expand = makeGuard(expand)
00792 
00793 # --------------------------------------------------------------------
00794 class DocPageScript(DocNode):
00795 # --------------------------------------------------------------------
00796     def __init__(self, attrs, URL, locator):
00797         DocNode.__init__(self, attrs, URL, locator)
00798 
00799     def publish(self, gen, pageNode = None):
00800         return None
00801 
00802     def expand(self, gen, pageNode = None):
00803         sa = self.getAttributes()
00804         gen.putString("<script type=")
00805         if sa.has_key("type"):
00806             gen.putXMLAttr(self.expandAttr(sa["type"], pageNode))
00807             gen.putString(" ")
00808         else:
00809             gen.putString("\"text/javascript\" ")
00810         if sa.has_key("src"):
00811             gen.putString("src=")
00812             gen.putXMLAttr(self.expandAttr(sa["src"], pageNode))
00813         gen.putString(">")
00814         DocNode.publish(self, gen, pageNode)
00815         gen.putString("</script>\n")
00816 
00817     expand = makeGuard(expand)
00818 
00819 # --------------------------------------------------------------------
00820 class DocPage(DocNode):
00821 # --------------------------------------------------------------------
00822     counter = 0
00823 
00824     def __init__(self, attrs, URL, locator):
00825         DocNode.__init__(self, attrs, URL, locator)
00826         DocPage.counter = 1 + DocPage.counter
00827         self.templateID = "template.default"
00828         self.name  = "page%d" % DocPage.counter
00829         self.title = "untitled"
00830         self.hide = False
00831 
00832         for k, v in self.attrs.items():
00833             if k == 'src':
00834                 self.title = v
00835             elif k == 'name':
00836                 self.name = v
00837             elif k == 'id':
00838                 pass
00839             elif k == 'title':
00840                 self.title = v
00841             elif k == 'hide':
00842                 self.hide = (v.lower() == 'yes')
00843             else:
00844                 raise DocError(
00845                     "web:page cannot have '%s' attribute" % k)
00846 
00847     def __str__(self):
00848         return DocNode.__str__(self) + ":<web:page name='%s' title='%s'>" \
00849             % (xml.sax.saxutils.escape(self.name),
00850                xml.sax.saxutils.escape(self.title))
00851 
00852     def getPublishFileName(self):
00853         return self.name + ".html"
00854 
00855     def getPublishURL(self):
00856         siteNode = self.findAncestors(DocSite)[0]
00857         return siteNode.getPublishURL() + \
00858             self.getPublishDirName() + \
00859             self.getPublishFileName()
00860 
00861     def visit(self, generator):
00862         generator.open(self.getPublishFileName())
00863         templateNode = nodeIndex[self.templateID]
00864         templateNode.publish(generator, self)
00865         generator.close()
00866         DocNode.visit(self, generator)
00867 
00868     def publish(self, generator, pageNode = None):
00869         if pageNode is self:
00870             # this is the page being published, so go on
00871             if opts.verb: print 'Publishing \'%s\''  % self.getPublishURL()
00872             DocNode.publish(self, generator, pageNode)
00873         # otherwise this page has been encountered recursively
00874         # during publishing
00875         return None
00876 
00877     def publishIndex(self, gen, inPage, activePageNodes, full=False):
00878         if self.hide: return False
00879         active = (self in activePageNodes)
00880         if active:
00881             activeLeaf = (activePageNodes.index(self) == 0)#len(activePageNodes)-1)
00882         else:
00883             activeLeaf = False
00884         gen.putString("<li")
00885         if active: gen.putString(" class='active'")
00886         if activeLeaf: gen.putString(" class='activeLeaf'")
00887         gen.putString("><a href=")
00888         gen.putXMLAttr(
00889             self.expandAttr("%%pathto:%s;" % self.getID(), inPage))
00890         gen.putString(">")
00891         gen.putXMLString(self.title)
00892         gen.putString("</a>\n")
00893         # Generate recursively the index of the children
00894         # This may or may not produce results; if not we need to backtrack,
00895         # so we save the position of the generator.
00896         pos = gen.tell()
00897         gen.putString("<ul>\n")
00898         if active or full:
00899             notEmpty = DocNode.publishIndex(self, gen, inPage, activePageNodes, full)
00900         else:
00901             notEmpty = False
00902         if notEmpty:
00903             gen.putString("</ul>")
00904         else:
00905             gen.seek(pos)
00906         gen.putString("</li>\n")
00907         return True
00908 
00909 # --------------------------------------------------------------------
00910 class DocSite(DocNode):
00911 # --------------------------------------------------------------------
00912     def __init__(self, attrs, URL, locator):
00913         DocNode.__init__(self, attrs, URL, locator)
00914         self.siteURL = "http://www.foo.org/"
00915         self.outDir = "html"
00916 
00917     def __str__(self):
00918         return DocNode.__str__(self) + ":<web:site>"
00919 
00920     def getPublishURL(self):
00921         return self.siteURL
00922 
00923     def setPublishURL(self, url):
00924         self.siteURL = url
00925 
00926     def getPublishDirName(self):
00927         return ""
00928 
00929     def getOutDir(self):
00930         return self.outDir
00931 
00932     def setOutDir(self, outDir):
00933         self.outDir = outDir
00934 
00935     def publish(self):
00936         generator = Generator(self.outDir)
00937         self.visit(generator)
00938 
00939     publish = makeGuard(publish)
00940 
00941 # --------------------------------------------------------------------
00942 class Generator:
00943 # --------------------------------------------------------------------
00944     def __init__(self, rootDir):
00945         ensureDir(rootDir)
00946         self.fileStack = []
00947         self.dirStack = [rootDir]
00948         ensureDir(rootDir)
00949         #print "CD ", rootDir
00950 
00951     def open(self, filePath):
00952         filePath = os.path.join(self.dirStack[-1], filePath)
00953         fid = open(filePath, "w")
00954         self.fileStack.append(fid)
00955         fid.write(DOCTYPE_XHTML_TRANSITIONAL)
00956         #print "OPEN ", filePath
00957 
00958     def putString(self, str):
00959         fid = self.fileStack[-1]
00960         try:
00961             encoded = str.encode('utf-8')
00962             fid.write(encoded)
00963         except (UnicodeEncodeError, IOError), e:
00964             print str
00965             raise DocError("writing text:"  + e.__str__())
00966         except:
00967             raise
00968 
00969     def putXMLString(self, str):
00970         fid = self.fileStack[-1]
00971         xstr = xml.sax.saxutils.escape(str, mapUnicodeToHtmlEntity)
00972         try:
00973             fid.write(xstr.encode('utf-8'))
00974         except (UnicodeEncodeError, IOError), e:
00975             raise DocError("writing XML-escaped string:"  + e.__str__())
00976         except:
00977             raise
00978 
00979     def putXMLAttr(self, str):
00980         fid = self.fileStack[-1]
00981         xstr = xml.sax.saxutils.quoteattr(str)
00982         try:
00983             fid.write(xstr.encode('utf-8'))
00984         except (UnicodeEncodeError, IOError), e:
00985             raise DocError("writing XML-escaped attribute:"  + e.__str__())
00986         except:
00987             raise
00988 
00989     def close(self):
00990         self.fileStack.pop().close()
00991         #print "CLOSE"
00992 
00993     def changeDir(self, dirName):
00994         currentDir = self.dirStack[-1]
00995         newDir = os.path.join(currentDir, dirName)
00996         ensureDir(newDir)
00997         self.dirStack.append(newDir)
00998         #print "CD ", newDir
00999 
01000     def parentDir(self):
01001         self.dirStack.pop()
01002         #print "CD .."
01003 
01004     def tell(self):
01005         fid = self.fileStack[-1]
01006         return fid.tell()
01007 
01008     def seek(self, pos):
01009         fid = self.fileStack[-1]
01010         fid.seek(pos)
01011 
01012 # --------------------------------------------------------------------
01013 class DocHandler(ContentHandler):
01014 # --------------------------------------------------------------------
01015 
01016     def __init__(self):
01017         ContentHandler.__init__(self)
01018         self.rootNode = None
01019         self.stack = []
01020         self.locatorStack = []
01021         self.filePathStack = []
01022         self.inDTD = False
01023 
01024     def resolveEntity(self, publicid, systemid):
01025         """
01026         Resolve XML entities by mapping to a local copy of the (X)HTML
01027         DTDs.
01028         """
01029         return open(os.path.join(
01030                 os.path.dirname(__file__),
01031                 'dtd/xhtml1',
01032                 systemid[systemid.rfind('/')+1:]), "rb")
01033 
01034     def lookupFile(self, filePath):
01035         if os.path.exists(filePath):
01036             return filePath
01037         if filePath[0] == '/':
01038             return None
01039         for path in self.filePathStack:
01040             dir = os.path.dirname(path)
01041             qualFilePath = os.path.join(dir, filePath)
01042             if os.path.exists(qualFilePath):
01043                 return qualFilePath
01044         return None
01045 
01046     def makeError(self, message):
01047         e = DocError(message)
01048         for i in xrange(len(self.filePathStack)-1,-1,-1):
01049             URL = self.filePathStack[i]
01050             locator = self.locatorStack[i]
01051             e.appendLocation(DocLocation(URL,
01052                                          locator.getLineNumber(),
01053                                          locator.getColumnNumber()))
01054         return e
01055 
01056     def startElement(self, name, attrs):
01057         """
01058         SAX interface: starting of XML element.
01059         The function creates a new document node, i.e. a specialized
01060         class of DocNode for the type of XML element encountered. It then
01061         appends it as the head of the parsing stack for further processing."
01062         """
01063         # convert attrs to a dictionary (implicitly copies as required by the doc)
01064         attrs_ = {}
01065         for k, v in attrs.items():
01066             attrs_[k] = v
01067         attrs = attrs_
01068 
01069         URL = self.getCurrentFileName()
01070         locator = self.getCurrentLocator()
01071 
01072         # The <web:include> element is not parsed recusrively; instead
01073         # it simply switches to parsing the specified file.
01074         if name == "include":
01075             if not attrs.has_key("src"):
01076                 raise self.makeError("<web:include> lacks the 'src' attribute")
01077             filePath = attrs["src"]
01078             qualFilePath = self.lookupFile(filePath)
01079             if qualFilePath is None:
01080                 raise self.makeError("the file '%s' could not be found while expanding <web:include>" % filePath)
01081             if opts.verb: print "Parsing '%s'" % qualFilePath
01082             if attrs.has_key("type"):
01083                 includeType = attrs["type"]
01084             else:
01085                 includeType = "webdoc"
01086             if includeType == "webdoc":
01087                 self.load(qualFilePath)
01088             elif includeType == "text":
01089                 self.characters(open(qualFilePath, 'r').read())
01090             else:
01091                 raise makeError("'%s' is not a valid <web:include> type" % includeType)
01092             return
01093 
01094         if len(self.stack) == 0:
01095             parent = None
01096         else:
01097             parent = self.stack[-1]
01098         node = None
01099 
01100         if name == "site":
01101             node = DocSite(attrs, URL, locator)
01102         elif name == "page":
01103             node = DocPage(attrs, URL, locator)
01104         elif name == "dir":
01105             node = DocDir(attrs, URL, locator)
01106         elif name == "template":
01107             node = DocTemplate(attrs, URL, locator)
01108         elif name == "pagestyle":
01109             node = DocPageStyle(attrs, URL, locator)
01110         elif name == "pagescript":
01111             node = DocPageScript(attrs, URL, locator)
01112         elif name == "group":
01113             node = DocGroup(attrs, URL, locator)
01114         elif name == "precode":
01115             node = DocCode(attrs, URL, locator)
01116         else:
01117             node = DocHtmlElement(name, attrs, URL, locator)
01118 
01119         if parent: parent.adopt(node)
01120         self.stack.append(node)
01121 
01122     def endElement(self, name):
01123         """
01124         SAX interface: closing of XML element.
01125         """
01126         if name == "include":
01127             return
01128         node = self.stack.pop()
01129         if len(self.stack) == 0:
01130             self.rootNode = node
01131 
01132     def load(self, qualFilePath):
01133         self.filePathStack.append(qualFilePath)
01134         parser = xml.sax.make_parser()
01135         parser.setContentHandler(self)
01136         parser.setEntityResolver(self)
01137         parser.setProperty(xml.sax.handler.property_lexical_handler, self)
01138         try:
01139             parser.parse(qualFilePath)
01140         except xml.sax.SAXParseException, e:
01141             raise self.makeError("XML parsing error: %s" % e.getMessage())
01142 
01143     def setDocumentLocator(self, locator):
01144         """SAX interface: This is called when a new file is parsed to set the locator object."""
01145         self.locatorStack.append(locator)
01146 
01147     def getCurrentLocator(self):
01148         if len(self.locatorStack) > 0:
01149             return self.locatorStack[-1]
01150         else:
01151             return None
01152 
01153     def characters(self, content):
01154         """
01155         SAX interface: characters.
01156         """
01157         parent = self.stack[-1]
01158         if parent.isA(DocCDATA):
01159             node = DocCDATAText(content)
01160         elif parent.isA(DocCode):
01161             node = DocCodeText(content)
01162         else:
01163             node = DocHtmlText(content)
01164         parent.adopt(node)
01165 
01166     def ignorableWhitespace(self, ws):
01167         self.characters(ws)
01168 
01169     def getCurrentFileName(self):
01170         return self.filePathStack[-1]
01171 
01172     def endDocument(self):
01173         self.locatorStack.pop()
01174         self.filePathStack.pop()
01175 
01176     def startCDATA(self):
01177         node = DocCDATA()
01178         self.stack[-1].adopt(node)
01179         self.stack.append(node)
01180 
01181     def endCDATA(self):
01182         node = self.stack.pop()
01183         if len(self.stack) == 0:
01184             self.rootNode = node
01185 
01186     def comment(self, body):
01187         if self.inDTD: return
01188         node = DocCDATAText("<!--" + body + "-->")
01189         self.stack[-1].adopt(node)
01190 
01191     def startEntity(self, name): pass
01192     def endEntity(self, name): pass
01193 
01194     def startDTD(self, name, public_id, system_id):
01195         self.inDTD = True
01196 
01197     def endDTD(self):
01198         self.inDTD = False
01199 
01200 # --------------------------------------------------------------------
01201 def start(filePath, opts):
01202 # --------------------------------------------------------------------
01203     global doxygenIndex
01204     global doxygenDir
01205 
01206     if not has_pygments and opts.verb:
01207         print "Warning: Pygments module not found: syntax coloring disabled."
01208 
01209     handler = DocHandler()
01210     try:
01211         handler.load(filePath)
01212     except DocError, e:
01213         print e
01214         sys.exit(-1)
01215 
01216     # configure site
01217     handler.rootNode.setOutDir(opts.outdir)
01218     handler.rootNode.setPublishURL(opts.siteurl)
01219 
01220     # load doxygen tag file
01221     if opts.doxytag:
01222         if opts.verb: print "Loading the Doxygen tag file", opts.doxytag
01223         try:
01224             doxygenIndex = Doxytag(opts.doxytag)
01225             doxygenDir = opts.doxydir
01226         except Exception, e:
01227             print "Error parsing the Doxygen tag file", opts.doxytag
01228             print e
01229             sys.exit(-1)
01230 
01231     if opts.verb > 2:
01232         print "== All pages =="
01233         for x in walkNodes(handler.rootNode, DocPage):
01234             print x
01235 
01236     if opts.verb: print "Publishing website..."
01237     try:
01238         handler.rootNode.publish()
01239     except DocError, e:
01240         print e
01241         sys.exit(-1)
01242 
01243     if opts.indexfile:
01244         if opts.verb: print "Storing the website index to", opts.indexfile
01245         try:
01246             f = open(opts.indexfile, 'w+')
01247             siteurl = nodeIndex['root'].getPublishURL()
01248             for (id,x) in sorted(nodeIndex.items()):
01249                 if (x.isA(DocHtmlElement) or x.isA(DocPage)) and x.attrs.has_key('id'):
01250                     url = x.getPublishURL()
01251                     if not url: continue
01252                     print >>f, '%s|%s' % (x.attrs['id'],
01253                                           calcRelURL(url,siteurl))
01254             if doxygenIndex:
01255                 for tag in sorted(doxygenIndex.index):
01256                     url = getDoxygenURL(tag)
01257                     print >>f, '%s|%s' % (tag,
01258                                           calcRelURL(url,siteurl))
01259         except Exception, e:
01260             print "Error writing the website index file"
01261             print e
01262             sys.exit(-1)
01263 
01264     sys.exit(0)
01265 
01266 # --------------------------------------------------------------------
01267 if __name__ == '__main__':
01268 # --------------------------------------------------------------------
01269     usage = """webdoc [OPTIONS...] <DOC.XML>
01270 
01271 --outdir    Set output directory
01272 --verbose   Be verbose
01273 --doxytag   Doxygen tag file
01274 --doxydir   Doxygen documentation location
01275 --profile   Collect and print profiling information
01276 """
01277     parser = OptionParser(usage=usage)
01278     parser.add_option(
01279         "-v", "--verbose",
01280         dest    = "verb",
01281         default = 0,
01282         action  = "count",
01283         help    = "print more debuging information")
01284     parser.add_option(
01285         "-o", "--outdir",
01286         dest    = "outdir",
01287         default = "html",
01288         action  = "store",
01289         help    = "write output to this directory")
01290     parser.add_option(
01291         "", "--doxytag",
01292         dest    = "doxytag",
01293         default = None,
01294         action  = "store",
01295         help    = "use this doxygen tag file")
01296     parser.add_option(
01297         "", "--doxydir",
01298         dest    = "doxydir",
01299         default = ".",
01300         action  = "store",
01301         help    = "find doxygen documentation here")
01302     parser.add_option(
01303         "", "--profile",
01304         dest    = "profile",
01305         default = False,
01306         action  = "store_true",
01307         help    = "run the profiler")
01308     parser.add_option(
01309         "", "--siteurl",
01310         dest = "siteurl",
01311         default = "",
01312         action = "store",
01313         help = "set the base URL of the website")
01314     parser.add_option(
01315         "", "--indexfile",
01316         dest = "indexfile",
01317         default = None,
01318         action = "store",
01319         help = "store the website index here")
01320 
01321     (opts, args) = parser.parse_args()
01322 
01323     if opts.profile:
01324         cProfile.run('start(args[0], opts)')
01325     else:
01326         start(args[0], opts)