00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 """
00013 MDOC fromats the help block of a MATLAB M-file based on a simple set
00014 of rules. Pharagraphs, verbatim sections, lists and other structures
00015 are automatically instantiated by looking at blank lines, indentation
00016 and a few decoration symbols.
00017
00018 The documentation starts at a conventional indentation level N (by
00019 default 2). A block of non-epmty lines prefixed by N characters is
00020 considered a paragraph. For instance
00021
00022 | Bla bla bla
00023 | bla bla bla.
00024 |
00025 | Bla bla.
00026
00027 generates two pharagraps. If there are more than N white spaces,
00028 then the block is taken verbatim instead (and rendered in <pre> HTML
00029 tags). For instance
00030
00031 | Bla bla bla
00032 | Code Code Code
00033 |
00034 | Code Code Code
00035
00036 generates one paragraph followed by one verbatim section.
00037 """
00038
00039 import xml.dom.minidom
00040 import sys
00041 import os
00042 import re
00043
00044 __mpname__ = 'MDocFormatter'
00045 __version__ = '0.1'
00046 __date__ = '2008-01-01'
00047 __description__ = 'MDoc formatting module'
00048 __long_description__ = __doc__
00049 __license__ = 'BSD'
00050 __author__ = 'Andrea Vedaldi'
00051
00052
00053 class Terminal:
00054 def isa(self, classinfo):
00055 return isinstance(self, classinfo)
00056
00057
00058 class E (Terminal):
00059 pass
00060
00061
00062 class B (Terminal):
00063 content = ""
00064
00065
00066 class L (Terminal):
00067 indent = 0
00068
00069
00070 class PL (L):
00071 pass
00072
00073
00074 class BL (L):
00075 bullet = None
00076 inner_indent = 0
00077
00078
00079 class DL (L):
00080 pass
00081
00082
00083 def lex(line):
00084
00085 """
00086 Parse the string LINE to a terminal symbol. Each line corresponds
00087 to exactly one terminal type. Terminal types are the leaf of a
00088 hierarchy of types.
00089 """
00090
00091
00092 match = re.match(r"\s*\n?$", line) ;
00093 if match: return B()
00094
00095
00096 match = re.match(r"(\s*)(.*)::(.*)\n?$", line)
00097 if match:
00098 x = DL()
00099 x.indent = len(match.group(1))
00100 x.content = match.group(2)
00101 x.inner_content = match.group(3)
00102 return x
00103
00104
00105 match = re.match(r"(\s*)([-\*#]\s*)(\S.*)\n?$", line)
00106 if match:
00107 x = BL()
00108 x.indent = len(match.group(1))
00109 x.inner_content = match.group(3)
00110 x.bullet = match.group(2)
00111 x.inner_indent = x.indent + len(x.bullet)
00112 x.content = x.bullet + x.inner_content
00113 return x
00114
00115
00116 match = re.match(r"(\s*)(\S.*)\n?$", line)
00117 if match:
00118 x = PL()
00119 x.indent = len(match.group(1))
00120 x.content = match.group(2)
00121 return x
00122
00123
00124 class Lexer(object):
00125
00126 """
00127 l = Lexer(LINES) parses the array of strings LINES. Lexer has a
00128 head pointing to the current line. The head can be controlled by
00129 the following methods:
00130
00131 l.next() advances the head and fetches the next terminal.
00132 l.back() moves back the head.
00133 l.getpos() returns the head position.
00134 l.seek(POS) sets the head position to POS.
00135 """
00136 def __init__(self, lines):
00137 self.tokens = []
00138 self.pos = -1
00139 for line in lines:
00140 self.tokens.append(lex(line))
00141
00142 def next(self):
00143 self.pos = self.pos + 1
00144 if self.pos >= len(self.tokens):
00145 return E()
00146 else:
00147 return self.tokens [self.pos]
00148
00149 def seek(self, pos):
00150 self.pos = pos
00151
00152 def back(self):
00153 if self.pos >=0: self.pos -= 1
00154
00155 def rewrite(self, str):
00156 self.tokens [self.pos] = str ;
00157
00158 def getpos(self):
00159 return self.pos
00160
00161 def __str__(self):
00162 str = ""
00163 for i,t in enumerate(self.tokens):
00164 str += "%5d) %s %s\n" % (i, t.__class__.__name__,t.content)
00165 return str
00166
00167
00168 class Formatter:
00169
00170 """
00171 f = Formatter(LINES) parses the array of strings LINES.
00172
00173 f = Formatter(LINES, FUNCS) takes the dictionary of functions
00174 FUNCS. Function names must be uppercase. The dictionary entries
00175 are used to cross link functions in the generated documentation.
00176
00177 Formatter(LINES, FUNCS, LINKTYPE) produces links of the specified
00178 type. Use 'a' for HTML anchors and 'wiki' for MediaWiki style
00179 links.
00180
00181 f.toDOM() process the data to construct an XML (HTML) representation
00182 of them.
00183 """
00184 def __init__ (self, lines, funcs={}, linktype='a'):
00185 self.indentinit = 0
00186 lineone = lines[0]
00187 while lineone.startswith(' '):
00188 lineone = lineone[1:]
00189 self.indentinit += 1
00190
00191 self.tokens = Lexer(lines)
00192 self.xmldoc = xml.dom.minidom.Document()
00193 self.funcs = funcs
00194 self.linktype = linktype
00195
00196
00197 def toTextNode(self,s):
00198 return self.xmldoc.createTextNode(unicode(s, 'iso-8859-1'))
00199
00200 def addAttr(self, tag, attr, val):
00201 x = self.xmldoc.createAttribute(attr)
00202 x.nodeValue = val
00203 tag.setAttributeNode(x)
00204
00205 def addText(self, tag, s):
00206 txt = self.toTextNode(s)
00207 tag.appendChild(txt)
00208
00209 def addFancyText(self, tag, s):
00210 "Adds text while transforming function references to links."
00211 xs = []
00212 last = -1
00213 iter = re.finditer(r'(?:'
00214 r'(?P<function>[A-Z][A-Z0-9_]*)'
00215 r'\([^\)]*\)'
00216 r')|(?:'
00217 r'<a href="matlab:vl_help\(\''
00218 r'(?P<page>[a-zA-Z0-9_]*)'
00219 r'\'\)">'
00220 r'(?P<text>[^<]*)'
00221 r'</a>'
00222 r')',s)
00223
00224
00225
00226
00227
00228
00229
00230
00231 for i in iter:
00232 func_name = i.group("function")
00233 page_name = i.group("page")
00234
00235 if func_name and self.funcs.has_key(func_name.upper()):
00236
00237 func_href = self.funcs[func_name.upper()]
00238
00239
00240 xs.append(self.toTextNode(s[last+1:i.start()]))
00241
00242 if self.linktype == 'a':
00243
00244 atag = self.xmldoc.createElement(u"a")
00245 self.addText(atag, i.group('function'))
00246 atag.setAttribute(u"href", u"%s" % (func_href))
00247 xs.append(atag)
00248 elif self.linktype == 'wiki':
00249 linktxt = "[[%s|%s]]" % (func_href, i.group('function'))
00250 xs.append(self.toTextNode(linktxt))
00251
00252
00253 last = i.start()+len(i.group(1))-1
00254
00255 elif page_name:
00256
00257 page_href = "%%dox:%s;" % page_name
00258
00259
00260 xs.append(self.toTextNode(s[last+1:i.start()]))
00261
00262 if self.linktype == 'a':
00263
00264 atag = self.xmldoc.createElement(u"a")
00265 self.addText(atag, i.group('text'))
00266 atag.setAttribute(u"href", u"%s" % (page_href))
00267 xs.append(atag)
00268 elif self.linktype == 'wiki':
00269 linktxt = "[[%s|%s]]" % (func_href, i.group('function'))
00270 xs.append(self.toTextNode(linktxt))
00271
00272
00273 last = i.end()-1
00274
00275 xs.append(self.toTextNode(s[last+1:]))
00276 for x in xs:
00277 tag.appendChild(x)
00278
00279
00280
00281 def parse_Terminal(self, T):
00282 "If the next terminal on the stream is of type T, the terminal"
00283 "is extracted and returned. Otherwise the function returns None"
00284 pos = self.tokens.getpos()
00285 t = self.tokens.next()
00286 if t.isa(T):
00287 return t
00288 self.tokens.seek(pos)
00289 return None
00290
00291
00292
00293 def parse_DIV(self, indent):
00294 "Parse a DIV(N) symbol. A DIV(N) a sequence of blank"
00295 "lines (B or other blocks at indentation level N, such as"
00296 "pharagraphs P(N), bullet lists BL(N), description lists DN(N)"
00297 pos = self.tokens.getpos()
00298 xs = []
00299 while True:
00300 x = self.parse_Terminal(B)
00301 if x: continue
00302
00303 x = self.parse_P(indent)
00304 if x:
00305 xs.append(x)
00306 continue
00307
00308 x = self.parse_V(indent)
00309 if x:
00310 xs.append(x)
00311 continue
00312
00313 x = self.parse_UL(indent)
00314 if x:
00315 xs.append(x)
00316 continue
00317
00318 x = self.parse_DL(indent)
00319 if x:
00320 xs.append(x)
00321 continue
00322
00323 break
00324 if len(xs) == 0: return None
00325 return xs
00326
00327
00328
00329 def parse_P(self, indent):
00330 content = "\n"
00331 good = False
00332 pos = self.tokens.getpos()
00333
00334
00335 x = self.parse_Terminal(PL)
00336 if x:
00337 if x.indent == indent:
00338 content += x.content + "\n"
00339 good = True
00340 else:
00341 self.tokens.back()
00342 if not good:
00343 return None
00344
00345
00346 while True:
00347 x = self.parse_Terminal(L)
00348 if x:
00349 if x.indent == indent:
00350 content += x.content + "\n"
00351 good = True
00352 continue
00353 else:
00354 self.tokens.back()
00355 break
00356
00357 ptag = self.xmldoc.createElement("p")
00358 self.addFancyText(ptag, content)
00359 return ptag
00360
00361
00362
00363 def parse_V(self, indent):
00364 content = "\n"
00365 good = False
00366 pos = self.tokens.getpos()
00367 while True:
00368 x = self.parse_Terminal(L)
00369 if x:
00370 if x.indent > indent:
00371 content += " "*(x.indent - indent) + x.content + "\n"
00372 good = True
00373 continue
00374 else:
00375 self.tokens.back()
00376 x = self.parse_Terminal(B)
00377 if x:
00378 content += "\n"
00379 continue
00380 break
00381 if good:
00382 ptag = self.xmldoc.createElement("pre")
00383
00384 if content[-2:] == "\n\n":
00385 content= content[:-1]
00386 self.addText(ptag, content)
00387 return ptag
00388 self.tokens.seek(pos)
00389 return None
00390
00391
00392
00393 def parse_UL(self, indent):
00394 xs = []
00395 while True:
00396 x = self.parse_ULI(indent)
00397 if x:
00398 xs.append(x)
00399 continue
00400 break
00401 if len(xs) == 0: return None
00402 ultag = self.xmldoc.createElement("ul")
00403 for x in xs:
00404 ultag.appendChild(x)
00405 return ultag
00406
00407
00408
00409 def parse_ULI(self, indent):
00410 content = "\n"
00411 good = False
00412 pos = self.tokens.getpos()
00413
00414
00415 x = self.parse_Terminal(BL)
00416 if x:
00417 if x.indent == indent:
00418 content += x.inner_content + "\n"
00419 indent = x.inner_indent
00420 good = True
00421 else:
00422 self.tokens.back()
00423 if not good:
00424 return None
00425
00426
00427 while True:
00428 x = self.parse_Terminal(L)
00429 if x:
00430 if x.indent == indent:
00431 content += x.content + "\n"
00432 good = True
00433 continue
00434 else:
00435 self.tokens.back()
00436 break
00437 litag = self.xmldoc.createElement(u"li")
00438 ptag = self.xmldoc.createElement(u"p")
00439 self.addFancyText(ptag, content)
00440 litag.appendChild(ptag)
00441
00442
00443 xs = self.parse_DIV(indent)
00444 if xs:
00445 for x in xs:
00446 litag.appendChild(x)
00447
00448 return litag
00449
00450
00451
00452
00453 def parse_DL(self, indent):
00454 xs = []
00455 while True:
00456 x = self.parse_DI(indent)
00457 if x:
00458 xs += x
00459 continue
00460 break
00461 if len(xs) == 0: return None
00462 dltag = self.xmldoc.createElement(u"dl")
00463 for x in xs:
00464 dltag.appendChild(x)
00465 return dltag
00466
00467
00468
00469 def parse_DI(self, indent):
00470 content = "\n"
00471 good = False
00472 pos = self.tokens.getpos()
00473 xs = []
00474
00475
00476 x = self.parse_Terminal(DL)
00477 if x:
00478 if x.indent == indent:
00479 content += x.content + "\n"
00480 good = True
00481 else:
00482 self.tokens.back()
00483 if not good:
00484 return None
00485
00486 if False:
00487
00488 dttag = self.xmldoc.createElement(u"dt")
00489 dttxt = self.toTextNode(content)
00490 dttag.appendChild(dttxt)
00491 xs.append(dttag)
00492
00493
00494 c = x.inner_content.strip()
00495 if len(c) > 0:
00496 tk = PL()
00497 tk.content = x.inner_content
00498 t = self.tokens.next()
00499 self.tokens.back()
00500 if t.isa(L) and t.indent > indent:
00501 tk.indent = t.indent
00502 else:
00503 tk.indent = indent+1 ;
00504 self.tokens.rewrite(tk)
00505 self.tokens.back()
00506 else:
00507
00508 dttag = self.xmldoc.createElement(u"dt")
00509 dttxt = self.toTextNode(content)
00510 dttag.appendChild(dttxt)
00511 c = x.inner_content.strip()
00512 if len(c) > 0:
00513 deftag = self.xmldoc.createElement(u"span")
00514 self.addAttr(deftag, "class", "defaults")
00515 self.addText(deftag, c)
00516 dttag.appendChild(deftag)
00517 xs.append(dttag)
00518
00519
00520 t = self.tokens.next()
00521 self.tokens.back()
00522 if t.isa(L) and t.indent > indent:
00523 xs_ = self.parse_DIV(t.indent)
00524 if len(xs_) > 0:
00525 ddtag = self.xmldoc.createElement(u"dd")
00526 for x in xs_:
00527 ddtag.appendChild(x)
00528 xs.append(ddtag)
00529
00530 return xs
00531
00532
00533 def toDOM(self):
00534
00535 xmf = self.xmldoc.createElement("div")
00536 xmf.setAttribute(u"class", u"documentation")
00537
00538 self.xmldoc.appendChild(xmf)
00539
00540
00541 xs = self.parse_DIV(self.indentinit)
00542 for x in xs: xmf.appendChild(x)
00543
00544 return self.xmldoc
00545
00546
00547 if __name__ == '__main__':
00548 text=""" Lorem Ipsum is simply dummy text of the printing and typesetting
00549 industry. Lorem Ipsum has been the industry's standard dummy text
00550 ever since the 1500s, when an unknown printer took a galley of type
00551 and scrambled it to make a type specimen book. It has survived not
00552 only five centuries, but also the leap into electronic typesetting,
00553 remaining essentially unchanged. It was popularised in the 1960s with
00554 the release of Letraset sheets containing Lorem Ipsum passages, and
00555 more recently with desktop publishing software like Aldus PageMaker
00556 including versions of Lorem Ipsum.
00557
00558 Also <a href="matlab:vl_help('fisher')">Fisher vectors</a>.
00559
00560 These are links BL(), BL(A,B) and BLA(A,A) (as long as the dictionary
00561 cites them).
00562
00563 Mimamama
00564 verbatim1
00565 verbatim2
00566 verbatim3
00567
00568 verbatim4
00569 verbatim5
00570 Lorem Ipsum is simply dummy text of the printing and typesetting
00571 industry. Lorem Ipsum has been the industry's standard dummy text
00572 ever since the 1500s, when an unknown printer took a galley of type
00573 and scrambled it to make a type specimen book. It has survived not
00574 only five centuries, but also the leap into electronic typesetting,
00575 remaining essentially unchanged. It was popularised in the 1960s with
00576 the release of Letraset sheets containing Lorem Ipsum passages, and
00577 more recently with desktop publishing software like Aldus PageMaker
00578 including versions of Lorem Ipsum.
00579
00580 - outer1 /
00581 outer1 line 2 /
00582 outer1 line 3 /
00583
00584 outer1 new paragarph
00585
00586 - inner1
00587 - inner2
00588 - inner3
00589 continued on next line
00590 continued with verbatim
00591
00592 more verbatim after blank
00593 - inner4
00594 - outer again
00595 - outer
00596 bla
00597
00598 - list2
00599 - list4
00600 - BL()
00601 - BL(A,B)
00602
00603 Test descrition::
00604 Lorem Ipsum is simply dummy text of the printing
00605 and typesetting industry. Lorem Ipsum has been the industry's
00606 standard dummy text ever since the 1500s, when an unknown printer
00607 took a galley of type and scrambled it to make a type specimen
00608 book. It has survived not only five centuries, but also the leap
00609 into electronic typesetting, remaining essentially unchanged. It
00610 was popularised in the 1960s with the release of Letraset sheets
00611 containing Lorem Ipsum passages, and more recently with desktop
00612 publishing software like Aldus PageMaker including versions of
00613 Lorem Ipsum.
00614
00615 Ancora::
00616 Bli bli bli
00617 Blu blu blu
00618
00619 - list
00620 - lust
00621 - last
00622
00623 Bli bla
00624
00625 Verbatimmo
00626 """
00627 lines = text.splitlines()
00628 formatter = Formatter(lines, {'BL':'http://www.google.com'}, 'a')
00629 print formatter.toDOM().toxml("UTF-8")