openrtm_aist_python: ezt.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """ezt.py -- easy templating
00003 
00004 ezt templates are very similar to standard HTML files.  But additionally
00005 they contain directives sprinkled in between.  With these directives
00006 it is possible to generate the dynamic content from the ezt templates.
00007 
00008 These directives are enclosed in square brackets.  If you are a 
00009 C-programmer, you might be familar with the #ifdef directives of the
00010 C preprocessor 'cpp'.  ezt provides a similar concept for HTML.  Additionally 
00011 EZT has a 'for' directive, which allows to iterate (repeat) certain 
00012 subsections of the template according to sequence of data items
00013 provided by the application.
00014 
00015 The HTML rendering is performed by the method generate() of the Template
00016 class.  Building template instances can either be done using external
00017 EZT files (convention: use the suffix .ezt for such files):
00018 
00019     >>> template = Template("../templates/log.ezt")
00020 
00021 or by calling the parse() method of a template instance directly with 
00022 a EZT template string:
00023 
00024     >>> template = Template()
00025     >>> template.parse('''<html><head>
00026     ... <title>[title_string]</title></head>
00027     ... <body><h1>[title_string]</h1>
00028     ...    [for a_sequence] <p>[a_sequence]</p>
00029     ...    [end] <hr>
00030     ...    The [person] is [if-any state]in[else]out[end].
00031     ... </body>
00032     ... </html>
00033     ... ''')
00034 
00035 The application should build a dictionary 'data' and pass it together
00036 with the output fileobject to the templates generate method:
00037 
00038     >>> data = {'title_string' : "A Dummy Page",
00039     ...         'a_sequence' : ['list item 1', 'list item 2', 'another element'],
00040     ...         'person': "doctor",
00041     ...         'state' : None }
00042     >>> import sys
00043     >>> template.generate(sys.stdout, data)
00044     <html><head>
00045     <title>A Dummy Page</title></head>
00046     <body><h1>A Dummy Page</h1>
00047      <p>list item 1</p>
00048      <p>list item 2</p>
00049      <p>another element</p>
00050      <hr>
00051     The doctor is out.
00052     </body>
00053     </html>
00054 
00055 Template syntax error reporting should be improved.  Currently it is 
00056 very sparse (template line numbers would be nice):
00057 
00058     >>> Template().parse("[if-any where] foo [else] bar [end unexpected args]")
00059     Traceback (innermost last):
00060       File "<stdin>", line 1, in ?
00061       File "ezt.py", line 220, in parse
00062         self.program = self._parse(text)
00063       File "ezt.py", line 275, in _parse
00064         raise ArgCountSyntaxError(str(args[1:]))
00065     ArgCountSyntaxError: ['unexpected', 'args']
00066     >>> Template().parse("[if unmatched_end]foo[end]")
00067     Traceback (innermost last):
00068       File "<stdin>", line 1, in ?
00069       File "ezt.py", line 206, in parse
00070         self.program = self._parse(text)
00071       File "ezt.py", line 266, in _parse
00072         raise UnmatchedEndError()
00073     UnmatchedEndError
00074 
00075 
00076 Directives
00077 ==========
00078 
00079  Several directives allow the use of dotted qualified names refering to objects
00080  or attributes of objects contained in the data dictionary given to the 
00081  .generate() method.
00082 
00083  Simple directives
00084  -----------------
00085 
00086    [QUAL_NAME]
00087 
00088    This directive is simply replaced by the value of identifier from the data 
00089    dictionary.  QUAL_NAME might be a dotted qualified name refering to some
00090    instance attribute of objects contained in the dats dictionary.
00091    Numbers are converted to string though.
00092 
00093    [include "filename"]  or [include QUAL_NAME]
00094 
00095    This directive is replaced by content of the named include file.
00096 
00097  Block directives
00098  ----------------
00099 
00100    [for QUAL_NAME] ... [end]
00101    
00102    The text within the [for ...] directive and the corresponding [end]
00103    is repeated for each element in the sequence referred to by the qualified
00104    name in the for directive.  Within the for block this identifiers now 
00105    refers to the actual item indexed by this loop iteration.
00106 
00107    [if-any QUAL_NAME [QUAL_NAME2 ...]] ... [else] ... [end]
00108 
00109    Test if any QUAL_NAME value is not None or an empty string or list.  
00110    The [else] clause is optional.  CAUTION: Numeric values are converted to
00111    string, so if QUAL_NAME refers to a numeric value 0, the then-clause is
00112    substituted!
00113 
00114    [if-index INDEX_FROM_FOR odd] ... [else] ... [end]
00115    [if-index INDEX_FROM_FOR even] ... [else] ... [end]
00116    [if-index INDEX_FROM_FOR first] ... [else] ... [end]
00117    [if-index INDEX_FROM_FOR last] ... [else] ... [end]
00118    [if-index INDEX_FROM_FOR NUMBER] ... [else] ... [end]
00119 
00120    These five directives work similar to [if-any], but are only useful 
00121    within a [for ...]-block (see above).  The odd/even directives are 
00122    for example useful to choose different background colors for adjacent rows 
00123    in a table.  Similar the first/last directives might be used to
00124    remove certain parts (for example "Diff to previous" doesn't make sense,
00125    if there is no previous).
00126 
00127    [is QUAL_NAME STRING] ... [else] ... [end]
00128    [is QUAL_NAME QUAL_NAME] ... [else] ... [end]
00129 
00130    The [is ...] directive is similar to the other conditional directives
00131    above.  But it allows to compare two value references or a value reference
00132    with some constant string.
00133  
00134 """
00135 #
00136 # Copyright (C) 2001-2002 Greg Stein. All Rights Reserved.
00137 #
00138 # Redistribution and use in source and binary forms, with or without 
00139 # modification, are permitted provided that the following conditions are 
00140 # met:
00141 #
00142 # * Redistributions of source code must retain the above copyright 
00143 #   notice, this list of conditions and the following disclaimer. 
00144 #
00145 # * Redistributions in binary form must reproduce the above copyright 
00146 #   notice, this list of conditions and the following disclaimer in the 
00147 #   documentation and/or other materials provided with the distribution. 
00148 #
00149 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
00150 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00151 # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
00152 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE 
00153 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
00154 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
00155 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
00156 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
00157 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
00158 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
00159 # POSSIBILITY OF SUCH DAMAGE.
00160 #
00161 #
00162 # This software is maintained by Greg and is available at:
00163 #    http://viewcvs.sourceforge.net/
00164 # it is also used by the following projects:
00165 #    http://edna.sourceforge.net/
00166 #
00167 
00168 import string
00169 import re
00170 from types import StringType, IntType, FloatType
00171 import os
00172 
00173 #
00174 # This regular expression matches three alternatives:
00175 #   expr: DIRECTIVE | BRACKET | COMMENT
00176 #   DIRECTIVE: '[' ITEM (whitespace ITEM)* ']
00177 #   ITEM: STRING | NAME
00178 #   STRING: '"' (not-slash-or-dquote | '\' anychar)* '"'
00179 #   NAME: (alphanum | '_' | '-' | '.')+
00180 #   BRACKET: '[[]'
00181 #   COMMENT: '[#' not-rbracket* ']'
00182 #
00183 # When used with the split() method, the return value will be composed of
00184 # non-matching text and the two paren groups (DIRECTIVE and BRACKET). Since
00185 # the COMMENT matches are not placed into a group, they are considered a
00186 # "splitting" value and simply dropped.
00187 #
00188 _item = r'(?:"(?:[^\\"]|\\.)*"|[-\w.]+)'
00189 _re_parse = re.compile(r'\[(%s(?: +%s)*)\]|(\[\[\])|\[#[^\]]*\]' % (_item, _item))
00190 
00191 _re_args = re.compile(r'"(?:[^\\"]|\\.)*"|[-\w.]+')
00192 
00193 # block commands and their argument counts
00194 _block_cmd_specs = { 'if-index':2, 'for':1, 'is':2 }
00195 _block_cmds = _block_cmd_specs.keys()
00196 
00197 # two regular expresssions for compressing whitespace. the first is used to
00198 # compress any whitespace including a newline into a single newline. the
00199 # second regex is used to compress runs of whitespace into a single space.
00200 _re_newline = re.compile('[ \t\r\f\v]*\n\\s*')
00201 _re_whitespace = re.compile(r'\s\s+')
00202 
00203 # this regex is used to substitute arguments into a value. we split the value,
00204 # replace the relevant pieces, and then put it all back together. splitting
00205 # will produce a list of: TEXT ( splitter TEXT )*. splitter will be '%' or
00206 # an integer.
00207 _re_subst = re.compile('%(%|[0-9]+)')
00208 
00209 class Template:
00210 
00211   def __init__(self, fname=None, compress_whitespace=1):
00212     self.compress_whitespace = compress_whitespace
00213     if fname:
00214       self.parse_file(fname)
00215 
00216   def parse_file(self, fname):
00217     "fname -> a string object with pathname of file containg an EZT template."
00218 
00219     self.program = self._parse(_FileReader(fname))
00220 
00221   def parse(self, text_or_reader):
00222     """Parse the template specified by text_or_reader.
00223 
00224     The argument should be a string containing the template, or it should
00225     specify a subclass of ezt.Reader which can read templates.
00226     """
00227     if not isinstance(text_or_reader, Reader):
00228       # assume the argument is a plain text string
00229       text_or_reader = _TextReader(text_or_reader)
00230     self.program = self._parse(text_or_reader)
00231 
00232   def generate(self, fp, data):
00233     ctx = _context()
00234     ctx.data = data
00235     ctx.for_index = { }
00236     self._execute(self.program, fp, ctx)
00237 
00238   def _parse(self, reader, for_names=None, file_args=()):
00239     """text -> string object containing the HTML template.
00240 
00241     This is a private helper function doing the real work for method parse.
00242     It returns the parsed template as a 'program'.  This program is a sequence
00243     made out of strings or (function, argument) 2-tuples.
00244 
00245     Note: comment directives [# ...] are automatically dropped by _re_parse.
00246     """
00247 
00248     # parse the template program into: (TEXT DIRECTIVE BRACKET)* TEXT
00249     parts = _re_parse.split(reader.text)
00250 
00251     program = [ ]
00252     stack = [ ]
00253     if not for_names:
00254        for_names = [ ]
00255 
00256     for i in range(len(parts)):
00257       piece = parts[i]
00258       which = i % 3  # discriminate between: TEXT DIRECTIVE BRACKET
00259       if which == 0:
00260         # TEXT. append if non-empty.
00261         if piece:
00262           if self.compress_whitespace:
00263             piece = _re_whitespace.sub(' ', _re_newline.sub('\n', piece))
00264           program.append(piece)
00265       elif which == 2:
00266         # BRACKET directive. append '[' if present.
00267         if piece:
00268           program.append('[')
00269       elif piece:
00270         # DIRECTIVE is present.
00271         args = _re_args.findall(piece)
00272         cmd = args[0]
00273         if cmd == 'else':
00274           if len(args) > 1:
00275             raise ArgCountSyntaxError(str(args[1:]))
00276           ### check: don't allow for 'for' cmd
00277           idx = stack[-1][1]
00278           true_section = program[idx:]
00279           del program[idx:]
00280           stack[-1][3] = true_section
00281         elif cmd == 'end':
00282           if len(args) > 1:
00283             raise ArgCountSyntaxError(str(args[1:]))
00284           # note: true-section may be None
00285           try:
00286             cmd, idx, args, true_section = stack.pop()
00287           except IndexError:
00288             raise UnmatchedEndError()
00289           else_section = program[idx:]
00290           func = getattr(self, '_cmd_' + re.sub('-', '_', cmd))
00291           program[idx:] = [ (func, (args, true_section, else_section)) ]
00292           if cmd == 'for':
00293             for_names.pop()
00294         elif cmd in _block_cmds:
00295           if len(args) > _block_cmd_specs[cmd] + 1:
00296             raise ArgCountSyntaxError(str(args[1:]))
00297           ### this assumes arg1 is always a ref
00298           args[1] = _prepare_ref(args[1], for_names, file_args)
00299 
00300           # handle arg2 for the 'is' command
00301           if cmd == 'is':
00302             args[2] = _prepare_ref(args[2], for_names, file_args)
00303           elif cmd == 'for':
00304             for_names.append(args[1][0])
00305 
00306           # remember the cmd, current pos, args, and a section placeholder
00307           stack.append([cmd, len(program), args[1:], None])
00308         elif cmd == 'include':
00309           if args[1][0] == '"':
00310             include_filename = args[1][1:-1]
00311             f_args = [ ]
00312             for arg in args[2:]:
00313               f_args.append(_prepare_ref(arg, for_names, file_args))
00314             program.extend(self._parse(reader.read_other(include_filename),
00315                                        for_names,
00316                                        f_args))
00317           else:
00318             if len(args) != 2:
00319               raise ArgCountSyntaxError(str(args))
00320             program.append((self._cmd_include,
00321                             (_prepare_ref(args[1], for_names, file_args),
00322                              reader)))
00323         elif cmd == 'if-any':
00324           f_args = [ ]
00325           for arg in args[1:]:
00326             f_args.append(_prepare_ref(arg, for_names, file_args))
00327           stack.append(['if-any', len(program), f_args, None])
00328         else:
00329           # implied PRINT command
00330           if len(args) > 1:
00331             f_args = [ ]
00332             for arg in args:
00333               f_args.append(_prepare_ref(arg, for_names, file_args))
00334             program.append((self._cmd_format, (f_args[0], f_args[1:])))
00335           else:
00336             program.append((self._cmd_print,
00337                             _prepare_ref(args[0], for_names, file_args)))
00338 
00339     if stack:
00340       ### would be nice to say which blocks...
00341       raise UnclosedBlocksError()
00342     return program
00343 
00344   def _execute(self, program, fp, ctx):
00345     """This private helper function takes a 'program' sequence as created
00346     by the method '_parse' and executes it step by step.  strings are written
00347     to the file object 'fp' and functions are called.
00348     """
00349     for step in program:
00350       if isinstance(step, StringType):
00351         fp.write(step)
00352       else:
00353         step[0](step[1], fp, ctx)
00354 
00355   def _cmd_print(self, valref, fp, ctx):
00356     value = _get_value(valref, ctx)
00357 
00358     # if the value has a 'read' attribute, then it is a stream: copy it
00359     if hasattr(value, 'read'):
00360       while 1:
00361         chunk = value.read(16384)
00362         if not chunk:
00363           break
00364         fp.write(chunk)
00365     else:
00366       fp.write(value)
00367 
00368   def _cmd_format(self, (valref, args), fp, ctx):
00369     fmt = _get_value(valref, ctx)
00370     parts = _re_subst.split(fmt)
00371     for i in range(len(parts)):
00372       piece = parts[i]
00373       if i%2 == 1 and piece != '%':
00374         idx = int(piece)
00375         if idx < len(args):
00376           piece = _get_value(args[idx], ctx)
00377         else:
00378           piece = '<undef>'
00379       fp.write(piece)
00380 
00381   def _cmd_include(self, (valref, reader), fp, ctx):
00382     fname = _get_value(valref, ctx)
00383     ### note: we don't have the set of for_names to pass into this parse.
00384     ### I don't think there is anything to do but document it.
00385     self._execute(self._parse(reader.read_other(fname)), fp, ctx)
00386 
00387   def _cmd_if_any(self, args, fp, ctx):
00388     "If any value is a non-empty string or non-empty list, then T else F."
00389     (valrefs, t_section, f_section) = args
00390     value = 0
00391     for valref in valrefs:
00392       if _get_value(valref, ctx):
00393         value = 1
00394         break
00395     self._do_if(value, t_section, f_section, fp, ctx)
00396 
00397   def _cmd_if_index(self, args, fp, ctx):
00398     ((valref, value), t_section, f_section) = args
00399     list, idx = ctx.for_index[valref[0]]
00400     if value == 'even':
00401       value = idx % 2 == 0
00402     elif value == 'odd':
00403       value = idx % 2 == 1
00404     elif value == 'first':
00405       value = idx == 0
00406     elif value == 'last':
00407       value = idx == len(list)-1
00408     else:
00409       value = idx == int(value)
00410     self._do_if(value, t_section, f_section, fp, ctx)
00411 
00412   def _cmd_is(self, args, fp, ctx):
00413     ((left_ref, right_ref), t_section, f_section) = args
00414     value = _get_value(right_ref, ctx)
00415     value = string.lower(_get_value(left_ref, ctx)) == string.lower(value)
00416     self._do_if(value, t_section, f_section, fp, ctx)
00417 
00418   def _do_if(self, value, t_section, f_section, fp, ctx):
00419     if t_section is None:
00420       t_section = f_section
00421       f_section = None
00422     if value:
00423       section = t_section
00424     else:
00425       section = f_section
00426     if section is not None:
00427       self._execute(section, fp, ctx)
00428 
00429   def _cmd_for(self, args, fp, ctx):
00430     ((valref,), unused, section) = args
00431     list = _get_value(valref, ctx)
00432     if isinstance(list, StringType):
00433       raise NeedSequenceError()
00434     refname = valref[0]
00435     ctx.for_index[refname] = idx = [ list, 0 ]
00436     for item in list:
00437       self._execute(section, fp, ctx)
00438       idx[1] = idx[1] + 1
00439     del ctx.for_index[refname]
00440 
00441 def boolean(value):
00442   "Return a value suitable for [if-any bool_var] usage in a template."
00443   if value:
00444     return 'yes'
00445   return None
00446 
00447 
00448 def _prepare_ref(refname, for_names, file_args):
00449   """refname -> a string containing a dotted identifier. example:"foo.bar.bang"
00450   for_names -> a list of active for sequences.
00451 
00452   Returns a `value reference', a 3-Tupel made out of (refname, start, rest), 
00453   for fast access later.
00454   """
00455   # is the reference a string constant?
00456   if refname[0] == '"':
00457     return None, refname[1:-1], None
00458 
00459   # if this is an include-argument, then just return the prepared ref
00460   if refname[:3] == 'arg':
00461     try:
00462       idx = int(refname[3:])
00463     except ValueError:
00464       pass
00465     else:
00466       if idx < len(file_args):
00467         return file_args[idx]
00468 
00469   parts = string.split(refname, '.')
00470   start = parts[0]
00471   rest = parts[1:]
00472   while rest and (start in for_names):
00473     # check if the next part is also a "for name"
00474     name = start + '.' + rest[0]
00475     if name in for_names:
00476       start = name
00477       del rest[0]
00478     else:
00479       break
00480   return refname, start, rest
00481 
00482 def _get_value((refname, start, rest), ctx):
00483   """(refname, start, rest) -> a prepared `value reference' (see above).
00484   ctx -> an execution context instance.
00485 
00486   Does a name space lookup within the template name space.  Active 
00487   for blocks take precedence over data dictionary members with the 
00488   same name.
00489   """
00490   if rest is None:
00491     # it was a string constant
00492     return start
00493   if ctx.for_index.has_key(start):
00494     list, idx = ctx.for_index[start]
00495     ob = list[idx]
00496   elif ctx.data.has_key(start):
00497     ob = ctx.data[start]
00498   else:
00499     raise UnknownReference(refname)
00500 
00501   # walk the rest of the dotted reference
00502   for attr in rest:
00503     try:
00504       ob = getattr(ob, attr)
00505     except AttributeError:
00506       raise UnknownReference(refname)
00507 
00508   # make sure we return a string instead of some various Python types
00509   if isinstance(ob, IntType) or isinstance(ob, FloatType):
00510     return str(ob)
00511   if ob is None:
00512     return ''
00513 
00514   # string or a sequence
00515   return ob
00516 
00517 
00518 class _context:
00519   """A container for the execution context"""
00520 
00521 
00522 class Reader:
00523   "Abstract class which allows EZT to detect Reader objects."
00524 
00525 class _FileReader(Reader):
00526   """Reads templates from the filesystem."""
00527   def __init__(self, fname):
00528     self.text = open(fname, 'rb').read()
00529     self._dir = os.path.dirname(fname)
00530   def read_other(self, relative):
00531     return _FileReader(os.path.join(self._dir, relative))
00532 
00533 class _TextReader(Reader):
00534   """'Reads' a template from provided text."""
00535   def __init__(self, text):
00536     self.text = text
00537   def read_other(self, relative):
00538     raise BaseUnavailableError()
00539 
00540 
00541 class EZTException(Exception):
00542   """Parent class of all EZT exceptions."""
00543 
00544 class ArgCountSyntaxError(EZTException):
00545   """A bracket directive got the wrong number of arguments."""
00546 
00547 class UnknownReference(EZTException):
00548   """The template references an object not contained in the data dictionary."""
00549 
00550 class NeedSequenceError(EZTException):
00551   """The object dereferenced by the template is no sequence (tuple or list)."""
00552 
00553 class UnclosedBlocksError(EZTException):
00554   """This error may be simply a missing [end]."""
00555 
00556 class UnmatchedEndError(EZTException):
00557   """This error may be caused by a misspelled if directive."""
00558 
00559 class BaseUnavailableError(EZTException):
00560   """Base location is unavailable, which disables includes."""
00561 
00562 
00563 # --- standard test environment ---
00564 def test_parse():
00565   assert _re_parse.split('[a]') == ['', '[a]', None, '']
00566   assert _re_parse.split('[a] [b]') == \
00567          ['', '[a]', None, ' ', '[b]', None, '']
00568   assert _re_parse.split('[a c] [b]') == \
00569          ['', '[a c]', None, ' ', '[b]', None, '']
00570   assert _re_parse.split('x [a] y [b] z') == \
00571          ['x ', '[a]', None, ' y ', '[b]', None, ' z']
00572   assert _re_parse.split('[a "b" c "d"]') == \
00573          ['', '[a "b" c "d"]', None, '']
00574   assert _re_parse.split(r'["a \"b[foo]" c.d f]') == \
00575          ['', '["a \\"b[foo]" c.d f]', None, '']
00576 
00577 def _test(argv):
00578   import doctest, ezt           
00579   verbose = "-v" in argv
00580   return doctest.testmod(ezt, verbose=verbose)
00581 
00582 if __name__ == "__main__":
00583   # invoke unit test for this module:
00584   import sys
00585   sys.exit(_test(sys.argv)[0])