recognizers.py
Go to the documentation of this file.
00001 """ANTLR3 runtime package"""
00002 
00003 # begin[licence]
00004 #
00005 # [The "BSD licence"]
00006 # Copyright (c) 2005-2008 Terence Parr
00007 # All rights reserved.
00008 #
00009 # Redistribution and use in source and binary forms, with or without
00010 # modification, are permitted provided that the following conditions
00011 # are met:
00012 # 1. Redistributions of source code must retain the above copyright
00013 #    notice, this list of conditions and the following disclaimer.
00014 # 2. Redistributions in binary form must reproduce the above copyright
00015 #    notice, this list of conditions and the following disclaimer in the
00016 #    documentation and/or other materials provided with the distribution.
00017 # 3. The name of the author may not be used to endorse or promote products
00018 #    derived from this software without specific prior written permission.
00019 #
00020 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00021 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00022 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00023 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00024 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00025 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00026 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00027 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00028 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00029 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030 #
00031 # end[licence]
00032 
00033 import sys
00034 import inspect
00035 
00036 from rve_interface_gen.antlr3 import runtime_version, runtime_version_str
00037 from constants import DEFAULT_CHANNEL, HIDDEN_CHANNEL, EOF, \
00038      EOR_TOKEN_TYPE, INVALID_TOKEN_TYPE
00039 from exceptions import RecognitionException, MismatchedTokenException, \
00040      MismatchedRangeException, MismatchedTreeNodeException, \
00041      NoViableAltException, EarlyExitException, MismatchedSetException, \
00042      MismatchedNotSetException, FailedPredicateException, \
00043      BacktrackingFailed, UnwantedTokenException, MissingTokenException
00044 from tokens import CommonToken, EOF_TOKEN, SKIP_TOKEN
00045 from compat import set, frozenset, reversed
00046 
00047 
00048 class RecognizerSharedState(object):
00049     """
00050     The set of fields needed by an abstract recognizer to recognize input
00051     and recover from errors etc...  As a separate state object, it can be
00052     shared among multiple grammars; e.g., when one grammar imports another.
00053 
00054     These fields are publically visible but the actual state pointer per
00055     parser is protected.
00056     """
00057 
00058     def __init__(self):
00059         # Track the set of token types that can follow any rule invocation.
00060         # Stack grows upwards.
00061         self.following = []
00062 
00063         # This is true when we see an error and before having successfully
00064         # matched a token.  Prevents generation of more than one error message
00065         # per error.
00066         self.errorRecovery = False
00067 
00068         # The index into the input stream where the last error occurred.
00069         # This is used to prevent infinite loops where an error is found
00070         # but no token is consumed during recovery...another error is found,
00071         # ad naseum.  This is a failsafe mechanism to guarantee that at least
00072         # one token/tree node is consumed for two errors.
00073         self.lastErrorIndex = -1
00074 
00075         # If 0, no backtracking is going on.  Safe to exec actions etc...
00076         # If >0 then it's the level of backtracking.
00077         self.backtracking = 0
00078 
00079         # An array[size num rules] of Map<Integer,Integer> that tracks
00080         # the stop token index for each rule.  ruleMemo[ruleIndex] is
00081         # the memoization table for ruleIndex.  For key ruleStartIndex, you
00082         # get back the stop token for associated rule or MEMO_RULE_FAILED.
00083         #
00084         # This is only used if rule memoization is on (which it is by default).
00085         self.ruleMemo = None
00086 
00087         ## Did the recognizer encounter a syntax error?  Track how many.
00088         self.syntaxErrors = 0
00089 
00090 
00091         # LEXER FIELDS (must be in same state object to avoid casting
00092         # constantly in generated code and Lexer object) :(
00093 
00094 
00095         ## The goal of all lexer rules/methods is to create a token object.
00096         # This is an instance variable as multiple rules may collaborate to
00097         # create a single token.  nextToken will return this object after
00098         # matching lexer rule(s).  If you subclass to allow multiple token
00099         # emissions, then set this to the last token to be matched or
00100         # something nonnull so that the auto token emit mechanism will not
00101         # emit another token.
00102         self.token = None
00103 
00104         ## What character index in the stream did the current token start at?
00105         # Needed, for example, to get the text for current token.  Set at
00106         # the start of nextToken.
00107         self.tokenStartCharIndex = -1
00108 
00109         ## The line on which the first character of the token resides
00110         self.tokenStartLine = None
00111 
00112         ## The character position of first character within the line
00113         self.tokenStartCharPositionInLine = None
00114 
00115         ## The channel number for the current token
00116         self.channel = None
00117 
00118         ## The token type for the current token
00119         self.type = None
00120 
00121         ## You can set the text for the current token to override what is in
00122         # the input char buffer.  Use setText() or can set this instance var.
00123         self.text = None
00124         
00125 
00126 class BaseRecognizer(object):
00127     """
00128     @brief Common recognizer functionality.
00129     
00130     A generic recognizer that can handle recognizers generated from
00131     lexer, parser, and tree grammars.  This is all the parsing
00132     support code essentially; most of it is error recovery stuff and
00133     backtracking.
00134     """
00135 
00136     MEMO_RULE_FAILED = -2
00137     MEMO_RULE_UNKNOWN = -1
00138 
00139     # copies from Token object for convenience in actions
00140     DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL
00141 
00142     # for convenience in actions
00143     HIDDEN = HIDDEN_CHANNEL
00144 
00145     # overridden by generated subclasses
00146     tokenNames = None
00147 
00148     # The antlr_version attribute has been introduced in 3.1. If it is not
00149     # overwritten in the generated recognizer, we assume a default of 3.0.1.
00150     antlr_version = (3, 0, 1, 0)
00151     antlr_version_str = "3.0.1"
00152 
00153     def __init__(self, state=None):
00154         # Input stream of the recognizer. Must be initialized by a subclass.
00155         self.input = None
00156 
00157         ## State of a lexer, parser, or tree parser are collected into a state
00158         # object so the state can be shared.  This sharing is needed to
00159         # have one grammar import others and share same error variables
00160         # and other state variables.  It's a kind of explicit multiple
00161         # inheritance via delegation of methods and shared state.
00162         if state is None:
00163             state = RecognizerSharedState()
00164         self._state = state
00165 
00166         if self.antlr_version > runtime_version:
00167             raise RuntimeError(
00168                 "ANTLR version mismatch: "
00169                 "The recognizer has been generated by V%s, but this runtime "
00170                 "is V%s. Please use the V%s runtime or higher."
00171                 % (self.antlr_version_str,
00172                    runtime_version_str,
00173                    self.antlr_version_str))
00174         elif (self.antlr_version < (3, 1, 0, 0) and
00175               self.antlr_version != runtime_version):
00176             # FIXME: make the runtime compatible with 3.0.1 codegen
00177             # and remove this block.
00178             raise RuntimeError(
00179                 "ANTLR version mismatch: "
00180                 "The recognizer has been generated by V%s, but this runtime "
00181                 "is V%s. Please use the V%s runtime."
00182                 % (self.antlr_version_str,
00183                    runtime_version_str,
00184                    self.antlr_version_str))
00185 
00186     # this one only exists to shut up pylint :(
00187     def setInput(self, input):
00188         self.input = input
00189 
00190         
00191     def reset(self):
00192         """
00193         reset the parser's state; subclasses must rewinds the input stream
00194         """
00195         
00196         # wack everything related to error recovery
00197         if self._state is None:
00198             # no shared state work to do
00199             return
00200         
00201         self._state.following = []
00202         self._state.errorRecovery = False
00203         self._state.lastErrorIndex = -1
00204         self._state.syntaxErrors = 0
00205         # wack everything related to backtracking and memoization
00206         self._state.backtracking = 0
00207         if self._state.ruleMemo is not None:
00208             self._state.ruleMemo = {}
00209 
00210 
00211     def match(self, input, ttype, follow):
00212         """
00213         Match current input symbol against ttype.  Attempt
00214         single token insertion or deletion error recovery.  If
00215         that fails, throw MismatchedTokenException.
00216 
00217         To turn off single token insertion or deletion error
00218         recovery, override recoverFromMismatchedToken() and have it
00219         throw an exception. See TreeParser.recoverFromMismatchedToken().
00220         This way any error in a rule will cause an exception and
00221         immediate exit from rule.  Rule would recover by resynchronizing
00222         to the set of symbols that can follow rule ref.
00223         """
00224         
00225         matchedSymbol = self.getCurrentInputSymbol(input)
00226         if self.input.LA(1) == ttype:
00227             self.input.consume()
00228             self._state.errorRecovery = False
00229             return matchedSymbol
00230 
00231         if self._state.backtracking > 0:
00232             # FIXME: need to return matchedSymbol here as well. damn!!
00233             raise BacktrackingFailed
00234 
00235         matchedSymbol = self.recoverFromMismatchedToken(input, ttype, follow)
00236         return matchedSymbol
00237 
00238 
00239     def matchAny(self, input):
00240         """Match the wildcard: in a symbol"""
00241 
00242         self._state.errorRecovery = False
00243         self.input.consume()
00244 
00245 
00246     def mismatchIsUnwantedToken(self, input, ttype):
00247         return input.LA(2) == ttype
00248 
00249 
00250     def mismatchIsMissingToken(self, input, follow):
00251         if follow is None:
00252             # we have no information about the follow; we can only consume
00253             # a single token and hope for the best
00254             return False
00255         
00256         # compute what can follow this grammar element reference
00257         if EOR_TOKEN_TYPE in follow:
00258             viableTokensFollowingThisRule = self.computeContextSensitiveRuleFOLLOW()
00259             follow = follow | viableTokensFollowingThisRule
00260 
00261             if len(self._state.following) > 0:
00262                 # remove EOR if we're not the start symbol
00263                 follow = follow - set([EOR_TOKEN_TYPE])
00264 
00265         # if current token is consistent with what could come after set
00266         # then we know we're missing a token; error recovery is free to
00267         # "insert" the missing token
00268         if input.LA(1) in follow or EOR_TOKEN_TYPE in follow:
00269             return True
00270 
00271         return False
00272 
00273 
00274     def reportError(self, e):
00275         """Report a recognition problem.
00276             
00277         This method sets errorRecovery to indicate the parser is recovering
00278         not parsing.  Once in recovery mode, no errors are generated.
00279         To get out of recovery mode, the parser must successfully match
00280         a token (after a resync).  So it will go:
00281 
00282         1. error occurs
00283         2. enter recovery mode, report error
00284         3. consume until token found in resynch set
00285         4. try to resume parsing
00286         5. next match() will reset errorRecovery mode
00287 
00288         If you override, make sure to update syntaxErrors if you care about
00289         that.
00290         
00291         """
00292         
00293         # if we've already reported an error and have not matched a token
00294         # yet successfully, don't report any errors.
00295         if self._state.errorRecovery:
00296             return
00297 
00298         self._state.syntaxErrors += 1 # don't count spurious
00299         self._state.errorRecovery = True
00300 
00301         self.displayRecognitionError(self.tokenNames, e)
00302 
00303 
00304     def displayRecognitionError(self, tokenNames, e):
00305         hdr = self.getErrorHeader(e)
00306         msg = self.getErrorMessage(e, tokenNames)
00307         self.emitErrorMessage(hdr+" "+msg)
00308 
00309 
00310     def getErrorMessage(self, e, tokenNames):
00311         """
00312         What error message should be generated for the various
00313         exception types?
00314         
00315         Not very object-oriented code, but I like having all error message
00316         generation within one method rather than spread among all of the
00317         exception classes. This also makes it much easier for the exception
00318         handling because the exception classes do not have to have pointers back
00319         to this object to access utility routines and so on. Also, changing
00320         the message for an exception type would be difficult because you
00321         would have to subclassing exception, but then somehow get ANTLR
00322         to make those kinds of exception objects instead of the default.
00323         This looks weird, but trust me--it makes the most sense in terms
00324         of flexibility.
00325 
00326         For grammar debugging, you will want to override this to add
00327         more information such as the stack frame with
00328         getRuleInvocationStack(e, this.getClass().getName()) and,
00329         for no viable alts, the decision description and state etc...
00330 
00331         Override this to change the message generated for one or more
00332         exception types.
00333         """
00334 
00335         if isinstance(e, UnwantedTokenException):
00336             tokenName = "<unknown>"
00337             if e.expecting == EOF:
00338                 tokenName = "EOF"
00339 
00340             else:
00341                 tokenName = self.tokenNames[e.expecting]
00342 
00343             msg = "extraneous input %s expecting %s" % (
00344                 self.getTokenErrorDisplay(e.getUnexpectedToken()),
00345                 tokenName
00346                 )
00347 
00348         elif isinstance(e, MissingTokenException):
00349             tokenName = "<unknown>"
00350             if e.expecting == EOF:
00351                 tokenName = "EOF"
00352 
00353             else:
00354                 tokenName = self.tokenNames[e.expecting]
00355 
00356             msg = "missing %s at %s" % (
00357                 tokenName, self.getTokenErrorDisplay(e.token)
00358                 )
00359 
00360         elif isinstance(e, MismatchedTokenException):
00361             tokenName = "<unknown>"
00362             if e.expecting == EOF:
00363                 tokenName = "EOF"
00364             else:
00365                 tokenName = self.tokenNames[e.expecting]
00366 
00367             msg = "mismatched input " \
00368                   + self.getTokenErrorDisplay(e.token) \
00369                   + " expecting " \
00370                   + tokenName
00371 
00372         elif isinstance(e, MismatchedTreeNodeException):
00373             tokenName = "<unknown>"
00374             if e.expecting == EOF:
00375                 tokenName = "EOF"
00376             else:
00377                 tokenName = self.tokenNames[e.expecting]
00378 
00379             msg = "mismatched tree node: %s expecting %s" \
00380                   % (e.node, tokenName)
00381 
00382         elif isinstance(e, NoViableAltException):
00383             msg = "no viable alternative at input " \
00384                   + self.getTokenErrorDisplay(e.token)
00385 
00386         elif isinstance(e, EarlyExitException):
00387             msg = "required (...)+ loop did not match anything at input " \
00388                   + self.getTokenErrorDisplay(e.token)
00389 
00390         elif isinstance(e, MismatchedSetException):
00391             msg = "mismatched input " \
00392                   + self.getTokenErrorDisplay(e.token) \
00393                   + " expecting set " \
00394                   + repr(e.expecting)
00395 
00396         elif isinstance(e, MismatchedNotSetException):
00397             msg = "mismatched input " \
00398                   + self.getTokenErrorDisplay(e.token) \
00399                   + " expecting set " \
00400                   + repr(e.expecting)
00401 
00402         elif isinstance(e, FailedPredicateException):
00403             msg = "rule " \
00404                   + e.ruleName \
00405                   + " failed predicate: {" \
00406                   + e.predicateText \
00407                   + "}?"
00408 
00409         else:
00410             msg = str(e)
00411 
00412         return msg
00413     
00414 
00415     def getNumberOfSyntaxErrors(self):
00416         """
00417         Get number of recognition errors (lexer, parser, tree parser).  Each
00418         recognizer tracks its own number.  So parser and lexer each have
00419         separate count.  Does not count the spurious errors found between
00420         an error and next valid token match
00421 
00422         See also reportError()
00423         """
00424         return self._state.syntaxErrors
00425 
00426 
00427     def getErrorHeader(self, e):
00428         """
00429         What is the error header, normally line/character position information?
00430         """
00431         
00432         return "line %d:%d" % (e.line, e.charPositionInLine)
00433 
00434 
00435     def getTokenErrorDisplay(self, t):
00436         """
00437         How should a token be displayed in an error message? The default
00438         is to display just the text, but during development you might
00439         want to have a lot of information spit out.  Override in that case
00440         to use t.toString() (which, for CommonToken, dumps everything about
00441         the token). This is better than forcing you to override a method in
00442         your token objects because you don't have to go modify your lexer
00443         so that it creates a new Java type.
00444         """
00445         
00446         s = t.text
00447         if s is None:
00448             if t.type == EOF:
00449                 s = "<EOF>"
00450             else:
00451                 s = "<"+t.type+">"
00452 
00453         return repr(s)
00454     
00455 
00456     def emitErrorMessage(self, msg):
00457         """Override this method to change where error messages go"""
00458         sys.stderr.write(msg + '\n')
00459 
00460 
00461     def recover(self, input, re):
00462         """
00463         Recover from an error found on the input stream.  This is
00464         for NoViableAlt and mismatched symbol exceptions.  If you enable
00465         single token insertion and deletion, this will usually not
00466         handle mismatched symbol exceptions but there could be a mismatched
00467         token that the match() routine could not recover from.
00468         """
00469         
00470         # PROBLEM? what if input stream is not the same as last time
00471         # perhaps make lastErrorIndex a member of input
00472         if self._state.lastErrorIndex == input.index():
00473             # uh oh, another error at same token index; must be a case
00474             # where LT(1) is in the recovery token set so nothing is
00475             # consumed; consume a single token so at least to prevent
00476             # an infinite loop; this is a failsafe.
00477             input.consume()
00478 
00479         self._state.lastErrorIndex = input.index()
00480         followSet = self.computeErrorRecoverySet()
00481         
00482         self.beginResync()
00483         self.consumeUntil(input, followSet)
00484         self.endResync()
00485 
00486 
00487     def beginResync(self):
00488         """
00489         A hook to listen in on the token consumption during error recovery.
00490         The DebugParser subclasses this to fire events to the listenter.
00491         """
00492 
00493         pass
00494 
00495 
00496     def endResync(self):
00497         """
00498         A hook to listen in on the token consumption during error recovery.
00499         The DebugParser subclasses this to fire events to the listenter.
00500         """
00501 
00502         pass
00503 
00504 
00505     def computeErrorRecoverySet(self):
00506         """
00507         Compute the error recovery set for the current rule.  During
00508         rule invocation, the parser pushes the set of tokens that can
00509         follow that rule reference on the stack; this amounts to
00510         computing FIRST of what follows the rule reference in the
00511         enclosing rule. This local follow set only includes tokens
00512         from within the rule; i.e., the FIRST computation done by
00513         ANTLR stops at the end of a rule.
00514 
00515         EXAMPLE
00516 
00517         When you find a "no viable alt exception", the input is not
00518         consistent with any of the alternatives for rule r.  The best
00519         thing to do is to consume tokens until you see something that
00520         can legally follow a call to r *or* any rule that called r.
00521         You don't want the exact set of viable next tokens because the
00522         input might just be missing a token--you might consume the
00523         rest of the input looking for one of the missing tokens.
00524 
00525         Consider grammar:
00526 
00527         a : '[' b ']'
00528           | '(' b ')'
00529           ;
00530         b : c '^' INT ;
00531         c : ID
00532           | INT
00533           ;
00534 
00535         At each rule invocation, the set of tokens that could follow
00536         that rule is pushed on a stack.  Here are the various "local"
00537         follow sets:
00538 
00539         FOLLOW(b1_in_a) = FIRST(']') = ']'
00540         FOLLOW(b2_in_a) = FIRST(')') = ')'
00541         FOLLOW(c_in_b) = FIRST('^') = '^'
00542 
00543         Upon erroneous input "[]", the call chain is
00544 
00545         a -> b -> c
00546 
00547         and, hence, the follow context stack is:
00548 
00549         depth  local follow set     after call to rule
00550           0         <EOF>                    a (from main())
00551           1          ']'                     b
00552           3          '^'                     c
00553 
00554         Notice that ')' is not included, because b would have to have
00555         been called from a different context in rule a for ')' to be
00556         included.
00557 
00558         For error recovery, we cannot consider FOLLOW(c)
00559         (context-sensitive or otherwise).  We need the combined set of
00560         all context-sensitive FOLLOW sets--the set of all tokens that
00561         could follow any reference in the call chain.  We need to
00562         resync to one of those tokens.  Note that FOLLOW(c)='^' and if
00563         we resync'd to that token, we'd consume until EOF.  We need to
00564         sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
00565         In this case, for input "[]", LA(1) is in this set so we would
00566         not consume anything and after printing an error rule c would
00567         return normally.  It would not find the required '^' though.
00568         At this point, it gets a mismatched token error and throws an
00569         exception (since LA(1) is not in the viable following token
00570         set).  The rule exception handler tries to recover, but finds
00571         the same recovery set and doesn't consume anything.  Rule b
00572         exits normally returning to rule a.  Now it finds the ']' (and
00573         with the successful match exits errorRecovery mode).
00574 
00575         So, you cna see that the parser walks up call chain looking
00576         for the token that was a member of the recovery set.
00577 
00578         Errors are not generated in errorRecovery mode.
00579 
00580         ANTLR's error recovery mechanism is based upon original ideas:
00581 
00582         "Algorithms + Data Structures = Programs" by Niklaus Wirth
00583 
00584         and
00585 
00586         "A note on error recovery in recursive descent parsers":
00587         http://portal.acm.org/citation.cfm?id=947902.947905
00588 
00589         Later, Josef Grosch had some good ideas:
00590 
00591         "Efficient and Comfortable Error Recovery in Recursive Descent
00592         Parsers":
00593         ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
00594 
00595         Like Grosch I implemented local FOLLOW sets that are combined
00596         at run-time upon error to avoid overhead during parsing.
00597         """
00598         
00599         return self.combineFollows(False)
00600 
00601         
00602     def computeContextSensitiveRuleFOLLOW(self):
00603         """
00604         Compute the context-sensitive FOLLOW set for current rule.
00605         This is set of token types that can follow a specific rule
00606         reference given a specific call chain.  You get the set of
00607         viable tokens that can possibly come next (lookahead depth 1)
00608         given the current call chain.  Contrast this with the
00609         definition of plain FOLLOW for rule r:
00610 
00611          FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
00612 
00613         where x in T* and alpha, beta in V*; T is set of terminals and
00614         V is the set of terminals and nonterminals.  In other words,
00615         FOLLOW(r) is the set of all tokens that can possibly follow
00616         references to r in *any* sentential form (context).  At
00617         runtime, however, we know precisely which context applies as
00618         we have the call chain.  We may compute the exact (rather
00619         than covering superset) set of following tokens.
00620 
00621         For example, consider grammar:
00622 
00623         stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
00624              | "return" expr '.'
00625              ;
00626         expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
00627         atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
00628              | '(' expr ')'
00629              ;
00630 
00631         The FOLLOW sets are all inclusive whereas context-sensitive
00632         FOLLOW sets are precisely what could follow a rule reference.
00633         For input input "i=(3);", here is the derivation:
00634 
00635         stat => ID '=' expr ';'
00636              => ID '=' atom ('+' atom)* ';'
00637              => ID '=' '(' expr ')' ('+' atom)* ';'
00638              => ID '=' '(' atom ')' ('+' atom)* ';'
00639              => ID '=' '(' INT ')' ('+' atom)* ';'
00640              => ID '=' '(' INT ')' ';'
00641 
00642         At the "3" token, you'd have a call chain of
00643 
00644           stat -> expr -> atom -> expr -> atom
00645 
00646         What can follow that specific nested ref to atom?  Exactly ')'
00647         as you can see by looking at the derivation of this specific
00648         input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
00649 
00650         You want the exact viable token set when recovering from a
00651         token mismatch.  Upon token mismatch, if LA(1) is member of
00652         the viable next token set, then you know there is most likely
00653         a missing token in the input stream.  "Insert" one by just not
00654         throwing an exception.
00655         """
00656 
00657         return self.combineFollows(True)
00658 
00659 
00660     def combineFollows(self, exact):
00661         followSet = set()
00662         for idx, localFollowSet in reversed(list(enumerate(self._state.following))):
00663             followSet |= localFollowSet
00664             if exact:
00665                 # can we see end of rule?
00666                 if EOR_TOKEN_TYPE in localFollowSet:
00667                     # Only leave EOR in set if at top (start rule); this lets
00668                     # us know if have to include follow(start rule); i.e., EOF
00669                     if idx > 0:
00670                         followSet.remove(EOR_TOKEN_TYPE)
00671                         
00672                 else:
00673                     # can't see end of rule, quit
00674                     break
00675 
00676         return followSet
00677 
00678 
00679     def recoverFromMismatchedToken(self, input, ttype, follow):
00680         """Attempt to recover from a single missing or extra token.
00681 
00682         EXTRA TOKEN
00683 
00684         LA(1) is not what we are looking for.  If LA(2) has the right token,
00685         however, then assume LA(1) is some extra spurious token.  Delete it
00686         and LA(2) as if we were doing a normal match(), which advances the
00687         input.
00688 
00689         MISSING TOKEN
00690 
00691         If current token is consistent with what could come after
00692         ttype then it is ok to 'insert' the missing token, else throw
00693         exception For example, Input 'i=(3;' is clearly missing the
00694         ')'.  When the parser returns from the nested call to expr, it
00695         will have call chain:
00696 
00697           stat -> expr -> atom
00698 
00699         and it will be trying to match the ')' at this point in the
00700         derivation:
00701 
00702              => ID '=' '(' INT ')' ('+' atom)* ';'
00703                                 ^
00704         match() will see that ';' doesn't match ')' and report a
00705         mismatched token error.  To recover, it sees that LA(1)==';'
00706         is in the set of tokens that can follow the ')' token
00707         reference in rule atom.  It can assume that you forgot the ')'.
00708         """
00709 
00710         e = None
00711 
00712         # if next token is what we are looking for then "delete" this token
00713         if self.mismatchIsUnwantedToken(input, ttype):
00714             e = UnwantedTokenException(ttype, input)
00715 
00716             self.beginResync()
00717             input.consume() # simply delete extra token
00718             self.endResync()
00719 
00720             # report after consuming so AW sees the token in the exception
00721             self.reportError(e)
00722 
00723             # we want to return the token we're actually matching
00724             matchedSymbol = self.getCurrentInputSymbol(input)
00725 
00726             # move past ttype token as if all were ok
00727             input.consume()
00728             return matchedSymbol
00729 
00730         # can't recover with single token deletion, try insertion
00731         if self.mismatchIsMissingToken(input, follow):
00732             inserted = self.getMissingSymbol(input, e, ttype, follow)
00733             e = MissingTokenException(ttype, input, inserted)
00734 
00735             # report after inserting so AW sees the token in the exception
00736             self.reportError(e)
00737             return inserted
00738 
00739         # even that didn't work; must throw the exception
00740         e = MismatchedTokenException(ttype, input)
00741         raise e
00742 
00743 
00744     def recoverFromMismatchedSet(self, input, e, follow):
00745         """Not currently used"""
00746 
00747         if self.mismatchIsMissingToken(input, follow):
00748             self.reportError(e)
00749             # we don't know how to conjure up a token for sets yet
00750             return self.getMissingSymbol(input, e, INVALID_TOKEN_TYPE, follow)
00751 
00752         # TODO do single token deletion like above for Token mismatch
00753         raise e
00754 
00755 
00756     def getCurrentInputSymbol(self, input):
00757         """
00758         Match needs to return the current input symbol, which gets put
00759         into the label for the associated token ref; e.g., x=ID.  Token
00760         and tree parsers need to return different objects. Rather than test
00761         for input stream type or change the IntStream interface, I use
00762         a simple method to ask the recognizer to tell me what the current
00763         input symbol is.
00764 
00765         This is ignored for lexers.
00766         """
00767         
00768         return None
00769 
00770 
00771     def getMissingSymbol(self, input, e, expectedTokenType, follow):
00772         """Conjure up a missing token during error recovery.
00773 
00774         The recognizer attempts to recover from single missing
00775         symbols. But, actions might refer to that missing symbol.
00776         For example, x=ID {f($x);}. The action clearly assumes
00777         that there has been an identifier matched previously and that
00778         $x points at that token. If that token is missing, but
00779         the next token in the stream is what we want we assume that
00780         this token is missing and we keep going. Because we
00781         have to return some token to replace the missing token,
00782         we have to conjure one up. This method gives the user control
00783         over the tokens returned for missing tokens. Mostly,
00784         you will want to create something special for identifier
00785         tokens. For literals such as '{' and ',', the default
00786         action in the parser or tree parser works. It simply creates
00787         a CommonToken of the appropriate type. The text will be the token.
00788         If you change what tokens must be created by the lexer,
00789         override this method to create the appropriate tokens.
00790         """
00791 
00792         return None
00793 
00794 
00795 ##     def recoverFromMissingElement(self, input, e, follow):
00796 ##         """
00797 ##         This code is factored out from mismatched token and mismatched set
00798 ##         recovery.  It handles "single token insertion" error recovery for
00799 ##         both.  No tokens are consumed to recover from insertions.  Return
00800 ##         true if recovery was possible else return false.
00801 ##         """
00802         
00803 ##         if self.mismatchIsMissingToken(input, follow):
00804 ##             self.reportError(e)
00805 ##             return True
00806 
00807 ##         # nothing to do; throw exception
00808 ##         return False
00809 
00810 
00811     def consumeUntil(self, input, tokenTypes):
00812         """
00813         Consume tokens until one matches the given token or token set
00814 
00815         tokenTypes can be a single token type or a set of token types
00816         
00817         """
00818         
00819         if not isinstance(tokenTypes, (set, frozenset)):
00820             tokenTypes = frozenset([tokenTypes])
00821 
00822         ttype = input.LA(1)
00823         while ttype != EOF and ttype not in tokenTypes:
00824             input.consume()
00825             ttype = input.LA(1)
00826 
00827 
00828     def getRuleInvocationStack(self):
00829         """
00830         Return List<String> of the rules in your parser instance
00831         leading up to a call to this method.  You could override if
00832         you want more details such as the file/line info of where
00833         in the parser java code a rule is invoked.
00834 
00835         This is very useful for error messages and for context-sensitive
00836         error recovery.
00837 
00838         You must be careful, if you subclass a generated recognizers.
00839         The default implementation will only search the module of self
00840         for rules, but the subclass will not contain any rules.
00841         You probably want to override this method to look like
00842 
00843         def getRuleInvocationStack(self):
00844             return self._getRuleInvocationStack(<class>.__module__)
00845 
00846         where <class> is the class of the generated recognizer, e.g.
00847         the superclass of self.
00848         """
00849 
00850         return self._getRuleInvocationStack(self.__module__)
00851 
00852 
00853     def _getRuleInvocationStack(cls, module):
00854         """
00855         A more general version of getRuleInvocationStack where you can
00856         pass in, for example, a RecognitionException to get it's rule
00857         stack trace.  This routine is shared with all recognizers, hence,
00858         static.
00859 
00860         TODO: move to a utility class or something; weird having lexer call
00861         this
00862         """
00863 
00864         # mmmhhh,... perhaps look at the first argument
00865         # (f_locals[co_varnames[0]]?) and test if it's a (sub)class of
00866         # requested recognizer...
00867         
00868         rules = []
00869         for frame in reversed(inspect.stack()):
00870             code = frame[0].f_code
00871             codeMod = inspect.getmodule(code)
00872             if codeMod is None:
00873                 continue
00874 
00875             # skip frames not in requested module
00876             if codeMod.__name__ != module:
00877                 continue
00878 
00879             # skip some unwanted names
00880             if code.co_name in ('nextToken', '<module>'):
00881                 continue
00882 
00883             rules.append(code.co_name)
00884 
00885         return rules
00886         
00887     _getRuleInvocationStack = classmethod(_getRuleInvocationStack)
00888     
00889 
00890     def getBacktrackingLevel(self):
00891         return self._state.backtracking
00892 
00893     def setBacktrackingLevel(self, n):
00894         self._state.backtracking = n
00895 
00896 
00897     def failed(self):
00898         """Return whether or not a backtracking attempt failed."""
00899 
00900         return self._state.failed
00901 
00902 
00903     def getGrammarFileName(self):
00904         """For debugging and other purposes, might want the grammar name.
00905         
00906         Have ANTLR generate an implementation for this method.
00907         """
00908 
00909         return self.grammarFileName
00910 
00911 
00912     def getSourceName(self):
00913         raise NotImplementedError
00914 
00915     
00916     def toStrings(self, tokens):
00917         """A convenience method for use most often with template rewrites.
00918 
00919         Convert a List<Token> to List<String>
00920         """
00921 
00922         if tokens is None:
00923             return None
00924 
00925         return [token.text for token in tokens]
00926 
00927 
00928     def getRuleMemoization(self, ruleIndex, ruleStartIndex):
00929         """
00930         Given a rule number and a start token index number, return
00931         MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
00932         start index.  If this rule has parsed input starting from the
00933         start index before, then return where the rule stopped parsing.
00934         It returns the index of the last token matched by the rule.
00935         """
00936         
00937         if ruleIndex not in self._state.ruleMemo:
00938             self._state.ruleMemo[ruleIndex] = {}
00939 
00940         return self._state.ruleMemo[ruleIndex].get(
00941             ruleStartIndex, self.MEMO_RULE_UNKNOWN
00942             )
00943 
00944 
00945     def alreadyParsedRule(self, input, ruleIndex):
00946         """
00947         Has this rule already parsed input at the current index in the
00948         input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
00949         If we attempted but failed to parse properly before, return
00950         MEMO_RULE_FAILED.
00951 
00952         This method has a side-effect: if we have seen this input for
00953         this rule and successfully parsed before, then seek ahead to
00954         1 past the stop token matched for this rule last time.
00955         """
00956 
00957         stopIndex = self.getRuleMemoization(ruleIndex, input.index())
00958         if stopIndex == self.MEMO_RULE_UNKNOWN:
00959             return False
00960 
00961         if stopIndex == self.MEMO_RULE_FAILED:
00962             raise BacktrackingFailed
00963 
00964         else:
00965             input.seek(stopIndex + 1)
00966 
00967         return True
00968 
00969 
00970     def memoize(self, input, ruleIndex, ruleStartIndex, success):
00971         """
00972         Record whether or not this rule parsed the input at this position
00973         successfully.
00974         """
00975 
00976         if success:
00977             stopTokenIndex = input.index() - 1
00978         else:
00979             stopTokenIndex = self.MEMO_RULE_FAILED
00980         
00981         if ruleIndex in self._state.ruleMemo:
00982             self._state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex
00983 
00984 
00985     def traceIn(self, ruleName, ruleIndex, inputSymbol):
00986         sys.stdout.write("enter %s %s" % (ruleName, inputSymbol))
00987         
00988         if self._state.backtracking > 0:
00989             sys.stdout.write(" backtracking=%s" % self._state.backtracking)
00990 
00991         sys.stdout.write('\n')
00992 
00993 
00994     def traceOut(self, ruleName, ruleIndex, inputSymbol):
00995         sys.stdout.write("exit %s %s" % (ruleName, inputSymbol))
00996         
00997         if self._state.backtracking > 0:
00998             sys.stdout.write(" backtracking=%s" % self._state.backtracking)
00999 
01000         if self._state.failed:
01001             sys.stdout.write(" failed")
01002         else:
01003             sys.stdout.write(" succeeded")
01004 
01005         sys.stdout.write('\n')
01006 
01007 
01008 class TokenSource(object):
01009     """
01010     @brief Abstract baseclass for token producers.
01011     
01012     A source of tokens must provide a sequence of tokens via nextToken()
01013     and also must reveal it's source of characters; CommonToken's text is
01014     computed from a CharStream; it only store indices into the char stream.
01015 
01016     Errors from the lexer are never passed to the parser.  Either you want
01017     to keep going or you do not upon token recognition error.  If you do not
01018     want to continue lexing then you do not want to continue parsing.  Just
01019     throw an exception not under RecognitionException and Java will naturally
01020     toss you all the way out of the recognizers.  If you want to continue
01021     lexing then you should not throw an exception to the parser--it has already
01022     requested a token.  Keep lexing until you get a valid one.  Just report
01023     errors and keep going, looking for a valid token.
01024     """
01025     
01026     def nextToken(self):
01027         """Return a Token object from your input stream (usually a CharStream).
01028         
01029         Do not fail/return upon lexing error; keep chewing on the characters
01030         until you get a good one; errors are not passed through to the parser.
01031         """
01032 
01033         raise NotImplementedError
01034     
01035 
01036     def __iter__(self):
01037         """The TokenSource is an interator.
01038 
01039         The iteration will not include the final EOF token, see also the note
01040         for the next() method.
01041 
01042         """
01043         
01044         return self
01045 
01046     
01047     def next(self):
01048         """Return next token or raise StopIteration.
01049 
01050         Note that this will raise StopIteration when hitting the EOF token,
01051         so EOF will not be part of the iteration.
01052         
01053         """
01054 
01055         token = self.nextToken()
01056         if token is None or token.type == EOF:
01057             raise StopIteration
01058         return token
01059 
01060     
01061 class Lexer(BaseRecognizer, TokenSource):
01062     """
01063     @brief Baseclass for generated lexer classes.
01064     
01065     A lexer is recognizer that draws input symbols from a character stream.
01066     lexer grammars result in a subclass of this object. A Lexer object
01067     uses simplified match() and error recovery mechanisms in the interest
01068     of speed.
01069     """
01070 
01071     def __init__(self, input, state=None):
01072         BaseRecognizer.__init__(self, state)
01073         TokenSource.__init__(self)
01074         
01075         # Where is the lexer drawing characters from?
01076         self.input = input
01077 
01078 
01079     def reset(self):
01080         BaseRecognizer.reset(self) # reset all recognizer state variables
01081 
01082         if self.input is not None:
01083             # rewind the input
01084             self.input.seek(0)
01085 
01086         if self._state is None:
01087             # no shared state work to do
01088             return
01089         
01090         # wack Lexer state variables
01091         self._state.token = None
01092         self._state.type = INVALID_TOKEN_TYPE
01093         self._state.channel = DEFAULT_CHANNEL
01094         self._state.tokenStartCharIndex = -1
01095         self._state.tokenStartLine = -1
01096         self._state.tokenStartCharPositionInLine = -1
01097         self._state.text = None
01098 
01099 
01100     def nextToken(self):
01101         """
01102         Return a token from this source; i.e., match a token on the char
01103         stream.
01104         """
01105         
01106         while 1:
01107             self._state.token = None
01108             self._state.channel = DEFAULT_CHANNEL
01109             self._state.tokenStartCharIndex = self.input.index()
01110             self._state.tokenStartCharPositionInLine = self.input.charPositionInLine
01111             self._state.tokenStartLine = self.input.line
01112             self._state.text = None
01113             if self.input.LA(1) == EOF:
01114                 return EOF_TOKEN
01115 
01116             try:
01117                 self.mTokens()
01118                 
01119                 if self._state.token is None:
01120                     self.emit()
01121                     
01122                 elif self._state.token == SKIP_TOKEN:
01123                     continue
01124 
01125                 return self._state.token
01126 
01127             except NoViableAltException, re:
01128                 self.reportError(re)
01129                 self.recover(re) # throw out current char and try again
01130 
01131             except RecognitionException, re:
01132                 self.reportError(re)
01133                 # match() routine has already called recover()
01134 
01135 
01136     def skip(self):
01137         """
01138         Instruct the lexer to skip creating a token for current lexer rule
01139         and look for another token.  nextToken() knows to keep looking when
01140         a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
01141         if token==null at end of any token rule, it creates one for you
01142         and emits it.
01143         """
01144         
01145         self._state.token = SKIP_TOKEN
01146 
01147 
01148     def mTokens(self):
01149         """This is the lexer entry point that sets instance var 'token'"""
01150 
01151         # abstract method
01152         raise NotImplementedError
01153     
01154 
01155     def setCharStream(self, input):
01156         """Set the char stream and reset the lexer"""
01157         self.input = None
01158         self.reset()
01159         self.input = input
01160 
01161 
01162     def getSourceName(self):
01163         return self.input.getSourceName()
01164 
01165 
01166     def emit(self, token=None):
01167         """
01168         The standard method called to automatically emit a token at the
01169         outermost lexical rule.  The token object should point into the
01170         char buffer start..stop.  If there is a text override in 'text',
01171         use that to set the token's text.  Override this method to emit
01172         custom Token objects.
01173 
01174         If you are building trees, then you should also override
01175         Parser or TreeParser.getMissingSymbol().
01176         """
01177 
01178         if token is None:
01179             token = CommonToken(
01180                 input=self.input,
01181                 type=self._state.type,
01182                 channel=self._state.channel,
01183                 start=self._state.tokenStartCharIndex,
01184                 stop=self.getCharIndex()-1
01185                 )
01186             token.line = self._state.tokenStartLine
01187             token.text = self._state.text
01188             token.charPositionInLine = self._state.tokenStartCharPositionInLine
01189 
01190         self._state.token = token
01191         
01192         return token
01193 
01194 
01195     def match(self, s):
01196         if isinstance(s, basestring):
01197             for c in s:
01198                 if self.input.LA(1) != ord(c):
01199                     if self._state.backtracking > 0:
01200                         raise BacktrackingFailed
01201 
01202                     mte = MismatchedTokenException(c, self.input)
01203                     self.recover(mte)
01204                     raise mte
01205 
01206                 self.input.consume()
01207 
01208         else:
01209             if self.input.LA(1) != s:
01210                 if self._state.backtracking > 0:
01211                     raise BacktrackingFailed
01212 
01213                 mte = MismatchedTokenException(unichr(s), self.input)
01214                 self.recover(mte) # don't really recover; just consume in lexer
01215                 raise mte
01216         
01217             self.input.consume()
01218             
01219 
01220     def matchAny(self):
01221         self.input.consume()
01222 
01223 
01224     def matchRange(self, a, b):
01225         if self.input.LA(1) < a or self.input.LA(1) > b:
01226             if self._state.backtracking > 0:
01227                 raise BacktrackingFailed
01228 
01229             mre = MismatchedRangeException(unichr(a), unichr(b), self.input)
01230             self.recover(mre)
01231             raise mre
01232 
01233         self.input.consume()
01234 
01235 
01236     def getLine(self):
01237         return self.input.line
01238 
01239 
01240     def getCharPositionInLine(self):
01241         return self.input.charPositionInLine
01242 
01243 
01244     def getCharIndex(self):
01245         """What is the index of the current character of lookahead?"""
01246         
01247         return self.input.index()
01248 
01249 
01250     def getText(self):
01251         """
01252         Return the text matched so far for the current token or any
01253         text override.
01254         """
01255         if self._state.text is not None:
01256             return self._state.text
01257         
01258         return self.input.substring(
01259             self._state.tokenStartCharIndex,
01260             self.getCharIndex()-1
01261             )
01262 
01263 
01264     def setText(self, text):
01265         """
01266         Set the complete text of this token; it wipes any previous
01267         changes to the text.
01268         """
01269         self._state.text = text
01270 
01271 
01272     text = property(getText, setText)
01273 
01274 
01275     def reportError(self, e):
01276         ## TODO: not thought about recovery in lexer yet.
01277 
01278         ## # if we've already reported an error and have not matched a token
01279         ## # yet successfully, don't report any errors.
01280         ## if self.errorRecovery:
01281         ##     #System.err.print("[SPURIOUS] ");
01282         ##     return;
01283         ## 
01284         ## self.errorRecovery = True
01285 
01286         self.displayRecognitionError(self.tokenNames, e)
01287 
01288 
01289     def getErrorMessage(self, e, tokenNames):
01290         msg = None
01291         
01292         if isinstance(e, MismatchedTokenException):
01293             msg = "mismatched character " \
01294                   + self.getCharErrorDisplay(e.c) \
01295                   + " expecting " \
01296                   + self.getCharErrorDisplay(e.expecting)
01297 
01298         elif isinstance(e, NoViableAltException):
01299             msg = "no viable alternative at character " \
01300                   + self.getCharErrorDisplay(e.c)
01301 
01302         elif isinstance(e, EarlyExitException):
01303             msg = "required (...)+ loop did not match anything at character " \
01304                   + self.getCharErrorDisplay(e.c)
01305             
01306         elif isinstance(e, MismatchedNotSetException):
01307             msg = "mismatched character " \
01308                   + self.getCharErrorDisplay(e.c) \
01309                   + " expecting set " \
01310                   + repr(e.expecting)
01311 
01312         elif isinstance(e, MismatchedSetException):
01313             msg = "mismatched character " \
01314                   + self.getCharErrorDisplay(e.c) \
01315                   + " expecting set " \
01316                   + repr(e.expecting)
01317 
01318         elif isinstance(e, MismatchedRangeException):
01319             msg = "mismatched character " \
01320                   + self.getCharErrorDisplay(e.c) \
01321                   + " expecting set " \
01322                   + self.getCharErrorDisplay(e.a) \
01323                   + ".." \
01324                   + self.getCharErrorDisplay(e.b)
01325 
01326         else:
01327             msg = BaseRecognizer.getErrorMessage(self, e, tokenNames)
01328 
01329         return msg
01330 
01331 
01332     def getCharErrorDisplay(self, c):
01333         if c == EOF:
01334             c = '<EOF>'
01335         return repr(c)
01336 
01337 
01338     def recover(self, re):
01339         """
01340         Lexers can normally match any char in it's vocabulary after matching
01341         a token, so do the easy thing and just kill a character and hope
01342         it all works out.  You can instead use the rule invocation stack
01343         to do sophisticated error recovery if you are in a fragment rule.
01344         """
01345 
01346         self.input.consume()
01347 
01348 
01349     def traceIn(self, ruleName, ruleIndex):
01350         inputSymbol = "%s line=%d:%s" % (self.input.LT(1),
01351                                          self.getLine(),
01352                                          self.getCharPositionInLine()
01353                                          )
01354         
01355         BaseRecognizer.traceIn(self, ruleName, ruleIndex, inputSymbol)
01356 
01357 
01358     def traceOut(self, ruleName, ruleIndex):
01359         inputSymbol = "%s line=%d:%s" % (self.input.LT(1),
01360                                          self.getLine(),
01361                                          self.getCharPositionInLine()
01362                                          )
01363 
01364         BaseRecognizer.traceOut(self, ruleName, ruleIndex, inputSymbol)
01365 
01366 
01367 
01368 class Parser(BaseRecognizer):
01369     """
01370     @brief Baseclass for generated parser classes.
01371     """
01372     
01373     def __init__(self, lexer, state=None):
01374         BaseRecognizer.__init__(self, state)
01375 
01376         self.setTokenStream(lexer)
01377 
01378 
01379     def reset(self):
01380         BaseRecognizer.reset(self) # reset all recognizer state variables
01381         if self.input is not None:
01382             self.input.seek(0) # rewind the input
01383 
01384 
01385     def getCurrentInputSymbol(self, input):
01386         return input.LT(1)
01387 
01388 
01389     def getMissingSymbol(self, input, e, expectedTokenType, follow):
01390         if expectedTokenType == EOF:
01391             tokenText = "<missing EOF>"
01392         else:
01393             tokenText = "<missing " + self.tokenNames[expectedTokenType] + ">"
01394         t = CommonToken(type=expectedTokenType, text=tokenText)
01395         current = input.LT(1)
01396         if current.type == EOF:
01397             current = input.LT(-1)
01398 
01399         if current is not None:
01400             t.line = current.line
01401             t.charPositionInLine = current.charPositionInLine
01402         t.channel = DEFAULT_CHANNEL
01403         return t
01404 
01405 
01406     def setTokenStream(self, input):
01407         """Set the token stream and reset the parser"""
01408         
01409         self.input = None
01410         self.reset()
01411         self.input = input
01412 
01413 
01414     def getTokenStream(self):
01415         return self.input
01416 
01417 
01418     def getSourceName(self):
01419         return self.input.getSourceName()
01420 
01421 
01422     def traceIn(self, ruleName, ruleIndex):
01423         BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1))
01424 
01425 
01426     def traceOut(self, ruleName, ruleIndex):
01427         BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1))
01428 
01429 
01430 class RuleReturnScope(object):
01431     """
01432     Rules can return start/stop info as well as possible trees and templates.
01433     """
01434 
01435     def getStart(self):
01436         """Return the start token or tree."""
01437         return None
01438     
01439 
01440     def getStop(self):
01441         """Return the stop token or tree."""
01442         return None
01443 
01444     
01445     def getTree(self):
01446         """Has a value potentially if output=AST."""
01447         return None
01448 
01449 
01450     def getTemplate(self):
01451         """Has a value potentially if output=template."""
01452         return None
01453 
01454 
01455 class ParserRuleReturnScope(RuleReturnScope):
01456     """
01457     Rules that return more than a single value must return an object
01458     containing all the values.  Besides the properties defined in
01459     RuleLabelScope.predefinedRulePropertiesScope there may be user-defined
01460     return values.  This class simply defines the minimum properties that
01461     are always defined and methods to access the others that might be
01462     available depending on output option such as template and tree.
01463 
01464     Note text is not an actual property of the return value, it is computed
01465     from start and stop using the input stream's toString() method.  I
01466     could add a ctor to this so that we can pass in and store the input
01467     stream, but I'm not sure we want to do that.  It would seem to be undefined
01468     to get the .text property anyway if the rule matches tokens from multiple
01469     input streams.
01470 
01471     I do not use getters for fields of objects that are used simply to
01472     group values such as this aggregate.  The getters/setters are there to
01473     satisfy the superclass interface.
01474     """
01475 
01476     def __init__(self):
01477         self.start = None
01478         self.stop = None
01479 
01480     
01481     def getStart(self):
01482         return self.start
01483 
01484 
01485     def getStop(self):
01486         return self.stop
01487 


rve_interface_gen
Author(s): Josh Faust
autogenerated on Wed Dec 11 2013 14:31:00