dfa.py
Go to the documentation of this file.
00001 """ANTLR3 runtime package"""
00002 
00003 # begin[licence]
00004 #
00005 # [The "BSD licence"]
00006 # Copyright (c) 2005-2008 Terence Parr
00007 # All rights reserved.
00008 #
00009 # Redistribution and use in source and binary forms, with or without
00010 # modification, are permitted provided that the following conditions
00011 # are met:
00012 # 1. Redistributions of source code must retain the above copyright
00013 #    notice, this list of conditions and the following disclaimer.
00014 # 2. Redistributions in binary form must reproduce the above copyright
00015 #    notice, this list of conditions and the following disclaimer in the
00016 #    documentation and/or other materials provided with the distribution.
00017 # 3. The name of the author may not be used to endorse or promote products
00018 #    derived from this software without specific prior written permission.
00019 #
00020 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00021 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00022 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00023 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00024 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00025 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00026 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00027 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00028 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00029 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030 #
00031 # end[licensc]
00032 
00033 from constants import EOF
00034 from exceptions import NoViableAltException, BacktrackingFailed
00035 
00036 
00037 class DFA(object):
00038     """@brief A DFA implemented as a set of transition tables.
00039 
00040     Any state that has a semantic predicate edge is special; those states
00041     are generated with if-then-else structures in a specialStateTransition()
00042     which is generated by cyclicDFA template.
00043     
00044     """
00045     
00046     def __init__(
00047         self,
00048         recognizer, decisionNumber,
00049         eot, eof, min, max, accept, special, transition
00050         ):
00051         ## Which recognizer encloses this DFA?  Needed to check backtracking
00052         self.recognizer = recognizer
00053 
00054         self.decisionNumber = decisionNumber
00055         self.eot = eot
00056         self.eof = eof
00057         self.min = min
00058         self.max = max
00059         self.accept = accept
00060         self.special = special
00061         self.transition = transition
00062 
00063 
00064     def predict(self, input):
00065         """
00066         From the input stream, predict what alternative will succeed
00067         using this DFA (representing the covering regular approximation
00068         to the underlying CFL).  Return an alternative number 1..n.  Throw
00069          an exception upon error.
00070         """
00071         mark = input.mark()
00072         s = 0 # we always start at s0
00073         try:
00074             for _ in xrange(50000):
00075                 #print "***Current state = %d" % s
00076                 
00077                 specialState = self.special[s]
00078                 if specialState >= 0:
00079                     #print "is special"
00080                     s = self.specialStateTransition(specialState, input)
00081                     if s == -1:
00082                         self.noViableAlt(s, input)
00083                         return 0
00084                     input.consume()
00085                     continue
00086 
00087                 if self.accept[s] >= 1:
00088                     #print "accept state for alt %d" % self.accept[s]
00089                     return self.accept[s]
00090 
00091                 # look for a normal char transition
00092                 c = input.LA(1)
00093 
00094                 #print "LA = %d (%r)" % (c, unichr(c) if c >= 0 else 'EOF')
00095                 #print "range = %d..%d" % (self.min[s], self.max[s])
00096 
00097                 if c >= self.min[s] and c <= self.max[s]:
00098                     # move to next state
00099                     snext = self.transition[s][c-self.min[s]]
00100                     #print "in range, next state = %d" % snext
00101                     
00102                     if snext < 0:
00103                         #print "not a normal transition"
00104                         # was in range but not a normal transition
00105                         # must check EOT, which is like the else clause.
00106                         # eot[s]>=0 indicates that an EOT edge goes to another
00107                         # state.
00108                         if self.eot[s] >= 0: # EOT Transition to accept state?
00109                             #print "EOT trans to accept state %d" % self.eot[s]
00110                             
00111                             s = self.eot[s]
00112                             input.consume()
00113                             # TODO: I had this as return accept[eot[s]]
00114                             # which assumed here that the EOT edge always
00115                             # went to an accept...faster to do this, but
00116                             # what about predicated edges coming from EOT
00117                             # target?
00118                             continue
00119 
00120                         #print "no viable alt"
00121                         self.noViableAlt(s, input)
00122                         return 0
00123 
00124                     s = snext
00125                     input.consume()
00126                     continue
00127 
00128                 if self.eot[s] >= 0:
00129                     #print "EOT to %d" % self.eot[s]
00130                     
00131                     s = self.eot[s]
00132                     input.consume()
00133                     continue
00134 
00135                 # EOF Transition to accept state?
00136                 if c == EOF and self.eof[s] >= 0:
00137                     #print "EOF Transition to accept state %d" \
00138                     #  % self.accept[self.eof[s]]
00139                     return self.accept[self.eof[s]]
00140 
00141                 # not in range and not EOF/EOT, must be invalid symbol
00142                 self.noViableAlt(s, input)
00143                 return 0
00144 
00145             else:
00146                 raise RuntimeError("DFA bang!")
00147             
00148         finally:
00149             input.rewind(mark)
00150 
00151 
00152     def noViableAlt(self, s, input):
00153         if self.recognizer._state.backtracking > 0:
00154             raise BacktrackingFailed
00155 
00156         nvae = NoViableAltException(
00157             self.getDescription(),
00158             self.decisionNumber,
00159             s,
00160             input
00161             )
00162 
00163         self.error(nvae)
00164         raise nvae
00165 
00166 
00167     def error(self, nvae):
00168         """A hook for debugging interface"""
00169         pass
00170 
00171 
00172     def specialStateTransition(self, s, input):
00173         return -1
00174 
00175 
00176     def getDescription(self):
00177         return "n/a"
00178 
00179 
00180 ##     def specialTransition(self, state, symbol):
00181 ##         return 0
00182 
00183 
00184     def unpack(cls, string):
00185         """@brief Unpack the runlength encoded table data.
00186 
00187         Terence implemented packed table initializers, because Java has a
00188         size restriction on .class files and the lookup tables can grow
00189         pretty large. The generated JavaLexer.java of the Java.g example
00190         would be about 15MB with uncompressed array initializers.
00191 
00192         Python does not have any size restrictions, but the compilation of
00193         such large source files seems to be pretty memory hungry. The memory
00194         consumption of the python process grew to >1.5GB when importing a
00195         15MB lexer, eating all my swap space and I was to impacient to see,
00196         if it could finish at all. With packed initializers that are unpacked
00197         at import time of the lexer module, everything works like a charm.
00198         
00199         """
00200         
00201         ret = []
00202         for i in range(len(string) / 2):
00203             (n, v) = ord(string[i*2]), ord(string[i*2+1])
00204 
00205             # Is there a bitwise operation to do this?
00206             if v == 0xFFFF:
00207                 v = -1
00208 
00209             ret += [v] * n
00210 
00211         return ret
00212     
00213     unpack = classmethod(unpack)


rve_interface_gen
Author(s): Josh Faust
autogenerated on Wed Dec 11 2013 14:31:00