rve_interface_gen: tokens.py Source File

Go to the documentation of this file.
00001 """ANTLR3 runtime package"""
00002 
00003 # begin[licence]
00004 #
00005 # [The "BSD licence"]
00006 # Copyright (c) 2005-2008 Terence Parr
00007 # All rights reserved.
00008 #
00009 # Redistribution and use in source and binary forms, with or without
00010 # modification, are permitted provided that the following conditions
00011 # are met:
00012 # 1. Redistributions of source code must retain the above copyright
00013 #    notice, this list of conditions and the following disclaimer.
00014 # 2. Redistributions in binary form must reproduce the above copyright
00015 #    notice, this list of conditions and the following disclaimer in the
00016 #    documentation and/or other materials provided with the distribution.
00017 # 3. The name of the author may not be used to endorse or promote products
00018 #    derived from this software without specific prior written permission.
00019 #
00020 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00021 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00022 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00023 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00024 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00025 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00026 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00027 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00028 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00029 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030 #
00031 # end[licence]
00032 
00033 from constants import EOF, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE
00034 
00035 ############################################################################
00036 #
00037 # basic token interface
00038 #
00039 ############################################################################
00040 
00041 class Token(object):
00042     """@brief Abstract token baseclass."""
00043 
00044     def getText(self):
00045         """@brief Get the text of the token.
00046 
00047         Using setter/getter methods is deprecated. Use o.text instead.
00048         """
00049         raise NotImplementedError
00050     
00051     def setText(self, text):
00052         """@brief Set the text of the token.
00053 
00054         Using setter/getter methods is deprecated. Use o.text instead.
00055         """
00056         raise NotImplementedError
00057 
00058 
00059     def getType(self):
00060         """@brief Get the type of the token.
00061 
00062         Using setter/getter methods is deprecated. Use o.type instead."""
00063 
00064         raise NotImplementedError
00065     
00066     def setType(self, ttype):
00067         """@brief Get the type of the token.
00068 
00069         Using setter/getter methods is deprecated. Use o.type instead."""
00070 
00071         raise NotImplementedError
00072     
00073     
00074     def getLine(self):
00075         """@brief Get the line number on which this token was matched
00076 
00077         Lines are numbered 1..n
00078         
00079         Using setter/getter methods is deprecated. Use o.line instead."""
00080 
00081         raise NotImplementedError
00082     
00083     def setLine(self, line):
00084         """@brief Set the line number on which this token was matched
00085 
00086         Using setter/getter methods is deprecated. Use o.line instead."""
00087 
00088         raise NotImplementedError
00089     
00090     
00091     def getCharPositionInLine(self):
00092         """@brief Get the column of the tokens first character,
00093         
00094         Columns are numbered 0..n-1
00095         
00096         Using setter/getter methods is deprecated. Use o.charPositionInLine instead."""
00097 
00098         raise NotImplementedError
00099     
00100     def setCharPositionInLine(self, pos):
00101         """@brief Set the column of the tokens first character,
00102 
00103         Using setter/getter methods is deprecated. Use o.charPositionInLine instead."""
00104 
00105         raise NotImplementedError
00106     
00107 
00108     def getChannel(self):
00109         """@brief Get the channel of the token
00110 
00111         Using setter/getter methods is deprecated. Use o.channel instead."""
00112 
00113         raise NotImplementedError
00114     
00115     def setChannel(self, channel):
00116         """@brief Set the channel of the token
00117 
00118         Using setter/getter methods is deprecated. Use o.channel instead."""
00119 
00120         raise NotImplementedError
00121     
00122 
00123     def getTokenIndex(self):
00124         """@brief Get the index in the input stream.
00125 
00126         An index from 0..n-1 of the token object in the input stream.
00127         This must be valid in order to use the ANTLRWorks debugger.
00128         
00129         Using setter/getter methods is deprecated. Use o.index instead."""
00130 
00131         raise NotImplementedError
00132     
00133     def setTokenIndex(self, index):
00134         """@brief Set the index in the input stream.
00135 
00136         Using setter/getter methods is deprecated. Use o.index instead."""
00137 
00138         raise NotImplementedError
00139 
00140 
00141     def getInputStream(self):
00142         """@brief From what character stream was this token created.
00143 
00144         You don't have to implement but it's nice to know where a Token
00145         comes from if you have include files etc... on the input."""
00146 
00147         raise NotImplementedError
00148 
00149     def setInputStream(self, input):
00150         """@brief From what character stream was this token created.
00151 
00152         You don't have to implement but it's nice to know where a Token
00153         comes from if you have include files etc... on the input."""
00154 
00155         raise NotImplementedError
00156 
00157 
00158 ############################################################################
00159 #
00160 # token implementations
00161 #
00162 # Token
00163 # +- CommonToken
00164 # \- ClassicToken
00165 #
00166 ############################################################################
00167 
00168 class CommonToken(Token):
00169     """@brief Basic token implementation.
00170 
00171     This implementation does not copy the text from the input stream upon
00172     creation, but keeps start/stop pointers into the stream to avoid
00173     unnecessary copy operations.
00174 
00175     """
00176     
00177     def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
00178                  input=None, start=None, stop=None, oldToken=None):
00179         Token.__init__(self)
00180         
00181         if oldToken is not None:
00182             self.type = oldToken.type
00183             self.line = oldToken.line
00184             self.charPositionInLine = oldToken.charPositionInLine
00185             self.channel = oldToken.channel
00186             self.index = oldToken.index
00187             self._text = oldToken._text
00188             if isinstance(oldToken, CommonToken):
00189                 self.input = oldToken.input
00190                 self.start = oldToken.start
00191                 self.stop = oldToken.stop
00192             
00193         else:
00194             self.type = type
00195             self.input = input
00196             self.charPositionInLine = -1 # set to invalid position
00197             self.line = 0
00198             self.channel = channel
00199             
00200             #What token number is this from 0..n-1 tokens; < 0 implies invalid index
00201             self.index = -1
00202             
00203             # We need to be able to change the text once in a while.  If
00204             # this is non-null, then getText should return this.  Note that
00205             # start/stop are not affected by changing this.
00206             self._text = text
00207 
00208             # The char position into the input buffer where this token starts
00209             self.start = start
00210 
00211             # The char position into the input buffer where this token stops
00212             # This is the index of the last char, *not* the index after it!
00213             self.stop = stop
00214 
00215 
00216     def getText(self):
00217         if self._text is not None:
00218             return self._text
00219 
00220         if self.input is None:
00221             return None
00222         
00223         return self.input.substring(self.start, self.stop)
00224 
00225 
00226     def setText(self, text):
00227         """
00228         Override the text for this token.  getText() will return this text
00229         rather than pulling from the buffer.  Note that this does not mean
00230         that start/stop indexes are not valid.  It means that that input
00231         was converted to a new string in the token object.
00232         """
00233         self._text = text
00234 
00235     text = property(getText, setText)
00236 
00237 
00238     def getType(self):
00239         return self.type 
00240 
00241     def setType(self, ttype):
00242         self.type = ttype
00243 
00244     
00245     def getLine(self):
00246         return self.line
00247     
00248     def setLine(self, line):
00249         self.line = line
00250 
00251 
00252     def getCharPositionInLine(self):
00253         return self.charPositionInLine
00254     
00255     def setCharPositionInLine(self, pos):
00256         self.charPositionInLine = pos
00257 
00258 
00259     def getChannel(self):
00260         return self.channel
00261     
00262     def setChannel(self, channel):
00263         self.channel = channel
00264     
00265 
00266     def getTokenIndex(self):
00267         return self.index
00268     
00269     def setTokenIndex(self, index):
00270         self.index = index
00271 
00272 
00273     def getInputStream(self):
00274         return self.input
00275 
00276     def setInputStream(self, input):
00277         self.input = input
00278 
00279 
00280     def __str__(self):
00281         if self.type == EOF:
00282             return "<EOF>"
00283 
00284         channelStr = ""
00285         if self.channel > 0:
00286             channelStr = ",channel=" + str(self.channel)
00287 
00288         txt = self.text
00289         if txt is not None:
00290             txt = txt.replace("\n","\\\\n")
00291             txt = txt.replace("\r","\\\\r")
00292             txt = txt.replace("\t","\\\\t")
00293         else:
00294             txt = "<no text>"
00295 
00296         return "[@%d,%d:%d=%r,<%d>%s,%d:%d]" % (
00297             self.index,
00298             self.start, self.stop,
00299             txt,
00300             self.type, channelStr,
00301             self.line, self.charPositionInLine
00302             )
00303     
00304 
00305 class ClassicToken(Token):
00306     """@brief Alternative token implementation.
00307     
00308     A Token object like we'd use in ANTLR 2.x; has an actual string created
00309     and associated with this object.  These objects are needed for imaginary
00310     tree nodes that have payload objects.  We need to create a Token object
00311     that has a string; the tree node will point at this token.  CommonToken
00312     has indexes into a char stream and hence cannot be used to introduce
00313     new strings.
00314     """
00315 
00316     def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL,
00317                  oldToken=None
00318                  ):
00319         Token.__init__(self)
00320         
00321         if oldToken is not None:
00322             self.text = oldToken.text
00323             self.type = oldToken.type
00324             self.line = oldToken.line
00325             self.charPositionInLine = oldToken.charPositionInLine
00326             self.channel = oldToken.channel
00327             
00328         self.text = text
00329         self.type = type
00330         self.line = None
00331         self.charPositionInLine = None
00332         self.channel = channel
00333         self.index = None
00334 
00335 
00336     def getText(self):
00337         return self.text
00338 
00339     def setText(self, text):
00340         self.text = text
00341 
00342 
00343     def getType(self):
00344         return self.type 
00345 
00346     def setType(self, ttype):
00347         self.type = ttype
00348 
00349     
00350     def getLine(self):
00351         return self.line
00352     
00353     def setLine(self, line):
00354         self.line = line
00355 
00356 
00357     def getCharPositionInLine(self):
00358         return self.charPositionInLine
00359     
00360     def setCharPositionInLine(self, pos):
00361         self.charPositionInLine = pos
00362 
00363 
00364     def getChannel(self):
00365         return self.channel
00366     
00367     def setChannel(self, channel):
00368         self.channel = channel
00369     
00370 
00371     def getTokenIndex(self):
00372         return self.index
00373     
00374     def setTokenIndex(self, index):
00375         self.index = index
00376 
00377 
00378     def getInputStream(self):
00379         return None
00380 
00381     def setInputStream(self, input):
00382         pass
00383 
00384 
00385     def toString(self):
00386         channelStr = ""
00387         if self.channel > 0:
00388             channelStr = ",channel=" + str(self.channel)
00389             
00390         txt = self.text
00391         if txt is None:
00392             txt = "<no text>"
00393 
00394         return "[@%r,%r,<%r>%s,%r:%r]" % (self.index,
00395                                           txt,
00396                                           self.type,
00397                                           channelStr,
00398                                           self.line,
00399                                           self.charPositionInLine
00400                                           )
00401     
00402 
00403     __str__ = toString
00404     __repr__ = toString
00405 
00406 
00407 
00408 EOF_TOKEN = CommonToken(type=EOF)
00409         
00410 INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
00411 
00412 # In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
00413 # will avoid creating a token for this symbol and try to fetch another.
00414 SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
00415 
00416