rocon_ebnf: rule_parser.py Source File

Go to the documentation of this file.
00001 #lparis45@gmail.com
00002 #
00003 #copyright 2008 under the GNU General Public License
00004 #
00005 
00006 """ simple parser using rules defined in EBNF format
00007 
00008 This module allows you to parse string according rules
00009 you defined in EBNF format (light)
00010 
00011 You use it as the re module:
00012 
00013     rp.match(rule,'string to be parsed')
00014 
00015     Result: RP Object  when parsing is ok,
00016                        else None
00017 
00018     where:
00019         rule is a list of rules definitions
00020 
00021         ex:  rule= ['main      ::= 'SELECT field FROM table' ,
00022                     'field     ::= alphanum*      ',
00023                     'alphanum  ::= r"[A-Za-z0-9]" ',
00024                     'table     ::= alphanum*      ' ] 
00025 
00026 For more details read the rp.doc
00027 """
00028 
00029 import re,string
00030 
00031 __version__='0.91'
00032 #0.91    2009/01  appending re.DOTALL to parsing $vars in interpret,
00033 #                    as sometimes, \n exists !!!
00034 #                 remove the strip of string
00035 #                 ignore trailing blanks at the end of parsing
00036 #                 take into account python reserved words
00037 
00038 def compile(rule):
00039     ret=RP()
00040     ret.compile(rule)
00041     return ret
00042 
00043 def match(rule,thestr):
00044     """ match the string against the rule """
00045     return compile(rule).match(thestr)
00046 
00047 #==================================== internal code ==========================        
00048 RESERVED={'and':'',         #0.91
00049           'as':'',
00050           'assert':'',
00051           'break':'',
00052           'class':'',
00053           'continue':'',
00054           'def':'',
00055           'del':'',
00056           'elif':'',
00057           'else':'',
00058           'except':'',
00059           'exec':'',
00060           'finally':'',
00061           'for':'',
00062           'from':'',
00063           'global':'',
00064           'if':'',
00065           'import':'',
00066           'in':'',
00067           'is':'',
00068           'lambda':'',
00069           'not':'',
00070           'or':'',
00071           'pass':'',
00072           'print':'',
00073           'raise':'',
00074           'return':'',
00075           'try':'',
00076           'while':'',
00077           'with':'',
00078           'yield':'',
00079     }
00080 
00081 
00082 class _Tokenizer:
00083     """ class representing the string to parse """
00084     def __init__(self, string,rp):
00085         """ set default values for string """
00086         self.string = string
00087         self.index = 0
00088         self.depth=0
00089         self.maxscan=0
00090         self.rp=rp
00091     def peek(self):
00092         """ peek current character in string, without consume it """
00093         try: 
00094             this = self.string[self.index]
00095         except Exception:
00096             this=''
00097         #print 'peeked:',this
00098         return this
00099     def read(self):
00100         """ read current character, and increment cursor """
00101         this = self.peek()
00102         self.index+=1
00103         self.maxscan=max(self.maxscan,self.index)
00104         return this
00105     def getString(self,ptr):
00106         """ return the parsed substring (ptr is the start point) """
00107         return self.string[ptr:self.index]
00108     def getRemaining(self):
00109         """ returns the remaining part of string not parsed """
00110         return self.string[self.index:]
00111     def removeBlanks(self):
00112         """ removed blanks of string before parsing rule, terminal or regular character
00113         this function could be desactivated using rp.IGNORE_BLANKS
00114         """
00115         if self.rp.ignore_blanks:
00116             while self.peek()==' ':
00117                 self.read()
00118         return self.index
00119     def reset(self,ptr=0):
00120         """ reset pointer when rule parsing is ko """
00121         self.index=ptr
00122     def getIndent(self,_down,_str='.'):
00123         """ returns string of ... for  debugging purposing """
00124         if _down:
00125             _str=_str*self.depth
00126             self.depth+=1
00127         else:
00128             self.depth-=1
00129             _str=_str*self.depth
00130         return _str
00131         
00132 class _RpRule:
00133     """ Class that defines rule """
00134     def __init__(self,name,_rp):
00135         self.name=name
00136         self.definitions=[]
00137         self.rp=_rp
00138     def addDefinition(self,aDef):
00139         """ add a definition to the rule """
00140         self.definitions.append(aDef)
00141     def match(self,_tk):
00142         """ match the string against the rule 
00143         Usually, this method is only used for the main rule 
00144         (the first rule defined)
00145         """
00146         for init in self.rp.code_init:
00147             if self.rp.execute_code:
00148                 exec(init,self.rp.vals)
00149             self.rp.code_array.append(init)
00150         _tk=_Tokenizer(_tk,self.rp)    #0.91
00151         self.scanMax=0
00152         self._parse(_tk)
00153         len1=_tk.index#+1
00154         if self.rp.ignore_blanks:           #0.91
00155             len2=len(_tk.string.rstrip())   #0.91
00156         else:                               #0.91
00157             len2=len(_tk.string)
00158         if len1<len2:
00159             self.rp.stringError=('-'*(_tk.maxscan)+'^')
00160             if self.rp.verbose: 
00161                 print _tk.string
00162                 print self.rp.stringError
00163         else:
00164             for post in self.rp.code_post:
00165                 if self.rp.execute_code:
00166                     exec(post,self.rp.vals)
00167                 self.rp.code_array.append(post)
00168             codestr=''
00169             for x in self.rp.code_array:
00170                 codestr+=x+'\n'
00171             self.rp.code=codestr
00172         return len1>=len2
00173     def _parse(self,_tk,mult='',n_val=None):
00174         """ parse the string against the rule """
00175         if self.rp.verbose: print _tk.getIndent(True)+'Parsing rule: "'+self.name+'" for string:"'+_tk.getRemaining()+'"'
00176         ret=-1
00177         _top=_tk.index
00178         _top2=_tk.removeBlanks()
00179         if mult=='' or mult=='?':
00180             for d in self.definitions:
00181                 ret=d._parse(_tk)
00182                 if ret>=0: break # Leave loop, this rule is OK !!
00183             if ret<0 and mult=='?':
00184                 ret=0            # rule ok or not, no problem
00185         else:
00186             fullret=-1
00187             #
00188             # try to make a parsing 
00189             #
00190             for d in self.definitions:
00191                 ret=d._parse(_tk)
00192                 if ret>0: 
00193                     fullret = ret
00194                     break  # Leave for loop as this rule is OK
00195             #
00196             # if previous parsing is ok, then go for infinite...
00197             #     
00198             while ret>0:
00199                 for d in self.definitions:
00200                     ret=d._parse(_tk)
00201                     if ret>0:
00202                         break #Leave for loop as this rule is OK
00203             #
00204             # now, check according '+' or '*'
00205             #
00206             ret=fullret
00207             if ret<0 and mult=='*':
00208                 ret=0
00209         _retstr='' 
00210         _retindent='.'
00211         if ret>=0:
00212             self.rp.vals[self.name]=_tk.getString(_top2)
00213             _retstr=_tk.getString(_top2)
00214             if ret>0: _retindent='<'
00215         elif ret<0:
00216             _tk.reset(_top)
00217         if self.rp.verbose: print _tk.getIndent(False,_retindent)+'Parsed rule :"'+self.name+'" - value="'+_retstr+'"'
00218         return ret
00219 
00220 class _RpDefinition:
00221     """ Class containing definitions (rule, terminal,...) """
00222     def __init__(self,aDef,_rp,code=''):
00223         """ in init, we make:
00224          - scan the definitions to create relative objects
00225              such as Rule, Terminal, Regular.
00226          - acquire the future to be executed
00227         """ 
00228         self.rp=_rp
00229         self.definition=self._scan(aDef)
00230         self.code=code
00231     def _addcode(self,code):
00232         """ add code statement to the current definition """
00233         self.code+='\n'+code
00234     def _parse(self,_tk,mult=''):
00235         """ parse the definition:
00236            - loop on all items of definition 
00237            - and call appropriate objet._parse 
00238               (Rule,Terminal,Regular)
00239         """
00240         ret=-1
00241         _top=_tk.index
00242         fullret=-1
00243         for _i,_def in enumerate(self.definition):
00244             #d,mult,notrule in self.definition:
00245             d,mult,notrule=_def
00246             if notrule: break  #Quit this loop as this this the end !!
00247             n_d,n_mult,n_notrule,n_val=None,None,None,None
00248             if _i<(len(self.definition)-1):
00249                 n_d,n_mult,n_notrule=self.definition[_i+1]
00250                 if n_notrule:
00251                     if isinstance(n_d,_RpRule) and self.rp.vals.has_key(n_d.name):
00252                         n_val=self.rp.vals[n_d.name]
00253             try:
00254                 ret=d._parse(_tk,mult,n_val)
00255                 if ret>0:
00256                     fullret=max(fullret,ret)
00257             except IndexError,err:
00258                 print '+++',err
00259                 ret=-1
00260                 _tk.getIndent(False) # Just to adjust indentation
00261                 break
00262             if ret<0:
00263                 fullret=-1
00264                 break # one definition is not OK
00265         ret=fullret
00266         if ret<0:
00267             _tk.reset(_top)
00268         elif ret>0:
00269             #
00270             # this definition is successfully parsed, 
00271             # so, relative code could be interpreted.
00272             #
00273             self._interpretCode()
00274         return ret
00275             
00276     def _scan(self,adef):
00277         """ scan definition to create objects """
00278         definition=[]
00279         for a in adef.split():
00280             mult=''
00281             m=re.match(r'([^\+\?\*]*)(.?)$',a)
00282             if m==None:
00283                 if len(a)>2 and (a[0:3]=='"+"' or a[0:3]=='"?"' or a[0:3]=='"*"'):
00284                     key=a[0:3]
00285                     mult=a[3:]
00286                 elif len(a)>2 and ( a[0:4]=='"**"' or a[0:4]=='"+="' or a[0:4]=='"*="'):
00287                     key=a[0:4]
00288                     mult=a[4:]
00289                 elif len(a)>2 and ( a[0:5]=='"**="'):
00290                     key=a[0:5]
00291                     mult=a[5:]
00292                 else:
00293                     raise Exception('Invalid definition: %s in %s' % (a,adef))
00294             else:
00295                 key=m.group(1)
00296                 mult=m.group(2)
00297             notrule=False
00298             #
00299             # "x"..."y"  ==> Regular expression
00300             #
00301             if key.find('"..."')>=2:
00302                 m2=re.match('"(.)"\.{3}"(.)"',key)
00303                 if m2==None:
00304                     raise Exception , 'Invalid expression:'+key
00305                 a1=_RpRegular('['+m2.group(1)+'-'+m2.group(2)+']',self.rp)
00306             #
00307             # "xxx"  == Terminal 
00308             #
00309             elif key[0]=='"' and key[-1]=='"':
00310                 a1=_RpTerminal(key[1:-1],self.rp)
00311             #
00312             # "xxxx   ==> Exception (" is missing)    
00313             #
00314             elif key[0]=='"' and key[-1]!='"':
00315                 raise Exception('End " is missing: %s in %s' % (a,adef))
00316             #
00317             # r"xxxx"  == Regular expression
00318             #
00319             elif key[0]=='r' and key[1]=='"' and key[-1]=='"':
00320                 a1=_RpRegular(key[2:-1],self.rp)
00321             #
00322             # lower_case  == Rule 
00323             #
00324             elif key.islower():
00325                 if key[0]=='^':
00326                     notrule=True
00327                     key=key[1:]
00328                 if self.rp.rules.has_key(key):
00329                     a1=self.rp.rules[key]
00330                 else:
00331                     a1=_RpRule(key,self.rp)
00332                     self.rp.rules[key]=a1
00333             #
00334             # ..else   == Terminal
00335             #
00336             else:
00337                 a1=_RpTerminal(key,self.rp)
00338             definition.append([a1,mult,notrule])
00339         return definition
00340     def _interpretCode(self):
00341         """ interpret the code defined for rule 
00342           - variables defined in code must begin with '$'
00343             followed by the rule name
00344         """
00345         if len(self.code)==0: return
00346         sepcode=self.code[0]
00347         codes=self.code.split(sepcode)[1:]
00348         for cc in codes:
00349             #locate @xxxx  variable in code:
00350             # ex for code:    the_string="$char"+"---" 
00351             #  => group(1)='the_string="' 
00352             #  => group(2)='char'        ==> the variable that will be found in _VALS
00353             #  => group(3)='"+"---" '
00354             m=re.match(r'^([^\$]*)\$([A-Za-z0-9_]*)(.*)$',cc,re.DOTALL)   #0.91
00355             while m!=None:
00356                 _deb=m.group(1)
00357                 _var=m.group(2)
00358                 _fin=m.group(3)
00359                 if self.rp.vals.has_key(_var):
00360                     cc=_deb+self.rp.vals.get(_var)+_fin
00361                 else:
00362                     raise Exception,_var+' not set'
00363                 m=re.match(r'^([^\$]*)\$([A-Za-z0-9]*)(.*)$',cc,re.DOTALL)  #0.91
00364             if self.rp.execute_code:
00365                 try:
00366                     exec(cc,self.rp.vals)
00367                 except Exception,error:
00368                     print '+++',error
00369                     print '+++Code=',cc
00370             self.rp.code_array.append(cc)
00371 
00372 class _RpTerminal:
00373     """ class to handle terminals """
00374     def __init__(self,term,rp):
00375         """ init terminal
00376           - check abbreviation of terminal 
00377             ex:  SEParator  
00378             minimum to check = 3
00379         """
00380         self.rp=rp
00381         term=term.strip()
00382         min=len(term)
00383         ##############################
00384         # Previously had this in a 'if term.isalnum():' scope, but that
00385         # eliminates underscores
00386         m=re.match(r'([A-Za-z0-9_]*)',term)
00387         if m!=None:
00388             term=term.upper()
00389             min=len(m.group(1))
00390         ##############################
00391         self.terminal=term
00392         self.min=min
00393     def _parse(self,_tk,mult='',n_val=None):
00394         """ parse the terminal """
00395         if self.rp.verbose: print _tk.getIndent(True)+'Parsing terminal:"'+self.terminal+'" for string:"'+_tk.getRemaining()+'"'
00396         ret=-1
00397         _top=_tk.index
00398         _top2=_tk.removeBlanks()
00399         min=0
00400         while (min<self.min or min<len(self.terminal)) and _tk.peek().upper()==self.terminal[min]:
00401             _tk.read()
00402             min+=1
00403             ret=min
00404         if _tk.index - _top2 < self.min: 
00405             ret=-1
00406             _tk.reset(_top)
00407         if min>0 and min<len(self.terminal):
00408             ntok=_tk.peek().strip()
00409             nterm=self.terminal[min].strip()
00410             #print string.ascii_letters.find(ntok)
00411             # if next car of terminal diff of next car of token !!
00412             if ntok!='' and nterm!='' and string.ascii_letters.find(ntok)>-1 and ntok!=nterm:
00413                 ret=-1
00414         if (mult=='?' or mult=='*'):
00415             ret=max(0,ret)
00416         _retstr=''
00417         _retindent='.'
00418         if ret>0: 
00419             _retstr=_tk.getString(_top2)
00420             _retindent='<'
00421         if self.rp.verbose: print _tk.getIndent(False,_retindent)+'Parsed terminal :"'+self.terminal+'" - value="'+_retstr+'"'
00422         return ret
00423 
00424 class _RpRegular:
00425     """ class to handle regular expressions """
00426     def __init__(self,term,rp):
00427         """ init class, compile reg """
00428         self.source=term
00429         self.rp=rp
00430         self.regular=re.compile(term)
00431     def _parse(self,_tk,mult='',n_val=None):
00432         """ parsing string against reg expression """
00433         if self.rp.verbose: print _tk.getIndent(True)+'Parsing regular:"'+self.source+'" for string:"'+_tk.getRemaining()+'"'
00434         if n_val!=None and n_val==_tk.peek(): 
00435             if self.rp.verbose: print _tk.getIndent(False)+'Parsed regular :"'+self.source+'" - value=""'
00436             return -1
00437         ret=-1
00438         min=0
00439         _top=_tk.index
00440         if mult=='' or mult=='+':
00441             _passed=False
00442             try:
00443                 _passed=self.regular.match(_tk.peek())
00444             except TypeError:
00445                 pass
00446             if _passed:
00447                 _tk.read()
00448                 min+=1
00449                 ret=min
00450                 if mult=='+':
00451                     while 1:
00452                         try:
00453                             if self.regular.match(_tk.peek()):
00454                                 min+=1
00455                                 _tk.read()
00456                                 ret=min
00457                             else:
00458                                 break
00459                         except IndexError:
00460                             break
00461                 
00462         elif mult=='?' or mult=='*':
00463             ret=0 # default for these mult
00464             if self.regular.match(_tk.peek()):
00465                 _tk.read()
00466                 min+=1
00467                 ret=min
00468                 if mult=='*':
00469                     while 1:
00470                         try:
00471                             if self.regular.match(_tk.peek()):
00472                                 min+=1
00473                                 _tk.read()
00474                                 ret=min
00475                             else:
00476                                 break
00477                         except IndexError:
00478                             break # end of string then return OK...
00479         else:
00480             raise Exception('Invalid multiplicator:',mult)
00481         if ret==-1:
00482             _tk.reset(_top)
00483         _retstr=''
00484         _retindent='.'
00485         if ret>0: 
00486             _retstr=_tk.getString(_top)
00487             _retindent='<'
00488         if self.rp.verbose: print _tk.getIndent(False,_retindent)+'Parsed regular :"'+self.source+'" - value="'+_retstr+'"'
00489         return ret
00490     
00491 class RP:
00492     def __init__(self):
00493         self.rules={}           # Contain list of rules 
00494         self.vals={}            # Values of rules while parsing the string
00495         self.verbose=False      # Display rules in process...
00496         self.debug=False        # Debugging mode
00497         self.ignore_blanks=True # Default is: ignore blanks 
00498         self.execute_code=True  # Default is: execute code
00499         self.sepcode='@'
00500         self.code_array=[]
00501         self.code_init=[]
00502         self.code_post=[]
00503         self.code=''            #the code, user can run !
00504         self.maindef=None
00505         self.stringError=''
00506         self.rp_locals=locals
00507     def compile(self,rule):
00508         cmp1=re.compile(r'^\s*([A-Za-z0-9_-]*)\s*::=\s*([^'+self.sepcode+r']*)(.*)$') #  xxxx ::=yyyyyy @zzzzzz
00509         cmp2=re.compile(r'^\s*\|\s*([^'+self.sepcode+r']*)(.*)$')                   #       |  yyyyyy @zzzzzz
00510         cmp3=re.compile(r'^\s*'+self.sepcode+r'(.*)$')                              #                 @zzzzzz
00511         currentRule=None
00512         for line in rule:
00513             line=line.strip()
00514             if line=='' or line[0]=='#': continue
00515             else:
00516                 m1,m2,m3,m4,m5,m6,m7=None,None,None,None,None,None,None
00517                 m1=cmp1.match(line)
00518                 if m1==None: m2=cmp2.match(line)
00519                 if m2==None: m3=cmp3.match(line)
00520                 if m3==None: m4=re.match(r'^\s*[Ii][Nn][Ii][Tt]\s+(.*)$',line)
00521                 if m4==None: m5=re.match(r'^\s*[Ii][Mm][Pp][Oo][Rr][Tt]\s+(.*)$',line)
00522                 if m5==None: m6=re.match(r'^\s*[Oo][Pp][Tt][Ii][Oo][Nn]\s+(.*)$',line)
00523                 if m6==None: m7=re.match(r'^\s*[Pp][Oo][Ss][Tt]\s+(.*)$',line)
00524             if m1!=None:
00525                 _rule=m1.group(1)
00526                 if RESERVED.has_key(_rule):                                                         #0.91
00527                     raise Exception("Invalid rule name: '%s' is a python reserved word" % (_rule) ) #0.91
00528                 _def=m1.group(2)
00529                 _code=m1.group(3)
00530                 if not self.rules.has_key(_rule):
00531                     currentRule=_RpRule(_rule,self)
00532                     self.rules[_rule]=currentRule
00533                 else:
00534                     currentRule=self.rules[_rule]
00535                 _def,newrules=splitBrackets(_def)
00536                 _ndef=re.split(r'\|',_def) # split for | in def
00537                 for _def in _ndef:
00538                     _def=_xlate(_def,True)
00539                     ruleDef=_RpDefinition(_def,self,_code)
00540                     currentRule.addDefinition(ruleDef)
00541                 rule.extend(newrules)               
00542                 if self.maindef==None: self.maindef=currentRule
00543             elif m2!=None:
00544                 _def=m2.group(1)
00545                 _code=m2.group(2)
00546                 _def,newrules=splitBrackets(_def)
00547                 _ndef=re.split(r'\|',_def) # split for | in def
00548                 for _def in _ndef:
00549                     _def=_xlate(_def,True)
00550                     ruleDef=_RpDefinition(_def,self,_code)
00551                     currentRule.addDefinition(ruleDef)
00552                 rule.extend(newrules)
00553             elif m3!=None:
00554                 _code=m3.group(1)
00555                 ruleDef._addcode(_code)
00556             elif m4!=None:
00557                 _initcode=m4.group(1)
00558                 self.code_init.append(_initcode)
00559             elif m5!=None:
00560                 _infile=m5.group(1)
00561                 try:
00562                     _file=open(_infile,'r')
00563                     _ret=_file.readlines()
00564                     _file.close()
00565                     rule.extend(_ret)
00566                 except Exception:
00567                     raise Exception('File not found:'+_infile)
00568             elif m6!=None:
00569                 val=m6.group(1).strip()
00570                 uval=val.upper()
00571                 if uval=='VERBOSE':
00572                     self.verbose=True
00573                 elif uval=='TRACE':
00574                     self.trace=True
00575                 elif uval=='BLANKS':
00576                     self.ignore_blanks=False
00577                 elif uval=='NORUN':
00578                     self.execute_code=False
00579                 elif len(val)>1:
00580                     vals=val.split(' ')
00581                     if vals[0].upper()=='SEPCODE':
00582                         self.sepcode=vals[1]
00583                         cmp1=re.compile(r'^\s*([A-Za-z0-9_-]*)\s*::=\s*([^'+self.sepcode+r']*)(.*)$') #  xxxx ::=yyyyyy @zzzzzz
00584                         cmp2=re.compile(r'^\s*\|\s*([^'+self.sepcode+r']*)(.*)$')                   #       |  yyyyyy @zzzzzz
00585                         cmp3=re.compile(r'^\s*'+self.sepcode+r'(.*)$')                              #                 @zzzzzz
00586                 else:
00587                     raise Exception('Invalid option:'+val)
00588             elif m7!=None:
00589                 _postcode=m7.group(1)
00590                 self.code_post.append(_postcode)
00591             else:
00592                 raise Exception('Invalid rule: '+line)
00593         self.check_rules()
00594         
00595     def check_rules(self):
00596         """ routine to check if all rules are defined """
00597         for k,v in self.rules.iteritems():
00598             if len(v.definitions)==0:
00599                 raise Exception('No definition for rule:'+k)
00600             
00601     def match(self,data):
00602         if self.maindef.match(data):
00603             for x,y in self.vals.iteritems():
00604                 if isinstance(y,str) and len(y)>0:
00605                     if y[0]=='"' or y[0]=="'":
00606                         exec("self."+x+"="+y)
00607                     else:
00608                         exec('self.'+x+'="'+y+'"')
00609                 elif isinstance(y,int):
00610                     exec("self."+x+"="+str(y))
00611                 elif isinstance(y,list):
00612                     exec("self."+x+"="+str(y))
00613             return self
00614         return None
00615         
00616     def get(self,var):
00617         if self.vals.has_key(var):
00618             return self.vals.get(var)
00619         return None
00620     
00621 def _xlate(str,reverse=False):
00622     xlation={'"("':'"l_parent"',
00623              '")"':'"r_parent"',
00624              '"|"':'"or_term"',
00625              '"[':'"l_bracket"',
00626              ']"':'"r_bracket"'}
00627     for k,v in xlation.iteritems():
00628         if reverse:
00629             str=str.replace(v,k)
00630         else:
00631             str=str.replace(k,v)
00632     return str
00633     
00634 def splitBrackets(inputStr):
00635     """ Split rules definitions 
00636         split along () , []
00637     """
00638     import random
00639     newrules=[]
00640     inputStr=_xlate(inputStr)
00641     # Now , split at (  )
00642     # ex:  r1 (r2) r3 (r4)* r5
00643     nr1=re.findall('\([^\)]*\)[\+\?\*]?',inputStr)  # => ['(r2)','(r4)*']
00644     nr2=re.split('\([^\)]*\)[\+\?\*]?',inputStr)    # => ['r1',r3','r5']
00645     nstr=nr2.pop(0) # => get 'r1'
00646     for nr in nr1:  # => get '(r2)' then '(r4)*' 
00647         if nr[-1]==')':
00648             suffix=''
00649             nr=nr[1:-1]
00650         else:
00651             suffix=nr[-1]
00652             nr=nr[1:-2]
00653         a_rule='rule' + str(random.randint(1,1000000))
00654         nstr+=(' '+a_rule+suffix+' ')
00655         newrules.append(a_rule+' ::= '+ _xlate(nr,True))
00656         nstr+=nr2.pop(0)    # => get 'r3' then 'r5'
00657     inputStr=nstr
00658     # Now , split at [ ]
00659     # ex:  r1 [r2] r3 [r4] r5
00660     nr1=re.findall('\[[^\]]*\]',inputStr)  # => ['[r2]','[r4]']
00661     nr2=re.split('\[[^\]]*\]',inputStr)    # => ['r1',r3','r5']
00662     nstr=nr2.pop(0) # => get 'r1'
00663     for nr in nr1:  # => get '[r2]' then '[r4]' 
00664         nr=nr[1:-1]
00665         a_rule='rule' + str(random.randint(1,1000000))
00666         nstr+=(' '+a_rule+'? ')
00667         newrules.append(a_rule+' ::= '+ _xlate(nr,True))
00668         nstr+=nr2.pop(0)    # => get 'r3' then 'r5'
00669     inputStr=nstr
00670     return inputStr,newrules
00671     
00672 
00673 if __name__=='__main__':
00674     rule=['init loc_range=[] ',
00675           'locate    ::=   range?  string  ',
00676           'range     ::=   "(" group+ ")"                   ',
00677           '            |    group                            ',
00678           'group     ::=   grp          ',
00679           'grp       ::=   nn "-" mm    @loc_range.append([$nn,$mm])',
00680           '            |   nn "." mm    @loc_range.append([$nn,($nn+$mm)])',
00681           '            |   nn           @loc_range.append([$nn,9999])',
00682           'nn        ::=  r"[0-9]"+                              ',
00683           'mm        ::=  r"[0-9]"+                              ',
00684           'string    ::=  sep char* sep  @loc_string="$char" ',
00685           'sep       ::=  r"\S"  ',
00686           'char      ::=  r"." ^sep' ]
00687     stringsToTry=(' 1-20 /hello world/ ',
00688                   ' 4-10 ,12345678',
00689                   ' 4-10 ,aaaaaa,',
00690                   ' /location/',
00691                   ' (4.10 25-28 39.3) /location/ ',
00692                   ' (4.10 25-28  /location/   ',
00693                   ' 46-  /location/   ')    
00694     r=compile(rule)
00695     for st in stringsToTry:
00696         ok_ko=(r.match(st)!=None)
00697         print '\n---------', st, '------------',ok_ko
00698         if not ok_ko:
00699             print '"'+st+'"'
00700             print " "+r.stringError
00701         else:
00702             print "range=",r.loc_range
00703             print "string=",r.loc_string
00704 
00705 
00706 
00707