15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# ply: lex.py
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Copyright (C) 2001-2011,
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# David M. Beazley (Dabeaz LLC)
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# All rights reserved.
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Redistribution and use in source and binary forms, with or without
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# modification, are permitted provided that the following conditions are
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# met:
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# * Redistributions of source code must retain the above copyright notice,
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#   this list of conditions and the following disclaimer.
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# * Redistributions in binary form must reproduce the above copyright notice,
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#   this list of conditions and the following disclaimer in the documentation
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#   and/or other materials provided with the distribution.
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# * Neither the name of the David Beazley or Dabeaz LLC may be used to
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#   endorse or promote products derived from this software without
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#  specific prior written permission.
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)__version__    = "3.4"
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)__tabversion__ = "3.2"       # Version of table file used
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import re, sys, types, copy, os
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# This tuple contains known string types
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)try:
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Python 2.6
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StringTypes = (types.StringType, types.UnicodeType)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)except AttributeError:
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Python 3.0
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StringTypes = (str, bytes)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Extract the code attribute of a function. Different implementations
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# are for Python 2/3 compatibility.
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)if sys.version_info[0] < 3:
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def func_code(f):
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return f.func_code
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)else:
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def func_code(f):
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return f.__code__
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# This regular expression is used to match valid token names
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Exception thrown when invalid token encountered and no default error
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# handler is defined.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class LexError(Exception):
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def __init__(self,message,s):
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         self.args = (message,)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         self.text = s
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Token class.  This class is used to represent the tokens produced.
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class LexToken(object):
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def __str__(self):
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def __repr__(self):
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return str(self)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# This object is a stand-in for a logging object created by the
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# logging module.
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class PlyLogger(object):
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def __init__(self,f):
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.f = f
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def critical(self,msg,*args,**kwargs):
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.f.write((msg % args) + "\n")
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def warning(self,msg,*args,**kwargs):
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.f.write("WARNING: "+ (msg % args) + "\n")
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def error(self,msg,*args,**kwargs):
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.f.write("ERROR: " + (msg % args) + "\n")
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    info = critical
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    debug = critical
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Null logger is used when no output is generated. Does nothing.
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class NullLogger(object):
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def __getattribute__(self,name):
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return self
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def __call__(self,*args,**kwargs):
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return self
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#                        === Lexing Engine ===
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# The following Lexer class implements the lexer runtime.   There are only
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# a few public methods and attributes:
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#    input()          -  Store a new string in the lexer
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#    token()          -  Get the next token
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#    clone()          -  Clone the lexer
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#    lineno           -  Current line number
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#    lexpos           -  Current position in the input string
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Lexer:
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def __init__(self):
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexre = None             # Master regular expression. This is a list of
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      # tuples (re,findex) where re is a compiled
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      # regular expression and findex is a list
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      # mapping regex group numbers to rules
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexretext = None         # Current regular expression strings
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstatere = {}          # Dictionary mapping lexer states to master regexs
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstateretext = {}      # Dictionary mapping lexer states to regex strings
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstaterenames = {}     # Dictionary mapping lexer states to symbol names
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstate = "INITIAL"     # Current lexer state
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstatestack = []       # Stack of lexer states
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstateinfo = None      # State information
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstateignore = {}      # Dictionary of ignored characters for each state
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstateerrorf = {}      # Dictionary of error functions for each state
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexreflags = 0           # Optional re compile flags
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexdata = None           # Actual input data (as a string)
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexpos = 0               # Current position in input text
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexlen = 0               # Length of the input text
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexerrorf = None         # Error rule (if any)
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lextokens = None         # List of valid tokens
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexignore = ""           # Ignored characters
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexliterals = ""         # Literal characters that can be passed through
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexmodule = None         # Module
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lineno = 1               # Current line number
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexoptimize = 0          # Optimized mode
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def clone(self,object=None):
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        c = copy.copy(self)
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # If the object parameter has been supplied, it means we are attaching the
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # lexer to a new object.  In this case, we have to rebind all methods in
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # the lexstatere and lexstateerrorf tables.
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if object:
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            newtab = { }
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            for key, ritem in self.lexstatere.items():
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                newre = []
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                for cre, findex in ritem:
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     newfindex = []
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     for f in findex:
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         if not f or not f[0]:
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             newfindex.append(f)
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             continue
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         newfindex.append((getattr(object,f[0].__name__),f[1]))
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                newre.append((cre,newfindex))
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                newtab[key] = newre
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            c.lexstatere = newtab
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            c.lexstateerrorf = { }
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            for key, ef in self.lexstateerrorf.items():
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                c.lexstateerrorf[key] = getattr(object,ef.__name__)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            c.lexmodule = object
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return c
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # writetab() - Write lexer information to a table file
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def writetab(self,tabfile,outputdir=""):
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if isinstance(tabfile,types.ModuleType):
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        basetabfilename = tabfile.split(".")[-1]
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        filename = os.path.join(outputdir,basetabfilename)+".py"
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf = open(filename,"w")
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf.write("_tabversion   = %s\n" % repr(__version__))
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf.write("_lextokens    = %s\n" % repr(self.lextokens))
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf.write("_lexreflags   = %s\n" % repr(self.lexreflags))
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf.write("_lexliterals  = %s\n" % repr(self.lexliterals))
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tabre = { }
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Collect all functions in the initial state
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        initial = self.lexstatere["INITIAL"]
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        initialfuncs = []
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for part in initial:
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            for f in part[1]:
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if f and f[0]:
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    initialfuncs.append(f)
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for key, lre in self.lexstatere.items():
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             titem = []
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             for i in range(len(lre)):
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i])))
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             tabre[key] = titem
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf.write("_lexstatere   = %s\n" % repr(tabre))
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        taberr = { }
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for key, ef in self.lexstateerrorf.items():
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             if ef:
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  taberr[key] = ef.__name__
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             else:
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  taberr[key] = None
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf.write("_lexstateerrorf = %s\n" % repr(taberr))
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tf.close()
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # readtab() - Read lexer information from a tab file
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def readtab(self,tabfile,fdict):
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if isinstance(tabfile,types.ModuleType):
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            lextab = tabfile
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else:
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if sys.version_info[0] < 3:
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                exec("import %s as lextab" % tabfile)
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            else:
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                env = { }
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                exec("import %s as lextab" % tabfile, env,env)
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                lextab = env['lextab']
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if getattr(lextab,"_tabversion","0.0") != __version__:
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            raise ImportError("Inconsistent PLY version")
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lextokens      = lextab._lextokens
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexreflags     = lextab._lexreflags
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexliterals    = lextab._lexliterals
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstateinfo   = lextab._lexstateinfo
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstateignore = lextab._lexstateignore
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstatere     = { }
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstateretext = { }
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for key,lre in lextab._lexstatere.items():
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             titem = []
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             txtitem = []
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             for i in range(len(lre)):
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict)))
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  txtitem.append(lre[i][0])
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             self.lexstatere[key] = titem
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             self.lexstateretext[key] = txtitem
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstateerrorf = { }
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for key,ef in lextab._lexstateerrorf.items():
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             self.lexstateerrorf[key] = fdict[ef]
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.begin('INITIAL')
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # input() - Push a new string into the lexer
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def input(self,s):
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Pull off the first character to see if s looks like a string
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        c = s[:1]
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if not isinstance(c,StringTypes):
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            raise ValueError("Expected a string")
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexdata = s
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexpos = 0
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexlen = len(s)
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # begin() - Changes the lexing state
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def begin(self,state):
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if not state in self.lexstatere:
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            raise ValueError("Undefined state")
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexre = self.lexstatere[state]
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexretext = self.lexstateretext[state]
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexignore = self.lexstateignore.get(state,"")
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexerrorf = self.lexstateerrorf.get(state,None)
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstate = state
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # push_state() - Changes the lexing state and saves old on stack
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def push_state(self,state):
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexstatestack.append(self.lexstate)
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.begin(state)
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # pop_state() - Restores the previous state
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def pop_state(self):
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.begin(self.lexstatestack.pop())
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # current_state() - Returns the current lexing state
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def current_state(self):
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return self.lexstate
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # skip() - Skip ahead n characters
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def skip(self,n):
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexpos += n
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # opttoken() - Return the next token from the Lexer
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    #
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Note: This function has been carefully implemented to be as fast
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # as possible.  Don't make changes unless you really know what
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # you are doing
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # ------------------------------------------------------------
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def token(self):
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Make local copies of frequently referenced attributes
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexpos    = self.lexpos
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexlen    = self.lexlen
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexignore = self.lexignore
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexdata   = self.lexdata
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        while lexpos < lexlen:
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if lexdata[lexpos] in lexignore:
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                lexpos += 1
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                continue
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            # Look for a regular expression match
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            for lexre,lexindexfunc in self.lexre:
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                m = lexre.match(lexdata,lexpos)
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if not m: continue
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                # Create a token for return
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                tok = LexToken()
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                tok.value = m.group()
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                tok.lineno = self.lineno
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                tok.lexpos = lexpos
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                i = m.lastindex
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                func,tok.type = lexindexfunc[i]
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if not func:
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   # If no token type was set, it's an ignored token
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   if tok.type:
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      self.lexpos = m.end()
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      return tok
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   else:
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      lexpos = m.end()
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      break
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                lexpos = m.end()
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                # If token is processed by a function, call it
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                tok.lexer = self      # Set additional attributes useful in token rules
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.lexmatch = m
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.lexpos = lexpos
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                newtok = func(tok)
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                # Every function must return a token, if nothing, we just move to next token
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if not newtok:
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    lexpos    = self.lexpos         # This is here in case user has updated lexpos.
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    lexignore = self.lexignore      # This is here in case there was a state change
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    break
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                # Verify type of the token.  If not in the token map, raise an error
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if not self.lexoptimize:
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    if not newtok.type in self.lextokens:
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            func_code(func).co_filename, func_code(func).co_firstlineno,
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            func.__name__, newtok.type),lexdata[lexpos:])
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                return newtok
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            else:
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                # No match, see if in literals
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if lexdata[lexpos] in self.lexliterals:
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok = LexToken()
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok.value = lexdata[lexpos]
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok.lineno = self.lineno
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok.type = tok.value
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok.lexpos = lexpos
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.lexpos = lexpos + 1
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    return tok
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                # No match. Call t_error() if defined.
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if self.lexerrorf:
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok = LexToken()
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok.value = self.lexdata[lexpos:]
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok.lineno = self.lineno
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok.type = "error"
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok.lexer = self
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    tok.lexpos = lexpos
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.lexpos = lexpos
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    newtok = self.lexerrorf(tok)
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    if lexpos == self.lexpos:
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        # Error method didn't change text position at all. This is an error.
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    lexpos = self.lexpos
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    if not newtok: continue
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    return newtok
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.lexpos = lexpos
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:])
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.lexpos = lexpos + 1
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if self.lexdata is None:
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             raise RuntimeError("No input string given with input()")
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return None
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Iterator interface
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def __iter__(self):
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return self
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def next(self):
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t = self.token()
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if t is None:
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            raise StopIteration
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return t
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    __next__ = next
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#                           ==== Lex Builder ===
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# The functions and classes below are used to collect lexing information
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# and build a Lexer object from it.
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# get_caller_module_dict()
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# This function returns a dictionary containing all of the symbols defined within
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# a caller further down the call stack.  This is used to get the environment
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# associated with the yacc() call if none was provided.
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def get_caller_module_dict(levels):
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    try:
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        raise RuntimeError
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    except RuntimeError:
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        e,b,t = sys.exc_info()
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        f = t.tb_frame
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        while levels > 0:
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            f = f.f_back
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            levels -= 1
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ldict = f.f_globals.copy()
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if f.f_globals != f.f_locals:
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ldict.update(f.f_locals)
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return ldict
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# _funcs_to_names()
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Given a list of regular expression functions, this converts it to a list
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# suitable for output to a table file
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def _funcs_to_names(funclist,namelist):
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    result = []
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for f,name in zip(funclist,namelist):
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         if f and f[0]:
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             result.append((name, f[1]))
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         else:
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             result.append(f)
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return result
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# _names_to_funcs()
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Given a list of regular expression function names, this converts it back to
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# functions.
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def _names_to_funcs(namelist,fdict):
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     result = []
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     for n in namelist:
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if n and n[0]:
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              result.append((fdict[n[0]],n[1]))
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          else:
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              result.append(n)
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     return result
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# _form_master_re()
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# This function takes a list of all of the regex components and attempts to
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# form the master regular expression.  Given limitations in the Python re
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# module, it may be necessary to break the master regex into separate expressions.
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def _form_master_re(relist,reflags,ldict,toknames):
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if not relist: return []
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    regex = "|".join(relist)
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    try:
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexre = re.compile(regex,re.VERBOSE | reflags)
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Build the index to function map for the matching engine
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexindexnames = lexindexfunc[:]
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for f,i in lexre.groupindex.items():
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            handle = ldict.get(f,None)
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if type(handle) in (types.FunctionType, types.MethodType):
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                lexindexfunc[i] = (handle,toknames[f])
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                lexindexnames[i] = f
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            elif handle is not None:
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                lexindexnames[i] = f
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if f.find("ignore_") > 0:
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    lexindexfunc[i] = (None,None)
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                else:
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    lexindexfunc[i] = (None, toknames[f])
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return [(lexre,lexindexfunc)],[regex],[lexindexnames]
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    except Exception:
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        m = int(len(relist)/2)
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if m == 0: m = 1
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return llist+rlist, lre+rre, lnames+rnames
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# def _statetoken(s,names)
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Given a declaration name s of the form "t_" and a dictionary whose keys are
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# state names, this function returns a tuple (states,tokenname) where states
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# is a tuple of state names and tokenname is the name of the token.  For example,
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def _statetoken(s,names):
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    nonstate = 1
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    parts = s.split("_")
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for i in range(1,len(parts)):
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         if not parts[i] in names and parts[i] != 'ANY': break
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if i > 1:
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       states = tuple(parts[1:i])
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else:
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       states = ('INITIAL',)
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if 'ANY' in states:
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       states = tuple(names)
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tokenname = "_".join(parts[i:])
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (states,tokenname)
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# LexerReflect()
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# This class represents information needed to build a lexer as extracted from a
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# user's input file.
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class LexerReflect(object):
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def __init__(self,ldict,log=None,reflags=0):
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.ldict      = ldict
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.error_func = None
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.tokens     = []
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.reflags    = reflags
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.stateinfo  = { 'INITIAL' : 'inclusive'}
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.files      = {}
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.error      = 0
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if log is None:
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.log = PlyLogger(sys.stderr)
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else:
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.log = log
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Get all of the basic information
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def get_all(self):
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.get_tokens()
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.get_literals()
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.get_states()
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.get_rules()
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Validate all of the information
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def validate_all(self):
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.validate_tokens()
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.validate_literals()
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.validate_rules()
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return self.error
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Get the tokens map
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def get_tokens(self):
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tokens = self.ldict.get("tokens",None)
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if not tokens:
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.log.error("No token list is defined")
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.error = 1
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if not isinstance(tokens,(list, tuple)):
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.log.error("tokens must be a list or tuple")
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.error = 1
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if not tokens:
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.log.error("tokens is empty")
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.error = 1
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.tokens = tokens
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Validate the tokens
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def validate_tokens(self):
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        terminals = {}
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for n in self.tokens:
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if not _is_identifier.match(n):
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.log.error("Bad token name '%s'",n)
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.error = 1
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if n in terminals:
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.log.warning("Token '%s' multiply defined", n)
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            terminals[n] = 1
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Get the literals specifier
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def get_literals(self):
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.literals = self.ldict.get("literals","")
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Validate literals
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def validate_literals(self):
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        try:
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            for c in self.literals:
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if not isinstance(c,StringTypes) or len(c) > 1:
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("Invalid literal %s. Must be a single character", repr(c))
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    continue
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        except TypeError:
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.log.error("Invalid literals specification. literals must be a sequence of characters")
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.error = 1
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def get_states(self):
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.states = self.ldict.get("states",None)
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Build statemap
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if self.states:
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             if not isinstance(self.states,(tuple,list)):
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  self.log.error("states must be defined as a tuple or list")
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  self.error = 1
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             else:
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  for s in self.states:
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        if not isinstance(s,tuple) or len(s) != 2:
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s))
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               self.error = 1
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               continue
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        name, statetype = s
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        if not isinstance(name,StringTypes):
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               self.log.error("State name %s must be a string", repr(name))
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               self.error = 1
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               continue
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        if not (statetype == 'inclusive' or statetype == 'exclusive'):
6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name)
6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               self.error = 1
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               continue
6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        if name in self.stateinfo:
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               self.log.error("State '%s' already defined",name)
6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               self.error = 1
6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               continue
6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        self.stateinfo[name] = statetype
6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Get all of the symbols with a t_ prefix and sort them into various
6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # categories (functions, strings, error functions, and ignore characters)
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def get_rules(self):
6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tsymbols = [f for f in self.ldict if f[:2] == 't_' ]
6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Now build up a list of functions and a list of strings
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.toknames = { }        # Mapping of symbols to token names
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.funcsym =  { }        # Symbols defined as functions
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.strsym =   { }        # Symbols defined as strings
6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.ignore   = { }        # Ignore strings by state
6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.errorf   = { }        # Error functions by state
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for s in self.stateinfo:
6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             self.funcsym[s] = []
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             self.strsym[s] = []
6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if len(tsymbols) == 0:
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.log.error("No rules of the form t_rulename are defined")
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.error = 1
6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for f in tsymbols:
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            t = self.ldict[f]
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            states, tokname = _statetoken(f,self.stateinfo)
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.toknames[f] = tokname
6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if hasattr(t,"__call__"):
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if tokname == 'error':
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    for s in states:
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        self.errorf[s] = t
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                elif tokname == 'ignore':
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    line = func_code(t).co_firstlineno
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    file = func_code(t).co_filename
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__)
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                else:
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    for s in states:
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        self.funcsym[s].append((f,t))
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            elif isinstance(t, StringTypes):
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if tokname == 'ignore':
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    for s in states:
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        self.ignore[s] = t
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    if "\\" in t:
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        self.log.warning("%s contains a literal backslash '\\'",f)
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                elif tokname == 'error':
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("Rule '%s' must be defined as a function", f)
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                else:
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    for s in states:
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        self.strsym[s].append((f,t))
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            else:
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.log.error("%s not defined as a function or string", f)
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.error = 1
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Sort the functions by line number
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for f in self.funcsym.values():
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if sys.version_info[0] < 3:
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno))
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            else:
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                # Python 3.0
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                f.sort(key=lambda x: func_code(x[1]).co_firstlineno)
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Sort the strings by regular expression length
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for s in self.strsym.values():
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if sys.version_info[0] < 3:
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1])))
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            else:
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                # Python 3.0
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                s.sort(key=lambda x: len(x[1]),reverse=True)
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Validate all of the t_rules collected
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def validate_rules(self):
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for state in self.stateinfo:
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            # Validate all rules defined by functions
7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            for fname, f in self.funcsym[state]:
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                line = func_code(f).co_firstlineno
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                file = func_code(f).co_filename
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.files[file] = 1
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                tokname = self.toknames[fname]
7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if isinstance(f, types.MethodType):
7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    reqargs = 2
7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                else:
7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    reqargs = 1
7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                nargs = func_code(f).co_argcount
7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if nargs > reqargs:
7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)
7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    continue
7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if nargs < reqargs:
7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)
7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    continue
7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if not f.__doc__:
7515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__)
7525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
7535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    continue
7545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                try:
7565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags)
7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    if c.match(""):
7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__)
7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        self.error = 1
7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                except re.error:
7615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    _etype, e, _etrace = sys.exc_info()
7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e)
7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    if '#' in f.__doc__:
7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__)
7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
7665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            # Validate all rules defined by strings
7685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            for name,r in self.strsym[state]:
7695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                tokname = self.toknames[name]
7705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if tokname == 'error':
7715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("Rule '%s' must be defined as a function", name)
7725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
7735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    continue
7745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if not tokname in self.tokens and tokname.find("ignore_") < 0:
7765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname)
7775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
7785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    continue
7795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                try:
7815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags)
7825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    if (c.match("")):
7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         self.log.error("Regular expression for rule '%s' matches empty string",name)
7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         self.error = 1
7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                except re.error:
7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    _etype, e, _etrace = sys.exc_info()
7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("Invalid regular expression for rule '%s'. %s",name,e)
7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    if '#' in r:
7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name)
7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if not self.funcsym[state] and not self.strsym[state]:
7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.log.error("No rules defined for state '%s'",state)
7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.error = 1
7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            # Validate the error function
7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            efunc = self.errorf.get(state,None)
7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if efunc:
7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                f = efunc
8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                line = func_code(f).co_firstlineno
8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                file = func_code(f).co_filename
8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                self.files[file] = 1
8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if isinstance(f, types.MethodType):
8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    reqargs = 2
8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                else:
8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    reqargs = 1
8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                nargs = func_code(f).co_argcount
8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if nargs > reqargs:
8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)
8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if nargs < reqargs:
8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)
8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for f in self.files:
8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            self.validate_file(f)
8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # -----------------------------------------------------------------------------
8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # validate_file()
8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    #
8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # This checks to see if there are duplicated t_rulename() functions or strings
8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # in the parser input file.  This is done using a simple regular expression
8265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # match on each line in the given file.
8275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # -----------------------------------------------------------------------------
8285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def validate_file(self,filename):
8305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        import os.path
8315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base,ext = os.path.splitext(filename)
8325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ext != '.py': return         # No idea what the file is. Return OK
8335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        try:
8355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            f = open(filename)
8365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            lines = f.readlines()
8375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            f.close()
8385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        except IOError:
8395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return                      # Couldn't find the file.  Don't worry about it
8405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
8425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
8435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        counthash = { }
8455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        linen = 1
8465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for l in lines:
8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            m = fre.match(l)
8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if not m:
8495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                m = sre.match(l)
8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if m:
8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                name = m.group(1)
8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                prev = counthash.get(name)
8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if not prev:
8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    counthash[name] = linen
8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                else:
8565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev)
8575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    self.error = 1
8585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            linen += 1
8595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
8615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# lex(module)
8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Build all of the regular expression rules from definitions in the supplied module
8645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
8655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None):
8665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    global lexer
8675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ldict = None
8685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stateinfo  = { 'INITIAL' : 'inclusive'}
8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj = Lexer()
8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj.lexoptimize = optimize
8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    global token,input
8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if errorlog is None:
8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        errorlog = PlyLogger(sys.stderr)
8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if debug:
8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if debuglog is None:
8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            debuglog = PlyLogger(sys.stderr)
8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Get the module dictionary used for the lexer
8815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if object: module = object
8825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if module:
8845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        _items = [(k,getattr(module,k)) for k in dir(module)]
8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ldict = dict(_items)
8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else:
8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ldict = get_caller_module_dict(2)
8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Collect parser information from the dictionary
8905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    linfo = LexerReflect(ldict,log=errorlog,reflags=reflags)
8915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    linfo.get_all()
8925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if not optimize:
8935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if linfo.validate_all():
8945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            raise SyntaxError("Can't build lexer")
8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if optimize and lextab:
8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        try:
8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            lexobj.readtab(lextab,ldict)
8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            token = lexobj.token
9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            input = lexobj.input
9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            lexer = lexobj
9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return lexobj
9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        except ImportError:
9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            pass
9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Dump some basic debugging information
9085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if debug:
9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        debuglog.info("lex: tokens   = %r", linfo.tokens)
9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        debuglog.info("lex: literals = %r", linfo.literals)
9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        debuglog.info("lex: states   = %r", linfo.stateinfo)
9125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Build a dictionary of valid token names
9145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj.lextokens = { }
9155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for n in linfo.tokens:
9165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexobj.lextokens[n] = 1
9175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Get literals specification
9195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if isinstance(linfo.literals,(list,tuple)):
9205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
9215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else:
9225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexobj.lexliterals = linfo.literals
9235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Get the stateinfo dictionary
9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stateinfo = linfo.stateinfo
9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    regexs = { }
9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Build the master regular expressions
9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for state in stateinfo:
9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        regex_list = []
9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Add rules defined by functions first
9335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for fname, f in linfo.funcsym[state]:
9345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            line = func_code(f).co_firstlineno
9355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            file = func_code(f).co_filename
9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))
9375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if debug:
9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state)
9395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        # Now add all of the simple rules
9415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for name,r in linfo.strsym[state]:
9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            regex_list.append("(?P<%s>%s)" % (name,r))
9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if debug:
9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)
9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        regexs[state] = regex_list
9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Build the master regular expressions
9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if debug:
9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")
9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for state in regexs:
9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames)
9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexobj.lexstatere[state] = lexre
9565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexobj.lexstateretext[state] = re_text
9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexobj.lexstaterenames[state] = re_names
9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if debug:
9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            for i in range(len(re_text)):
9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i])
9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # For inclusive states, we need to add the regular expressions from the INITIAL state
9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for state,stype in stateinfo.items():
9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if state != "INITIAL" and stype == 'inclusive':
9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
9675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])
9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj.lexstateinfo = stateinfo
9705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj.lexre = lexobj.lexstatere["INITIAL"]
9715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj.lexretext = lexobj.lexstateretext["INITIAL"]
9725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj.lexreflags = reflags
9735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Set up ignore variables
9755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj.lexstateignore = linfo.ignore
9765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")
9775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Set up error functions
9795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj.lexstateerrorf = linfo.errorf
9805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexobj.lexerrorf = linfo.errorf.get("INITIAL",None)
9815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if not lexobj.lexerrorf:
9825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        errorlog.warning("No t_error rule is defined")
9835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Check state information for ignore and error rules
9855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for s,stype in stateinfo.items():
9865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if stype == 'exclusive':
9875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              if not s in linfo.errorf:
9885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   errorlog.warning("No error rule is defined for exclusive state '%s'", s)
9895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              if not s in linfo.ignore and lexobj.lexignore:
9905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   errorlog.warning("No ignore rule is defined for exclusive state '%s'", s)
9915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        elif stype == 'inclusive':
9925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              if not s in linfo.errorf:
9935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   linfo.errorf[s] = linfo.errorf.get("INITIAL",None)
9945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              if not s in linfo.ignore:
9955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   linfo.ignore[s] = linfo.ignore.get("INITIAL","")
9965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Create global versions of the token() and input() functions
9985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    token = lexobj.token
9995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input = lexobj.input
10005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexer = lexobj
10015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # If in optimize mode, we write the lextab
10035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if lextab and optimize:
10045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        lexobj.writetab(lextab,outputdir)
10055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return lexobj
10075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
10095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# runmain()
10105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
10115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# This runs the lexer as a main program
10125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
10135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def runmain(lexer=None,data=None):
10155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if not data:
10165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        try:
10175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            filename = sys.argv[1]
10185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            f = open(filename)
10195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            data = f.read()
10205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            f.close()
10215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        except IndexError:
10225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            sys.stdout.write("Reading from standard input (type EOF to end):\n")
10235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            data = sys.stdin.read()
10245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if lexer:
10265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        _input = lexer.input
10275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else:
10285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        _input = input
10295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    _input(data)
10305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if lexer:
10315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        _token = lexer.token
10325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else:
10335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        _token = token
10345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while 1:
10365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        tok = _token()
10375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if not tok: break
10385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos))
10395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
10415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# @TOKEN(regex)
10425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
10435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# This decorator function can be used to set the regex expression on a function
10445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# when its docstring might need to be set in an alternative way
10455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# -----------------------------------------------------------------------------
10465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def TOKEN(r):
10485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    def set_doc(f):
10495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if hasattr(r,"__call__"):
10505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            f.__doc__ = r.__doc__
10515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else:
10525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            f.__doc__ = r
10535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return f
10545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return set_doc
10555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Alternative spelling of the TOKEN decorator
10575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Token = TOKEN
10585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1059