15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#!/usr/bin/env python 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Copyright (c) 2012 The Chromium Authors. All rights reserved. 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# found in the LICENSE file. 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)""" Lexer for PPAPI IDL """ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# IDL Lexer 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# The lexer is uses the PLY lex library to build a tokenizer which understands 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# WebIDL tokens. 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# WebIDL, and WebIDL regular expressions can be found at: 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# http://dev.w3.org/2006/webapi/WebIDL/ 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# PLY can be found at: 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# http://www.dabeaz.com/ply/ 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import os.path 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import re 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import sys 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Try to load the ply module, if not, then assume it is in the third_party 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# directory, relative to ppapi 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)try: 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) from ply import lex 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)except: 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) module_path, module_name = os.path.split(__file__) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) third_party = os.path.join(module_path, '..', '..', 'third_party') 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sys.path.append(third_party) 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) from ply import lex 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from idl_option import GetOption, Option, ParseOptions 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Option('output', 'Generate output.') 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# IDL Lexer 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class IDLLexer(object): 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # 'tokens' is a value required by lex which specifies the complete list 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # of valid token types. 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) tokens = [ 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Symbol and keywords types 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'COMMENT', 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'DESCRIBE', 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'ENUM', 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'LABEL', 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'SYMBOL', 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'INLINE', 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'INTERFACE', 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'STRUCT', 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'TYPEDEF', 57a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 'OR', 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Extra WebIDL keywords 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'CALLBACK', 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'DICTIONARY', 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'OPTIONAL', 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'STATIC', 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Invented for apps use 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'NAMESPACE', 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Data types 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'FLOAT', 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'OCT', 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'INT', 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'HEX', 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'STRING', 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Operators 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'LSHIFT', 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'RSHIFT' 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ] 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # 'keywords' is a map of string to token type. All SYMBOL tokens are 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # matched against keywords, to determine if the token is actually a keyword. 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) keywords = { 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'describe' : 'DESCRIBE', 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'enum' : 'ENUM', 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'label' : 'LABEL', 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'interface' : 'INTERFACE', 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'readonly' : 'READONLY', 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'struct' : 'STRUCT', 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'typedef' : 'TYPEDEF', 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'callback' : 'CALLBACK', 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'dictionary' : 'DICTIONARY', 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'optional' : 'OPTIONAL', 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'static' : 'STATIC', 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 'namespace' : 'NAMESPACE', 96a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 97a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 'or' : 'OR', 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # 'literals' is a value expected by lex which specifies a list of valid 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # literal tokens, meaning the token type and token value are identical. 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) literals = '"*.(){}[],;:=+-/~|&^?' 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Token definitions 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Lex assumes any value or function in the form of 't_<TYPE>' represents a 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # regular expression where a match will emit a token of type <TYPE>. In the 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # case of a function, the function is called when a match is made. These 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # definitions come from WebIDL. 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # 't_ignore' is a special match of items to ignore 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t_ignore = ' \t' 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Constant values 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|-?\d+[Ee][+-]?\d+' 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t_INT = r'-?[0-9]+[uU]?' 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t_OCT = r'-?0[0-7]+' 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t_HEX = r'-?0[Xx][0-9A-Fa-f]+' 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t_LSHIFT = r'<<' 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t_RSHIFT = r'>>' 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # A line ending '\n', we use this to increment the line number 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def t_LINE_END(self, t): 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) r'\n+' 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.AddLines(len(t.value)) 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # We do not process escapes in the IDL strings. Strings are exclusively 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # used for attributes, and not used as typical 'C' constants. 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def t_STRING(self, t): 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) r'"[^"]*"' 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t.value = t.value[1:-1] 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.AddLines(t.value.count('\n')) 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return t 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # A C or C++ style comment: /* xxx */ or // 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def t_COMMENT(self, t): 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)' 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.AddLines(t.value.count('\n')) 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return t 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Return a "preprocessor" inline block 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def t_INLINE(self, t): 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) r'\#inline (.|\n)*?\#endinl.*' 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.AddLines(t.value.count('\n')) 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return t 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # A symbol or keyword. 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def t_KEYWORD_SYMBOL(self, t): 1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) r'_?[A-Za-z][A-Za-z_0-9]*' 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # All non-keywords are assumed to be symbols 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t.type = self.keywords.get(t.value, 'SYMBOL') 1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # We strip leading underscores so that you can specify symbols with the same 1552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # value as a keywords (E.g. a dictionary named 'interface'). 1562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if t.value[0] == '_': 1572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) t.value = t.value[1:] 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return t 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def t_ANY_error(self, t): 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) msg = "Unrecognized input" 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) line = self.lexobj.lineno 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # If that line has not been accounted for, then we must have hit 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # EoF, so compute the beginning of the line that caused the problem. 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if line >= len(self.index): 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Find the offset in the line of the first word causing the issue 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) word = t.value.split()[0] 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) offs = self.lines[line - 1].find(word) 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Add the computed line's starting position 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.index.append(self.lexobj.lexpos - offs) 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) msg = "Unexpected EoF reached after" 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pos = self.lexobj.lexpos - self.index[line] 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) file = self.lexobj.filename 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out = self.ErrorMessage(file, line, pos, msg) 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sys.stderr.write(out + '\n') 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.lex_errors += 1 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def AddLines(self, count): 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Set the lexer position for the beginning of the next line. In the case 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # of multiple lines, tokens can not exist on any of the lines except the 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # last one, so the recorded value for previous lines are unused. We still 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # fill the array however, to make sure the line count is correct. 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.lexobj.lineno += count 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for i in range(count): 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.index.append(self.lexobj.lexpos) 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def FileLineMsg(self, file, line, msg): 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if file: return "%s(%d) : %s" % (file, line + 1, msg) 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return "<BuiltIn> : %s" % msg 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def SourceLine(self, file, line, pos): 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) caret = '\t^'.expandtabs(pos) 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # We decrement the line number since the array is 0 based while the 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # line numbers are 1 based. 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return "%s\n%s" % (self.lines[line - 1], caret) 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def ErrorMessage(self, file, line, pos, msg): 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return "\n%s\n%s" % ( 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.FileLineMsg(file, line, msg), 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.SourceLine(file, line, pos)) 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def SetData(self, filename, data): 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Start with line 1, not zero 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.lexobj.lineno = 1 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.lexobj.filename = filename 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.lines = data.split('\n') 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.index = [0] 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.lexobj.input(data) 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.lex_errors = 0 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def __init__(self): 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.lexobj = lex.lex(object=self, lextab=None, optimize=0) 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# FilesToTokens 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# From a set of source file names, generate a list of tokens. 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def FilesToTokens(filenames, verbose=False): 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lexer = IDLLexer() 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) outlist = [] 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for filename in filenames: 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) data = open(filename).read() 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lexer.SetData(filename, data) 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if verbose: sys.stdout.write(' Loaded %s...\n' % filename) 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while 1: 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t = lexer.lexobj.token() 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if t is None: break 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) outlist.append(t) 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return outlist 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def TokensFromText(text): 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lexer = IDLLexer() 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lexer.SetData('unknown', text) 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) outlist = [] 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while 1: 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t = lexer.lexobj.token() 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if t is None: break 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) outlist.append(t.value) 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return outlist 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# TextToTokens 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# From a block of text, generate a list of tokens 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def TextToTokens(source): 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lexer = IDLLexer() 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) outlist = [] 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lexer.SetData('AUTO', source) 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while 1: 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t = lexer.lexobj.token() 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if t is None: break 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) outlist.append(t.value) 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return outlist 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# TestSame 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# From a set of token values, generate a new source text by joining with a 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# single space. The new source is then tokenized and compared against the 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# old set. 2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def TestSame(values1): 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Recreate the source from the tokens. We use newline instead of whitespace 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # since the '//' and #inline regex are line sensitive. 2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text = '\n'.join(values1) 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) values2 = TextToTokens(text) 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) count1 = len(values1) 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) count2 = len(values2) 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if count1 != count2: 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "Size mismatch original %d vs %d\n" % (count1, count2) 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if count1 > count2: count1 = count2 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for i in range(count1): 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if values1[i] != values2[i]: 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "%d >>%s<< >>%s<<" % (i, values1[i], values2[i]) 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if GetOption('output'): 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sys.stdout.write('Generating original.txt and tokenized.txt\n') 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) open('original.txt', 'w').write(src1) 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) open('tokenized.txt', 'w').write(src2) 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if values1 == values2: 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sys.stdout.write('Same: Pass\n') 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "****************\n%s\n%s***************\n" % (src1, src2) 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sys.stdout.write('Same: Failed\n') 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# TestExpect 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# From a set of tokens pairs, verify the type field of the second matches 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# the value of the first, so that: 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# INT 123 FLOAT 1.1 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# will generate a passing test, where the first token is the SYMBOL INT, 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# and the second token is the INT 123, third token is the SYMBOL FLOAT and 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# the fourth is the FLOAT 1.1, etc... 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def TestExpect(tokens): 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) count = len(tokens) 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) index = 0 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) errors = 0 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while index < count: 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) type = tokens[index].value 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) token = tokens[index + 1] 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) index += 2 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if type != token.type: 3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sys.stderr.write('Mismatch: Expected %s, but got %s = %s.\n' % 3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (type, token.type, token.value)) 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) errors += 1 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if not errors: 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sys.stdout.write('Expect: Pass\n') 3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sys.stdout.write('Expect: Failed\n') 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def Main(args): 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) filenames = ParseOptions(args) 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) try: 3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) tokens = FilesToTokens(filenames, GetOption('verbose')) 3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) values = [tok.value for tok in tokens] 3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if GetOption('output'): sys.stdout.write(' <> '.join(values) + '\n') 3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if GetOption('test'): 3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if TestSame(values): 3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if TestExpect(tokens): 3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0 3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) except lex.LexError as le: 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sys.stderr.write('%s\n' % str(le)) 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1 3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)if __name__ == '__main__': 3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sys.exit(Main(sys.argv[1:])) 353