1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)# Copyright 2014 The Chromium Authors. All rights reserved. 2a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 3a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)# found in the LICENSE file. 45d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 55c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liuimport imp 60529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochimport os.path 75d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)import sys 85d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 95c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# Disable lint check for finding modules: 105c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# pylint: disable=F0401 115c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 125c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liudef _GetDirAbove(dirname): 135c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu """Returns the directory "above" this file containing |dirname| (which must 145c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu also be "above" this file).""" 150529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch path = os.path.abspath(__file__) 160529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch while True: 170529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch path, tail = os.path.split(path) 180529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch assert tail 195c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu if tail == dirname: 205c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu return path 215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 225c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liutry: 235c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu imp.find_module("ply") 245c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liuexcept ImportError: 255c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu sys.path.append(os.path.join(_GetDirAbove("mojo"), "third_party")) 265c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liufrom ply.lex import TOKEN 275c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 285c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liufrom ..error import Error 295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 30effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 315c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# Disable lint check for exceptions deriving from Exception: 325c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# pylint: disable=W0710 335c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liuclass LexError(Error): 345c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu """Class for errors from the lexer.""" 35effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 365c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu def __init__(self, filename, message, lineno): 375c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu Error.__init__(self, filename, message, lineno=lineno) 38effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 39effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 405c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# We have methods which look like they could be functions: 415c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# pylint: disable=R0201 425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)class Lexer(object): 43effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 44effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch def __init__(self, filename): 45effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch self.filename = filename 46effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ######################-- PRIVATE --###################### 485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## 505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## Internal auxiliary methods 515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## 525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def _error(self, msg, token): 535c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu raise LexError(self.filename, msg, token.lineno) 545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## 565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## Reserved keywords 575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## 585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) keywords = ( 595d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'HANDLE', 605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 615d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'IMPORT', 625d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'MODULE', 635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'STRUCT', 645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'INTERFACE', 655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'ENUM', 66cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 'CONST', 6746d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles) 'TRUE', 6846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles) 'FALSE', 6946d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles) 'DEFAULT', 705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ) 715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) keyword_map = {} 735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) for keyword in keywords: 745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) keyword_map[keyword.lower()] = keyword 755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## 775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## All the tokens recognized by the lexer 785d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## 795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) tokens = keywords + ( 805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # Identifiers 815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'NAME', 825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 83effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Constants 845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'ORDINAL', 85cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 'INT_CONST_DEC', 'INT_CONST_HEX', 86c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch 'FLOAT_CONST', 875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'CHAR_CONST', 885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 895d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # String literals 905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'STRING_LITERAL', 915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # Operators 9346d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles) 'MINUS', 9446d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles) 'PLUS', 95cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 'AMP', 965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 975d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # Assignment 985d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'EQUALS', 995d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 100a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) # Request / response 101a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 'RESPONSE', 102a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 103effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Delimiters 1045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'LPAREN', 'RPAREN', # ( ) 1055d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'LBRACKET', 'RBRACKET', # [ ] 1065d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'LBRACE', 'RBRACE', # { } 107effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 'LANGLE', 'RANGLE', # < > 108effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 'SEMI', # ; 1095d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'COMMA', 'DOT' # , . 1105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ) 1115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## 1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## Regexes for use in tokens 1145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## 1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 116cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) # valid C identifiers (K&R2: A.2.3) 117cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) identifier = r'[a-zA-Z_][0-9a-zA-Z_]*' 1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) hex_prefix = '0[xX]' 1205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) hex_digits = '[0-9a-fA-F]+' 1215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # integer constants (K&R2: A.2.5.1) 123cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) decimal_constant = '0|([1-9][0-9]*)' 124cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) hex_constant = hex_prefix+hex_digits 125cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) # Don't allow octal constants (even invalid octal). 126cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) octal_constant_disallowed = '0[0-9]+' 1275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # character constants (K&R2: A.2.5.2) 1295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line 1305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # directives with Windows paths as filenames (..\..\dir\file) 1315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # For the same reason, decimal_escape allows all digit sequences. We want to 1325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # parse all correct code, even if it means to sometimes parse incorrect 1335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # code. 1345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # 1355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" 1365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) decimal_escape = r"""(\d+)""" 1375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) hex_escape = r"""(x[0-9a-fA-F]+)""" 1385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])""" 1395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) escape_sequence = \ 1415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))' 1425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) cconst_char = r"""([^'\\\n]|"""+escape_sequence+')' 1435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) char_const = "'"+cconst_char+"'" 1445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)" 1455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bad_char_const = \ 1465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+ \ 1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bad_escape+r"""[^'\n]*')""" 1485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # string literals (K&R2: A.2.6) 1505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) string_char = r"""([^"\\\n]|"""+escape_sequence+')' 1515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) string_literal = '"'+string_char+'*"' 1525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"' 1535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # floating constants (K&R2: A.2.5.3) 1555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) exponent_part = r"""([eE][-+]?[0-9]+)""" 1565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" 1575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) floating_constant = \ 1585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) '(((('+fractional_constant+')'+ \ 159c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch exponent_part+'?)|([0-9]+'+exponent_part+')))' 1605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 161effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Ordinals 162effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch ordinal = r'@[0-9]+' 163effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch missing_ordinal_value = r'@' 164effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Don't allow ordinal values in octal (even invalid octal, like 09) or 165effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # hexadecimal. 166effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)|('+hex_prefix+hex_digits+'))' 167effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 1685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## 1695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## Rules for the normal state 1705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ## 171effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch t_ignore = ' \t\r' 1725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # Newlines 1745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_NEWLINE(self, t): 1755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) r'\n+' 176c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch t.lexer.lineno += len(t.value) 1775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1785d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # Operators 1795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_MINUS = r'-' 18046d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles) t_PLUS = r'\+' 181cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) t_AMP = r'&' 1825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # = 1845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_EQUALS = r'=' 1855d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 186a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) # => 187a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) t_RESPONSE = r'=>' 188a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 189effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Delimiters 1905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_LPAREN = r'\(' 1915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_RPAREN = r'\)' 1925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_LBRACKET = r'\[' 1935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_RBRACKET = r'\]' 1945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_LBRACE = r'\{' 1955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_RBRACE = r'\}' 196effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch t_LANGLE = r'<' 197effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch t_RANGLE = r'>' 1985d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_COMMA = r',' 199effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch t_DOT = r'\.' 2005d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_SEMI = r';' 2015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2025d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t_STRING_LITERAL = string_literal 2035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # The following floating and integer constants are defined as 2055d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # functions to impose a strict order (otherwise, decimal 2065d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # is placed before the others because its regex is longer, 2075d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # and this is bad) 2085d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # 2095d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) @TOKEN(floating_constant) 2105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_FLOAT_CONST(self, t): 2115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return t 2125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) @TOKEN(hex_constant) 2145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_INT_CONST_HEX(self, t): 2155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return t 2165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 217cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) @TOKEN(octal_constant_disallowed) 218cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) def t_OCTAL_CONSTANT_DISALLOWED(self, t): 219cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) msg = "Octal values not allowed" 2205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) self._error(msg, t) 2215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) @TOKEN(decimal_constant) 2235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_INT_CONST_DEC(self, t): 2245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return t 2255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # Must come before bad_char_const, to prevent it from 2275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # catching valid char constants as invalid 2285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # 2295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) @TOKEN(char_const) 2305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_CHAR_CONST(self, t): 2315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return t 2325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) @TOKEN(unmatched_quote) 2345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_UNMATCHED_QUOTE(self, t): 2355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) msg = "Unmatched '" 2365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) self._error(msg, t) 2375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) @TOKEN(bad_char_const) 2395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_BAD_CHAR_CONST(self, t): 2405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) msg = "Invalid char constant %s" % t.value 2415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) self._error(msg, t) 2425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # unmatched string literals are caught by the preprocessor 2445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) @TOKEN(bad_string_literal) 2465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_BAD_STRING_LITERAL(self, t): 2475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) msg = "String contains invalid escape code" 2485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) self._error(msg, t) 2495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 250effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch # Handle ordinal-related tokens in the right order: 251effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch @TOKEN(octal_or_hex_ordinal_disallowed) 252effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t): 253effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch msg = "Octal and hexadecimal ordinal values not allowed" 254effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch self._error(msg, t) 255effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 256effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch @TOKEN(ordinal) 257effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch def t_ORDINAL(self, t): 258effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return t 259effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 260effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch @TOKEN(missing_ordinal_value) 261effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch def t_BAD_ORDINAL(self, t): 262effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch msg = "Missing ordinal value" 263effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch self._error(msg, t) 264effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 2655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) @TOKEN(identifier) 2665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_NAME(self, t): 2675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) t.type = self.keyword_map.get(t.value, "NAME") 2685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return t 2695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # Ignore C and C++ style comments 2715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_COMMENT(self, t): 2725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)' 273cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) t.lexer.lineno += t.value.count("\n") 2745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def t_error(self, t): 276cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) msg = "Illegal character %s" % repr(t.value[0]) 2775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) self._error(msg, t) 278