1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)# Copyright 2014 The Chromium Authors. All rights reserved.
2a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
3a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)# found in the LICENSE file.
45d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
55c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liuimport imp
60529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochimport os.path
75d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)import sys
85d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
95c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# Disable lint check for finding modules:
105c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# pylint: disable=F0401
115c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
125c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liudef _GetDirAbove(dirname):
135c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  """Returns the directory "above" this file containing |dirname| (which must
145c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  also be "above" this file)."""
150529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch  path = os.path.abspath(__file__)
160529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch  while True:
170529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch    path, tail = os.path.split(path)
180529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch    assert tail
195c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu    if tail == dirname:
205c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu      return path
215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
225c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liutry:
235c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  imp.find_module("ply")
245c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liuexcept ImportError:
255c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  sys.path.append(os.path.join(_GetDirAbove("mojo"), "third_party"))
265c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liufrom ply.lex import TOKEN
275c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
285c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liufrom ..error import Error
295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
30effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
315c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# Disable lint check for exceptions deriving from Exception:
325c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# pylint: disable=W0710
335c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liuclass LexError(Error):
345c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  """Class for errors from the lexer."""
35effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
365c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  def __init__(self, filename, message, lineno):
375c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu    Error.__init__(self, filename, message, lineno=lineno)
38effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
39effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
405c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# We have methods which look like they could be functions:
415c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu# pylint: disable=R0201
425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)class Lexer(object):
43effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
44effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  def __init__(self, filename):
45effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    self.filename = filename
46effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ######################--   PRIVATE   --######################
485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ##
505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ## Internal auxiliary methods
515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ##
525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def _error(self, msg, token):
535c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu    raise LexError(self.filename, msg, token.lineno)
545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ##
565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ## Reserved keywords
575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ##
585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  keywords = (
595d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'HANDLE',
605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
615d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'IMPORT',
625d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'MODULE',
635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'STRUCT',
645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'INTERFACE',
655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'ENUM',
66cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    'CONST',
6746d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)    'TRUE',
6846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)    'FALSE',
6946d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)    'DEFAULT',
705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  )
715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  keyword_map = {}
735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  for keyword in keywords:
745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    keyword_map[keyword.lower()] = keyword
755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ##
775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ## All the tokens recognized by the lexer
785d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ##
795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  tokens = keywords + (
805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    # Identifiers
815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'NAME',
825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
83effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # Constants
845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'ORDINAL',
85cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    'INT_CONST_DEC', 'INT_CONST_HEX',
86c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch    'FLOAT_CONST',
875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'CHAR_CONST',
885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
895d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    # String literals
905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'STRING_LITERAL',
915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    # Operators
9346d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)    'MINUS',
9446d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)    'PLUS',
95cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    'AMP',
965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
975d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    # Assignment
985d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'EQUALS',
995d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
100a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    # Request / response
101a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    'RESPONSE',
102a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
103effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    # Delimiters
1045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'LPAREN', 'RPAREN',         # ( )
1055d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'LBRACKET', 'RBRACKET',     # [ ]
1065d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'LBRACE', 'RBRACE',         # { }
107effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    'LANGLE', 'RANGLE',         # < >
108effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    'SEMI',                     # ;
1095d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'COMMA', 'DOT'              # , .
1105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  )
1115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ##
1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ## Regexes for use in tokens
1145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ##
1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
116cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  # valid C identifiers (K&R2: A.2.3)
117cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  identifier = r'[a-zA-Z_][0-9a-zA-Z_]*'
1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  hex_prefix = '0[xX]'
1205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  hex_digits = '[0-9a-fA-F]+'
1215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # integer constants (K&R2: A.2.5.1)
123cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  decimal_constant = '0|([1-9][0-9]*)'
124cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  hex_constant = hex_prefix+hex_digits
125cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  # Don't allow octal constants (even invalid octal).
126cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  octal_constant_disallowed = '0[0-9]+'
1275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # character constants (K&R2: A.2.5.2)
1295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
1305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # directives with Windows paths as filenames (..\..\dir\file)
1315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # For the same reason, decimal_escape allows all digit sequences. We want to
1325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # parse all correct code, even if it means to sometimes parse incorrect
1335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # code.
1345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  #
1355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
1365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  decimal_escape = r"""(\d+)"""
1375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  hex_escape = r"""(x[0-9a-fA-F]+)"""
1385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
1395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  escape_sequence = \
1415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
1425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
1435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  char_const = "'"+cconst_char+"'"
1445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
1455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  bad_char_const = \
1465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+ \
1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      bad_escape+r"""[^'\n]*')"""
1485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # string literals (K&R2: A.2.6)
1505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  string_char = r"""([^"\\\n]|"""+escape_sequence+')'
1515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  string_literal = '"'+string_char+'*"'
1525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
1535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # floating constants (K&R2: A.2.5.3)
1555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  exponent_part = r"""([eE][-+]?[0-9]+)"""
1565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
1575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  floating_constant = \
1585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      '(((('+fractional_constant+')'+ \
159c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch      exponent_part+'?)|([0-9]+'+exponent_part+')))'
1605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
161effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # Ordinals
162effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  ordinal = r'@[0-9]+'
163effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  missing_ordinal_value = r'@'
164effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # Don't allow ordinal values in octal (even invalid octal, like 09) or
165effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # hexadecimal.
166effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)|('+hex_prefix+hex_digits+'))'
167effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
1685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ##
1695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ## Rules for the normal state
1705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ##
171effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  t_ignore = ' \t\r'
1725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # Newlines
1745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_NEWLINE(self, t):
1755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    r'\n+'
176c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch    t.lexer.lineno += len(t.value)
1775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1785d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # Operators
1795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_MINUS             = r'-'
18046d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)  t_PLUS              = r'\+'
181cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  t_AMP               = r'&'
1825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # =
1845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_EQUALS            = r'='
1855d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
186a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  # =>
187a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  t_RESPONSE          = r'=>'
188a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
189effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # Delimiters
1905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_LPAREN            = r'\('
1915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_RPAREN            = r'\)'
1925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_LBRACKET          = r'\['
1935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_RBRACKET          = r'\]'
1945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_LBRACE            = r'\{'
1955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_RBRACE            = r'\}'
196effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  t_LANGLE            = r'<'
197effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  t_RANGLE            = r'>'
1985d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_COMMA             = r','
199effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  t_DOT               = r'\.'
2005d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_SEMI              = r';'
2015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2025d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  t_STRING_LITERAL    = string_literal
2035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # The following floating and integer constants are defined as
2055d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # functions to impose a strict order (otherwise, decimal
2065d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # is placed before the others because its regex is longer,
2075d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # and this is bad)
2085d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  #
2095d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  @TOKEN(floating_constant)
2105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_FLOAT_CONST(self, t):
2115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return t
2125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  @TOKEN(hex_constant)
2145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_INT_CONST_HEX(self, t):
2155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return t
2165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
217cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  @TOKEN(octal_constant_disallowed)
218cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  def t_OCTAL_CONSTANT_DISALLOWED(self, t):
219cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    msg = "Octal values not allowed"
2205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    self._error(msg, t)
2215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  @TOKEN(decimal_constant)
2235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_INT_CONST_DEC(self, t):
2245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return t
2255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # Must come before bad_char_const, to prevent it from
2275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # catching valid char constants as invalid
2285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  #
2295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  @TOKEN(char_const)
2305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_CHAR_CONST(self, t):
2315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return t
2325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  @TOKEN(unmatched_quote)
2345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_UNMATCHED_QUOTE(self, t):
2355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    msg = "Unmatched '"
2365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    self._error(msg, t)
2375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  @TOKEN(bad_char_const)
2395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_BAD_CHAR_CONST(self, t):
2405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    msg = "Invalid char constant %s" % t.value
2415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    self._error(msg, t)
2425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # unmatched string literals are caught by the preprocessor
2445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  @TOKEN(bad_string_literal)
2465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_BAD_STRING_LITERAL(self, t):
2475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    msg = "String contains invalid escape code"
2485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    self._error(msg, t)
2495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
250effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  # Handle ordinal-related tokens in the right order:
251effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  @TOKEN(octal_or_hex_ordinal_disallowed)
252effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t):
253effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    msg = "Octal and hexadecimal ordinal values not allowed"
254effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    self._error(msg, t)
255effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
256effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  @TOKEN(ordinal)
257effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  def t_ORDINAL(self, t):
258effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return t
259effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
260effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  @TOKEN(missing_ordinal_value)
261effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  def t_BAD_ORDINAL(self, t):
262effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    msg = "Missing ordinal value"
263effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    self._error(msg, t)
264effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
2655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  @TOKEN(identifier)
2665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_NAME(self, t):
2675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    t.type = self.keyword_map.get(t.value, "NAME")
2685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return t
2695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # Ignore C and C++ style comments
2715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_COMMENT(self, t):
2725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
273cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    t.lexer.lineno += t.value.count("\n")
2745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def t_error(self, t):
276cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    msg = "Illegal character %s" % repr(t.value[0])
2775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    self._error(msg, t)
278