1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import imp 6import os.path 7import sys 8 9def _GetDirAbove(dirname): 10 """Returns the directory "above" this file containing |dirname| (which must 11 also be "above" this file).""" 12 path = os.path.abspath(__file__) 13 while True: 14 path, tail = os.path.split(path) 15 assert tail 16 if tail == dirname: 17 return path 18 19try: 20 imp.find_module("ply") 21except ImportError: 22 sys.path.append(os.path.join(_GetDirAbove("mojo"), "third_party")) 23from ply.lex import TOKEN 24 25from ..error import Error 26 27 28class LexError(Error): 29 """Class for errors from the lexer.""" 30 31 def __init__(self, filename, message, lineno): 32 Error.__init__(self, filename, message, lineno=lineno) 33 34 35# We have methods which look like they could be functions: 36# pylint: disable=R0201 37class Lexer(object): 38 39 def __init__(self, filename): 40 self.filename = filename 41 42 ######################-- PRIVATE --###################### 43 44 ## 45 ## Internal auxiliary methods 46 ## 47 def _error(self, msg, token): 48 raise LexError(self.filename, msg, token.lineno) 49 50 ## 51 ## Reserved keywords 52 ## 53 keywords = ( 54 'HANDLE', 55 56 'IMPORT', 57 'MODULE', 58 'STRUCT', 59 'INTERFACE', 60 'ENUM', 61 'CONST', 62 'TRUE', 63 'FALSE', 64 'DEFAULT', 65 ) 66 67 keyword_map = {} 68 for keyword in keywords: 69 keyword_map[keyword.lower()] = keyword 70 71 ## 72 ## All the tokens recognized by the lexer 73 ## 74 tokens = keywords + ( 75 # Identifiers 76 'NAME', 77 78 # Constants 79 'ORDINAL', 80 'INT_CONST_DEC', 'INT_CONST_HEX', 81 'FLOAT_CONST', 82 83 # String literals 84 'STRING_LITERAL', 85 86 # Operators 87 'MINUS', 88 'PLUS', 89 'AMP', 90 'QSTN', 91 92 # Assignment 93 'EQUALS', 94 95 # Request / response 96 'RESPONSE', 97 98 # Delimiters 99 'LPAREN', 'RPAREN', # ( ) 100 'LBRACKET', 'RBRACKET', # [ ] 101 'LBRACE', 'RBRACE', # { } 102 'LANGLE', 'RANGLE', # < > 103 'SEMI', # ; 104 'COMMA', 'DOT' # , . 105 ) 106 107 ## 108 ## Regexes for use in tokens 109 ## 110 111 # valid C identifiers (K&R2: A.2.3) 112 identifier = r'[a-zA-Z_][0-9a-zA-Z_]*' 113 114 hex_prefix = '0[xX]' 115 hex_digits = '[0-9a-fA-F]+' 116 117 # integer constants (K&R2: A.2.5.1) 118 decimal_constant = '0|([1-9][0-9]*)' 119 hex_constant = hex_prefix+hex_digits 120 # Don't allow octal constants (even invalid octal). 121 octal_constant_disallowed = '0[0-9]+' 122 123 # character constants (K&R2: A.2.5.2) 124 # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line 125 # directives with Windows paths as filenames (..\..\dir\file) 126 # For the same reason, decimal_escape allows all digit sequences. We want to 127 # parse all correct code, even if it means to sometimes parse incorrect 128 # code. 129 # 130 simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" 131 decimal_escape = r"""(\d+)""" 132 hex_escape = r"""(x[0-9a-fA-F]+)""" 133 bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])""" 134 135 escape_sequence = \ 136 r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))' 137 138 # string literals (K&R2: A.2.6) 139 string_char = r"""([^"\\\n]|"""+escape_sequence+')' 140 string_literal = '"'+string_char+'*"' 141 bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"' 142 143 # floating constants (K&R2: A.2.5.3) 144 exponent_part = r"""([eE][-+]?[0-9]+)""" 145 fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" 146 floating_constant = \ 147 '(((('+fractional_constant+')'+ \ 148 exponent_part+'?)|([0-9]+'+exponent_part+')))' 149 150 # Ordinals 151 ordinal = r'@[0-9]+' 152 missing_ordinal_value = r'@' 153 # Don't allow ordinal values in octal (even invalid octal, like 09) or 154 # hexadecimal. 155 octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)|('+hex_prefix+hex_digits+'))' 156 157 ## 158 ## Rules for the normal state 159 ## 160 t_ignore = ' \t\r' 161 162 # Newlines 163 def t_NEWLINE(self, t): 164 r'\n+' 165 t.lexer.lineno += len(t.value) 166 167 # Operators 168 t_MINUS = r'-' 169 t_PLUS = r'\+' 170 t_AMP = r'&' 171 t_QSTN = r'\?' 172 173 # = 174 t_EQUALS = r'=' 175 176 # => 177 t_RESPONSE = r'=>' 178 179 # Delimiters 180 t_LPAREN = r'\(' 181 t_RPAREN = r'\)' 182 t_LBRACKET = r'\[' 183 t_RBRACKET = r'\]' 184 t_LBRACE = r'\{' 185 t_RBRACE = r'\}' 186 t_LANGLE = r'<' 187 t_RANGLE = r'>' 188 t_COMMA = r',' 189 t_DOT = r'\.' 190 t_SEMI = r';' 191 192 t_STRING_LITERAL = string_literal 193 194 # The following floating and integer constants are defined as 195 # functions to impose a strict order (otherwise, decimal 196 # is placed before the others because its regex is longer, 197 # and this is bad) 198 # 199 @TOKEN(floating_constant) 200 def t_FLOAT_CONST(self, t): 201 return t 202 203 @TOKEN(hex_constant) 204 def t_INT_CONST_HEX(self, t): 205 return t 206 207 @TOKEN(octal_constant_disallowed) 208 def t_OCTAL_CONSTANT_DISALLOWED(self, t): 209 msg = "Octal values not allowed" 210 self._error(msg, t) 211 212 @TOKEN(decimal_constant) 213 def t_INT_CONST_DEC(self, t): 214 return t 215 216 # unmatched string literals are caught by the preprocessor 217 218 @TOKEN(bad_string_literal) 219 def t_BAD_STRING_LITERAL(self, t): 220 msg = "String contains invalid escape code" 221 self._error(msg, t) 222 223 # Handle ordinal-related tokens in the right order: 224 @TOKEN(octal_or_hex_ordinal_disallowed) 225 def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t): 226 msg = "Octal and hexadecimal ordinal values not allowed" 227 self._error(msg, t) 228 229 @TOKEN(ordinal) 230 def t_ORDINAL(self, t): 231 return t 232 233 @TOKEN(missing_ordinal_value) 234 def t_BAD_ORDINAL(self, t): 235 msg = "Missing ordinal value" 236 self._error(msg, t) 237 238 @TOKEN(identifier) 239 def t_NAME(self, t): 240 t.type = self.keyword_map.get(t.value, "NAME") 241 return t 242 243 # Ignore C and C++ style comments 244 def t_COMMENT(self, t): 245 r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)' 246 t.lexer.lineno += t.value.count("\n") 247 248 def t_error(self, t): 249 msg = "Illegal character %s" % repr(t.value[0]) 250 self._error(msg, t) 251