1#!/usr/bin/env python 2# 3# Copyright 2010 The Closure Linter Authors. All Rights Reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS-IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Metadata pass for annotating tokens in EcmaScript files.""" 18 19__author__ = ('robbyw@google.com (Robert Walker)') 20 21from closure_linter import javascripttokens 22from closure_linter import tokenutil 23 24 25TokenType = javascripttokens.JavaScriptTokenType 26 27 28class ParseError(Exception): 29 """Exception indicating a parse error at the given token. 30 31 Attributes: 32 token: The token where the parse error occurred. 33 """ 34 35 def __init__(self, token, message=None): 36 """Initialize a parse error at the given token with an optional message. 37 38 Args: 39 token: The token where the parse error occurred. 40 message: A message describing the parse error. 41 """ 42 Exception.__init__(self, message) 43 self.token = token 44 45 46class EcmaContext(object): 47 """Context object for EcmaScript languages. 48 49 Attributes: 50 type: The context type. 51 start_token: The token where this context starts. 52 end_token: The token where this context ends. 53 parent: The parent context. 54 """ 55 56 # The root context. 57 ROOT = 'root' 58 59 # A block of code. 60 BLOCK = 'block' 61 62 # A pseudo-block of code for a given case or default section. 63 CASE_BLOCK = 'case_block' 64 65 # Block of statements in a for loop's parentheses. 66 FOR_GROUP_BLOCK = 'for_block' 67 68 # An implied block of code for 1 line if, while, and for statements 69 IMPLIED_BLOCK = 'implied_block' 70 71 # An index in to an array or object. 72 INDEX = 'index' 73 74 # An array literal in []. 75 ARRAY_LITERAL = 'array_literal' 76 77 # An object literal in {}. 78 OBJECT_LITERAL = 'object_literal' 79 80 # An individual element in an array or object literal. 81 LITERAL_ELEMENT = 'literal_element' 82 83 # The portion of a ternary statement between ? and : 84 TERNARY_TRUE = 'ternary_true' 85 86 # The portion of a ternary statment after : 87 TERNARY_FALSE = 'ternary_false' 88 89 # The entire switch statment. This will contain a GROUP with the variable 90 # and a BLOCK with the code. 91 92 # Since that BLOCK is not a normal block, it can not contain statements except 93 # for case and default. 94 SWITCH = 'switch' 95 96 # A normal comment. 97 COMMENT = 'comment' 98 99 # A JsDoc comment. 100 DOC = 'doc' 101 102 # An individual statement. 103 STATEMENT = 'statement' 104 105 # Code within parentheses. 106 GROUP = 'group' 107 108 # Parameter names in a function declaration. 109 PARAMETERS = 'parameters' 110 111 # A set of variable declarations appearing after the 'var' keyword. 112 VAR = 'var' 113 114 # Context types that are blocks. 115 BLOCK_TYPES = frozenset([ 116 ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK]) 117 118 def __init__(self, type, start_token, parent): 119 """Initializes the context object. 120 121 Args: 122 type: The context type. 123 start_token: The token where this context starts. 124 parent: The parent context. 125 """ 126 self.type = type 127 self.start_token = start_token 128 self.end_token = None 129 self.parent = parent 130 131 def __repr__(self): 132 """Returns a string representation of the context object.""" 133 stack = [] 134 context = self 135 while context: 136 stack.append(context.type) 137 context = context.parent 138 return 'Context(%s)' % ' > '.join(stack) 139 140 141class EcmaMetaData(object): 142 """Token metadata for EcmaScript languages. 143 144 Attributes: 145 last_code: The last code token to appear before this one. 146 context: The context this token appears in. 147 operator_type: The operator type, will be one of the *_OPERATOR constants 148 defined below. 149 """ 150 151 UNARY_OPERATOR = 'unary' 152 153 UNARY_POST_OPERATOR = 'unary_post' 154 155 BINARY_OPERATOR = 'binary' 156 157 TERNARY_OPERATOR = 'ternary' 158 159 def __init__(self): 160 """Initializes a token metadata object.""" 161 self.last_code = None 162 self.context = None 163 self.operator_type = None 164 self.is_implied_semicolon = False 165 self.is_implied_block = False 166 self.is_implied_block_close = False 167 168 def __repr__(self): 169 """Returns a string representation of the context object.""" 170 parts = ['%r' % self.context] 171 if self.operator_type: 172 parts.append('optype: %r' % self.operator_type) 173 if self.is_implied_semicolon: 174 parts.append('implied;') 175 return 'MetaData(%s)' % ', '.join(parts) 176 177 def IsUnaryOperator(self): 178 return self.operator_type in (EcmaMetaData.UNARY_OPERATOR, 179 EcmaMetaData.UNARY_POST_OPERATOR) 180 181 def IsUnaryPostOperator(self): 182 return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR 183 184 185class EcmaMetaDataPass(object): 186 """A pass that iterates over all tokens and builds metadata about them.""" 187 188 def __init__(self): 189 """Initialize the meta data pass object.""" 190 self.Reset() 191 192 def Reset(self): 193 """Resets the metadata pass to prepare for the next file.""" 194 self._token = None 195 self._context = None 196 self._AddContext(EcmaContext.ROOT) 197 self._last_code = None 198 199 def _CreateContext(self, type): 200 """Overridable by subclasses to create the appropriate context type.""" 201 return EcmaContext(type, self._token, self._context) 202 203 def _CreateMetaData(self): 204 """Overridable by subclasses to create the appropriate metadata type.""" 205 return EcmaMetaData() 206 207 def _AddContext(self, type): 208 """Adds a context of the given type to the context stack. 209 210 Args: 211 type: The type of context to create 212 """ 213 self._context = self._CreateContext(type) 214 215 def _PopContext(self): 216 """Moves up one level in the context stack. 217 218 Returns: 219 The former context. 220 221 Raises: 222 ParseError: If the root context is popped. 223 """ 224 top_context = self._context 225 top_context.end_token = self._token 226 self._context = top_context.parent 227 if self._context: 228 return top_context 229 else: 230 raise ParseError(self._token) 231 232 def _PopContextType(self, *stop_types): 233 """Pops the context stack until a context of the given type is popped. 234 235 Args: 236 stop_types: The types of context to pop to - stops at the first match. 237 238 Returns: 239 The context object of the given type that was popped. 240 """ 241 last = None 242 while not last or last.type not in stop_types: 243 last = self._PopContext() 244 return last 245 246 def _EndStatement(self): 247 """Process the end of a statement.""" 248 self._PopContextType(EcmaContext.STATEMENT) 249 if self._context.type == EcmaContext.IMPLIED_BLOCK: 250 self._token.metadata.is_implied_block_close = True 251 self._PopContext() 252 253 def _ProcessContext(self): 254 """Process the context at the current token. 255 256 Returns: 257 The context that should be assigned to the current token, or None if 258 the current context after this method should be used. 259 260 Raises: 261 ParseError: When the token appears in an invalid context. 262 """ 263 token = self._token 264 token_type = token.type 265 266 if self._context.type in EcmaContext.BLOCK_TYPES: 267 # Whenever we're in a block, we add a statement context. We make an 268 # exception for switch statements since they can only contain case: and 269 # default: and therefore don't directly contain statements. 270 # The block we add here may be immediately removed in some cases, but 271 # that causes no harm. 272 parent = self._context.parent 273 if not parent or parent.type != EcmaContext.SWITCH: 274 self._AddContext(EcmaContext.STATEMENT) 275 276 elif self._context.type == EcmaContext.ARRAY_LITERAL: 277 self._AddContext(EcmaContext.LITERAL_ELEMENT) 278 279 if token_type == TokenType.START_PAREN: 280 if self._last_code and self._last_code.IsKeyword('for'): 281 # for loops contain multiple statements in the group unlike while, 282 # switch, if, etc. 283 self._AddContext(EcmaContext.FOR_GROUP_BLOCK) 284 else: 285 self._AddContext(EcmaContext.GROUP) 286 287 elif token_type == TokenType.END_PAREN: 288 result = self._PopContextType(EcmaContext.GROUP, 289 EcmaContext.FOR_GROUP_BLOCK) 290 keyword_token = result.start_token.metadata.last_code 291 # keyword_token will not exist if the open paren is the first line of the 292 # file, for example if all code is wrapped in an immediately executed 293 # annonymous function. 294 if keyword_token and keyword_token.string in ('if', 'for', 'while'): 295 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) 296 if next_code.type != TokenType.START_BLOCK: 297 # Check for do-while. 298 is_do_while = False 299 pre_keyword_token = keyword_token.metadata.last_code 300 if (pre_keyword_token and 301 pre_keyword_token.type == TokenType.END_BLOCK): 302 start_block_token = pre_keyword_token.metadata.context.start_token 303 is_do_while = start_block_token.metadata.last_code.string == 'do' 304 305 # If it's not do-while, it's an implied block. 306 if not is_do_while: 307 self._AddContext(EcmaContext.IMPLIED_BLOCK) 308 token.metadata.is_implied_block = True 309 310 return result 311 312 # else (not else if) with no open brace after it should be considered the 313 # start of an implied block, similar to the case with if, for, and while 314 # above. 315 elif (token_type == TokenType.KEYWORD and 316 token.string == 'else'): 317 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) 318 if (next_code.type != TokenType.START_BLOCK and 319 (next_code.type != TokenType.KEYWORD or next_code.string != 'if')): 320 self._AddContext(EcmaContext.IMPLIED_BLOCK) 321 token.metadata.is_implied_block = True 322 323 elif token_type == TokenType.START_PARAMETERS: 324 self._AddContext(EcmaContext.PARAMETERS) 325 326 elif token_type == TokenType.END_PARAMETERS: 327 return self._PopContextType(EcmaContext.PARAMETERS) 328 329 elif token_type == TokenType.START_BRACKET: 330 if (self._last_code and 331 self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES): 332 self._AddContext(EcmaContext.INDEX) 333 else: 334 self._AddContext(EcmaContext.ARRAY_LITERAL) 335 336 elif token_type == TokenType.END_BRACKET: 337 return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL) 338 339 elif token_type == TokenType.START_BLOCK: 340 if (self._last_code.type in (TokenType.END_PAREN, 341 TokenType.END_PARAMETERS) or 342 self._last_code.IsKeyword('else') or 343 self._last_code.IsKeyword('do') or 344 self._last_code.IsKeyword('try') or 345 self._last_code.IsKeyword('finally') or 346 (self._last_code.IsOperator(':') and 347 self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)): 348 # else, do, try, and finally all might have no () before {. 349 # Also, handle the bizzare syntax case 10: {...}. 350 self._AddContext(EcmaContext.BLOCK) 351 else: 352 self._AddContext(EcmaContext.OBJECT_LITERAL) 353 354 elif token_type == TokenType.END_BLOCK: 355 context = self._PopContextType(EcmaContext.BLOCK, 356 EcmaContext.OBJECT_LITERAL) 357 if self._context.type == EcmaContext.SWITCH: 358 # The end of the block also means the end of the switch statement it 359 # applies to. 360 return self._PopContext() 361 return context 362 363 elif token.IsKeyword('switch'): 364 self._AddContext(EcmaContext.SWITCH) 365 366 elif (token_type == TokenType.KEYWORD and 367 token.string in ('case', 'default')): 368 # Pop up to but not including the switch block. 369 while self._context.parent.type != EcmaContext.SWITCH: 370 self._PopContext() 371 372 elif token.IsOperator('?'): 373 self._AddContext(EcmaContext.TERNARY_TRUE) 374 375 elif token.IsOperator(':'): 376 if self._context.type == EcmaContext.OBJECT_LITERAL: 377 self._AddContext(EcmaContext.LITERAL_ELEMENT) 378 379 elif self._context.type == EcmaContext.TERNARY_TRUE: 380 self._PopContext() 381 self._AddContext(EcmaContext.TERNARY_FALSE) 382 383 # Handle nested ternary statements like: 384 # foo = bar ? baz ? 1 : 2 : 3 385 # When we encounter the second ":" the context is 386 # ternary_false > ternary_true > statement > root 387 elif (self._context.type == EcmaContext.TERNARY_FALSE and 388 self._context.parent.type == EcmaContext.TERNARY_TRUE): 389 self._PopContext() # Leave current ternary false context. 390 self._PopContext() # Leave current parent ternary true 391 self._AddContext(EcmaContext.TERNARY_FALSE) 392 393 elif self._context.parent.type == EcmaContext.SWITCH: 394 self._AddContext(EcmaContext.CASE_BLOCK) 395 396 elif token.IsKeyword('var'): 397 self._AddContext(EcmaContext.VAR) 398 399 elif token.IsOperator(','): 400 while self._context.type not in (EcmaContext.VAR, 401 EcmaContext.ARRAY_LITERAL, 402 EcmaContext.OBJECT_LITERAL, 403 EcmaContext.STATEMENT, 404 EcmaContext.PARAMETERS, 405 EcmaContext.GROUP): 406 self._PopContext() 407 408 elif token_type == TokenType.SEMICOLON: 409 self._EndStatement() 410 411 def Process(self, first_token): 412 """Processes the token stream starting with the given token.""" 413 self._token = first_token 414 while self._token: 415 self._ProcessToken() 416 417 if self._token.IsCode(): 418 self._last_code = self._token 419 420 self._token = self._token.next 421 422 try: 423 self._PopContextType(self, EcmaContext.ROOT) 424 except ParseError: 425 # Ignore the "popped to root" error. 426 pass 427 428 def _ProcessToken(self): 429 """Process the given token.""" 430 token = self._token 431 token.metadata = self._CreateMetaData() 432 context = (self._ProcessContext() or self._context) 433 token.metadata.context = context 434 token.metadata.last_code = self._last_code 435 436 # Determine the operator type of the token, if applicable. 437 if token.type == TokenType.OPERATOR: 438 token.metadata.operator_type = self._GetOperatorType(token) 439 440 # Determine if there is an implied semicolon after the token. 441 if token.type != TokenType.SEMICOLON: 442 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) 443 # A statement like if (x) does not need a semicolon after it 444 is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK 445 is_last_code_in_line = token.IsCode() and ( 446 not next_code or next_code.line_number != token.line_number) 447 is_continued_identifier = (token.type == TokenType.IDENTIFIER and 448 token.string.endswith('.')) 449 is_continued_operator = (token.type == TokenType.OPERATOR and 450 not token.metadata.IsUnaryPostOperator()) 451 is_continued_dot = token.string == '.' 452 next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR 453 next_code_is_dot = next_code and next_code.string == '.' 454 is_end_of_block = (token.type == TokenType.END_BLOCK and 455 token.metadata.context.type != EcmaContext.OBJECT_LITERAL) 456 is_multiline_string = token.type == TokenType.STRING_TEXT 457 next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK 458 if (is_last_code_in_line and 459 self._StatementCouldEndInContext() and 460 not is_multiline_string and 461 not is_end_of_block and 462 not is_continued_identifier and 463 not is_continued_operator and 464 not is_continued_dot and 465 not next_code_is_dot and 466 not next_code_is_operator and 467 not is_implied_block and 468 not next_code_is_block): 469 token.metadata.is_implied_semicolon = True 470 self._EndStatement() 471 472 def _StatementCouldEndInContext(self): 473 """Returns whether the current statement (if any) may end in this context.""" 474 # In the basic statement or variable declaration context, statement can 475 # always end in this context. 476 if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR): 477 return True 478 479 # End of a ternary false branch inside a statement can also be the 480 # end of the statement, for example: 481 # var x = foo ? foo.bar() : null 482 # In this case the statement ends after the null, when the context stack 483 # looks like ternary_false > var > statement > root. 484 if (self._context.type == EcmaContext.TERNARY_FALSE and 485 self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)): 486 return True 487 488 # In all other contexts like object and array literals, ternary true, etc. 489 # the statement can't yet end. 490 return False 491 492 def _GetOperatorType(self, token): 493 """Returns the operator type of the given operator token. 494 495 Args: 496 token: The token to get arity for. 497 498 Returns: 499 The type of the operator. One of the *_OPERATOR constants defined in 500 EcmaMetaData. 501 """ 502 if token.string == '?': 503 return EcmaMetaData.TERNARY_OPERATOR 504 505 if token.string in TokenType.UNARY_OPERATORS: 506 return EcmaMetaData.UNARY_OPERATOR 507 508 last_code = token.metadata.last_code 509 if not last_code or last_code.type == TokenType.END_BLOCK: 510 return EcmaMetaData.UNARY_OPERATOR 511 512 if (token.string in TokenType.UNARY_POST_OPERATORS and 513 last_code.type in TokenType.EXPRESSION_ENDER_TYPES): 514 return EcmaMetaData.UNARY_POST_OPERATOR 515 516 if (token.string in TokenType.UNARY_OK_OPERATORS and 517 last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and 518 last_code.string not in TokenType.UNARY_POST_OPERATORS): 519 return EcmaMetaData.UNARY_OPERATOR 520 521 return EcmaMetaData.BINARY_OPERATOR 522