1#!/usr/bin/env python 2# 3# Copyright 2010 The Closure Linter Authors. All Rights Reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS-IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Metadata pass for annotating tokens in EcmaScript files.""" 18 19__author__ = ('robbyw@google.com (Robert Walker)') 20 21from closure_linter import javascripttokens 22from closure_linter import tokenutil 23 24 25TokenType = javascripttokens.JavaScriptTokenType 26 27 28class ParseError(Exception): 29 """Exception indicating a parse error at the given token. 30 31 Attributes: 32 token: The token where the parse error occurred. 33 """ 34 35 def __init__(self, token, message=None): 36 """Initialize a parse error at the given token with an optional message. 37 38 Args: 39 token: The token where the parse error occurred. 40 message: A message describing the parse error. 41 """ 42 Exception.__init__(self, message) 43 self.token = token 44 45 46class EcmaContext(object): 47 """Context object for EcmaScript languages. 48 49 Attributes: 50 type: The context type. 51 start_token: The token where this context starts. 52 end_token: The token where this context ends. 53 parent: The parent context. 54 """ 55 56 # The root context. 57 ROOT = 'root' 58 59 # A block of code. 60 BLOCK = 'block' 61 62 # A pseudo-block of code for a given case or default section. 63 CASE_BLOCK = 'case_block' 64 65 # Block of statements in a for loop's parentheses. 66 FOR_GROUP_BLOCK = 'for_block' 67 68 # An implied block of code for 1 line if, while, and for statements 69 IMPLIED_BLOCK = 'implied_block' 70 71 # An index in to an array or object. 72 INDEX = 'index' 73 74 # An array literal in []. 75 ARRAY_LITERAL = 'array_literal' 76 77 # An object literal in {}. 78 OBJECT_LITERAL = 'object_literal' 79 80 # An individual element in an array or object literal. 81 LITERAL_ELEMENT = 'literal_element' 82 83 # The portion of a ternary statement between ? and : 84 TERNARY_TRUE = 'ternary_true' 85 86 # The portion of a ternary statment after : 87 TERNARY_FALSE = 'ternary_false' 88 89 # The entire switch statment. This will contain a GROUP with the variable 90 # and a BLOCK with the code. 91 92 # Since that BLOCK is not a normal block, it can not contain statements except 93 # for case and default. 94 SWITCH = 'switch' 95 96 # A normal comment. 97 COMMENT = 'comment' 98 99 # A JsDoc comment. 100 DOC = 'doc' 101 102 # An individual statement. 103 STATEMENT = 'statement' 104 105 # Code within parentheses. 106 GROUP = 'group' 107 108 # Parameter names in a function declaration. 109 PARAMETERS = 'parameters' 110 111 # A set of variable declarations appearing after the 'var' keyword. 112 VAR = 'var' 113 114 # Context types that are blocks. 115 BLOCK_TYPES = frozenset([ 116 ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK]) 117 118 def __init__(self, context_type, start_token, parent=None): 119 """Initializes the context object. 120 121 Args: 122 context_type: The context type. 123 start_token: The token where this context starts. 124 parent: The parent context. 125 126 Attributes: 127 type: The context type. 128 start_token: The token where this context starts. 129 end_token: The token where this context ends. 130 parent: The parent context. 131 children: The child contexts of this context, in order. 132 """ 133 self.type = context_type 134 self.start_token = start_token 135 self.end_token = None 136 137 self.parent = None 138 self.children = [] 139 140 if parent: 141 parent.AddChild(self) 142 143 def __repr__(self): 144 """Returns a string representation of the context object.""" 145 stack = [] 146 context = self 147 while context: 148 stack.append(context.type) 149 context = context.parent 150 return 'Context(%s)' % ' > '.join(stack) 151 152 def AddChild(self, child): 153 """Adds a child to this context and sets child's parent to this context. 154 155 Args: 156 child: A child EcmaContext. The child's parent will be set to this 157 context. 158 """ 159 160 child.parent = self 161 162 self.children.append(child) 163 self.children.sort(EcmaContext._CompareContexts) 164 165 def GetRoot(self): 166 """Get the root context that contains this context, if any.""" 167 context = self 168 while context: 169 if context.type is EcmaContext.ROOT: 170 return context 171 context = context.parent 172 173 @staticmethod 174 def _CompareContexts(context1, context2): 175 """Sorts contexts 1 and 2 by start token document position.""" 176 return tokenutil.Compare(context1.start_token, context2.start_token) 177 178 179class EcmaMetaData(object): 180 """Token metadata for EcmaScript languages. 181 182 Attributes: 183 last_code: The last code token to appear before this one. 184 context: The context this token appears in. 185 operator_type: The operator type, will be one of the *_OPERATOR constants 186 defined below. 187 aliased_symbol: The full symbol being identified, as a string (e.g. an 188 'XhrIo' alias for 'goog.net.XhrIo'). Only applicable to identifier 189 tokens. This is set in aliaspass.py and is a best guess. 190 is_alias_definition: True if the symbol is part of an alias definition. 191 If so, these symbols won't be counted towards goog.requires/provides. 192 """ 193 194 UNARY_OPERATOR = 'unary' 195 196 UNARY_POST_OPERATOR = 'unary_post' 197 198 BINARY_OPERATOR = 'binary' 199 200 TERNARY_OPERATOR = 'ternary' 201 202 def __init__(self): 203 """Initializes a token metadata object.""" 204 self.last_code = None 205 self.context = None 206 self.operator_type = None 207 self.is_implied_semicolon = False 208 self.is_implied_block = False 209 self.is_implied_block_close = False 210 self.aliased_symbol = None 211 self.is_alias_definition = False 212 213 def __repr__(self): 214 """Returns a string representation of the context object.""" 215 parts = ['%r' % self.context] 216 if self.operator_type: 217 parts.append('optype: %r' % self.operator_type) 218 if self.is_implied_semicolon: 219 parts.append('implied;') 220 if self.aliased_symbol: 221 parts.append('alias for: %s' % self.aliased_symbol) 222 return 'MetaData(%s)' % ', '.join(parts) 223 224 def IsUnaryOperator(self): 225 return self.operator_type in (EcmaMetaData.UNARY_OPERATOR, 226 EcmaMetaData.UNARY_POST_OPERATOR) 227 228 def IsUnaryPostOperator(self): 229 return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR 230 231 232class EcmaMetaDataPass(object): 233 """A pass that iterates over all tokens and builds metadata about them.""" 234 235 def __init__(self): 236 """Initialize the meta data pass object.""" 237 self.Reset() 238 239 def Reset(self): 240 """Resets the metadata pass to prepare for the next file.""" 241 self._token = None 242 self._context = None 243 self._AddContext(EcmaContext.ROOT) 244 self._last_code = None 245 246 def _CreateContext(self, context_type): 247 """Overridable by subclasses to create the appropriate context type.""" 248 return EcmaContext(context_type, self._token, self._context) 249 250 def _CreateMetaData(self): 251 """Overridable by subclasses to create the appropriate metadata type.""" 252 return EcmaMetaData() 253 254 def _AddContext(self, context_type): 255 """Adds a context of the given type to the context stack. 256 257 Args: 258 context_type: The type of context to create 259 """ 260 self._context = self._CreateContext(context_type) 261 262 def _PopContext(self): 263 """Moves up one level in the context stack. 264 265 Returns: 266 The former context. 267 268 Raises: 269 ParseError: If the root context is popped. 270 """ 271 top_context = self._context 272 top_context.end_token = self._token 273 self._context = top_context.parent 274 if self._context: 275 return top_context 276 else: 277 raise ParseError(self._token) 278 279 def _PopContextType(self, *stop_types): 280 """Pops the context stack until a context of the given type is popped. 281 282 Args: 283 *stop_types: The types of context to pop to - stops at the first match. 284 285 Returns: 286 The context object of the given type that was popped. 287 """ 288 last = None 289 while not last or last.type not in stop_types: 290 last = self._PopContext() 291 return last 292 293 def _EndStatement(self): 294 """Process the end of a statement.""" 295 self._PopContextType(EcmaContext.STATEMENT) 296 if self._context.type == EcmaContext.IMPLIED_BLOCK: 297 self._token.metadata.is_implied_block_close = True 298 self._PopContext() 299 300 def _ProcessContext(self): 301 """Process the context at the current token. 302 303 Returns: 304 The context that should be assigned to the current token, or None if 305 the current context after this method should be used. 306 307 Raises: 308 ParseError: When the token appears in an invalid context. 309 """ 310 token = self._token 311 token_type = token.type 312 313 if self._context.type in EcmaContext.BLOCK_TYPES: 314 # Whenever we're in a block, we add a statement context. We make an 315 # exception for switch statements since they can only contain case: and 316 # default: and therefore don't directly contain statements. 317 # The block we add here may be immediately removed in some cases, but 318 # that causes no harm. 319 parent = self._context.parent 320 if not parent or parent.type != EcmaContext.SWITCH: 321 self._AddContext(EcmaContext.STATEMENT) 322 323 elif self._context.type == EcmaContext.ARRAY_LITERAL: 324 self._AddContext(EcmaContext.LITERAL_ELEMENT) 325 326 if token_type == TokenType.START_PAREN: 327 if self._last_code and self._last_code.IsKeyword('for'): 328 # for loops contain multiple statements in the group unlike while, 329 # switch, if, etc. 330 self._AddContext(EcmaContext.FOR_GROUP_BLOCK) 331 else: 332 self._AddContext(EcmaContext.GROUP) 333 334 elif token_type == TokenType.END_PAREN: 335 result = self._PopContextType(EcmaContext.GROUP, 336 EcmaContext.FOR_GROUP_BLOCK) 337 keyword_token = result.start_token.metadata.last_code 338 # keyword_token will not exist if the open paren is the first line of the 339 # file, for example if all code is wrapped in an immediately executed 340 # annonymous function. 341 if keyword_token and keyword_token.string in ('if', 'for', 'while'): 342 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) 343 if next_code.type != TokenType.START_BLOCK: 344 # Check for do-while. 345 is_do_while = False 346 pre_keyword_token = keyword_token.metadata.last_code 347 if (pre_keyword_token and 348 pre_keyword_token.type == TokenType.END_BLOCK): 349 start_block_token = pre_keyword_token.metadata.context.start_token 350 is_do_while = start_block_token.metadata.last_code.string == 'do' 351 352 # If it's not do-while, it's an implied block. 353 if not is_do_while: 354 self._AddContext(EcmaContext.IMPLIED_BLOCK) 355 token.metadata.is_implied_block = True 356 357 return result 358 359 # else (not else if) with no open brace after it should be considered the 360 # start of an implied block, similar to the case with if, for, and while 361 # above. 362 elif (token_type == TokenType.KEYWORD and 363 token.string == 'else'): 364 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) 365 if (next_code.type != TokenType.START_BLOCK and 366 (next_code.type != TokenType.KEYWORD or next_code.string != 'if')): 367 self._AddContext(EcmaContext.IMPLIED_BLOCK) 368 token.metadata.is_implied_block = True 369 370 elif token_type == TokenType.START_PARAMETERS: 371 self._AddContext(EcmaContext.PARAMETERS) 372 373 elif token_type == TokenType.END_PARAMETERS: 374 return self._PopContextType(EcmaContext.PARAMETERS) 375 376 elif token_type == TokenType.START_BRACKET: 377 if (self._last_code and 378 self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES): 379 self._AddContext(EcmaContext.INDEX) 380 else: 381 self._AddContext(EcmaContext.ARRAY_LITERAL) 382 383 elif token_type == TokenType.END_BRACKET: 384 return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL) 385 386 elif token_type == TokenType.START_BLOCK: 387 if (self._last_code.type in (TokenType.END_PAREN, 388 TokenType.END_PARAMETERS) or 389 self._last_code.IsKeyword('else') or 390 self._last_code.IsKeyword('do') or 391 self._last_code.IsKeyword('try') or 392 self._last_code.IsKeyword('finally') or 393 (self._last_code.IsOperator(':') and 394 self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)): 395 # else, do, try, and finally all might have no () before {. 396 # Also, handle the bizzare syntax case 10: {...}. 397 self._AddContext(EcmaContext.BLOCK) 398 else: 399 self._AddContext(EcmaContext.OBJECT_LITERAL) 400 401 elif token_type == TokenType.END_BLOCK: 402 context = self._PopContextType(EcmaContext.BLOCK, 403 EcmaContext.OBJECT_LITERAL) 404 if self._context.type == EcmaContext.SWITCH: 405 # The end of the block also means the end of the switch statement it 406 # applies to. 407 return self._PopContext() 408 return context 409 410 elif token.IsKeyword('switch'): 411 self._AddContext(EcmaContext.SWITCH) 412 413 elif (token_type == TokenType.KEYWORD and 414 token.string in ('case', 'default') and 415 self._context.type != EcmaContext.OBJECT_LITERAL): 416 # Pop up to but not including the switch block. 417 while self._context.parent.type != EcmaContext.SWITCH: 418 self._PopContext() 419 if self._context.parent is None: 420 raise ParseError(token, 'Encountered case/default statement ' 421 'without switch statement') 422 423 elif token.IsOperator('?'): 424 self._AddContext(EcmaContext.TERNARY_TRUE) 425 426 elif token.IsOperator(':'): 427 if self._context.type == EcmaContext.OBJECT_LITERAL: 428 self._AddContext(EcmaContext.LITERAL_ELEMENT) 429 430 elif self._context.type == EcmaContext.TERNARY_TRUE: 431 self._PopContext() 432 self._AddContext(EcmaContext.TERNARY_FALSE) 433 434 # Handle nested ternary statements like: 435 # foo = bar ? baz ? 1 : 2 : 3 436 # When we encounter the second ":" the context is 437 # ternary_false > ternary_true > statement > root 438 elif (self._context.type == EcmaContext.TERNARY_FALSE and 439 self._context.parent.type == EcmaContext.TERNARY_TRUE): 440 self._PopContext() # Leave current ternary false context. 441 self._PopContext() # Leave current parent ternary true 442 self._AddContext(EcmaContext.TERNARY_FALSE) 443 444 elif self._context.parent.type == EcmaContext.SWITCH: 445 self._AddContext(EcmaContext.CASE_BLOCK) 446 447 elif token.IsKeyword('var'): 448 self._AddContext(EcmaContext.VAR) 449 450 elif token.IsOperator(','): 451 while self._context.type not in (EcmaContext.VAR, 452 EcmaContext.ARRAY_LITERAL, 453 EcmaContext.OBJECT_LITERAL, 454 EcmaContext.STATEMENT, 455 EcmaContext.PARAMETERS, 456 EcmaContext.GROUP): 457 self._PopContext() 458 459 elif token_type == TokenType.SEMICOLON: 460 self._EndStatement() 461 462 def Process(self, first_token): 463 """Processes the token stream starting with the given token.""" 464 self._token = first_token 465 while self._token: 466 self._ProcessToken() 467 468 if self._token.IsCode(): 469 self._last_code = self._token 470 471 self._token = self._token.next 472 473 try: 474 self._PopContextType(self, EcmaContext.ROOT) 475 except ParseError: 476 # Ignore the "popped to root" error. 477 pass 478 479 def _ProcessToken(self): 480 """Process the given token.""" 481 token = self._token 482 token.metadata = self._CreateMetaData() 483 context = (self._ProcessContext() or self._context) 484 token.metadata.context = context 485 token.metadata.last_code = self._last_code 486 487 # Determine the operator type of the token, if applicable. 488 if token.type == TokenType.OPERATOR: 489 token.metadata.operator_type = self._GetOperatorType(token) 490 491 # Determine if there is an implied semicolon after the token. 492 if token.type != TokenType.SEMICOLON: 493 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) 494 # A statement like if (x) does not need a semicolon after it 495 is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK 496 is_last_code_in_line = token.IsCode() and ( 497 not next_code or next_code.line_number != token.line_number) 498 is_continued_identifier = (token.type == TokenType.IDENTIFIER and 499 token.string.endswith('.')) 500 is_continued_operator = (token.type == TokenType.OPERATOR and 501 not token.metadata.IsUnaryPostOperator()) 502 is_continued_dot = token.string == '.' 503 next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR 504 next_code_is_dot = next_code and next_code.string == '.' 505 is_end_of_block = ( 506 token.type == TokenType.END_BLOCK and 507 token.metadata.context.type != EcmaContext.OBJECT_LITERAL) 508 is_multiline_string = token.type == TokenType.STRING_TEXT 509 is_continued_var_decl = (token.IsKeyword('var') and 510 next_code and 511 (next_code.type in [TokenType.IDENTIFIER, 512 TokenType.SIMPLE_LVALUE]) and 513 token.line_number < next_code.line_number) 514 next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK 515 if (is_last_code_in_line and 516 self._StatementCouldEndInContext() and 517 not is_multiline_string and 518 not is_end_of_block and 519 not is_continued_var_decl and 520 not is_continued_identifier and 521 not is_continued_operator and 522 not is_continued_dot and 523 not next_code_is_dot and 524 not next_code_is_operator and 525 not is_implied_block and 526 not next_code_is_block): 527 token.metadata.is_implied_semicolon = True 528 self._EndStatement() 529 530 def _StatementCouldEndInContext(self): 531 """Returns if the current statement (if any) may end in this context.""" 532 # In the basic statement or variable declaration context, statement can 533 # always end in this context. 534 if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR): 535 return True 536 537 # End of a ternary false branch inside a statement can also be the 538 # end of the statement, for example: 539 # var x = foo ? foo.bar() : null 540 # In this case the statement ends after the null, when the context stack 541 # looks like ternary_false > var > statement > root. 542 if (self._context.type == EcmaContext.TERNARY_FALSE and 543 self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)): 544 return True 545 546 # In all other contexts like object and array literals, ternary true, etc. 547 # the statement can't yet end. 548 return False 549 550 def _GetOperatorType(self, token): 551 """Returns the operator type of the given operator token. 552 553 Args: 554 token: The token to get arity for. 555 556 Returns: 557 The type of the operator. One of the *_OPERATOR constants defined in 558 EcmaMetaData. 559 """ 560 if token.string == '?': 561 return EcmaMetaData.TERNARY_OPERATOR 562 563 if token.string in TokenType.UNARY_OPERATORS: 564 return EcmaMetaData.UNARY_OPERATOR 565 566 last_code = token.metadata.last_code 567 if not last_code or last_code.type == TokenType.END_BLOCK: 568 return EcmaMetaData.UNARY_OPERATOR 569 570 if (token.string in TokenType.UNARY_POST_OPERATORS and 571 last_code.type in TokenType.EXPRESSION_ENDER_TYPES): 572 return EcmaMetaData.UNARY_POST_OPERATOR 573 574 if (token.string in TokenType.UNARY_OK_OPERATORS and 575 last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and 576 last_code.string not in TokenType.UNARY_POST_OPERATORS): 577 return EcmaMetaData.UNARY_OPERATOR 578 579 return EcmaMetaData.BINARY_OPERATOR 580