1#!/usr/bin/env python 2# Copyright 2010 The Closure Linter Authors. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS-IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Methods for checking EcmaScript files for indentation issues.""" 17 18__author__ = ('robbyw@google.com (Robert Walker)') 19 20import gflags as flags 21 22from closure_linter import ecmametadatapass 23from closure_linter import errors 24from closure_linter import javascripttokens 25from closure_linter import tokenutil 26from closure_linter.common import error 27from closure_linter.common import position 28 29 30flags.DEFINE_boolean('debug_indentation', False, 31 'Whether to print debugging information for indentation.') 32 33 34# Shorthand 35Context = ecmametadatapass.EcmaContext 36Error = error.Error 37Position = position.Position 38Type = javascripttokens.JavaScriptTokenType 39 40 41# The general approach: 42# 43# 1. Build a stack of tokens that can affect indentation. 44# For each token, we determine if it is a block or continuation token. 45# Some tokens need to be temporarily overwritten in case they are removed 46# before the end of the line. 47# Much of the work here is determining which tokens to keep on the stack 48# at each point. Operators, for example, should be removed once their 49# expression or line is gone, while parentheses must stay until the matching 50# end parentheses is found. 51# 52# 2. Given that stack, determine the allowable indentations. 53# Due to flexible indentation rules in JavaScript, there may be many 54# allowable indentations for each stack. We follows the general 55# "no false positives" approach of GJsLint and build the most permissive 56# set possible. 57 58 59class TokenInfo(object): 60 """Stores information about a token. 61 62 Attributes: 63 token: The token 64 is_block: Whether the token represents a block indentation. 65 is_transient: Whether the token should be automatically removed without 66 finding a matching end token. 67 overridden_by: TokenInfo for a token that overrides the indentation that 68 this token would require. 69 is_permanent_override: Whether the override on this token should persist 70 even after the overriding token is removed from the stack. For example: 71 x([ 72 1], 73 2); 74 needs this to be set so the last line is not required to be a continuation 75 indent. 76 line_number: The effective line number of this token. Will either be the 77 actual line number or the one before it in the case of a mis-wrapped 78 operator. 79 """ 80 81 def __init__(self, token, is_block=False): 82 """Initializes a TokenInfo object. 83 84 Args: 85 token: The token 86 is_block: Whether the token represents a block indentation. 87 """ 88 self.token = token 89 self.overridden_by = None 90 self.is_permanent_override = False 91 self.is_block = is_block 92 self.is_transient = not is_block and token.type not in ( 93 Type.START_PAREN, Type.START_PARAMETERS) 94 self.line_number = token.line_number 95 96 def __repr__(self): 97 result = '\n %s' % self.token 98 if self.overridden_by: 99 result = '%s OVERRIDDEN [by "%s"]' % ( 100 result, self.overridden_by.token.string) 101 result += ' {is_block: %s, is_transient: %s}' % ( 102 self.is_block, self.is_transient) 103 return result 104 105 106class IndentationRules(object): 107 """EmcaScript indentation rules. 108 109 Can be used to find common indentation errors in JavaScript, ActionScript and 110 other Ecma like scripting languages. 111 """ 112 113 def __init__(self): 114 """Initializes the IndentationRules checker.""" 115 self._stack = [] 116 117 # Map from line number to number of characters it is off in indentation. 118 self._start_index_offset = {} 119 120 def Finalize(self): 121 if self._stack: 122 old_stack = self._stack 123 self._stack = [] 124 raise Exception('INTERNAL ERROR: indentation stack is not empty: %r' % 125 old_stack) 126 127 def CheckToken(self, token, state): 128 """Checks a token for indentation errors. 129 130 Args: 131 token: The current token under consideration 132 state: Additional information about the current tree state 133 134 Returns: 135 An error array [error code, error string, error token] if the token is 136 improperly indented, or None if indentation is correct. 137 """ 138 139 token_type = token.type 140 indentation_errors = [] 141 stack = self._stack 142 is_first = self._IsFirstNonWhitespaceTokenInLine(token) 143 144 # Add tokens that could decrease indentation before checking. 145 if token_type == Type.END_PAREN: 146 self._PopTo(Type.START_PAREN) 147 148 elif token_type == Type.END_PARAMETERS: 149 self._PopTo(Type.START_PARAMETERS) 150 151 elif token_type == Type.END_BRACKET: 152 self._PopTo(Type.START_BRACKET) 153 154 elif token_type == Type.END_BLOCK: 155 start_token = self._PopTo(Type.START_BLOCK) 156 # Check for required goog.scope comment. 157 if start_token: 158 goog_scope = tokenutil.GoogScopeOrNoneFromStartBlock(start_token.token) 159 if goog_scope is not None: 160 if not token.line.endswith('; // goog.scope\n'): 161 if (token.line.find('//') > -1 and 162 token.line.find('goog.scope') > 163 token.line.find('//')): 164 indentation_errors.append([ 165 errors.MALFORMED_END_OF_SCOPE_COMMENT, 166 ('Malformed end of goog.scope comment. Please use the ' 167 'exact following syntax to close the scope:\n' 168 '}); // goog.scope'), 169 token, 170 Position(token.start_index, token.length)]) 171 else: 172 indentation_errors.append([ 173 errors.MISSING_END_OF_SCOPE_COMMENT, 174 ('Missing comment for end of goog.scope which opened at line ' 175 '%d. End the scope with:\n' 176 '}); // goog.scope' % 177 (start_token.line_number)), 178 token, 179 Position(token.start_index, token.length)]) 180 181 elif token_type == Type.KEYWORD and token.string in ('case', 'default'): 182 self._Add(self._PopTo(Type.START_BLOCK)) 183 184 elif token_type == Type.SEMICOLON: 185 self._PopTransient() 186 187 if (is_first and 188 token_type not in (Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT)): 189 if flags.FLAGS.debug_indentation: 190 print 'Line #%d: stack %r' % (token.line_number, stack) 191 192 # Ignore lines that start in JsDoc since we don't check them properly yet. 193 # TODO(robbyw): Support checking JsDoc indentation. 194 # Ignore lines that start as multi-line strings since indentation is N/A. 195 # Ignore lines that start with operators since we report that already. 196 # Ignore lines with tabs since we report that already. 197 expected = self._GetAllowableIndentations() 198 actual = self._GetActualIndentation(token) 199 200 # Special case comments describing else, case, and default. Allow them 201 # to outdent to the parent block. 202 if token_type in Type.COMMENT_TYPES: 203 next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) 204 if next_code and next_code.type == Type.END_BLOCK: 205 next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES) 206 if next_code and next_code.string in ('else', 'case', 'default'): 207 # TODO(robbyw): This almost certainly introduces false negatives. 208 expected |= self._AddToEach(expected, -2) 209 210 if actual >= 0 and actual not in expected: 211 expected = sorted(expected) 212 indentation_errors.append([ 213 errors.WRONG_INDENTATION, 214 'Wrong indentation: expected any of {%s} but got %d' % ( 215 ', '.join('%d' % x for x in expected if x < 80), actual), 216 token, 217 Position(actual, expected[0])]) 218 self._start_index_offset[token.line_number] = expected[0] - actual 219 220 # Add tokens that could increase indentation. 221 if token_type == Type.START_BRACKET: 222 self._Add(TokenInfo( 223 token=token, 224 is_block=token.metadata.context.type == Context.ARRAY_LITERAL)) 225 226 elif token_type == Type.START_BLOCK or token.metadata.is_implied_block: 227 self._Add(TokenInfo(token=token, is_block=True)) 228 229 elif token_type in (Type.START_PAREN, Type.START_PARAMETERS): 230 self._Add(TokenInfo(token=token, is_block=False)) 231 232 elif token_type == Type.KEYWORD and token.string == 'return': 233 self._Add(TokenInfo(token)) 234 235 elif not token.IsLastInLine() and ( 236 token.IsAssignment() or token.IsOperator('?')): 237 self._Add(TokenInfo(token=token)) 238 239 # Handle implied block closes. 240 if token.metadata.is_implied_block_close: 241 self._PopToImpliedBlock() 242 243 # Add some tokens only if they appear at the end of the line. 244 is_last = self._IsLastCodeInLine(token) 245 if is_last: 246 next_code_token = tokenutil.GetNextCodeToken(token) 247 # Increase required indentation if this is an overlong wrapped statement 248 # ending in an operator. 249 if token_type == Type.OPERATOR: 250 if token.string == ':': 251 if stack and stack[-1].token.string == '?': 252 # When a ternary : is on a different line than its '?', it doesn't 253 # add indentation. 254 if token.line_number == stack[-1].token.line_number: 255 self._Add(TokenInfo(token)) 256 elif token.metadata.context.type == Context.CASE_BLOCK: 257 # Pop transient tokens from say, line continuations, e.g., 258 # case x. 259 # y: 260 # Want to pop the transient 4 space continuation indent. 261 self._PopTransient() 262 # Starting the body of the case statement, which is a type of 263 # block. 264 self._Add(TokenInfo(token=token, is_block=True)) 265 elif token.metadata.context.type == Context.LITERAL_ELEMENT: 266 # When in an object literal, acts as operator indicating line 267 # continuations. 268 self._Add(TokenInfo(token)) 269 else: 270 # ':' might also be a statement label, no effect on indentation in 271 # this case. 272 pass 273 274 elif token.string != ',': 275 self._Add(TokenInfo(token)) 276 else: 277 # The token is a comma. 278 if token.metadata.context.type == Context.VAR: 279 self._Add(TokenInfo(token)) 280 elif token.metadata.context.type != Context.PARAMETERS: 281 self._PopTransient() 282 # Increase required indentation if this is the end of a statement that's 283 # continued with an operator on the next line (e.g. the '.'). 284 elif (next_code_token and next_code_token.type == Type.OPERATOR and 285 not next_code_token.metadata.IsUnaryOperator()): 286 self._Add(TokenInfo(token)) 287 elif token_type == Type.PARAMETERS and token.string.endswith(','): 288 # Parameter lists. 289 self._Add(TokenInfo(token)) 290 elif token.IsKeyword('var'): 291 self._Add(TokenInfo(token)) 292 elif token.metadata.is_implied_semicolon: 293 self._PopTransient() 294 elif token.IsAssignment(): 295 self._Add(TokenInfo(token)) 296 297 return indentation_errors 298 299 def _AddToEach(self, original, amount): 300 """Returns a new set with the given amount added to each element. 301 302 Args: 303 original: The original set of numbers 304 amount: The amount to add to each element 305 306 Returns: 307 A new set containing each element of the original set added to the amount. 308 """ 309 return set([x + amount for x in original]) 310 311 _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS, 312 Type.START_BRACKET) 313 314 _HARD_STOP_STRINGS = ('return', '?') 315 316 def _IsHardStop(self, token): 317 """Determines if the given token can have a hard stop after it. 318 319 Args: 320 token: token to examine 321 322 Returns: 323 Whether the token can have a hard stop after it. 324 325 Hard stops are indentations defined by the position of another token as in 326 indentation lined up with return, (, [, and ?. 327 """ 328 return (token.type in self._HARD_STOP_TYPES or 329 token.string in self._HARD_STOP_STRINGS or 330 token.IsAssignment()) 331 332 def _GetAllowableIndentations(self): 333 """Computes the set of allowable indentations. 334 335 Returns: 336 The set of allowable indentations, given the current stack. 337 """ 338 expected = set([0]) 339 hard_stops = set([]) 340 341 # Whether the tokens are still in the same continuation, meaning additional 342 # indentation is optional. As an example: 343 # x = 5 + 344 # 6 + 345 # 7; 346 # The second '+' does not add any required indentation. 347 in_same_continuation = False 348 349 for token_info in self._stack: 350 token = token_info.token 351 352 # Handle normal additive indentation tokens. 353 if not token_info.overridden_by and token.string != 'return': 354 if token_info.is_block: 355 expected = self._AddToEach(expected, 2) 356 hard_stops = self._AddToEach(hard_stops, 2) 357 in_same_continuation = False 358 elif in_same_continuation: 359 expected |= self._AddToEach(expected, 4) 360 hard_stops |= self._AddToEach(hard_stops, 4) 361 else: 362 expected = self._AddToEach(expected, 4) 363 hard_stops |= self._AddToEach(hard_stops, 4) 364 in_same_continuation = True 365 366 # Handle hard stops after (, [, return, =, and ? 367 if self._IsHardStop(token): 368 override_is_hard_stop = (token_info.overridden_by and 369 self._IsHardStop( 370 token_info.overridden_by.token)) 371 if token.type == Type.START_PAREN and token.previous: 372 # For someFunction(...) we allow to indent at the beginning of the 373 # identifier +4 374 prev = token.previous 375 if (prev.type == Type.IDENTIFIER and 376 prev.line_number == token.line_number): 377 hard_stops.add(prev.start_index + 4) 378 if not override_is_hard_stop: 379 start_index = token.start_index 380 if token.line_number in self._start_index_offset: 381 start_index += self._start_index_offset[token.line_number] 382 if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and 383 not token_info.overridden_by): 384 hard_stops.add(start_index + 1) 385 386 elif token.string == 'return' and not token_info.overridden_by: 387 hard_stops.add(start_index + 7) 388 389 elif token.type == Type.START_BRACKET: 390 hard_stops.add(start_index + 1) 391 392 elif token.IsAssignment(): 393 hard_stops.add(start_index + len(token.string) + 1) 394 395 elif token.IsOperator('?') and not token_info.overridden_by: 396 hard_stops.add(start_index + 2) 397 398 return (expected | hard_stops) or set([0]) 399 400 def _GetActualIndentation(self, token): 401 """Gets the actual indentation of the line containing the given token. 402 403 Args: 404 token: Any token on the line. 405 406 Returns: 407 The actual indentation of the line containing the given token. Returns 408 -1 if this line should be ignored due to the presence of tabs. 409 """ 410 # Move to the first token in the line 411 token = tokenutil.GetFirstTokenInSameLine(token) 412 413 # If it is whitespace, it is the indentation. 414 if token.type == Type.WHITESPACE: 415 if token.string.find('\t') >= 0: 416 return -1 417 else: 418 return len(token.string) 419 elif token.type == Type.PARAMETERS: 420 return len(token.string) - len(token.string.lstrip()) 421 else: 422 return 0 423 424 def _IsFirstNonWhitespaceTokenInLine(self, token): 425 """Determines if the given token is the first non-space token on its line. 426 427 Args: 428 token: The token. 429 430 Returns: 431 True if the token is the first non-whitespace token on its line. 432 """ 433 if token.type in (Type.WHITESPACE, Type.BLANK_LINE): 434 return False 435 if token.IsFirstInLine(): 436 return True 437 return (token.previous and token.previous.IsFirstInLine() and 438 token.previous.type == Type.WHITESPACE) 439 440 def _IsLastCodeInLine(self, token): 441 """Determines if the given token is the last code token on its line. 442 443 Args: 444 token: The token. 445 446 Returns: 447 True if the token is the last code token on its line. 448 """ 449 if token.type in Type.NON_CODE_TYPES: 450 return False 451 start_token = token 452 while True: 453 token = token.next 454 if not token or token.line_number != start_token.line_number: 455 return True 456 if token.type not in Type.NON_CODE_TYPES: 457 return False 458 459 def _AllFunctionPropertyAssignTokens(self, start_token, end_token): 460 """Checks if tokens are (likely) a valid function property assignment. 461 462 Args: 463 start_token: Start of the token range. 464 end_token: End of the token range. 465 466 Returns: 467 True if all tokens between start_token and end_token are legal tokens 468 within a function declaration and assignment into a property. 469 """ 470 for token in tokenutil.GetTokenRange(start_token, end_token): 471 fn_decl_tokens = (Type.FUNCTION_DECLARATION, 472 Type.PARAMETERS, 473 Type.START_PARAMETERS, 474 Type.END_PARAMETERS, 475 Type.END_PAREN) 476 if (token.type not in fn_decl_tokens and 477 token.IsCode() and 478 not tokenutil.IsIdentifierOrDot(token) and 479 not token.IsAssignment() and 480 not (token.type == Type.OPERATOR and token.string == ',')): 481 return False 482 return True 483 484 def _Add(self, token_info): 485 """Adds the given token info to the stack. 486 487 Args: 488 token_info: The token information to add. 489 """ 490 if self._stack and self._stack[-1].token == token_info.token: 491 # Don't add the same token twice. 492 return 493 494 if token_info.is_block or token_info.token.type == Type.START_PAREN: 495 scope_token = tokenutil.GoogScopeOrNoneFromStartBlock(token_info.token) 496 token_info.overridden_by = TokenInfo(scope_token) if scope_token else None 497 498 if (token_info.token.type == Type.START_BLOCK and 499 token_info.token.metadata.context.type == Context.BLOCK): 500 # Handle function() {} assignments: their block contents get special 501 # treatment and are allowed to just indent by two whitespace. 502 # For example 503 # long.long.name = function( 504 # a) { 505 # In this case the { and the = are on different lines. But the 506 # override should still apply for all previous stack tokens that are 507 # part of an assignment of a block. 508 509 has_assignment = any(x for x in self._stack if x.token.IsAssignment()) 510 if has_assignment: 511 last_token = token_info.token.previous 512 for stack_info in reversed(self._stack): 513 if (last_token and 514 not self._AllFunctionPropertyAssignTokens(stack_info.token, 515 last_token)): 516 break 517 stack_info.overridden_by = token_info 518 stack_info.is_permanent_override = True 519 last_token = stack_info.token 520 521 index = len(self._stack) - 1 522 while index >= 0: 523 stack_info = self._stack[index] 524 stack_token = stack_info.token 525 526 if stack_info.line_number == token_info.line_number: 527 # In general, tokens only override each other when they are on 528 # the same line. 529 stack_info.overridden_by = token_info 530 if (token_info.token.type == Type.START_BLOCK and 531 (stack_token.IsAssignment() or 532 stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))): 533 # Multi-line blocks have lasting overrides, as in: 534 # callFn({ 535 # a: 10 536 # }, 537 # 30); 538 # b/11450054. If a string is not closed properly then close_block 539 # could be null. 540 close_block = token_info.token.metadata.context.end_token 541 stack_info.is_permanent_override = close_block and ( 542 close_block.line_number != token_info.token.line_number) 543 else: 544 break 545 index -= 1 546 547 self._stack.append(token_info) 548 549 def _Pop(self): 550 """Pops the top token from the stack. 551 552 Returns: 553 The popped token info. 554 """ 555 token_info = self._stack.pop() 556 if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET): 557 # Remove any temporary overrides. 558 self._RemoveOverrides(token_info) 559 else: 560 # For braces and brackets, which can be object and array literals, remove 561 # overrides when the literal is closed on the same line. 562 token_check = token_info.token 563 same_type = token_check.type 564 goal_type = None 565 if token_info.token.type == Type.START_BRACKET: 566 goal_type = Type.END_BRACKET 567 else: 568 goal_type = Type.END_BLOCK 569 line_number = token_info.token.line_number 570 count = 0 571 while token_check and token_check.line_number == line_number: 572 if token_check.type == goal_type: 573 count -= 1 574 if not count: 575 self._RemoveOverrides(token_info) 576 break 577 if token_check.type == same_type: 578 count += 1 579 token_check = token_check.next 580 return token_info 581 582 def _PopToImpliedBlock(self): 583 """Pops the stack until an implied block token is found.""" 584 while not self._Pop().token.metadata.is_implied_block: 585 pass 586 587 def _PopTo(self, stop_type): 588 """Pops the stack until a token of the given type is popped. 589 590 Args: 591 stop_type: The type of token to pop to. 592 593 Returns: 594 The token info of the given type that was popped. 595 """ 596 last = None 597 while True: 598 last = self._Pop() 599 if last.token.type == stop_type: 600 break 601 return last 602 603 def _RemoveOverrides(self, token_info): 604 """Marks any token that was overridden by this token as active again. 605 606 Args: 607 token_info: The token that is being removed from the stack. 608 """ 609 for stack_token in self._stack: 610 if (stack_token.overridden_by == token_info and 611 not stack_token.is_permanent_override): 612 stack_token.overridden_by = None 613 614 def _PopTransient(self): 615 """Pops all transient tokens - i.e. not blocks, literals, or parens.""" 616 while self._stack and self._stack[-1].is_transient: 617 self._Pop() 618