1#!/usr/bin/env python 2# 3# Copyright 2010 The Closure Linter Authors. All Rights Reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS-IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Methods for checking EcmaScript files for indentation issues.""" 18 19__author__ = ('robbyw@google.com (Robert Walker)') 20 21from closure_linter import ecmametadatapass 22from closure_linter import errors 23from closure_linter import javascripttokens 24from closure_linter import tokenutil 25from closure_linter.common import error 26from closure_linter.common import position 27 28import gflags as flags 29 30flags.DEFINE_boolean('debug_indentation', False, 31 'Whether to print debugging information for indentation.') 32 33 34# Shorthand 35Context = ecmametadatapass.EcmaContext 36Error = error.Error 37Position = position.Position 38Type = javascripttokens.JavaScriptTokenType 39 40 41# The general approach: 42# 43# 1. Build a stack of tokens that can affect indentation. 44# For each token, we determine if it is a block or continuation token. 45# Some tokens need to be temporarily overwritten in case they are removed 46# before the end of the line. 47# Much of the work here is determining which tokens to keep on the stack 48# at each point. Operators, for example, should be removed once their 49# expression or line is gone, while parentheses must stay until the matching 50# end parentheses is found. 51# 52# 2. Given that stack, determine the allowable indentations. 53# Due to flexible indentation rules in JavaScript, there may be many 54# allowable indentations for each stack. We follows the general 55# "no false positives" approach of GJsLint and build the most permissive 56# set possible. 57 58 59class TokenInfo(object): 60 """Stores information about a token. 61 62 Attributes: 63 token: The token 64 is_block: Whether the token represents a block indentation. 65 is_transient: Whether the token should be automatically removed without 66 finding a matching end token. 67 overridden_by: TokenInfo for a token that overrides the indentation that 68 this token would require. 69 is_permanent_override: Whether the override on this token should persist 70 even after the overriding token is removed from the stack. For example: 71 x([ 72 1], 73 2); 74 needs this to be set so the last line is not required to be a continuation 75 indent. 76 line_number: The effective line number of this token. Will either be the 77 actual line number or the one before it in the case of a mis-wrapped 78 operator. 79 """ 80 81 def __init__(self, token, is_block=False): 82 """Initializes a TokenInfo object. 83 84 Args: 85 token: The token 86 is_block: Whether the token represents a block indentation. 87 """ 88 self.token = token 89 self.overridden_by = None 90 self.is_permanent_override = False 91 self.is_block = is_block 92 self.is_transient = not is_block and not token.type in ( 93 Type.START_PAREN, Type.START_PARAMETERS) 94 self.line_number = token.line_number 95 96 def __repr__(self): 97 result = '\n %s' % self.token 98 if self.overridden_by: 99 result = '%s OVERRIDDEN [by "%s"]' % ( 100 result, self.overridden_by.token.string) 101 result += ' {is_block: %s, is_transient: %s}' % ( 102 self.is_block, self.is_transient) 103 return result 104 105 106class IndentationRules(object): 107 """EmcaScript indentation rules. 108 109 Can be used to find common indentation errors in JavaScript, ActionScript and 110 other Ecma like scripting languages. 111 """ 112 113 def __init__(self): 114 """Initializes the IndentationRules checker.""" 115 self._stack = [] 116 117 # Map from line number to number of characters it is off in indentation. 118 self._start_index_offset = {} 119 120 def Finalize(self): 121 if self._stack: 122 old_stack = self._stack 123 self._stack = [] 124 raise Exception("INTERNAL ERROR: indentation stack is not empty: %r" % 125 old_stack) 126 127 def CheckToken(self, token, state): 128 """Checks a token for indentation errors. 129 130 Args: 131 token: The current token under consideration 132 state: Additional information about the current tree state 133 134 Returns: 135 An error array [error code, error string, error token] if the token is 136 improperly indented, or None if indentation is correct. 137 """ 138 139 token_type = token.type 140 indentation_errors = [] 141 stack = self._stack 142 is_first = self._IsFirstNonWhitespaceTokenInLine(token) 143 144 # Add tokens that could decrease indentation before checking. 145 if token_type == Type.END_PAREN: 146 self._PopTo(Type.START_PAREN) 147 148 elif token_type == Type.END_PARAMETERS: 149 self._PopTo(Type.START_PARAMETERS) 150 151 elif token_type == Type.END_BRACKET: 152 self._PopTo(Type.START_BRACKET) 153 154 elif token_type == Type.END_BLOCK: 155 start_token = self._PopTo(Type.START_BLOCK) 156 # Check for required goog.scope comment. 157 if start_token: 158 goog_scope = self._GoogScopeOrNone(start_token.token) 159 if goog_scope is not None: 160 if not token.line.endswith('; // goog.scope\n'): 161 if (token.line.find('//') > -1 and 162 token.line.find('goog.scope') > 163 token.line.find('//')): 164 indentation_errors.append([ 165 errors.MALFORMED_END_OF_SCOPE_COMMENT, 166 ('Malformed end of goog.scope comment. Please use the ' 167 'exact following syntax to close the scope:\n' 168 '}); // goog.scope'), 169 token, 170 Position(token.start_index, token.length)]) 171 else: 172 indentation_errors.append([ 173 errors.MISSING_END_OF_SCOPE_COMMENT, 174 ('Missing comment for end of goog.scope which opened at line ' 175 '%d. End the scope with:\n' 176 '}); // goog.scope' % 177 (start_token.line_number)), 178 token, 179 Position(token.start_index, token.length)]) 180 181 elif token_type == Type.KEYWORD and token.string in ('case', 'default'): 182 self._Add(self._PopTo(Type.START_BLOCK)) 183 184 elif is_first and token.string == '.': 185 # This token should have been on the previous line, so treat it as if it 186 # was there. 187 info = TokenInfo(token) 188 info.line_number = token.line_number - 1 189 self._Add(info) 190 191 elif token_type == Type.SEMICOLON: 192 self._PopTransient() 193 194 not_binary_operator = (token_type != Type.OPERATOR or 195 token.metadata.IsUnaryOperator()) 196 not_dot = token.string != '.' 197 if is_first and not_binary_operator and not_dot and token.type not in ( 198 Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT): 199 if flags.FLAGS.debug_indentation: 200 print 'Line #%d: stack %r' % (token.line_number, stack) 201 202 # Ignore lines that start in JsDoc since we don't check them properly yet. 203 # TODO(robbyw): Support checking JsDoc indentation. 204 # Ignore lines that start as multi-line strings since indentation is N/A. 205 # Ignore lines that start with operators since we report that already. 206 # Ignore lines with tabs since we report that already. 207 expected = self._GetAllowableIndentations() 208 actual = self._GetActualIndentation(token) 209 210 # Special case comments describing else, case, and default. Allow them 211 # to outdent to the parent block. 212 if token_type in Type.COMMENT_TYPES: 213 next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) 214 if next_code and next_code.type == Type.END_BLOCK: 215 next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES) 216 if next_code and next_code.string in ('else', 'case', 'default'): 217 # TODO(robbyw): This almost certainly introduces false negatives. 218 expected |= self._AddToEach(expected, -2) 219 220 if actual >= 0 and actual not in expected: 221 expected = sorted(expected) 222 indentation_errors.append([ 223 errors.WRONG_INDENTATION, 224 'Wrong indentation: expected any of {%s} but got %d' % ( 225 ', '.join( 226 ['%d' % x for x in expected]), actual), 227 token, 228 Position(actual, expected[0])]) 229 self._start_index_offset[token.line_number] = expected[0] - actual 230 231 # Add tokens that could increase indentation. 232 if token_type == Type.START_BRACKET: 233 self._Add(TokenInfo(token=token, 234 is_block=token.metadata.context.type == Context.ARRAY_LITERAL)) 235 236 elif token_type == Type.START_BLOCK or token.metadata.is_implied_block: 237 self._Add(TokenInfo(token=token, is_block=True)) 238 239 elif token_type in (Type.START_PAREN, Type.START_PARAMETERS): 240 self._Add(TokenInfo(token=token, is_block=False)) 241 242 elif token_type == Type.KEYWORD and token.string == 'return': 243 self._Add(TokenInfo(token)) 244 245 elif not token.IsLastInLine() and ( 246 token.IsAssignment() or token.IsOperator('?')): 247 self._Add(TokenInfo(token=token)) 248 249 # Handle implied block closes. 250 if token.metadata.is_implied_block_close: 251 self._PopToImpliedBlock() 252 253 # Add some tokens only if they appear at the end of the line. 254 is_last = self._IsLastCodeInLine(token) 255 if is_last: 256 if token_type == Type.OPERATOR: 257 if token.string == ':': 258 if (stack and stack[-1].token.string == '?'): 259 # When a ternary : is on a different line than its '?', it doesn't 260 # add indentation. 261 if (token.line_number == stack[-1].token.line_number): 262 self._Add(TokenInfo(token)) 263 elif token.metadata.context.type == Context.CASE_BLOCK: 264 # Pop transient tokens from say, line continuations, e.g., 265 # case x. 266 # y: 267 # Want to pop the transient 4 space continuation indent. 268 self._PopTransient() 269 # Starting the body of the case statement, which is a type of 270 # block. 271 self._Add(TokenInfo(token=token, is_block=True)) 272 elif token.metadata.context.type == Context.LITERAL_ELEMENT: 273 # When in an object literal, acts as operator indicating line 274 # continuations. 275 self._Add(TokenInfo(token)) 276 pass 277 else: 278 # ':' might also be a statement label, no effect on indentation in 279 # this case. 280 pass 281 282 elif token.string != ',': 283 self._Add(TokenInfo(token)) 284 else: 285 # The token is a comma. 286 if token.metadata.context.type == Context.VAR: 287 self._Add(TokenInfo(token)) 288 elif token.metadata.context.type != Context.PARAMETERS: 289 self._PopTransient() 290 291 elif (token.string.endswith('.') 292 and token_type in (Type.IDENTIFIER, Type.NORMAL)): 293 self._Add(TokenInfo(token)) 294 elif token_type == Type.PARAMETERS and token.string.endswith(','): 295 # Parameter lists. 296 self._Add(TokenInfo(token)) 297 elif token.metadata.is_implied_semicolon: 298 self._PopTransient() 299 elif token.IsAssignment(): 300 self._Add(TokenInfo(token)) 301 302 return indentation_errors 303 304 def _AddToEach(self, original, amount): 305 """Returns a new set with the given amount added to each element. 306 307 Args: 308 original: The original set of numbers 309 amount: The amount to add to each element 310 311 Returns: 312 A new set containing each element of the original set added to the amount. 313 """ 314 return set([x + amount for x in original]) 315 316 _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS, 317 Type.START_BRACKET) 318 319 _HARD_STOP_STRINGS = ('return', '?') 320 321 def _IsHardStop(self, token): 322 """Determines if the given token can have a hard stop after it. 323 324 Hard stops are indentations defined by the position of another token as in 325 indentation lined up with return, (, [, and ?. 326 """ 327 return (token.type in self._HARD_STOP_TYPES or 328 token.string in self._HARD_STOP_STRINGS or 329 token.IsAssignment()) 330 331 def _GetAllowableIndentations(self): 332 """Computes the set of allowable indentations. 333 334 Returns: 335 The set of allowable indentations, given the current stack. 336 """ 337 expected = set([0]) 338 hard_stops = set([]) 339 340 # Whether the tokens are still in the same continuation, meaning additional 341 # indentation is optional. As an example: 342 # x = 5 + 343 # 6 + 344 # 7; 345 # The second '+' does not add any required indentation. 346 in_same_continuation = False 347 348 for token_info in self._stack: 349 token = token_info.token 350 351 # Handle normal additive indentation tokens. 352 if not token_info.overridden_by and token.string != 'return': 353 if token_info.is_block: 354 expected = self._AddToEach(expected, 2) 355 hard_stops = self._AddToEach(hard_stops, 2) 356 in_same_continuation = False 357 elif in_same_continuation: 358 expected |= self._AddToEach(expected, 4) 359 hard_stops |= self._AddToEach(hard_stops, 4) 360 else: 361 expected = self._AddToEach(expected, 4) 362 hard_stops |= self._AddToEach(hard_stops, 4) 363 in_same_continuation = True 364 365 # Handle hard stops after (, [, return, =, and ? 366 if self._IsHardStop(token): 367 override_is_hard_stop = (token_info.overridden_by and 368 self._IsHardStop(token_info.overridden_by.token)) 369 if not override_is_hard_stop: 370 start_index = token.start_index 371 if token.line_number in self._start_index_offset: 372 start_index += self._start_index_offset[token.line_number] 373 if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and 374 not token_info.overridden_by): 375 hard_stops.add(start_index + 1) 376 377 elif token.string == 'return' and not token_info.overridden_by: 378 hard_stops.add(start_index + 7) 379 380 elif (token.type == Type.START_BRACKET): 381 hard_stops.add(start_index + 1) 382 383 elif token.IsAssignment(): 384 hard_stops.add(start_index + len(token.string) + 1) 385 386 elif token.IsOperator('?') and not token_info.overridden_by: 387 hard_stops.add(start_index + 2) 388 389 return (expected | hard_stops) or set([0]) 390 391 def _GetActualIndentation(self, token): 392 """Gets the actual indentation of the line containing the given token. 393 394 Args: 395 token: Any token on the line. 396 397 Returns: 398 The actual indentation of the line containing the given token. Returns 399 -1 if this line should be ignored due to the presence of tabs. 400 """ 401 # Move to the first token in the line 402 token = tokenutil.GetFirstTokenInSameLine(token) 403 404 # If it is whitespace, it is the indentation. 405 if token.type == Type.WHITESPACE: 406 if token.string.find('\t') >= 0: 407 return -1 408 else: 409 return len(token.string) 410 elif token.type == Type.PARAMETERS: 411 return len(token.string) - len(token.string.lstrip()) 412 else: 413 return 0 414 415 def _IsFirstNonWhitespaceTokenInLine(self, token): 416 """Determines if the given token is the first non-space token on its line. 417 418 Args: 419 token: The token. 420 421 Returns: 422 True if the token is the first non-whitespace token on its line. 423 """ 424 if token.type in (Type.WHITESPACE, Type.BLANK_LINE): 425 return False 426 if token.IsFirstInLine(): 427 return True 428 return (token.previous and token.previous.IsFirstInLine() and 429 token.previous.type == Type.WHITESPACE) 430 431 def _IsLastCodeInLine(self, token): 432 """Determines if the given token is the last code token on its line. 433 434 Args: 435 token: The token. 436 437 Returns: 438 True if the token is the last code token on its line. 439 """ 440 if token.type in Type.NON_CODE_TYPES: 441 return False 442 start_token = token 443 while True: 444 token = token.next 445 if not token or token.line_number != start_token.line_number: 446 return True 447 if token.type not in Type.NON_CODE_TYPES: 448 return False 449 450 def _GoogScopeOrNone(self, token): 451 """Determines if the given START_BLOCK is part of a goog.scope statement. 452 453 Args: 454 token: A token of type START_BLOCK. 455 456 Returns: 457 The goog.scope function call token, or None if such call doesn't exist. 458 """ 459 # Search for a goog.scope statement, which will be 5 tokens before the 460 # block. Illustration of the tokens found prior to the start block: 461 # goog.scope(function() { 462 # 5 4 3 21 ^ 463 464 maybe_goog_scope = token 465 for unused_i in xrange(5): 466 maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and 467 maybe_goog_scope.previous else None) 468 if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope': 469 return maybe_goog_scope 470 471 def _Add(self, token_info): 472 """Adds the given token info to the stack. 473 474 Args: 475 token_info: The token information to add. 476 """ 477 if self._stack and self._stack[-1].token == token_info.token: 478 # Don't add the same token twice. 479 return 480 481 if token_info.is_block or token_info.token.type == Type.START_PAREN: 482 token_info.overridden_by = self._GoogScopeOrNone(token_info.token) 483 index = 1 484 while index <= len(self._stack): 485 stack_info = self._stack[-index] 486 stack_token = stack_info.token 487 488 if stack_info.line_number == token_info.line_number: 489 # In general, tokens only override each other when they are on 490 # the same line. 491 stack_info.overridden_by = token_info 492 if (token_info.token.type == Type.START_BLOCK and 493 (stack_token.IsAssignment() or 494 stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))): 495 # Multi-line blocks have lasting overrides, as in: 496 # callFn({ 497 # a: 10 498 # }, 499 # 30); 500 close_block = token_info.token.metadata.context.end_token 501 stack_info.is_permanent_override = \ 502 close_block.line_number != token_info.token.line_number 503 elif (token_info.token.type == Type.START_BLOCK and 504 token_info.token.metadata.context.type == Context.BLOCK and 505 (stack_token.IsAssignment() or 506 stack_token.type == Type.IDENTIFIER)): 507 # When starting a function block, the override can transcend lines. 508 # For example 509 # long.long.name = function( 510 # a) { 511 # In this case the { and the = are on different lines. But the 512 # override should still apply. 513 stack_info.overridden_by = token_info 514 stack_info.is_permanent_override = True 515 else: 516 break 517 index += 1 518 519 self._stack.append(token_info) 520 521 def _Pop(self): 522 """Pops the top token from the stack. 523 524 Returns: 525 The popped token info. 526 """ 527 token_info = self._stack.pop() 528 if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET): 529 # Remove any temporary overrides. 530 self._RemoveOverrides(token_info) 531 else: 532 # For braces and brackets, which can be object and array literals, remove 533 # overrides when the literal is closed on the same line. 534 token_check = token_info.token 535 same_type = token_check.type 536 goal_type = None 537 if token_info.token.type == Type.START_BRACKET: 538 goal_type = Type.END_BRACKET 539 else: 540 goal_type = Type.END_BLOCK 541 line_number = token_info.token.line_number 542 count = 0 543 while token_check and token_check.line_number == line_number: 544 if token_check.type == goal_type: 545 count -= 1 546 if not count: 547 self._RemoveOverrides(token_info) 548 break 549 if token_check.type == same_type: 550 count += 1 551 token_check = token_check.next 552 return token_info 553 554 def _PopToImpliedBlock(self): 555 """Pops the stack until an implied block token is found.""" 556 while not self._Pop().token.metadata.is_implied_block: 557 pass 558 559 def _PopTo(self, stop_type): 560 """Pops the stack until a token of the given type is popped. 561 562 Args: 563 stop_type: The type of token to pop to. 564 565 Returns: 566 The token info of the given type that was popped. 567 """ 568 last = None 569 while True: 570 last = self._Pop() 571 if last.token.type == stop_type: 572 break 573 return last 574 575 def _RemoveOverrides(self, token_info): 576 """Marks any token that was overridden by this token as active again. 577 578 Args: 579 token_info: The token that is being removed from the stack. 580 """ 581 for stack_token in self._stack: 582 if (stack_token.overridden_by == token_info and 583 not stack_token.is_permanent_override): 584 stack_token.overridden_by = None 585 586 def _PopTransient(self): 587 """Pops all transient tokens - i.e. not blocks, literals, or parens.""" 588 while self._stack and self._stack[-1].is_transient: 589 self._Pop() 590