1#!/usr/bin/env python 2# Copyright 2010 The Closure Linter Authors. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS-IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Methods for checking EcmaScript files for indentation issues.""" 17 18__author__ = ('robbyw@google.com (Robert Walker)') 19 20import gflags as flags 21 22from closure_linter import ecmametadatapass 23from closure_linter import errors 24from closure_linter import javascripttokens 25from closure_linter import tokenutil 26from closure_linter.common import error 27from closure_linter.common import position 28 29 30flags.DEFINE_boolean('debug_indentation', False, 31 'Whether to print debugging information for indentation.') 32 33 34# Shorthand 35Context = ecmametadatapass.EcmaContext 36Error = error.Error 37Position = position.Position 38Type = javascripttokens.JavaScriptTokenType 39 40 41# The general approach: 42# 43# 1. Build a stack of tokens that can affect indentation. 44# For each token, we determine if it is a block or continuation token. 45# Some tokens need to be temporarily overwritten in case they are removed 46# before the end of the line. 47# Much of the work here is determining which tokens to keep on the stack 48# at each point. Operators, for example, should be removed once their 49# expression or line is gone, while parentheses must stay until the matching 50# end parentheses is found. 51# 52# 2. Given that stack, determine the allowable indentations. 53# Due to flexible indentation rules in JavaScript, there may be many 54# allowable indentations for each stack. We follows the general 55# "no false positives" approach of GJsLint and build the most permissive 56# set possible. 57 58 59class TokenInfo(object): 60 """Stores information about a token. 61 62 Attributes: 63 token: The token 64 is_block: Whether the token represents a block indentation. 65 is_transient: Whether the token should be automatically removed without 66 finding a matching end token. 67 overridden_by: TokenInfo for a token that overrides the indentation that 68 this token would require. 69 is_permanent_override: Whether the override on this token should persist 70 even after the overriding token is removed from the stack. For example: 71 x([ 72 1], 73 2); 74 needs this to be set so the last line is not required to be a continuation 75 indent. 76 line_number: The effective line number of this token. Will either be the 77 actual line number or the one before it in the case of a mis-wrapped 78 operator. 79 """ 80 81 def __init__(self, token, is_block=False): 82 """Initializes a TokenInfo object. 83 84 Args: 85 token: The token 86 is_block: Whether the token represents a block indentation. 87 """ 88 self.token = token 89 self.overridden_by = None 90 self.is_permanent_override = False 91 self.is_block = is_block 92 self.is_transient = not is_block and token.type not in ( 93 Type.START_PAREN, Type.START_PARAMETERS) 94 self.line_number = token.line_number 95 96 def __repr__(self): 97 result = '\n %s' % self.token 98 if self.overridden_by: 99 result = '%s OVERRIDDEN [by "%s"]' % ( 100 result, self.overridden_by.token.string) 101 result += ' {is_block: %s, is_transient: %s}' % ( 102 self.is_block, self.is_transient) 103 return result 104 105 106class IndentationRules(object): 107 """EmcaScript indentation rules. 108 109 Can be used to find common indentation errors in JavaScript, ActionScript and 110 other Ecma like scripting languages. 111 """ 112 113 def __init__(self): 114 """Initializes the IndentationRules checker.""" 115 self._stack = [] 116 117 # Map from line number to number of characters it is off in indentation. 118 self._start_index_offset = {} 119 120 def Finalize(self): 121 if self._stack: 122 old_stack = self._stack 123 self._stack = [] 124 raise Exception('INTERNAL ERROR: indentation stack is not empty: %r' % 125 old_stack) 126 127 def CheckToken(self, token, state): 128 """Checks a token for indentation errors. 129 130 Args: 131 token: The current token under consideration 132 state: Additional information about the current tree state 133 134 Returns: 135 An error array [error code, error string, error token] if the token is 136 improperly indented, or None if indentation is correct. 137 """ 138 139 token_type = token.type 140 indentation_errors = [] 141 stack = self._stack 142 is_first = self._IsFirstNonWhitespaceTokenInLine(token) 143 144 # Add tokens that could decrease indentation before checking. 145 if token_type == Type.END_PAREN: 146 self._PopTo(Type.START_PAREN) 147 148 elif token_type == Type.END_PARAMETERS: 149 self._PopTo(Type.START_PARAMETERS) 150 151 elif token_type == Type.END_BRACKET: 152 self._PopTo(Type.START_BRACKET) 153 154 elif token_type == Type.END_BLOCK: 155 start_token = self._PopTo(Type.START_BLOCK) 156 # Check for required goog.scope comment. 157 if start_token: 158 goog_scope = tokenutil.GoogScopeOrNoneFromStartBlock(start_token.token) 159 if goog_scope is not None: 160 if not token.line.endswith('; // goog.scope\n'): 161 if (token.line.find('//') > -1 and 162 token.line.find('goog.scope') > 163 token.line.find('//')): 164 indentation_errors.append([ 165 errors.MALFORMED_END_OF_SCOPE_COMMENT, 166 ('Malformed end of goog.scope comment. Please use the ' 167 'exact following syntax to close the scope:\n' 168 '}); // goog.scope'), 169 token, 170 Position(token.start_index, token.length)]) 171 else: 172 indentation_errors.append([ 173 errors.MISSING_END_OF_SCOPE_COMMENT, 174 ('Missing comment for end of goog.scope which opened at line ' 175 '%d. End the scope with:\n' 176 '}); // goog.scope' % 177 (start_token.line_number)), 178 token, 179 Position(token.start_index, token.length)]) 180 181 elif token_type == Type.KEYWORD and token.string in ('case', 'default'): 182 self._Add(self._PopTo(Type.START_BLOCK)) 183 184 elif is_first and token.string == '.': 185 # This token should have been on the previous line, so treat it as if it 186 # was there. 187 info = TokenInfo(token) 188 info.line_number = token.line_number - 1 189 self._Add(info) 190 191 elif token_type == Type.SEMICOLON: 192 self._PopTransient() 193 194 not_binary_operator = (token_type != Type.OPERATOR or 195 token.metadata.IsUnaryOperator()) 196 not_dot = token.string != '.' 197 if is_first and not_binary_operator and not_dot and token.type not in ( 198 Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT): 199 if flags.FLAGS.debug_indentation: 200 print 'Line #%d: stack %r' % (token.line_number, stack) 201 202 # Ignore lines that start in JsDoc since we don't check them properly yet. 203 # TODO(robbyw): Support checking JsDoc indentation. 204 # Ignore lines that start as multi-line strings since indentation is N/A. 205 # Ignore lines that start with operators since we report that already. 206 # Ignore lines with tabs since we report that already. 207 expected = self._GetAllowableIndentations() 208 actual = self._GetActualIndentation(token) 209 210 # Special case comments describing else, case, and default. Allow them 211 # to outdent to the parent block. 212 if token_type in Type.COMMENT_TYPES: 213 next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) 214 if next_code and next_code.type == Type.END_BLOCK: 215 next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES) 216 if next_code and next_code.string in ('else', 'case', 'default'): 217 # TODO(robbyw): This almost certainly introduces false negatives. 218 expected |= self._AddToEach(expected, -2) 219 220 if actual >= 0 and actual not in expected: 221 expected = sorted(expected) 222 indentation_errors.append([ 223 errors.WRONG_INDENTATION, 224 'Wrong indentation: expected any of {%s} but got %d' % ( 225 ', '.join( 226 ['%d' % x for x in expected]), actual), 227 token, 228 Position(actual, expected[0])]) 229 self._start_index_offset[token.line_number] = expected[0] - actual 230 231 # Add tokens that could increase indentation. 232 if token_type == Type.START_BRACKET: 233 self._Add(TokenInfo( 234 token=token, 235 is_block=token.metadata.context.type == Context.ARRAY_LITERAL)) 236 237 elif token_type == Type.START_BLOCK or token.metadata.is_implied_block: 238 self._Add(TokenInfo(token=token, is_block=True)) 239 240 elif token_type in (Type.START_PAREN, Type.START_PARAMETERS): 241 self._Add(TokenInfo(token=token, is_block=False)) 242 243 elif token_type == Type.KEYWORD and token.string == 'return': 244 self._Add(TokenInfo(token)) 245 246 elif not token.IsLastInLine() and ( 247 token.IsAssignment() or token.IsOperator('?')): 248 self._Add(TokenInfo(token=token)) 249 250 # Handle implied block closes. 251 if token.metadata.is_implied_block_close: 252 self._PopToImpliedBlock() 253 254 # Add some tokens only if they appear at the end of the line. 255 is_last = self._IsLastCodeInLine(token) 256 if is_last: 257 if token_type == Type.OPERATOR: 258 if token.string == ':': 259 if stack and stack[-1].token.string == '?': 260 # When a ternary : is on a different line than its '?', it doesn't 261 # add indentation. 262 if token.line_number == stack[-1].token.line_number: 263 self._Add(TokenInfo(token)) 264 elif token.metadata.context.type == Context.CASE_BLOCK: 265 # Pop transient tokens from say, line continuations, e.g., 266 # case x. 267 # y: 268 # Want to pop the transient 4 space continuation indent. 269 self._PopTransient() 270 # Starting the body of the case statement, which is a type of 271 # block. 272 self._Add(TokenInfo(token=token, is_block=True)) 273 elif token.metadata.context.type == Context.LITERAL_ELEMENT: 274 # When in an object literal, acts as operator indicating line 275 # continuations. 276 self._Add(TokenInfo(token)) 277 pass 278 else: 279 # ':' might also be a statement label, no effect on indentation in 280 # this case. 281 pass 282 283 elif token.string != ',': 284 self._Add(TokenInfo(token)) 285 else: 286 # The token is a comma. 287 if token.metadata.context.type == Context.VAR: 288 self._Add(TokenInfo(token)) 289 elif token.metadata.context.type != Context.PARAMETERS: 290 self._PopTransient() 291 292 elif (token.string.endswith('.') 293 and token_type in (Type.IDENTIFIER, Type.NORMAL)): 294 self._Add(TokenInfo(token)) 295 elif token_type == Type.PARAMETERS and token.string.endswith(','): 296 # Parameter lists. 297 self._Add(TokenInfo(token)) 298 elif token.IsKeyword('var'): 299 self._Add(TokenInfo(token)) 300 elif token.metadata.is_implied_semicolon: 301 self._PopTransient() 302 elif token.IsAssignment(): 303 self._Add(TokenInfo(token)) 304 305 return indentation_errors 306 307 def _AddToEach(self, original, amount): 308 """Returns a new set with the given amount added to each element. 309 310 Args: 311 original: The original set of numbers 312 amount: The amount to add to each element 313 314 Returns: 315 A new set containing each element of the original set added to the amount. 316 """ 317 return set([x + amount for x in original]) 318 319 _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS, 320 Type.START_BRACKET) 321 322 _HARD_STOP_STRINGS = ('return', '?') 323 324 def _IsHardStop(self, token): 325 """Determines if the given token can have a hard stop after it. 326 327 Args: 328 token: token to examine 329 330 Returns: 331 Whether the token can have a hard stop after it. 332 333 Hard stops are indentations defined by the position of another token as in 334 indentation lined up with return, (, [, and ?. 335 """ 336 return (token.type in self._HARD_STOP_TYPES or 337 token.string in self._HARD_STOP_STRINGS or 338 token.IsAssignment()) 339 340 def _GetAllowableIndentations(self): 341 """Computes the set of allowable indentations. 342 343 Returns: 344 The set of allowable indentations, given the current stack. 345 """ 346 expected = set([0]) 347 hard_stops = set([]) 348 349 # Whether the tokens are still in the same continuation, meaning additional 350 # indentation is optional. As an example: 351 # x = 5 + 352 # 6 + 353 # 7; 354 # The second '+' does not add any required indentation. 355 in_same_continuation = False 356 357 for token_info in self._stack: 358 token = token_info.token 359 360 # Handle normal additive indentation tokens. 361 if not token_info.overridden_by and token.string != 'return': 362 if token_info.is_block: 363 expected = self._AddToEach(expected, 2) 364 hard_stops = self._AddToEach(hard_stops, 2) 365 in_same_continuation = False 366 elif in_same_continuation: 367 expected |= self._AddToEach(expected, 4) 368 hard_stops |= self._AddToEach(hard_stops, 4) 369 else: 370 expected = self._AddToEach(expected, 4) 371 hard_stops |= self._AddToEach(hard_stops, 4) 372 in_same_continuation = True 373 374 # Handle hard stops after (, [, return, =, and ? 375 if self._IsHardStop(token): 376 override_is_hard_stop = (token_info.overridden_by and 377 self._IsHardStop( 378 token_info.overridden_by.token)) 379 if not override_is_hard_stop: 380 start_index = token.start_index 381 if token.line_number in self._start_index_offset: 382 start_index += self._start_index_offset[token.line_number] 383 if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and 384 not token_info.overridden_by): 385 hard_stops.add(start_index + 1) 386 387 elif token.string == 'return' and not token_info.overridden_by: 388 hard_stops.add(start_index + 7) 389 390 elif token.type == Type.START_BRACKET: 391 hard_stops.add(start_index + 1) 392 393 elif token.IsAssignment(): 394 hard_stops.add(start_index + len(token.string) + 1) 395 396 elif token.IsOperator('?') and not token_info.overridden_by: 397 hard_stops.add(start_index + 2) 398 399 return (expected | hard_stops) or set([0]) 400 401 def _GetActualIndentation(self, token): 402 """Gets the actual indentation of the line containing the given token. 403 404 Args: 405 token: Any token on the line. 406 407 Returns: 408 The actual indentation of the line containing the given token. Returns 409 -1 if this line should be ignored due to the presence of tabs. 410 """ 411 # Move to the first token in the line 412 token = tokenutil.GetFirstTokenInSameLine(token) 413 414 # If it is whitespace, it is the indentation. 415 if token.type == Type.WHITESPACE: 416 if token.string.find('\t') >= 0: 417 return -1 418 else: 419 return len(token.string) 420 elif token.type == Type.PARAMETERS: 421 return len(token.string) - len(token.string.lstrip()) 422 else: 423 return 0 424 425 def _IsFirstNonWhitespaceTokenInLine(self, token): 426 """Determines if the given token is the first non-space token on its line. 427 428 Args: 429 token: The token. 430 431 Returns: 432 True if the token is the first non-whitespace token on its line. 433 """ 434 if token.type in (Type.WHITESPACE, Type.BLANK_LINE): 435 return False 436 if token.IsFirstInLine(): 437 return True 438 return (token.previous and token.previous.IsFirstInLine() and 439 token.previous.type == Type.WHITESPACE) 440 441 def _IsLastCodeInLine(self, token): 442 """Determines if the given token is the last code token on its line. 443 444 Args: 445 token: The token. 446 447 Returns: 448 True if the token is the last code token on its line. 449 """ 450 if token.type in Type.NON_CODE_TYPES: 451 return False 452 start_token = token 453 while True: 454 token = token.next 455 if not token or token.line_number != start_token.line_number: 456 return True 457 if token.type not in Type.NON_CODE_TYPES: 458 return False 459 460 def _Add(self, token_info): 461 """Adds the given token info to the stack. 462 463 Args: 464 token_info: The token information to add. 465 """ 466 if self._stack and self._stack[-1].token == token_info.token: 467 # Don't add the same token twice. 468 return 469 470 if token_info.is_block or token_info.token.type == Type.START_PAREN: 471 token_info.overridden_by = ( 472 tokenutil.GoogScopeOrNoneFromStartBlock(token_info.token)) 473 index = 1 474 while index <= len(self._stack): 475 stack_info = self._stack[-index] 476 stack_token = stack_info.token 477 478 if stack_info.line_number == token_info.line_number: 479 # In general, tokens only override each other when they are on 480 # the same line. 481 stack_info.overridden_by = token_info 482 if (token_info.token.type == Type.START_BLOCK and 483 (stack_token.IsAssignment() or 484 stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))): 485 # Multi-line blocks have lasting overrides, as in: 486 # callFn({ 487 # a: 10 488 # }, 489 # 30); 490 # b/11450054. If a string is not closed properly then close_block 491 # could be null. 492 close_block = token_info.token.metadata.context.end_token 493 stack_info.is_permanent_override = close_block and ( 494 close_block.line_number != token_info.token.line_number) 495 elif (token_info.token.type == Type.START_BLOCK and 496 token_info.token.metadata.context.type == Context.BLOCK and 497 (stack_token.IsAssignment() or 498 stack_token.type == Type.IDENTIFIER)): 499 # When starting a function block, the override can transcend lines. 500 # For example 501 # long.long.name = function( 502 # a) { 503 # In this case the { and the = are on different lines. But the 504 # override should still apply. 505 stack_info.overridden_by = token_info 506 stack_info.is_permanent_override = True 507 else: 508 break 509 index += 1 510 511 self._stack.append(token_info) 512 513 def _Pop(self): 514 """Pops the top token from the stack. 515 516 Returns: 517 The popped token info. 518 """ 519 token_info = self._stack.pop() 520 if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET): 521 # Remove any temporary overrides. 522 self._RemoveOverrides(token_info) 523 else: 524 # For braces and brackets, which can be object and array literals, remove 525 # overrides when the literal is closed on the same line. 526 token_check = token_info.token 527 same_type = token_check.type 528 goal_type = None 529 if token_info.token.type == Type.START_BRACKET: 530 goal_type = Type.END_BRACKET 531 else: 532 goal_type = Type.END_BLOCK 533 line_number = token_info.token.line_number 534 count = 0 535 while token_check and token_check.line_number == line_number: 536 if token_check.type == goal_type: 537 count -= 1 538 if not count: 539 self._RemoveOverrides(token_info) 540 break 541 if token_check.type == same_type: 542 count += 1 543 token_check = token_check.next 544 return token_info 545 546 def _PopToImpliedBlock(self): 547 """Pops the stack until an implied block token is found.""" 548 while not self._Pop().token.metadata.is_implied_block: 549 pass 550 551 def _PopTo(self, stop_type): 552 """Pops the stack until a token of the given type is popped. 553 554 Args: 555 stop_type: The type of token to pop to. 556 557 Returns: 558 The token info of the given type that was popped. 559 """ 560 last = None 561 while True: 562 last = self._Pop() 563 if last.token.type == stop_type: 564 break 565 return last 566 567 def _RemoveOverrides(self, token_info): 568 """Marks any token that was overridden by this token as active again. 569 570 Args: 571 token_info: The token that is being removed from the stack. 572 """ 573 for stack_token in self._stack: 574 if (stack_token.overridden_by == token_info and 575 not stack_token.is_permanent_override): 576 stack_token.overridden_by = None 577 578 def _PopTransient(self): 579 """Pops all transient tokens - i.e. not blocks, literals, or parens.""" 580 while self._stack and self._stack[-1].is_transient: 581 self._Pop() 582