1#!/usr/bin/env python
2# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS-IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Methods for checking EcmaScript files for indentation issues."""
17
18__author__ = ('robbyw@google.com (Robert Walker)')
19
20import gflags as flags
21
22from closure_linter import ecmametadatapass
23from closure_linter import errors
24from closure_linter import javascripttokens
25from closure_linter import tokenutil
26from closure_linter.common import error
27from closure_linter.common import position
28
29
30flags.DEFINE_boolean('debug_indentation', False,
31                     'Whether to print debugging information for indentation.')
32
33
34# Shorthand
35Context = ecmametadatapass.EcmaContext
36Error = error.Error
37Position = position.Position
38Type = javascripttokens.JavaScriptTokenType
39
40
41# The general approach:
42#
43# 1. Build a stack of tokens that can affect indentation.
44#    For each token, we determine if it is a block or continuation token.
45#    Some tokens need to be temporarily overwritten in case they are removed
46#    before the end of the line.
47#    Much of the work here is determining which tokens to keep on the stack
48#    at each point.  Operators, for example, should be removed once their
49#    expression or line is gone, while parentheses must stay until the matching
50#    end parentheses is found.
51#
52# 2. Given that stack, determine the allowable indentations.
53#    Due to flexible indentation rules in JavaScript, there may be many
54#    allowable indentations for each stack.  We follows the general
55#    "no false positives" approach of GJsLint and build the most permissive
56#    set possible.
57
58
59class TokenInfo(object):
60  """Stores information about a token.
61
62  Attributes:
63    token: The token
64    is_block: Whether the token represents a block indentation.
65    is_transient: Whether the token should be automatically removed without
66      finding a matching end token.
67    overridden_by: TokenInfo for a token that overrides the indentation that
68      this token would require.
69    is_permanent_override: Whether the override on this token should persist
70      even after the overriding token is removed from the stack.  For example:
71      x([
72        1],
73      2);
74      needs this to be set so the last line is not required to be a continuation
75      indent.
76    line_number: The effective line number of this token.  Will either be the
77      actual line number or the one before it in the case of a mis-wrapped
78      operator.
79  """
80
81  def __init__(self, token, is_block=False):
82    """Initializes a TokenInfo object.
83
84    Args:
85      token: The token
86      is_block: Whether the token represents a block indentation.
87    """
88    self.token = token
89    self.overridden_by = None
90    self.is_permanent_override = False
91    self.is_block = is_block
92    self.is_transient = not is_block and token.type not in (
93        Type.START_PAREN, Type.START_PARAMETERS)
94    self.line_number = token.line_number
95
96  def __repr__(self):
97    result = '\n  %s' % self.token
98    if self.overridden_by:
99      result = '%s OVERRIDDEN [by "%s"]' % (
100          result, self.overridden_by.token.string)
101    result += ' {is_block: %s, is_transient: %s}' % (
102        self.is_block, self.is_transient)
103    return result
104
105
106class IndentationRules(object):
107  """EmcaScript indentation rules.
108
109  Can be used to find common indentation errors in JavaScript, ActionScript and
110  other Ecma like scripting languages.
111  """
112
113  def __init__(self):
114    """Initializes the IndentationRules checker."""
115    self._stack = []
116
117    # Map from line number to number of characters it is off in indentation.
118    self._start_index_offset = {}
119
120  def Finalize(self):
121    if self._stack:
122      old_stack = self._stack
123      self._stack = []
124      raise Exception('INTERNAL ERROR: indentation stack is not empty: %r' %
125                      old_stack)
126
127  def CheckToken(self, token, state):
128    """Checks a token for indentation errors.
129
130    Args:
131      token: The current token under consideration
132      state: Additional information about the current tree state
133
134    Returns:
135      An error array [error code, error string, error token] if the token is
136      improperly indented, or None if indentation is correct.
137    """
138
139    token_type = token.type
140    indentation_errors = []
141    stack = self._stack
142    is_first = self._IsFirstNonWhitespaceTokenInLine(token)
143
144    # Add tokens that could decrease indentation before checking.
145    if token_type == Type.END_PAREN:
146      self._PopTo(Type.START_PAREN)
147
148    elif token_type == Type.END_PARAMETERS:
149      self._PopTo(Type.START_PARAMETERS)
150
151    elif token_type == Type.END_BRACKET:
152      self._PopTo(Type.START_BRACKET)
153
154    elif token_type == Type.END_BLOCK:
155      start_token = self._PopTo(Type.START_BLOCK)
156      # Check for required goog.scope comment.
157      if start_token:
158        goog_scope = tokenutil.GoogScopeOrNoneFromStartBlock(start_token.token)
159        if goog_scope is not None:
160          if not token.line.endswith(';  // goog.scope\n'):
161            if (token.line.find('//') > -1 and
162                token.line.find('goog.scope') >
163                token.line.find('//')):
164              indentation_errors.append([
165                  errors.MALFORMED_END_OF_SCOPE_COMMENT,
166                  ('Malformed end of goog.scope comment. Please use the '
167                   'exact following syntax to close the scope:\n'
168                   '});  // goog.scope'),
169                  token,
170                  Position(token.start_index, token.length)])
171            else:
172              indentation_errors.append([
173                  errors.MISSING_END_OF_SCOPE_COMMENT,
174                  ('Missing comment for end of goog.scope which opened at line '
175                   '%d. End the scope with:\n'
176                   '});  // goog.scope' %
177                   (start_token.line_number)),
178                  token,
179                  Position(token.start_index, token.length)])
180
181    elif token_type == Type.KEYWORD and token.string in ('case', 'default'):
182      self._Add(self._PopTo(Type.START_BLOCK))
183
184    elif token_type == Type.SEMICOLON:
185      self._PopTransient()
186
187    if (is_first and
188        token_type not in (Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT)):
189      if flags.FLAGS.debug_indentation:
190        print 'Line #%d: stack %r' % (token.line_number, stack)
191
192      # Ignore lines that start in JsDoc since we don't check them properly yet.
193      # TODO(robbyw): Support checking JsDoc indentation.
194      # Ignore lines that start as multi-line strings since indentation is N/A.
195      # Ignore lines that start with operators since we report that already.
196      # Ignore lines with tabs since we report that already.
197      expected = self._GetAllowableIndentations()
198      actual = self._GetActualIndentation(token)
199
200      # Special case comments describing else, case, and default.  Allow them
201      # to outdent to the parent block.
202      if token_type in Type.COMMENT_TYPES:
203        next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES)
204        if next_code and next_code.type == Type.END_BLOCK:
205          next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES)
206        if next_code and next_code.string in ('else', 'case', 'default'):
207          # TODO(robbyw): This almost certainly introduces false negatives.
208          expected |= self._AddToEach(expected, -2)
209
210      if actual >= 0 and actual not in expected:
211        expected = sorted(expected)
212        indentation_errors.append([
213            errors.WRONG_INDENTATION,
214            'Wrong indentation: expected any of {%s} but got %d' % (
215                ', '.join('%d' % x for x in expected if x < 80), actual),
216            token,
217            Position(actual, expected[0])])
218        self._start_index_offset[token.line_number] = expected[0] - actual
219
220    # Add tokens that could increase indentation.
221    if token_type == Type.START_BRACKET:
222      self._Add(TokenInfo(
223          token=token,
224          is_block=token.metadata.context.type == Context.ARRAY_LITERAL))
225
226    elif token_type == Type.START_BLOCK or token.metadata.is_implied_block:
227      self._Add(TokenInfo(token=token, is_block=True))
228
229    elif token_type in (Type.START_PAREN, Type.START_PARAMETERS):
230      self._Add(TokenInfo(token=token, is_block=False))
231
232    elif token_type == Type.KEYWORD and token.string == 'return':
233      self._Add(TokenInfo(token))
234
235    elif not token.IsLastInLine() and (
236        token.IsAssignment() or token.IsOperator('?')):
237      self._Add(TokenInfo(token=token))
238
239    # Handle implied block closes.
240    if token.metadata.is_implied_block_close:
241      self._PopToImpliedBlock()
242
243    # Add some tokens only if they appear at the end of the line.
244    is_last = self._IsLastCodeInLine(token)
245    if is_last:
246      next_code_token = tokenutil.GetNextCodeToken(token)
247      # Increase required indentation if this is an overlong wrapped statement
248      # ending in an operator.
249      if token_type == Type.OPERATOR:
250        if token.string == ':':
251          if stack and stack[-1].token.string == '?':
252            # When a ternary : is on a different line than its '?', it doesn't
253            # add indentation.
254            if token.line_number == stack[-1].token.line_number:
255              self._Add(TokenInfo(token))
256          elif token.metadata.context.type == Context.CASE_BLOCK:
257            # Pop transient tokens from say, line continuations, e.g.,
258            # case x.
259            #     y:
260            # Want to pop the transient 4 space continuation indent.
261            self._PopTransient()
262            # Starting the body of the case statement, which is a type of
263            # block.
264            self._Add(TokenInfo(token=token, is_block=True))
265          elif token.metadata.context.type == Context.LITERAL_ELEMENT:
266            # When in an object literal, acts as operator indicating line
267            # continuations.
268            self._Add(TokenInfo(token))
269          else:
270            # ':' might also be a statement label, no effect on indentation in
271            # this case.
272            pass
273
274        elif token.string != ',':
275          self._Add(TokenInfo(token))
276        else:
277          # The token is a comma.
278          if token.metadata.context.type == Context.VAR:
279            self._Add(TokenInfo(token))
280          elif token.metadata.context.type != Context.PARAMETERS:
281            self._PopTransient()
282      # Increase required indentation if this is the end of a statement that's
283      # continued with an operator on the next line (e.g. the '.').
284      elif (next_code_token and next_code_token.type == Type.OPERATOR and
285            not next_code_token.metadata.IsUnaryOperator()):
286        self._Add(TokenInfo(token))
287      elif token_type == Type.PARAMETERS and token.string.endswith(','):
288        # Parameter lists.
289        self._Add(TokenInfo(token))
290      elif token.IsKeyword('var'):
291        self._Add(TokenInfo(token))
292      elif token.metadata.is_implied_semicolon:
293        self._PopTransient()
294    elif token.IsAssignment():
295      self._Add(TokenInfo(token))
296
297    return indentation_errors
298
299  def _AddToEach(self, original, amount):
300    """Returns a new set with the given amount added to each element.
301
302    Args:
303      original: The original set of numbers
304      amount: The amount to add to each element
305
306    Returns:
307      A new set containing each element of the original set added to the amount.
308    """
309    return set([x + amount for x in original])
310
311  _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS,
312                      Type.START_BRACKET)
313
314  _HARD_STOP_STRINGS = ('return', '?')
315
316  def _IsHardStop(self, token):
317    """Determines if the given token can have a hard stop after it.
318
319    Args:
320      token: token to examine
321
322    Returns:
323      Whether the token can have a hard stop after it.
324
325    Hard stops are indentations defined by the position of another token as in
326    indentation lined up with return, (, [, and ?.
327    """
328    return (token.type in self._HARD_STOP_TYPES or
329            token.string in self._HARD_STOP_STRINGS or
330            token.IsAssignment())
331
332  def _GetAllowableIndentations(self):
333    """Computes the set of allowable indentations.
334
335    Returns:
336      The set of allowable indentations, given the current stack.
337    """
338    expected = set([0])
339    hard_stops = set([])
340
341    # Whether the tokens are still in the same continuation, meaning additional
342    # indentation is optional.  As an example:
343    # x = 5 +
344    #     6 +
345    #     7;
346    # The second '+' does not add any required indentation.
347    in_same_continuation = False
348
349    for token_info in self._stack:
350      token = token_info.token
351
352      # Handle normal additive indentation tokens.
353      if not token_info.overridden_by and token.string != 'return':
354        if token_info.is_block:
355          expected = self._AddToEach(expected, 2)
356          hard_stops = self._AddToEach(hard_stops, 2)
357          in_same_continuation = False
358        elif in_same_continuation:
359          expected |= self._AddToEach(expected, 4)
360          hard_stops |= self._AddToEach(hard_stops, 4)
361        else:
362          expected = self._AddToEach(expected, 4)
363          hard_stops |= self._AddToEach(hard_stops, 4)
364          in_same_continuation = True
365
366      # Handle hard stops after (, [, return, =, and ?
367      if self._IsHardStop(token):
368        override_is_hard_stop = (token_info.overridden_by and
369                                 self._IsHardStop(
370                                     token_info.overridden_by.token))
371        if token.type == Type.START_PAREN and token.previous:
372          # For someFunction(...) we allow to indent at the beginning of the
373          # identifier +4
374          prev = token.previous
375          if (prev.type == Type.IDENTIFIER and
376              prev.line_number == token.line_number):
377            hard_stops.add(prev.start_index + 4)
378        if not override_is_hard_stop:
379          start_index = token.start_index
380          if token.line_number in self._start_index_offset:
381            start_index += self._start_index_offset[token.line_number]
382          if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and
383              not token_info.overridden_by):
384            hard_stops.add(start_index + 1)
385
386          elif token.string == 'return' and not token_info.overridden_by:
387            hard_stops.add(start_index + 7)
388
389          elif token.type == Type.START_BRACKET:
390            hard_stops.add(start_index + 1)
391
392          elif token.IsAssignment():
393            hard_stops.add(start_index + len(token.string) + 1)
394
395          elif token.IsOperator('?') and not token_info.overridden_by:
396            hard_stops.add(start_index + 2)
397
398    return (expected | hard_stops) or set([0])
399
400  def _GetActualIndentation(self, token):
401    """Gets the actual indentation of the line containing the given token.
402
403    Args:
404      token: Any token on the line.
405
406    Returns:
407      The actual indentation of the line containing the given token.  Returns
408      -1 if this line should be ignored due to the presence of tabs.
409    """
410    # Move to the first token in the line
411    token = tokenutil.GetFirstTokenInSameLine(token)
412
413    # If it is whitespace, it is the indentation.
414    if token.type == Type.WHITESPACE:
415      if token.string.find('\t') >= 0:
416        return -1
417      else:
418        return len(token.string)
419    elif token.type == Type.PARAMETERS:
420      return len(token.string) - len(token.string.lstrip())
421    else:
422      return 0
423
424  def _IsFirstNonWhitespaceTokenInLine(self, token):
425    """Determines if the given token is the first non-space token on its line.
426
427    Args:
428      token: The token.
429
430    Returns:
431      True if the token is the first non-whitespace token on its line.
432    """
433    if token.type in (Type.WHITESPACE, Type.BLANK_LINE):
434      return False
435    if token.IsFirstInLine():
436      return True
437    return (token.previous and token.previous.IsFirstInLine() and
438            token.previous.type == Type.WHITESPACE)
439
440  def _IsLastCodeInLine(self, token):
441    """Determines if the given token is the last code token on its line.
442
443    Args:
444      token: The token.
445
446    Returns:
447      True if the token is the last code token on its line.
448    """
449    if token.type in Type.NON_CODE_TYPES:
450      return False
451    start_token = token
452    while True:
453      token = token.next
454      if not token or token.line_number != start_token.line_number:
455        return True
456      if token.type not in Type.NON_CODE_TYPES:
457        return False
458
459  def _AllFunctionPropertyAssignTokens(self, start_token, end_token):
460    """Checks if tokens are (likely) a valid function property assignment.
461
462    Args:
463      start_token: Start of the token range.
464      end_token: End of the token range.
465
466    Returns:
467      True if all tokens between start_token and end_token are legal tokens
468      within a function declaration and assignment into a property.
469    """
470    for token in tokenutil.GetTokenRange(start_token, end_token):
471      fn_decl_tokens = (Type.FUNCTION_DECLARATION,
472                        Type.PARAMETERS,
473                        Type.START_PARAMETERS,
474                        Type.END_PARAMETERS,
475                        Type.END_PAREN)
476      if (token.type not in fn_decl_tokens and
477          token.IsCode() and
478          not tokenutil.IsIdentifierOrDot(token) and
479          not token.IsAssignment() and
480          not (token.type == Type.OPERATOR and token.string == ',')):
481        return False
482    return True
483
484  def _Add(self, token_info):
485    """Adds the given token info to the stack.
486
487    Args:
488      token_info: The token information to add.
489    """
490    if self._stack and self._stack[-1].token == token_info.token:
491      # Don't add the same token twice.
492      return
493
494    if token_info.is_block or token_info.token.type == Type.START_PAREN:
495      scope_token = tokenutil.GoogScopeOrNoneFromStartBlock(token_info.token)
496      token_info.overridden_by = TokenInfo(scope_token) if scope_token else None
497
498      if (token_info.token.type == Type.START_BLOCK and
499          token_info.token.metadata.context.type == Context.BLOCK):
500        # Handle function() {} assignments: their block contents get special
501        # treatment and are allowed to just indent by two whitespace.
502        # For example
503        # long.long.name = function(
504        #     a) {
505        # In this case the { and the = are on different lines.  But the
506        # override should still apply for all previous stack tokens that are
507        # part of an assignment of a block.
508
509        has_assignment = any(x for x in self._stack if x.token.IsAssignment())
510        if has_assignment:
511          last_token = token_info.token.previous
512          for stack_info in reversed(self._stack):
513            if (last_token and
514                not self._AllFunctionPropertyAssignTokens(stack_info.token,
515                                                          last_token)):
516              break
517            stack_info.overridden_by = token_info
518            stack_info.is_permanent_override = True
519            last_token = stack_info.token
520
521      index = len(self._stack) - 1
522      while index >= 0:
523        stack_info = self._stack[index]
524        stack_token = stack_info.token
525
526        if stack_info.line_number == token_info.line_number:
527          # In general, tokens only override each other when they are on
528          # the same line.
529          stack_info.overridden_by = token_info
530          if (token_info.token.type == Type.START_BLOCK and
531              (stack_token.IsAssignment() or
532               stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))):
533            # Multi-line blocks have lasting overrides, as in:
534            # callFn({
535            #   a: 10
536            # },
537            # 30);
538            # b/11450054. If a string is not closed properly then close_block
539            # could be null.
540            close_block = token_info.token.metadata.context.end_token
541            stack_info.is_permanent_override = close_block and (
542                close_block.line_number != token_info.token.line_number)
543        else:
544          break
545        index -= 1
546
547    self._stack.append(token_info)
548
549  def _Pop(self):
550    """Pops the top token from the stack.
551
552    Returns:
553      The popped token info.
554    """
555    token_info = self._stack.pop()
556    if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET):
557      # Remove any temporary overrides.
558      self._RemoveOverrides(token_info)
559    else:
560      # For braces and brackets, which can be object and array literals, remove
561      # overrides when the literal is closed on the same line.
562      token_check = token_info.token
563      same_type = token_check.type
564      goal_type = None
565      if token_info.token.type == Type.START_BRACKET:
566        goal_type = Type.END_BRACKET
567      else:
568        goal_type = Type.END_BLOCK
569      line_number = token_info.token.line_number
570      count = 0
571      while token_check and token_check.line_number == line_number:
572        if token_check.type == goal_type:
573          count -= 1
574          if not count:
575            self._RemoveOverrides(token_info)
576            break
577        if token_check.type == same_type:
578          count += 1
579        token_check = token_check.next
580    return token_info
581
582  def _PopToImpliedBlock(self):
583    """Pops the stack until an implied block token is found."""
584    while not self._Pop().token.metadata.is_implied_block:
585      pass
586
587  def _PopTo(self, stop_type):
588    """Pops the stack until a token of the given type is popped.
589
590    Args:
591      stop_type: The type of token to pop to.
592
593    Returns:
594      The token info of the given type that was popped.
595    """
596    last = None
597    while True:
598      last = self._Pop()
599      if last.token.type == stop_type:
600        break
601    return last
602
603  def _RemoveOverrides(self, token_info):
604    """Marks any token that was overridden by this token as active again.
605
606    Args:
607      token_info: The token that is being removed from the stack.
608    """
609    for stack_token in self._stack:
610      if (stack_token.overridden_by == token_info and
611          not stack_token.is_permanent_override):
612        stack_token.overridden_by = None
613
614  def _PopTransient(self):
615    """Pops all transient tokens - i.e. not blocks, literals, or parens."""
616    while self._stack and self._stack[-1].is_transient:
617      self._Pop()
618