1#!/usr/bin/env python
2#
3# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS-IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Methods for checking EcmaScript files for indentation issues."""
18
19__author__ = ('robbyw@google.com (Robert Walker)')
20
21from closure_linter import ecmametadatapass
22from closure_linter import errors
23from closure_linter import javascripttokens
24from closure_linter import tokenutil
25from closure_linter.common import error
26from closure_linter.common import position
27
28import gflags as flags
29
30flags.DEFINE_boolean('debug_indentation', False,
31                     'Whether to print debugging information for indentation.')
32
33
34# Shorthand
35Context = ecmametadatapass.EcmaContext
36Error = error.Error
37Position = position.Position
38Type = javascripttokens.JavaScriptTokenType
39
40
41# The general approach:
42#
43# 1. Build a stack of tokens that can affect indentation.
44#    For each token, we determine if it is a block or continuation token.
45#    Some tokens need to be temporarily overwritten in case they are removed
46#    before the end of the line.
47#    Much of the work here is determining which tokens to keep on the stack
48#    at each point.  Operators, for example, should be removed once their
49#    expression or line is gone, while parentheses must stay until the matching
50#    end parentheses is found.
51#
52# 2. Given that stack, determine the allowable indentations.
53#    Due to flexible indentation rules in JavaScript, there may be many
54#    allowable indentations for each stack.  We follows the general
55#    "no false positives" approach of GJsLint and build the most permissive
56#    set possible.
57
58
59class TokenInfo(object):
60  """Stores information about a token.
61
62  Attributes:
63    token: The token
64    is_block: Whether the token represents a block indentation.
65    is_transient: Whether the token should be automatically removed without
66      finding a matching end token.
67    overridden_by: TokenInfo for a token that overrides the indentation that
68      this token would require.
69    is_permanent_override: Whether the override on this token should persist
70      even after the overriding token is removed from the stack.  For example:
71      x([
72        1],
73      2);
74      needs this to be set so the last line is not required to be a continuation
75      indent.
76    line_number: The effective line number of this token.  Will either be the
77      actual line number or the one before it in the case of a mis-wrapped
78      operator.
79  """
80
81  def __init__(self, token, is_block=False):
82    """Initializes a TokenInfo object.
83
84    Args:
85      token: The token
86      is_block: Whether the token represents a block indentation.
87    """
88    self.token = token
89    self.overridden_by = None
90    self.is_permanent_override = False
91    self.is_block = is_block
92    self.is_transient = not is_block and not token.type in (
93        Type.START_PAREN, Type.START_PARAMETERS)
94    self.line_number = token.line_number
95
96  def __repr__(self):
97    result = '\n  %s' % self.token
98    if self.overridden_by:
99      result = '%s OVERRIDDEN [by "%s"]' % (
100          result, self.overridden_by.token.string)
101    result += ' {is_block: %s, is_transient: %s}' % (
102        self.is_block, self.is_transient)
103    return result
104
105
106class IndentationRules(object):
107  """EmcaScript indentation rules.
108
109  Can be used to find common indentation errors in JavaScript, ActionScript and
110  other Ecma like scripting languages.
111  """
112
113  def __init__(self):
114    """Initializes the IndentationRules checker."""
115    self._stack = []
116
117    # Map from line number to number of characters it is off in indentation.
118    self._start_index_offset = {}
119
120  def Finalize(self):
121    if self._stack:
122      old_stack = self._stack
123      self._stack = []
124      raise Exception("INTERNAL ERROR: indentation stack is not empty: %r" %
125                      old_stack)
126
127  def CheckToken(self, token, state):
128    """Checks a token for indentation errors.
129
130    Args:
131      token: The current token under consideration
132      state: Additional information about the current tree state
133
134    Returns:
135      An error array [error code, error string, error token] if the token is
136      improperly indented, or None if indentation is correct.
137    """
138
139    token_type = token.type
140    indentation_errors = []
141    stack = self._stack
142    is_first = self._IsFirstNonWhitespaceTokenInLine(token)
143
144    # Add tokens that could decrease indentation before checking.
145    if token_type == Type.END_PAREN:
146      self._PopTo(Type.START_PAREN)
147
148    elif token_type == Type.END_PARAMETERS:
149      self._PopTo(Type.START_PARAMETERS)
150
151    elif token_type == Type.END_BRACKET:
152      self._PopTo(Type.START_BRACKET)
153
154    elif token_type == Type.END_BLOCK:
155      start_token = self._PopTo(Type.START_BLOCK)
156      # Check for required goog.scope comment.
157      if start_token:
158        goog_scope = self._GoogScopeOrNone(start_token.token)
159        if goog_scope is not None:
160          if not token.line.endswith(';  // goog.scope\n'):
161            if (token.line.find('//') > -1 and
162                token.line.find('goog.scope') >
163                token.line.find('//')):
164              indentation_errors.append([
165                  errors.MALFORMED_END_OF_SCOPE_COMMENT,
166                  ('Malformed end of goog.scope comment. Please use the '
167                   'exact following syntax to close the scope:\n'
168                   '});  // goog.scope'),
169                  token,
170                  Position(token.start_index, token.length)])
171            else:
172              indentation_errors.append([
173                  errors.MISSING_END_OF_SCOPE_COMMENT,
174                  ('Missing comment for end of goog.scope which opened at line '
175                   '%d. End the scope with:\n'
176                   '});  // goog.scope' %
177                   (start_token.line_number)),
178                  token,
179                  Position(token.start_index, token.length)])
180
181    elif token_type == Type.KEYWORD and token.string in ('case', 'default'):
182      self._Add(self._PopTo(Type.START_BLOCK))
183
184    elif is_first and token.string == '.':
185      # This token should have been on the previous line, so treat it as if it
186      # was there.
187      info = TokenInfo(token)
188      info.line_number = token.line_number - 1
189      self._Add(info)
190
191    elif token_type == Type.SEMICOLON:
192      self._PopTransient()
193
194    not_binary_operator = (token_type != Type.OPERATOR or
195                           token.metadata.IsUnaryOperator())
196    not_dot = token.string != '.'
197    if is_first and not_binary_operator and not_dot and token.type not in (
198        Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT):
199      if flags.FLAGS.debug_indentation:
200        print 'Line #%d: stack %r' % (token.line_number, stack)
201
202      # Ignore lines that start in JsDoc since we don't check them properly yet.
203      # TODO(robbyw): Support checking JsDoc indentation.
204      # Ignore lines that start as multi-line strings since indentation is N/A.
205      # Ignore lines that start with operators since we report that already.
206      # Ignore lines with tabs since we report that already.
207      expected = self._GetAllowableIndentations()
208      actual = self._GetActualIndentation(token)
209
210      # Special case comments describing else, case, and default.  Allow them
211      # to outdent to the parent block.
212      if token_type in Type.COMMENT_TYPES:
213        next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES)
214        if next_code and next_code.type == Type.END_BLOCK:
215          next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES)
216        if next_code and next_code.string in ('else', 'case', 'default'):
217          # TODO(robbyw): This almost certainly introduces false negatives.
218          expected |= self._AddToEach(expected, -2)
219
220      if actual >= 0 and actual not in expected:
221        expected = sorted(expected)
222        indentation_errors.append([
223            errors.WRONG_INDENTATION,
224            'Wrong indentation: expected any of {%s} but got %d' % (
225                ', '.join(
226                    ['%d' % x for x in expected]), actual),
227            token,
228            Position(actual, expected[0])])
229        self._start_index_offset[token.line_number] = expected[0] - actual
230
231    # Add tokens that could increase indentation.
232    if token_type == Type.START_BRACKET:
233      self._Add(TokenInfo(token=token,
234          is_block=token.metadata.context.type == Context.ARRAY_LITERAL))
235
236    elif token_type == Type.START_BLOCK or token.metadata.is_implied_block:
237      self._Add(TokenInfo(token=token, is_block=True))
238
239    elif token_type in (Type.START_PAREN, Type.START_PARAMETERS):
240      self._Add(TokenInfo(token=token, is_block=False))
241
242    elif token_type == Type.KEYWORD and token.string == 'return':
243      self._Add(TokenInfo(token))
244
245    elif not token.IsLastInLine() and (
246        token.IsAssignment() or token.IsOperator('?')):
247      self._Add(TokenInfo(token=token))
248
249    # Handle implied block closes.
250    if token.metadata.is_implied_block_close:
251      self._PopToImpliedBlock()
252
253    # Add some tokens only if they appear at the end of the line.
254    is_last = self._IsLastCodeInLine(token)
255    if is_last:
256      if token_type == Type.OPERATOR:
257        if token.string == ':':
258          if (stack and stack[-1].token.string == '?'):
259            # When a ternary : is on a different line than its '?', it doesn't
260            # add indentation.
261            if (token.line_number == stack[-1].token.line_number):
262              self._Add(TokenInfo(token))
263          elif token.metadata.context.type == Context.CASE_BLOCK:
264            # Pop transient tokens from say, line continuations, e.g.,
265            # case x.
266            #     y:
267            # Want to pop the transient 4 space continuation indent.
268            self._PopTransient()
269            # Starting the body of the case statement, which is a type of
270            # block.
271            self._Add(TokenInfo(token=token, is_block=True))
272          elif token.metadata.context.type == Context.LITERAL_ELEMENT:
273            # When in an object literal, acts as operator indicating line
274            # continuations.
275            self._Add(TokenInfo(token))
276            pass
277          else:
278            # ':' might also be a statement label, no effect on indentation in
279            # this case.
280            pass
281
282        elif token.string != ',':
283          self._Add(TokenInfo(token))
284        else:
285          # The token is a comma.
286          if token.metadata.context.type == Context.VAR:
287            self._Add(TokenInfo(token))
288          elif token.metadata.context.type != Context.PARAMETERS:
289            self._PopTransient()
290
291      elif (token.string.endswith('.')
292            and token_type in (Type.IDENTIFIER, Type.NORMAL)):
293        self._Add(TokenInfo(token))
294      elif token_type == Type.PARAMETERS and token.string.endswith(','):
295        # Parameter lists.
296        self._Add(TokenInfo(token))
297      elif token.metadata.is_implied_semicolon:
298        self._PopTransient()
299    elif token.IsAssignment():
300      self._Add(TokenInfo(token))
301
302    return indentation_errors
303
304  def _AddToEach(self, original, amount):
305    """Returns a new set with the given amount added to each element.
306
307    Args:
308      original: The original set of numbers
309      amount: The amount to add to each element
310
311    Returns:
312      A new set containing each element of the original set added to the amount.
313    """
314    return set([x + amount for x in original])
315
316  _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS,
317                      Type.START_BRACKET)
318
319  _HARD_STOP_STRINGS = ('return', '?')
320
321  def _IsHardStop(self, token):
322    """Determines if the given token can have a hard stop after it.
323
324    Hard stops are indentations defined by the position of another token as in
325    indentation lined up with return, (, [, and ?.
326    """
327    return (token.type in self._HARD_STOP_TYPES or
328            token.string in self._HARD_STOP_STRINGS or
329            token.IsAssignment())
330
331  def _GetAllowableIndentations(self):
332    """Computes the set of allowable indentations.
333
334    Returns:
335      The set of allowable indentations, given the current stack.
336    """
337    expected = set([0])
338    hard_stops = set([])
339
340    # Whether the tokens are still in the same continuation, meaning additional
341    # indentation is optional.  As an example:
342    # x = 5 +
343    #     6 +
344    #     7;
345    # The second '+' does not add any required indentation.
346    in_same_continuation = False
347
348    for token_info in self._stack:
349      token = token_info.token
350
351      # Handle normal additive indentation tokens.
352      if not token_info.overridden_by and token.string != 'return':
353        if token_info.is_block:
354          expected = self._AddToEach(expected, 2)
355          hard_stops = self._AddToEach(hard_stops, 2)
356          in_same_continuation = False
357        elif in_same_continuation:
358          expected |= self._AddToEach(expected, 4)
359          hard_stops |= self._AddToEach(hard_stops, 4)
360        else:
361          expected = self._AddToEach(expected, 4)
362          hard_stops |= self._AddToEach(hard_stops, 4)
363          in_same_continuation = True
364
365      # Handle hard stops after (, [, return, =, and ?
366      if self._IsHardStop(token):
367        override_is_hard_stop = (token_info.overridden_by and
368            self._IsHardStop(token_info.overridden_by.token))
369        if not override_is_hard_stop:
370          start_index = token.start_index
371          if token.line_number in self._start_index_offset:
372            start_index += self._start_index_offset[token.line_number]
373          if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and
374              not token_info.overridden_by):
375            hard_stops.add(start_index + 1)
376
377          elif token.string == 'return' and not token_info.overridden_by:
378            hard_stops.add(start_index + 7)
379
380          elif (token.type == Type.START_BRACKET):
381            hard_stops.add(start_index + 1)
382
383          elif token.IsAssignment():
384            hard_stops.add(start_index + len(token.string) + 1)
385
386          elif token.IsOperator('?') and not token_info.overridden_by:
387            hard_stops.add(start_index + 2)
388
389    return (expected | hard_stops) or set([0])
390
391  def _GetActualIndentation(self, token):
392    """Gets the actual indentation of the line containing the given token.
393
394    Args:
395      token: Any token on the line.
396
397    Returns:
398      The actual indentation of the line containing the given token.  Returns
399      -1 if this line should be ignored due to the presence of tabs.
400    """
401    # Move to the first token in the line
402    token = tokenutil.GetFirstTokenInSameLine(token)
403
404    # If it is whitespace, it is the indentation.
405    if token.type == Type.WHITESPACE:
406      if token.string.find('\t') >= 0:
407        return -1
408      else:
409        return len(token.string)
410    elif token.type == Type.PARAMETERS:
411      return len(token.string) - len(token.string.lstrip())
412    else:
413      return 0
414
415  def _IsFirstNonWhitespaceTokenInLine(self, token):
416    """Determines if the given token is the first non-space token on its line.
417
418    Args:
419      token: The token.
420
421    Returns:
422      True if the token is the first non-whitespace token on its line.
423    """
424    if token.type in (Type.WHITESPACE, Type.BLANK_LINE):
425      return False
426    if token.IsFirstInLine():
427      return True
428    return (token.previous and token.previous.IsFirstInLine() and
429            token.previous.type == Type.WHITESPACE)
430
431  def _IsLastCodeInLine(self, token):
432    """Determines if the given token is the last code token on its line.
433
434    Args:
435      token: The token.
436
437    Returns:
438      True if the token is the last code token on its line.
439    """
440    if token.type in Type.NON_CODE_TYPES:
441      return False
442    start_token = token
443    while True:
444      token = token.next
445      if not token or token.line_number != start_token.line_number:
446        return True
447      if token.type not in Type.NON_CODE_TYPES:
448        return False
449
450  def _GoogScopeOrNone(self, token):
451    """Determines if the given START_BLOCK is part of a goog.scope statement.
452
453    Args:
454      token: A token of type START_BLOCK.
455
456    Returns:
457      The goog.scope function call token, or None if such call doesn't exist.
458    """
459    # Search for a goog.scope statement, which will be 5 tokens before the
460    # block. Illustration of the tokens found prior to the start block:
461    # goog.scope(function() {
462    #      5    4    3   21 ^
463
464    maybe_goog_scope = token
465    for unused_i in xrange(5):
466      maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and
467                          maybe_goog_scope.previous else None)
468    if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope':
469      return maybe_goog_scope
470
471  def _Add(self, token_info):
472    """Adds the given token info to the stack.
473
474    Args:
475      token_info: The token information to add.
476    """
477    if self._stack and self._stack[-1].token == token_info.token:
478      # Don't add the same token twice.
479      return
480
481    if token_info.is_block or token_info.token.type == Type.START_PAREN:
482      token_info.overridden_by = self._GoogScopeOrNone(token_info.token)
483      index = 1
484      while index <= len(self._stack):
485        stack_info = self._stack[-index]
486        stack_token = stack_info.token
487
488        if stack_info.line_number == token_info.line_number:
489          # In general, tokens only override each other when they are on
490          # the same line.
491          stack_info.overridden_by = token_info
492          if (token_info.token.type == Type.START_BLOCK and
493              (stack_token.IsAssignment() or
494               stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))):
495            # Multi-line blocks have lasting overrides, as in:
496            # callFn({
497            #   a: 10
498            # },
499            # 30);
500            close_block = token_info.token.metadata.context.end_token
501            stack_info.is_permanent_override = \
502                close_block.line_number != token_info.token.line_number
503        elif (token_info.token.type == Type.START_BLOCK and
504              token_info.token.metadata.context.type == Context.BLOCK and
505              (stack_token.IsAssignment() or
506               stack_token.type == Type.IDENTIFIER)):
507          # When starting a function block, the override can transcend lines.
508          # For example
509          # long.long.name = function(
510          #     a) {
511          # In this case the { and the = are on different lines.  But the
512          # override should still apply.
513          stack_info.overridden_by = token_info
514          stack_info.is_permanent_override = True
515        else:
516          break
517        index += 1
518
519    self._stack.append(token_info)
520
521  def _Pop(self):
522    """Pops the top token from the stack.
523
524    Returns:
525      The popped token info.
526    """
527    token_info = self._stack.pop()
528    if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET):
529      # Remove any temporary overrides.
530      self._RemoveOverrides(token_info)
531    else:
532      # For braces and brackets, which can be object and array literals, remove
533      # overrides when the literal is closed on the same line.
534      token_check = token_info.token
535      same_type = token_check.type
536      goal_type = None
537      if token_info.token.type == Type.START_BRACKET:
538        goal_type = Type.END_BRACKET
539      else:
540        goal_type = Type.END_BLOCK
541      line_number = token_info.token.line_number
542      count = 0
543      while token_check and token_check.line_number == line_number:
544        if token_check.type == goal_type:
545          count -= 1
546          if not count:
547            self._RemoveOverrides(token_info)
548            break
549        if token_check.type == same_type:
550          count += 1
551        token_check = token_check.next
552    return token_info
553
554  def _PopToImpliedBlock(self):
555    """Pops the stack until an implied block token is found."""
556    while not self._Pop().token.metadata.is_implied_block:
557      pass
558
559  def _PopTo(self, stop_type):
560    """Pops the stack until a token of the given type is popped.
561
562    Args:
563      stop_type: The type of token to pop to.
564
565    Returns:
566      The token info of the given type that was popped.
567    """
568    last = None
569    while True:
570      last = self._Pop()
571      if last.token.type == stop_type:
572        break
573    return last
574
575  def _RemoveOverrides(self, token_info):
576    """Marks any token that was overridden by this token as active again.
577
578    Args:
579      token_info: The token that is being removed from the stack.
580    """
581    for stack_token in self._stack:
582      if (stack_token.overridden_by == token_info and
583          not stack_token.is_permanent_override):
584        stack_token.overridden_by = None
585
586  def _PopTransient(self):
587    """Pops all transient tokens - i.e. not blocks, literals, or parens."""
588    while self._stack and self._stack[-1].is_transient:
589      self._Pop()
590