1#!/usr/bin/env python
2#
3# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS-IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Metadata pass for annotating tokens in EcmaScript files."""
18
19__author__ = ('robbyw@google.com (Robert Walker)')
20
21from closure_linter import javascripttokens
22from closure_linter import tokenutil
23
24
25TokenType = javascripttokens.JavaScriptTokenType
26
27
28class ParseError(Exception):
29  """Exception indicating a parse error at the given token.
30
31  Attributes:
32    token: The token where the parse error occurred.
33  """
34
35  def __init__(self, token, message=None):
36    """Initialize a parse error at the given token with an optional message.
37
38    Args:
39      token: The token where the parse error occurred.
40      message: A message describing the parse error.
41    """
42    Exception.__init__(self, message)
43    self.token = token
44
45
46class EcmaContext(object):
47  """Context object for EcmaScript languages.
48
49  Attributes:
50    type: The context type.
51    start_token: The token where this context starts.
52    end_token: The token where this context ends.
53    parent: The parent context.
54  """
55
56  # The root context.
57  ROOT = 'root'
58
59  # A block of code.
60  BLOCK = 'block'
61
62  # A pseudo-block of code for a given case or default section.
63  CASE_BLOCK = 'case_block'
64
65  # Block of statements in a for loop's parentheses.
66  FOR_GROUP_BLOCK = 'for_block'
67
68  # An implied block of code for 1 line if, while, and for statements
69  IMPLIED_BLOCK = 'implied_block'
70
71  # An index in to an array or object.
72  INDEX = 'index'
73
74  # An array literal in [].
75  ARRAY_LITERAL = 'array_literal'
76
77  # An object literal in {}.
78  OBJECT_LITERAL = 'object_literal'
79
80  # An individual element in an array or object literal.
81  LITERAL_ELEMENT = 'literal_element'
82
83  # The portion of a ternary statement between ? and :
84  TERNARY_TRUE = 'ternary_true'
85
86  # The portion of a ternary statment after :
87  TERNARY_FALSE = 'ternary_false'
88
89  # The entire switch statment.  This will contain a GROUP with the variable
90  # and a BLOCK with the code.
91
92  # Since that BLOCK is not a normal block, it can not contain statements except
93  # for case and default.
94  SWITCH = 'switch'
95
96  # A normal comment.
97  COMMENT = 'comment'
98
99  # A JsDoc comment.
100  DOC = 'doc'
101
102  # An individual statement.
103  STATEMENT = 'statement'
104
105  # Code within parentheses.
106  GROUP = 'group'
107
108  # Parameter names in a function declaration.
109  PARAMETERS = 'parameters'
110
111  # A set of variable declarations appearing after the 'var' keyword.
112  VAR = 'var'
113
114  # Context types that are blocks.
115  BLOCK_TYPES = frozenset([
116      ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK])
117
118  def __init__(self, type, start_token, parent):
119    """Initializes the context object.
120
121    Args:
122      type: The context type.
123      start_token: The token where this context starts.
124      parent: The parent context.
125    """
126    self.type = type
127    self.start_token = start_token
128    self.end_token = None
129    self.parent = parent
130
131  def __repr__(self):
132    """Returns a string representation of the context object."""
133    stack = []
134    context = self
135    while context:
136      stack.append(context.type)
137      context = context.parent
138    return 'Context(%s)' % ' > '.join(stack)
139
140
141class EcmaMetaData(object):
142  """Token metadata for EcmaScript languages.
143
144  Attributes:
145    last_code: The last code token to appear before this one.
146    context: The context this token appears in.
147    operator_type: The operator type, will be one of the *_OPERATOR constants
148        defined below.
149  """
150
151  UNARY_OPERATOR = 'unary'
152
153  UNARY_POST_OPERATOR = 'unary_post'
154
155  BINARY_OPERATOR = 'binary'
156
157  TERNARY_OPERATOR = 'ternary'
158
159  def __init__(self):
160    """Initializes a token metadata object."""
161    self.last_code = None
162    self.context = None
163    self.operator_type = None
164    self.is_implied_semicolon = False
165    self.is_implied_block = False
166    self.is_implied_block_close = False
167
168  def __repr__(self):
169    """Returns a string representation of the context object."""
170    parts = ['%r' % self.context]
171    if self.operator_type:
172      parts.append('optype: %r' % self.operator_type)
173    if self.is_implied_semicolon:
174      parts.append('implied;')
175    return 'MetaData(%s)' % ', '.join(parts)
176
177  def IsUnaryOperator(self):
178    return self.operator_type in (EcmaMetaData.UNARY_OPERATOR,
179                                  EcmaMetaData.UNARY_POST_OPERATOR)
180
181  def IsUnaryPostOperator(self):
182    return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR
183
184
185class EcmaMetaDataPass(object):
186  """A pass that iterates over all tokens and builds metadata about them."""
187
188  def __init__(self):
189    """Initialize the meta data pass object."""
190    self.Reset()
191
192  def Reset(self):
193    """Resets the metadata pass to prepare for the next file."""
194    self._token = None
195    self._context = None
196    self._AddContext(EcmaContext.ROOT)
197    self._last_code = None
198
199  def _CreateContext(self, type):
200    """Overridable by subclasses to create the appropriate context type."""
201    return EcmaContext(type, self._token, self._context)
202
203  def _CreateMetaData(self):
204    """Overridable by subclasses to create the appropriate metadata type."""
205    return EcmaMetaData()
206
207  def _AddContext(self, type):
208    """Adds a context of the given type to the context stack.
209
210    Args:
211      type: The type of context to create
212    """
213    self._context  = self._CreateContext(type)
214
215  def _PopContext(self):
216    """Moves up one level in the context stack.
217
218    Returns:
219      The former context.
220
221    Raises:
222      ParseError: If the root context is popped.
223    """
224    top_context = self._context
225    top_context.end_token = self._token
226    self._context = top_context.parent
227    if self._context:
228      return top_context
229    else:
230      raise ParseError(self._token)
231
232  def _PopContextType(self, *stop_types):
233    """Pops the context stack until a context of the given type is popped.
234
235    Args:
236      stop_types: The types of context to pop to - stops at the first match.
237
238    Returns:
239      The context object of the given type that was popped.
240    """
241    last = None
242    while not last or last.type not in stop_types:
243      last = self._PopContext()
244    return last
245
246  def _EndStatement(self):
247    """Process the end of a statement."""
248    self._PopContextType(EcmaContext.STATEMENT)
249    if self._context.type == EcmaContext.IMPLIED_BLOCK:
250      self._token.metadata.is_implied_block_close = True
251      self._PopContext()
252
253  def _ProcessContext(self):
254    """Process the context at the current token.
255
256    Returns:
257      The context that should be assigned to the current token, or None if
258      the current context after this method should be used.
259
260    Raises:
261      ParseError: When the token appears in an invalid context.
262    """
263    token = self._token
264    token_type = token.type
265
266    if self._context.type in EcmaContext.BLOCK_TYPES:
267      # Whenever we're in a block, we add a statement context.  We make an
268      # exception for switch statements since they can only contain case: and
269      # default: and therefore don't directly contain statements.
270      # The block we add here may be immediately removed in some cases, but
271      # that causes no harm.
272      parent = self._context.parent
273      if not parent or parent.type != EcmaContext.SWITCH:
274        self._AddContext(EcmaContext.STATEMENT)
275
276    elif self._context.type == EcmaContext.ARRAY_LITERAL:
277      self._AddContext(EcmaContext.LITERAL_ELEMENT)
278
279    if token_type == TokenType.START_PAREN:
280      if self._last_code and self._last_code.IsKeyword('for'):
281        # for loops contain multiple statements in the group unlike while,
282        # switch, if, etc.
283        self._AddContext(EcmaContext.FOR_GROUP_BLOCK)
284      else:
285        self._AddContext(EcmaContext.GROUP)
286
287    elif token_type == TokenType.END_PAREN:
288      result = self._PopContextType(EcmaContext.GROUP,
289                                    EcmaContext.FOR_GROUP_BLOCK)
290      keyword_token = result.start_token.metadata.last_code
291      # keyword_token will not exist if the open paren is the first line of the
292      # file, for example if all code is wrapped in an immediately executed
293      # annonymous function.
294      if keyword_token and keyword_token.string in ('if', 'for', 'while'):
295        next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
296        if next_code.type != TokenType.START_BLOCK:
297          # Check for do-while.
298          is_do_while = False
299          pre_keyword_token = keyword_token.metadata.last_code
300          if (pre_keyword_token and
301              pre_keyword_token.type == TokenType.END_BLOCK):
302            start_block_token = pre_keyword_token.metadata.context.start_token
303            is_do_while = start_block_token.metadata.last_code.string == 'do'
304
305          # If it's not do-while, it's an implied block.
306          if not is_do_while:
307            self._AddContext(EcmaContext.IMPLIED_BLOCK)
308            token.metadata.is_implied_block = True
309
310      return result
311
312    # else (not else if) with no open brace after it should be considered the
313    # start of an implied block, similar to the case with if, for, and while
314    # above.
315    elif (token_type == TokenType.KEYWORD and
316          token.string == 'else'):
317      next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
318      if (next_code.type != TokenType.START_BLOCK and
319          (next_code.type != TokenType.KEYWORD or next_code.string != 'if')):
320        self._AddContext(EcmaContext.IMPLIED_BLOCK)
321        token.metadata.is_implied_block = True
322
323    elif token_type == TokenType.START_PARAMETERS:
324      self._AddContext(EcmaContext.PARAMETERS)
325
326    elif token_type == TokenType.END_PARAMETERS:
327      return self._PopContextType(EcmaContext.PARAMETERS)
328
329    elif token_type == TokenType.START_BRACKET:
330      if (self._last_code and
331          self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
332        self._AddContext(EcmaContext.INDEX)
333      else:
334        self._AddContext(EcmaContext.ARRAY_LITERAL)
335
336    elif token_type == TokenType.END_BRACKET:
337      return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL)
338
339    elif token_type == TokenType.START_BLOCK:
340      if (self._last_code.type in (TokenType.END_PAREN,
341                                   TokenType.END_PARAMETERS) or
342          self._last_code.IsKeyword('else') or
343          self._last_code.IsKeyword('do') or
344          self._last_code.IsKeyword('try') or
345          self._last_code.IsKeyword('finally') or
346          (self._last_code.IsOperator(':') and
347           self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)):
348        # else, do, try, and finally all might have no () before {.
349        # Also, handle the bizzare syntax case 10: {...}.
350        self._AddContext(EcmaContext.BLOCK)
351      else:
352        self._AddContext(EcmaContext.OBJECT_LITERAL)
353
354    elif token_type == TokenType.END_BLOCK:
355      context = self._PopContextType(EcmaContext.BLOCK,
356                                     EcmaContext.OBJECT_LITERAL)
357      if self._context.type == EcmaContext.SWITCH:
358        # The end of the block also means the end of the switch statement it
359        # applies to.
360        return self._PopContext()
361      return context
362
363    elif token.IsKeyword('switch'):
364      self._AddContext(EcmaContext.SWITCH)
365
366    elif (token_type == TokenType.KEYWORD and
367          token.string in ('case', 'default')):
368      # Pop up to but not including the switch block.
369      while self._context.parent.type != EcmaContext.SWITCH:
370        self._PopContext()
371
372    elif token.IsOperator('?'):
373      self._AddContext(EcmaContext.TERNARY_TRUE)
374
375    elif token.IsOperator(':'):
376      if self._context.type == EcmaContext.OBJECT_LITERAL:
377        self._AddContext(EcmaContext.LITERAL_ELEMENT)
378
379      elif self._context.type == EcmaContext.TERNARY_TRUE:
380        self._PopContext()
381        self._AddContext(EcmaContext.TERNARY_FALSE)
382
383      # Handle nested ternary statements like:
384      # foo = bar ? baz ? 1 : 2 : 3
385      # When we encounter the second ":" the context is
386      # ternary_false > ternary_true > statement > root
387      elif (self._context.type == EcmaContext.TERNARY_FALSE and
388            self._context.parent.type == EcmaContext.TERNARY_TRUE):
389           self._PopContext() # Leave current ternary false context.
390           self._PopContext() # Leave current parent ternary true
391           self._AddContext(EcmaContext.TERNARY_FALSE)
392
393      elif self._context.parent.type == EcmaContext.SWITCH:
394        self._AddContext(EcmaContext.CASE_BLOCK)
395
396    elif token.IsKeyword('var'):
397      self._AddContext(EcmaContext.VAR)
398
399    elif token.IsOperator(','):
400      while self._context.type not in (EcmaContext.VAR,
401                                       EcmaContext.ARRAY_LITERAL,
402                                       EcmaContext.OBJECT_LITERAL,
403                                       EcmaContext.STATEMENT,
404                                       EcmaContext.PARAMETERS,
405                                       EcmaContext.GROUP):
406        self._PopContext()
407
408    elif token_type == TokenType.SEMICOLON:
409      self._EndStatement()
410
411  def Process(self, first_token):
412    """Processes the token stream starting with the given token."""
413    self._token = first_token
414    while self._token:
415      self._ProcessToken()
416
417      if self._token.IsCode():
418        self._last_code = self._token
419
420      self._token = self._token.next
421
422    try:
423      self._PopContextType(self, EcmaContext.ROOT)
424    except ParseError:
425      # Ignore the "popped to root" error.
426      pass
427
428  def _ProcessToken(self):
429    """Process the given token."""
430    token = self._token
431    token.metadata = self._CreateMetaData()
432    context = (self._ProcessContext() or self._context)
433    token.metadata.context = context
434    token.metadata.last_code = self._last_code
435
436    # Determine the operator type of the token, if applicable.
437    if token.type == TokenType.OPERATOR:
438      token.metadata.operator_type = self._GetOperatorType(token)
439
440    # Determine if there is an implied semicolon after the token.
441    if token.type != TokenType.SEMICOLON:
442      next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
443      # A statement like if (x) does not need a semicolon after it
444      is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK
445      is_last_code_in_line = token.IsCode() and (
446          not next_code or next_code.line_number != token.line_number)
447      is_continued_identifier = (token.type == TokenType.IDENTIFIER and
448                                 token.string.endswith('.'))
449      is_continued_operator = (token.type == TokenType.OPERATOR and
450                               not token.metadata.IsUnaryPostOperator())
451      is_continued_dot = token.string == '.'
452      next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR
453      next_code_is_dot = next_code and next_code.string == '.'
454      is_end_of_block = (token.type == TokenType.END_BLOCK and
455          token.metadata.context.type != EcmaContext.OBJECT_LITERAL)
456      is_multiline_string = token.type == TokenType.STRING_TEXT
457      next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK
458      if (is_last_code_in_line and
459          self._StatementCouldEndInContext() and
460          not is_multiline_string and
461          not is_end_of_block and
462          not is_continued_identifier and
463          not is_continued_operator and
464          not is_continued_dot and
465          not next_code_is_dot and
466          not next_code_is_operator and
467          not is_implied_block and
468          not next_code_is_block):
469        token.metadata.is_implied_semicolon = True
470        self._EndStatement()
471
472  def _StatementCouldEndInContext(self):
473    """Returns whether the current statement (if any) may end in this context."""
474    # In the basic statement or variable declaration context, statement can
475    # always end in this context.
476    if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR):
477      return True
478
479    # End of a ternary false branch inside a statement can also be the
480    # end of the statement, for example:
481    # var x = foo ? foo.bar() : null
482    # In this case the statement ends after the null, when the context stack
483    # looks like ternary_false > var > statement > root.
484    if (self._context.type == EcmaContext.TERNARY_FALSE and
485        self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)):
486      return True
487
488    # In all other contexts like object and array literals, ternary true, etc.
489    # the statement can't yet end.
490    return False
491
492  def _GetOperatorType(self, token):
493    """Returns the operator type of the given operator token.
494
495    Args:
496      token: The token to get arity for.
497
498    Returns:
499      The type of the operator.  One of the *_OPERATOR constants defined in
500      EcmaMetaData.
501    """
502    if token.string == '?':
503      return EcmaMetaData.TERNARY_OPERATOR
504
505    if token.string in TokenType.UNARY_OPERATORS:
506      return EcmaMetaData.UNARY_OPERATOR
507
508    last_code = token.metadata.last_code
509    if not last_code or last_code.type == TokenType.END_BLOCK:
510      return EcmaMetaData.UNARY_OPERATOR
511
512    if (token.string in TokenType.UNARY_POST_OPERATORS and
513        last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
514      return EcmaMetaData.UNARY_POST_OPERATOR
515
516    if (token.string in TokenType.UNARY_OK_OPERATORS and
517        last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and
518        last_code.string not in TokenType.UNARY_POST_OPERATORS):
519      return EcmaMetaData.UNARY_OPERATOR
520
521    return EcmaMetaData.BINARY_OPERATOR
522