1#!/usr/bin/env python
2#
3# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS-IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Logic for computing dependency information for closurized JavaScript files.
18
19Closurized JavaScript files express dependencies using goog.require and
20goog.provide statements. In order for the linter to detect when a statement is
21missing or unnecessary, all identifiers in the JavaScript file must first be
22processed to determine if they constitute the creation or usage of a dependency.
23"""
24
25
26
27from closure_linter import javascripttokens
28from closure_linter import tokenutil
29
30# pylint: disable-msg=C6409
31TokenType = javascripttokens.JavaScriptTokenType
32
33DEFAULT_EXTRA_NAMESPACES = [
34  'goog.testing.asserts',
35  'goog.testing.jsunit',
36]
37
38class ClosurizedNamespacesInfo(object):
39  """Dependency information for closurized JavaScript files.
40
41  Processes token streams for dependency creation or usage and provides logic
42  for determining if a given require or provide statement is unnecessary or if
43  there are missing require or provide statements.
44  """
45
46  def __init__(self, closurized_namespaces, ignored_extra_namespaces):
47    """Initializes an instance the ClosurizedNamespacesInfo class.
48
49    Args:
50      closurized_namespaces: A list of namespace prefixes that should be
51          processed for dependency information. Non-matching namespaces are
52          ignored.
53      ignored_extra_namespaces: A list of namespaces that should not be reported
54          as extra regardless of whether they are actually used.
55    """
56    self._closurized_namespaces = closurized_namespaces
57    self._ignored_extra_namespaces = (ignored_extra_namespaces +
58                                      DEFAULT_EXTRA_NAMESPACES)
59    self.Reset()
60
61  def Reset(self):
62    """Resets the internal state to prepare for processing a new file."""
63
64    # A list of goog.provide tokens in the order they appeared in the file.
65    self._provide_tokens = []
66
67    # A list of goog.require tokens in the order they appeared in the file.
68    self._require_tokens = []
69
70    # Namespaces that are already goog.provided.
71    self._provided_namespaces = []
72
73    # Namespaces that are already goog.required.
74    self._required_namespaces = []
75
76    # Note that created_namespaces and used_namespaces contain both namespaces
77    # and identifiers because there are many existing cases where a method or
78    # constant is provided directly instead of its namespace. Ideally, these
79    # two lists would only have to contain namespaces.
80
81    # A list of tuples where the first element is the namespace of an identifier
82    # created in the file and the second is the identifier itself.
83    self._created_namespaces = []
84
85    # A list of tuples where the first element is the namespace of an identifier
86    # used in the file and the second is the identifier itself.
87    self._used_namespaces = []
88
89    # A list of seemingly-unnecessary namespaces that are goog.required() and
90    # annotated with @suppress {extraRequire}.
91    self._suppressed_requires = []
92
93    # A list of goog.provide tokens which are duplicates.
94    self._duplicate_provide_tokens = []
95
96    # A list of goog.require tokens which are duplicates.
97    self._duplicate_require_tokens = []
98
99    # Whether this file is in a goog.scope. Someday, we may add support
100    # for checking scopified namespaces, but for now let's just fail
101    # in a more reasonable way.
102    self._scopified_file = False
103
104    # TODO(user): Handle the case where there are 2 different requires
105    # that can satisfy the same dependency, but only one is necessary.
106
107  def GetProvidedNamespaces(self):
108    """Returns the namespaces which are already provided by this file.
109
110    Returns:
111      A list of strings where each string is a 'namespace' corresponding to an
112      existing goog.provide statement in the file being checked.
113    """
114    return list(self._provided_namespaces)
115
116  def GetRequiredNamespaces(self):
117    """Returns the namespaces which are already required by this file.
118
119    Returns:
120      A list of strings where each string is a 'namespace' corresponding to an
121      existing goog.require statement in the file being checked.
122    """
123    return list(self._required_namespaces)
124
125  def IsExtraProvide(self, token):
126    """Returns whether the given goog.provide token is unnecessary.
127
128    Args:
129      token: A goog.provide token.
130
131    Returns:
132      True if the given token corresponds to an unnecessary goog.provide
133      statement, otherwise False.
134    """
135    if self._scopified_file:
136      return False
137
138    namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
139
140    base_namespace = namespace.split('.', 1)[0]
141    if base_namespace not in self._closurized_namespaces:
142      return False
143
144    if token in self._duplicate_provide_tokens:
145      return True
146
147    # TODO(user): There's probably a faster way to compute this.
148    for created_namespace, created_identifier in self._created_namespaces:
149      if namespace == created_namespace or namespace == created_identifier:
150        return False
151
152    return True
153
154  def IsExtraRequire(self, token):
155    """Returns whether the given goog.require token is unnecessary.
156
157    Args:
158      token: A goog.require token.
159
160    Returns:
161      True if the given token corresponds to an unnecessary goog.require
162      statement, otherwise False.
163    """
164    if self._scopified_file:
165      return False
166
167    namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
168
169    base_namespace = namespace.split('.', 1)[0]
170    if base_namespace not in self._closurized_namespaces:
171      return False
172
173    if namespace in self._ignored_extra_namespaces:
174      return False
175
176    if token in self._duplicate_require_tokens:
177      return True
178
179    if namespace in self._suppressed_requires:
180      return False
181
182    # If the namespace contains a component that is initial caps, then that
183    # must be the last component of the namespace.
184    parts = namespace.split('.')
185    if len(parts) > 1 and parts[-2][0].isupper():
186      return True
187
188    # TODO(user): There's probably a faster way to compute this.
189    for used_namespace, used_identifier in self._used_namespaces:
190      if namespace == used_namespace or namespace == used_identifier:
191        return False
192
193    return True
194
195  def GetMissingProvides(self):
196    """Returns the set of missing provided namespaces for the current file.
197
198    Returns:
199      Returns a set of strings where each string is a namespace that should be
200      provided by this file, but is not.
201    """
202    if self._scopified_file:
203      return set()
204
205    missing_provides = set()
206    for namespace, identifier in self._created_namespaces:
207      if (not self._IsPrivateIdentifier(identifier) and
208          namespace not in self._provided_namespaces and
209          identifier not in self._provided_namespaces and
210          namespace not in self._required_namespaces):
211        missing_provides.add(namespace)
212
213    return missing_provides
214
215  def GetMissingRequires(self):
216    """Returns the set of missing required namespaces for the current file.
217
218    For each non-private identifier used in the file, find either a
219    goog.require, goog.provide or a created identifier that satisfies it.
220    goog.require statements can satisfy the identifier by requiring either the
221    namespace of the identifier or the identifier itself. goog.provide
222    statements can satisfy the identifier by providing the namespace of the
223    identifier. A created identifier can only satisfy the used identifier if
224    it matches it exactly (necessary since things can be defined on a
225    namespace in more than one file). Note that provided namespaces should be
226    a subset of created namespaces, but we check both because in some cases we
227    can't always detect the creation of the namespace.
228
229    Returns:
230      Returns a set of strings where each string is a namespace that should be
231      required by this file, but is not.
232    """
233    if self._scopified_file:
234      return set()
235
236    external_dependencies = set(self._required_namespaces)
237
238    # Assume goog namespace is always available.
239    external_dependencies.add('goog')
240
241    created_identifiers = set()
242    for namespace, identifier in self._created_namespaces:
243      created_identifiers.add(identifier)
244
245    missing_requires = set()
246    for namespace, identifier in self._used_namespaces:
247      if (not self._IsPrivateIdentifier(identifier) and
248          namespace not in external_dependencies and
249          namespace not in self._provided_namespaces and
250          identifier not in external_dependencies and
251          identifier not in created_identifiers):
252        missing_requires.add(namespace)
253
254    return missing_requires
255
256  def _IsPrivateIdentifier(self, identifier):
257    """Returns whether the given identifer is private."""
258    pieces = identifier.split('.')
259    for piece in pieces:
260      if piece.endswith('_'):
261        return True
262    return False
263
264  def IsFirstProvide(self, token):
265    """Returns whether token is the first provide token."""
266    return self._provide_tokens and token == self._provide_tokens[0]
267
268  def IsFirstRequire(self, token):
269    """Returns whether token is the first require token."""
270    return self._require_tokens and token == self._require_tokens[0]
271
272  def IsLastProvide(self, token):
273    """Returns whether token is the last provide token."""
274    return self._provide_tokens and token == self._provide_tokens[-1]
275
276  def IsLastRequire(self, token):
277    """Returns whether token is the last require token."""
278    return self._require_tokens and token == self._require_tokens[-1]
279
280  def ProcessToken(self, token, state_tracker):
281    """Processes the given token for dependency information.
282
283    Args:
284      token: The token to process.
285      state_tracker: The JavaScript state tracker.
286    """
287
288    # Note that this method is in the critical path for the linter and has been
289    # optimized for performance in the following ways:
290    # - Tokens are checked by type first to minimize the number of function
291    #   calls necessary to determine if action needs to be taken for the token.
292    # - The most common tokens types are checked for first.
293    # - The number of function calls has been minimized (thus the length of this
294    #   function.
295
296    if token.type == TokenType.IDENTIFIER:
297      # TODO(user): Consider saving the whole identifier in metadata.
298      whole_identifier_string = self._GetWholeIdentifierString(token)
299      if whole_identifier_string is None:
300        # We only want to process the identifier one time. If the whole string
301        # identifier is None, that means this token was part of a multi-token
302        # identifier, but it was not the first token of the identifier.
303        return
304
305      # In the odd case that a goog.require is encountered inside a function,
306      # just ignore it (e.g. dynamic loading in test runners).
307      if token.string == 'goog.require' and not state_tracker.InFunction():
308        self._require_tokens.append(token)
309        namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
310        if namespace in self._required_namespaces:
311          self._duplicate_require_tokens.append(token)
312        else:
313          self._required_namespaces.append(namespace)
314
315        # If there is a suppression for the require, add a usage for it so it
316        # gets treated as a regular goog.require (i.e. still gets sorted).
317        jsdoc = state_tracker.GetDocComment()
318        if jsdoc and ('extraRequire' in jsdoc.suppressions):
319          self._suppressed_requires.append(namespace)
320          self._AddUsedNamespace(state_tracker, namespace)
321
322      elif token.string == 'goog.provide':
323        self._provide_tokens.append(token)
324        namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
325        if namespace in self._provided_namespaces:
326          self._duplicate_provide_tokens.append(token)
327        else:
328          self._provided_namespaces.append(namespace)
329
330        # If there is a suppression for the provide, add a creation for it so it
331        # gets treated as a regular goog.provide (i.e. still gets sorted).
332        jsdoc = state_tracker.GetDocComment()
333        if jsdoc and ('extraProvide' in jsdoc.suppressions):
334          self._AddCreatedNamespace(state_tracker, namespace)
335
336      elif token.string == 'goog.scope':
337        self._scopified_file = True
338
339      else:
340        jsdoc = state_tracker.GetDocComment()
341        if jsdoc and jsdoc.HasFlag('typedef'):
342          self._AddCreatedNamespace(state_tracker, whole_identifier_string,
343                                    self.GetClosurizedNamespace(
344                                        whole_identifier_string))
345        else:
346          self._AddUsedNamespace(state_tracker, whole_identifier_string)
347
348    elif token.type == TokenType.SIMPLE_LVALUE:
349      identifier = token.values['identifier']
350      namespace = self.GetClosurizedNamespace(identifier)
351      if state_tracker.InFunction():
352        self._AddUsedNamespace(state_tracker, identifier)
353      elif namespace and namespace != 'goog':
354        self._AddCreatedNamespace(state_tracker, identifier, namespace)
355
356    elif token.type == TokenType.DOC_FLAG:
357      flag_type = token.attached_object.flag_type
358      is_interface = state_tracker.GetDocComment().HasFlag('interface')
359      if flag_type == 'implements' or (flag_type == 'extends' and is_interface):
360        # Interfaces should be goog.require'd.
361        doc_start = tokenutil.Search(token, TokenType.DOC_START_BRACE)
362        interface = tokenutil.Search(doc_start, TokenType.COMMENT)
363        self._AddUsedNamespace(state_tracker, interface.string)
364
365
366  def _GetWholeIdentifierString(self, token):
367    """Returns the whole identifier string for the given token.
368
369    Checks the tokens after the current one to see if the token is one in a
370    sequence of tokens which are actually just one identifier (i.e. a line was
371    wrapped in the middle of an identifier).
372
373    Args:
374      token: The token to check.
375
376    Returns:
377      The whole identifier string or None if this token is not the first token
378      in a multi-token identifier.
379    """
380    result = ''
381
382    # Search backward to determine if this token is the first token of the
383    # identifier. If it is not the first token, return None to signal that this
384    # token should be ignored.
385    prev_token = token.previous
386    while prev_token:
387      if (prev_token.IsType(TokenType.IDENTIFIER) or
388          prev_token.IsType(TokenType.NORMAL) and prev_token.string == '.'):
389        return None
390      elif (not prev_token.IsType(TokenType.WHITESPACE) and
391            not prev_token.IsAnyType(TokenType.COMMENT_TYPES)):
392        break
393      prev_token = prev_token.previous
394
395    # Search forward to find other parts of this identifier separated by white
396    # space.
397    next_token = token
398    while next_token:
399      if (next_token.IsType(TokenType.IDENTIFIER) or
400          next_token.IsType(TokenType.NORMAL) and next_token.string == '.'):
401        result += next_token.string
402      elif (not next_token.IsType(TokenType.WHITESPACE) and
403            not next_token.IsAnyType(TokenType.COMMENT_TYPES)):
404        break
405      next_token = next_token.next
406
407    return result
408
409  def _AddCreatedNamespace(self, state_tracker, identifier, namespace=None):
410    """Adds the namespace of an identifier to the list of created namespaces.
411
412    If the identifier is annotated with a 'missingProvide' suppression, it is
413    not added.
414
415    Args:
416      state_tracker: The JavaScriptStateTracker instance.
417      identifier: The identifier to add.
418      namespace: The namespace of the identifier or None if the identifier is
419          also the namespace.
420    """
421    if not namespace:
422      namespace = identifier
423
424    jsdoc = state_tracker.GetDocComment()
425    if jsdoc and 'missingProvide' in jsdoc.suppressions:
426      return
427
428    self._created_namespaces.append([namespace, identifier])
429
430  def _AddUsedNamespace(self, state_tracker, identifier):
431    """Adds the namespace of an identifier to the list of used namespaces.
432
433    If the identifier is annotated with a 'missingRequire' suppression, it is
434    not added.
435
436    Args:
437      state_tracker: The JavaScriptStateTracker instance.
438      identifier: An identifier which has been used.
439    """
440    jsdoc = state_tracker.GetDocComment()
441    if jsdoc and 'missingRequire' in jsdoc.suppressions:
442      return
443
444    namespace = self.GetClosurizedNamespace(identifier)
445    if namespace:
446      self._used_namespaces.append([namespace, identifier])
447
448  def GetClosurizedNamespace(self, identifier):
449    """Given an identifier, returns the namespace that identifier is from.
450
451    Args:
452      identifier: The identifier to extract a namespace from.
453
454    Returns:
455      The namespace the given identifier resides in, or None if one could not
456      be found.
457    """
458    if identifier.startswith('goog.global'):
459      # Ignore goog.global, since it is, by definition, global.
460      return None
461
462    parts = identifier.split('.')
463    for namespace in self._closurized_namespaces:
464      if not identifier.startswith(namespace + '.'):
465        continue
466
467      last_part = parts[-1]
468      if not last_part:
469        # TODO(robbyw): Handle this: it's a multi-line identifier.
470        return None
471
472      # The namespace for a class is the shortest prefix ending in a class
473      # name, which starts with a capital letter but is not a capitalized word.
474      #
475      # We ultimately do not want to allow requiring or providing of inner
476      # classes/enums.  Instead, a file should provide only the top-level class
477      # and users should require only that.
478      namespace = []
479      for part in parts:
480        if part == 'prototype' or part.isupper():
481          return '.'.join(namespace)
482        namespace.append(part)
483        if part[0].isupper():
484          return '.'.join(namespace)
485
486      # At this point, we know there's no class or enum, so the namespace is
487      # just the identifier with the last part removed. With the exception of
488      # apply, inherits, and call, which should also be stripped.
489      if parts[-1] in ('apply', 'inherits', 'call'):
490        parts.pop()
491      parts.pop()
492
493      # If the last part ends with an underscore, it is a private variable,
494      # method, or enum. The namespace is whatever is before it.
495      if parts and parts[-1].endswith('_'):
496        parts.pop()
497
498      return '.'.join(parts)
499
500    return None
501