1#!/usr/bin/env python
2#
3# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS-IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Logic for computing dependency information for closurized JavaScript files.
18
19Closurized JavaScript files express dependencies using goog.require and
20goog.provide statements. In order for the linter to detect when a statement is
21missing or unnecessary, all identifiers in the JavaScript file must first be
22processed to determine if they constitute the creation or usage of a dependency.
23"""
24
25
26
27import re
28
29from closure_linter import javascripttokens
30from closure_linter import tokenutil
31
32# pylint: disable=g-bad-name
33TokenType = javascripttokens.JavaScriptTokenType
34
35DEFAULT_EXTRA_NAMESPACES = [
36    'goog.testing.asserts',
37    'goog.testing.jsunit',
38]
39
40
41class ClosurizedNamespacesInfo(object):
42  """Dependency information for closurized JavaScript files.
43
44  Processes token streams for dependency creation or usage and provides logic
45  for determining if a given require or provide statement is unnecessary or if
46  there are missing require or provide statements.
47  """
48
49  def __init__(self, closurized_namespaces, ignored_extra_namespaces):
50    """Initializes an instance the ClosurizedNamespacesInfo class.
51
52    Args:
53      closurized_namespaces: A list of namespace prefixes that should be
54          processed for dependency information. Non-matching namespaces are
55          ignored.
56      ignored_extra_namespaces: A list of namespaces that should not be reported
57          as extra regardless of whether they are actually used.
58    """
59    self._closurized_namespaces = closurized_namespaces
60    self._ignored_extra_namespaces = (ignored_extra_namespaces +
61                                      DEFAULT_EXTRA_NAMESPACES)
62    self.Reset()
63
64  def Reset(self):
65    """Resets the internal state to prepare for processing a new file."""
66
67    # A list of goog.provide tokens in the order they appeared in the file.
68    self._provide_tokens = []
69
70    # A list of goog.require tokens in the order they appeared in the file.
71    self._require_tokens = []
72
73    # Namespaces that are already goog.provided.
74    self._provided_namespaces = []
75
76    # Namespaces that are already goog.required.
77    self._required_namespaces = []
78
79    # Note that created_namespaces and used_namespaces contain both namespaces
80    # and identifiers because there are many existing cases where a method or
81    # constant is provided directly instead of its namespace. Ideally, these
82    # two lists would only have to contain namespaces.
83
84    # A list of tuples where the first element is the namespace of an identifier
85    # created in the file, the second is the identifier itself and the third is
86    # the line number where it's created.
87    self._created_namespaces = []
88
89    # A list of tuples where the first element is the namespace of an identifier
90    # used in the file, the second is the identifier itself and the third is the
91    # line number where it's used.
92    self._used_namespaces = []
93
94    # A list of seemingly-unnecessary namespaces that are goog.required() and
95    # annotated with @suppress {extraRequire}.
96    self._suppressed_requires = []
97
98    # A list of goog.provide tokens which are duplicates.
99    self._duplicate_provide_tokens = []
100
101    # A list of goog.require tokens which are duplicates.
102    self._duplicate_require_tokens = []
103
104    # Whether this file is in a goog.scope. Someday, we may add support
105    # for checking scopified namespaces, but for now let's just fail
106    # in a more reasonable way.
107    self._scopified_file = False
108
109    # TODO(user): Handle the case where there are 2 different requires
110    # that can satisfy the same dependency, but only one is necessary.
111
112  def GetProvidedNamespaces(self):
113    """Returns the namespaces which are already provided by this file.
114
115    Returns:
116      A list of strings where each string is a 'namespace' corresponding to an
117      existing goog.provide statement in the file being checked.
118    """
119    return set(self._provided_namespaces)
120
121  def GetRequiredNamespaces(self):
122    """Returns the namespaces which are already required by this file.
123
124    Returns:
125      A list of strings where each string is a 'namespace' corresponding to an
126      existing goog.require statement in the file being checked.
127    """
128    return set(self._required_namespaces)
129
130  def IsExtraProvide(self, token):
131    """Returns whether the given goog.provide token is unnecessary.
132
133    Args:
134      token: A goog.provide token.
135
136    Returns:
137      True if the given token corresponds to an unnecessary goog.provide
138      statement, otherwise False.
139    """
140    namespace = tokenutil.GetStringAfterToken(token)
141
142    base_namespace = namespace.split('.', 1)[0]
143    if base_namespace not in self._closurized_namespaces:
144      return False
145
146    if token in self._duplicate_provide_tokens:
147      return True
148
149    # TODO(user): There's probably a faster way to compute this.
150    for created_namespace, created_identifier, _ in self._created_namespaces:
151      if namespace == created_namespace or namespace == created_identifier:
152        return False
153
154    return True
155
156  def IsExtraRequire(self, token):
157    """Returns whether the given goog.require token is unnecessary.
158
159    Args:
160      token: A goog.require token.
161
162    Returns:
163      True if the given token corresponds to an unnecessary goog.require
164      statement, otherwise False.
165    """
166    namespace = tokenutil.GetStringAfterToken(token)
167
168    base_namespace = namespace.split('.', 1)[0]
169    if base_namespace not in self._closurized_namespaces:
170      return False
171
172    if namespace in self._ignored_extra_namespaces:
173      return False
174
175    if token in self._duplicate_require_tokens:
176      return True
177
178    if namespace in self._suppressed_requires:
179      return False
180
181    # If the namespace contains a component that is initial caps, then that
182    # must be the last component of the namespace.
183    parts = namespace.split('.')
184    if len(parts) > 1 and parts[-2][0].isupper():
185      return True
186
187    # TODO(user): There's probably a faster way to compute this.
188    for used_namespace, used_identifier, _ in self._used_namespaces:
189      if namespace == used_namespace or namespace == used_identifier:
190        return False
191
192    return True
193
194  def GetMissingProvides(self):
195    """Returns the dict of missing provided namespaces for the current file.
196
197    Returns:
198      Returns a dictionary of key as string and value as integer where each
199      string(key) is a namespace that should be provided by this file, but is
200      not and integer(value) is first line number where it's defined.
201    """
202    missing_provides = dict()
203    for namespace, identifier, line_number in self._created_namespaces:
204      if (not self._IsPrivateIdentifier(identifier) and
205          namespace not in self._provided_namespaces and
206          identifier not in self._provided_namespaces and
207          namespace not in self._required_namespaces and
208          namespace not in missing_provides):
209        missing_provides[namespace] = line_number
210
211    return missing_provides
212
213  def GetMissingRequires(self):
214    """Returns the dict of missing required namespaces for the current file.
215
216    For each non-private identifier used in the file, find either a
217    goog.require, goog.provide or a created identifier that satisfies it.
218    goog.require statements can satisfy the identifier by requiring either the
219    namespace of the identifier or the identifier itself. goog.provide
220    statements can satisfy the identifier by providing the namespace of the
221    identifier. A created identifier can only satisfy the used identifier if
222    it matches it exactly (necessary since things can be defined on a
223    namespace in more than one file). Note that provided namespaces should be
224    a subset of created namespaces, but we check both because in some cases we
225    can't always detect the creation of the namespace.
226
227    Returns:
228      Returns a dictionary of key as string and value integer where each
229      string(key) is a namespace that should be required by this file, but is
230      not and integer(value) is first line number where it's used.
231    """
232    external_dependencies = set(self._required_namespaces)
233
234    # Assume goog namespace is always available.
235    external_dependencies.add('goog')
236
237    created_identifiers = set()
238    for namespace, identifier, line_number in self._created_namespaces:
239      created_identifiers.add(identifier)
240
241    missing_requires = dict()
242    for namespace, identifier, line_number in self._used_namespaces:
243      if (not self._IsPrivateIdentifier(identifier) and
244          namespace not in external_dependencies and
245          namespace not in self._provided_namespaces and
246          identifier not in external_dependencies and
247          identifier not in created_identifiers and
248          namespace not in missing_requires):
249        missing_requires[namespace] = line_number
250
251    return missing_requires
252
253  def _IsPrivateIdentifier(self, identifier):
254    """Returns whether the given identifer is private."""
255    pieces = identifier.split('.')
256    for piece in pieces:
257      if piece.endswith('_'):
258        return True
259    return False
260
261  def IsFirstProvide(self, token):
262    """Returns whether token is the first provide token."""
263    return self._provide_tokens and token == self._provide_tokens[0]
264
265  def IsFirstRequire(self, token):
266    """Returns whether token is the first require token."""
267    return self._require_tokens and token == self._require_tokens[0]
268
269  def IsLastProvide(self, token):
270    """Returns whether token is the last provide token."""
271    return self._provide_tokens and token == self._provide_tokens[-1]
272
273  def IsLastRequire(self, token):
274    """Returns whether token is the last require token."""
275    return self._require_tokens and token == self._require_tokens[-1]
276
277  def ProcessToken(self, token, state_tracker):
278    """Processes the given token for dependency information.
279
280    Args:
281      token: The token to process.
282      state_tracker: The JavaScript state tracker.
283    """
284
285    # Note that this method is in the critical path for the linter and has been
286    # optimized for performance in the following ways:
287    # - Tokens are checked by type first to minimize the number of function
288    #   calls necessary to determine if action needs to be taken for the token.
289    # - The most common tokens types are checked for first.
290    # - The number of function calls has been minimized (thus the length of this
291    #   function.
292
293    if token.type == TokenType.IDENTIFIER:
294      # TODO(user): Consider saving the whole identifier in metadata.
295      whole_identifier_string = tokenutil.GetIdentifierForToken(token)
296      if whole_identifier_string is None:
297        # We only want to process the identifier one time. If the whole string
298        # identifier is None, that means this token was part of a multi-token
299        # identifier, but it was not the first token of the identifier.
300        return
301
302      # In the odd case that a goog.require is encountered inside a function,
303      # just ignore it (e.g. dynamic loading in test runners).
304      if token.string == 'goog.require' and not state_tracker.InFunction():
305        self._require_tokens.append(token)
306        namespace = tokenutil.GetStringAfterToken(token)
307        if namespace in self._required_namespaces:
308          self._duplicate_require_tokens.append(token)
309        else:
310          self._required_namespaces.append(namespace)
311
312        # If there is a suppression for the require, add a usage for it so it
313        # gets treated as a regular goog.require (i.e. still gets sorted).
314        jsdoc = state_tracker.GetDocComment()
315        if jsdoc and ('extraRequire' in jsdoc.suppressions):
316          self._suppressed_requires.append(namespace)
317          self._AddUsedNamespace(state_tracker, namespace, token.line_number)
318
319      elif token.string == 'goog.provide':
320        self._provide_tokens.append(token)
321        namespace = tokenutil.GetStringAfterToken(token)
322        if namespace in self._provided_namespaces:
323          self._duplicate_provide_tokens.append(token)
324        else:
325          self._provided_namespaces.append(namespace)
326
327        # If there is a suppression for the provide, add a creation for it so it
328        # gets treated as a regular goog.provide (i.e. still gets sorted).
329        jsdoc = state_tracker.GetDocComment()
330        if jsdoc and ('extraProvide' in jsdoc.suppressions):
331          self._AddCreatedNamespace(state_tracker, namespace, token.line_number)
332
333      elif token.string == 'goog.scope':
334        self._scopified_file = True
335
336      elif token.string == 'goog.setTestOnly':
337
338        # Since the message is optional, we don't want to scan to later lines.
339        for t in tokenutil.GetAllTokensInSameLine(token):
340          if t.type == TokenType.STRING_TEXT:
341            message = t.string
342
343            if re.match(r'^\w+(\.\w+)+$', message):
344              # This looks like a namespace. If it's a Closurized namespace,
345              # consider it created.
346              base_namespace = message.split('.', 1)[0]
347              if base_namespace in self._closurized_namespaces:
348                self._AddCreatedNamespace(state_tracker, message,
349                                          token.line_number)
350
351            break
352      else:
353        jsdoc = state_tracker.GetDocComment()
354        if token.metadata and token.metadata.aliased_symbol:
355          whole_identifier_string = token.metadata.aliased_symbol
356        if jsdoc and jsdoc.HasFlag('typedef'):
357          self._AddCreatedNamespace(state_tracker, whole_identifier_string,
358                                    token.line_number,
359                                    namespace=self.GetClosurizedNamespace(
360                                        whole_identifier_string))
361        else:
362          if not (token.metadata and token.metadata.is_alias_definition):
363            self._AddUsedNamespace(state_tracker, whole_identifier_string,
364                                   token.line_number)
365
366    elif token.type == TokenType.SIMPLE_LVALUE:
367      identifier = token.values['identifier']
368      start_token = tokenutil.GetIdentifierStart(token)
369      if start_token and start_token != token:
370        # Multi-line identifier being assigned. Get the whole identifier.
371        identifier = tokenutil.GetIdentifierForToken(start_token)
372      else:
373        start_token = token
374      # If an alias is defined on the start_token, use it instead.
375      if (start_token and
376          start_token.metadata and
377          start_token.metadata.aliased_symbol and
378          not start_token.metadata.is_alias_definition):
379        identifier = start_token.metadata.aliased_symbol
380
381      if identifier:
382        namespace = self.GetClosurizedNamespace(identifier)
383        if state_tracker.InFunction():
384          self._AddUsedNamespace(state_tracker, identifier, token.line_number)
385        elif namespace and namespace != 'goog':
386          self._AddCreatedNamespace(state_tracker, identifier,
387                                    token.line_number, namespace=namespace)
388
389    elif token.type == TokenType.DOC_FLAG:
390      flag_type = token.attached_object.flag_type
391      is_interface = state_tracker.GetDocComment().HasFlag('interface')
392      if flag_type == 'implements' or (flag_type == 'extends' and is_interface):
393        # Interfaces should be goog.require'd.
394        doc_start = tokenutil.Search(token, TokenType.DOC_START_BRACE)
395        interface = tokenutil.Search(doc_start, TokenType.COMMENT)
396        self._AddUsedNamespace(state_tracker, interface.string,
397                               token.line_number)
398
399  def _AddCreatedNamespace(self, state_tracker, identifier, line_number,
400                           namespace=None):
401    """Adds the namespace of an identifier to the list of created namespaces.
402
403    If the identifier is annotated with a 'missingProvide' suppression, it is
404    not added.
405
406    Args:
407      state_tracker: The JavaScriptStateTracker instance.
408      identifier: The identifier to add.
409      line_number: Line number where namespace is created.
410      namespace: The namespace of the identifier or None if the identifier is
411          also the namespace.
412    """
413    if not namespace:
414      namespace = identifier
415
416    jsdoc = state_tracker.GetDocComment()
417    if jsdoc and 'missingProvide' in jsdoc.suppressions:
418      return
419
420    self._created_namespaces.append([namespace, identifier, line_number])
421
422  def _AddUsedNamespace(self, state_tracker, identifier, line_number):
423    """Adds the namespace of an identifier to the list of used namespaces.
424
425    If the identifier is annotated with a 'missingRequire' suppression, it is
426    not added.
427
428    Args:
429      state_tracker: The JavaScriptStateTracker instance.
430      identifier: An identifier which has been used.
431      line_number: Line number where namespace is used.
432    """
433    jsdoc = state_tracker.GetDocComment()
434    if jsdoc and 'missingRequire' in jsdoc.suppressions:
435      return
436
437    namespace = self.GetClosurizedNamespace(identifier)
438    # b/5362203 If its a variable in scope then its not a required namespace.
439    if namespace and not state_tracker.IsVariableInScope(namespace):
440      self._used_namespaces.append([namespace, identifier, line_number])
441
442  def GetClosurizedNamespace(self, identifier):
443    """Given an identifier, returns the namespace that identifier is from.
444
445    Args:
446      identifier: The identifier to extract a namespace from.
447
448    Returns:
449      The namespace the given identifier resides in, or None if one could not
450      be found.
451    """
452    if identifier.startswith('goog.global'):
453      # Ignore goog.global, since it is, by definition, global.
454      return None
455
456    parts = identifier.split('.')
457    for namespace in self._closurized_namespaces:
458      if not identifier.startswith(namespace + '.'):
459        continue
460
461      last_part = parts[-1]
462      if not last_part:
463        # TODO(robbyw): Handle this: it's a multi-line identifier.
464        return None
465
466      # The namespace for a class is the shortest prefix ending in a class
467      # name, which starts with a capital letter but is not a capitalized word.
468      #
469      # We ultimately do not want to allow requiring or providing of inner
470      # classes/enums.  Instead, a file should provide only the top-level class
471      # and users should require only that.
472      namespace = []
473      for part in parts:
474        if part == 'prototype' or part.isupper():
475          return '.'.join(namespace)
476        namespace.append(part)
477        if part[0].isupper():
478          return '.'.join(namespace)
479
480      # At this point, we know there's no class or enum, so the namespace is
481      # just the identifier with the last part removed. With the exception of
482      # apply, inherits, and call, which should also be stripped.
483      if parts[-1] in ('apply', 'inherits', 'call'):
484        parts.pop()
485      parts.pop()
486
487      # If the last part ends with an underscore, it is a private variable,
488      # method, or enum. The namespace is whatever is before it.
489      if parts and parts[-1].endswith('_'):
490        parts.pop()
491
492      return '.'.join(parts)
493
494    return None
495