1#!/usr/bin/env python 2# 3# Copyright 2008 The Closure Linter Authors. All Rights Reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS-IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Logic for computing dependency information for closurized JavaScript files. 18 19Closurized JavaScript files express dependencies using goog.require and 20goog.provide statements. In order for the linter to detect when a statement is 21missing or unnecessary, all identifiers in the JavaScript file must first be 22processed to determine if they constitute the creation or usage of a dependency. 23""" 24 25 26 27import re 28 29from closure_linter import javascripttokens 30from closure_linter import tokenutil 31 32# pylint: disable=g-bad-name 33TokenType = javascripttokens.JavaScriptTokenType 34 35DEFAULT_EXTRA_NAMESPACES = [ 36 'goog.testing.asserts', 37 'goog.testing.jsunit', 38] 39 40 41class ClosurizedNamespacesInfo(object): 42 """Dependency information for closurized JavaScript files. 43 44 Processes token streams for dependency creation or usage and provides logic 45 for determining if a given require or provide statement is unnecessary or if 46 there are missing require or provide statements. 47 """ 48 49 def __init__(self, closurized_namespaces, ignored_extra_namespaces): 50 """Initializes an instance the ClosurizedNamespacesInfo class. 51 52 Args: 53 closurized_namespaces: A list of namespace prefixes that should be 54 processed for dependency information. Non-matching namespaces are 55 ignored. 56 ignored_extra_namespaces: A list of namespaces that should not be reported 57 as extra regardless of whether they are actually used. 58 """ 59 self._closurized_namespaces = closurized_namespaces 60 self._ignored_extra_namespaces = (ignored_extra_namespaces + 61 DEFAULT_EXTRA_NAMESPACES) 62 self.Reset() 63 64 def Reset(self): 65 """Resets the internal state to prepare for processing a new file.""" 66 67 # A list of goog.provide tokens in the order they appeared in the file. 68 self._provide_tokens = [] 69 70 # A list of goog.require tokens in the order they appeared in the file. 71 self._require_tokens = [] 72 73 # Namespaces that are already goog.provided. 74 self._provided_namespaces = [] 75 76 # Namespaces that are already goog.required. 77 self._required_namespaces = [] 78 79 # Note that created_namespaces and used_namespaces contain both namespaces 80 # and identifiers because there are many existing cases where a method or 81 # constant is provided directly instead of its namespace. Ideally, these 82 # two lists would only have to contain namespaces. 83 84 # A list of tuples where the first element is the namespace of an identifier 85 # created in the file, the second is the identifier itself and the third is 86 # the line number where it's created. 87 self._created_namespaces = [] 88 89 # A list of tuples where the first element is the namespace of an identifier 90 # used in the file, the second is the identifier itself and the third is the 91 # line number where it's used. 92 self._used_namespaces = [] 93 94 # A list of seemingly-unnecessary namespaces that are goog.required() and 95 # annotated with @suppress {extraRequire}. 96 self._suppressed_requires = [] 97 98 # A list of goog.provide tokens which are duplicates. 99 self._duplicate_provide_tokens = [] 100 101 # A list of goog.require tokens which are duplicates. 102 self._duplicate_require_tokens = [] 103 104 # Whether this file is in a goog.scope. Someday, we may add support 105 # for checking scopified namespaces, but for now let's just fail 106 # in a more reasonable way. 107 self._scopified_file = False 108 109 # TODO(user): Handle the case where there are 2 different requires 110 # that can satisfy the same dependency, but only one is necessary. 111 112 def GetProvidedNamespaces(self): 113 """Returns the namespaces which are already provided by this file. 114 115 Returns: 116 A list of strings where each string is a 'namespace' corresponding to an 117 existing goog.provide statement in the file being checked. 118 """ 119 return set(self._provided_namespaces) 120 121 def GetRequiredNamespaces(self): 122 """Returns the namespaces which are already required by this file. 123 124 Returns: 125 A list of strings where each string is a 'namespace' corresponding to an 126 existing goog.require statement in the file being checked. 127 """ 128 return set(self._required_namespaces) 129 130 def IsExtraProvide(self, token): 131 """Returns whether the given goog.provide token is unnecessary. 132 133 Args: 134 token: A goog.provide token. 135 136 Returns: 137 True if the given token corresponds to an unnecessary goog.provide 138 statement, otherwise False. 139 """ 140 namespace = tokenutil.GetStringAfterToken(token) 141 142 base_namespace = namespace.split('.', 1)[0] 143 if base_namespace not in self._closurized_namespaces: 144 return False 145 146 if token in self._duplicate_provide_tokens: 147 return True 148 149 # TODO(user): There's probably a faster way to compute this. 150 for created_namespace, created_identifier, _ in self._created_namespaces: 151 if namespace == created_namespace or namespace == created_identifier: 152 return False 153 154 return True 155 156 def IsExtraRequire(self, token): 157 """Returns whether the given goog.require token is unnecessary. 158 159 Args: 160 token: A goog.require token. 161 162 Returns: 163 True if the given token corresponds to an unnecessary goog.require 164 statement, otherwise False. 165 """ 166 namespace = tokenutil.GetStringAfterToken(token) 167 168 base_namespace = namespace.split('.', 1)[0] 169 if base_namespace not in self._closurized_namespaces: 170 return False 171 172 if namespace in self._ignored_extra_namespaces: 173 return False 174 175 if token in self._duplicate_require_tokens: 176 return True 177 178 if namespace in self._suppressed_requires: 179 return False 180 181 # If the namespace contains a component that is initial caps, then that 182 # must be the last component of the namespace. 183 parts = namespace.split('.') 184 if len(parts) > 1 and parts[-2][0].isupper(): 185 return True 186 187 # TODO(user): There's probably a faster way to compute this. 188 for used_namespace, used_identifier, _ in self._used_namespaces: 189 if namespace == used_namespace or namespace == used_identifier: 190 return False 191 192 return True 193 194 def GetMissingProvides(self): 195 """Returns the dict of missing provided namespaces for the current file. 196 197 Returns: 198 Returns a dictionary of key as string and value as integer where each 199 string(key) is a namespace that should be provided by this file, but is 200 not and integer(value) is first line number where it's defined. 201 """ 202 missing_provides = dict() 203 for namespace, identifier, line_number in self._created_namespaces: 204 if (not self._IsPrivateIdentifier(identifier) and 205 namespace not in self._provided_namespaces and 206 identifier not in self._provided_namespaces and 207 namespace not in self._required_namespaces and 208 namespace not in missing_provides): 209 missing_provides[namespace] = line_number 210 211 return missing_provides 212 213 def GetMissingRequires(self): 214 """Returns the dict of missing required namespaces for the current file. 215 216 For each non-private identifier used in the file, find either a 217 goog.require, goog.provide or a created identifier that satisfies it. 218 goog.require statements can satisfy the identifier by requiring either the 219 namespace of the identifier or the identifier itself. goog.provide 220 statements can satisfy the identifier by providing the namespace of the 221 identifier. A created identifier can only satisfy the used identifier if 222 it matches it exactly (necessary since things can be defined on a 223 namespace in more than one file). Note that provided namespaces should be 224 a subset of created namespaces, but we check both because in some cases we 225 can't always detect the creation of the namespace. 226 227 Returns: 228 Returns a dictionary of key as string and value integer where each 229 string(key) is a namespace that should be required by this file, but is 230 not and integer(value) is first line number where it's used. 231 """ 232 external_dependencies = set(self._required_namespaces) 233 234 # Assume goog namespace is always available. 235 external_dependencies.add('goog') 236 237 created_identifiers = set() 238 for namespace, identifier, line_number in self._created_namespaces: 239 created_identifiers.add(identifier) 240 241 missing_requires = dict() 242 for namespace, identifier, line_number in self._used_namespaces: 243 if (not self._IsPrivateIdentifier(identifier) and 244 namespace not in external_dependencies and 245 namespace not in self._provided_namespaces and 246 identifier not in external_dependencies and 247 identifier not in created_identifiers and 248 namespace not in missing_requires): 249 missing_requires[namespace] = line_number 250 251 return missing_requires 252 253 def _IsPrivateIdentifier(self, identifier): 254 """Returns whether the given identifer is private.""" 255 pieces = identifier.split('.') 256 for piece in pieces: 257 if piece.endswith('_'): 258 return True 259 return False 260 261 def IsFirstProvide(self, token): 262 """Returns whether token is the first provide token.""" 263 return self._provide_tokens and token == self._provide_tokens[0] 264 265 def IsFirstRequire(self, token): 266 """Returns whether token is the first require token.""" 267 return self._require_tokens and token == self._require_tokens[0] 268 269 def IsLastProvide(self, token): 270 """Returns whether token is the last provide token.""" 271 return self._provide_tokens and token == self._provide_tokens[-1] 272 273 def IsLastRequire(self, token): 274 """Returns whether token is the last require token.""" 275 return self._require_tokens and token == self._require_tokens[-1] 276 277 def ProcessToken(self, token, state_tracker): 278 """Processes the given token for dependency information. 279 280 Args: 281 token: The token to process. 282 state_tracker: The JavaScript state tracker. 283 """ 284 285 # Note that this method is in the critical path for the linter and has been 286 # optimized for performance in the following ways: 287 # - Tokens are checked by type first to minimize the number of function 288 # calls necessary to determine if action needs to be taken for the token. 289 # - The most common tokens types are checked for first. 290 # - The number of function calls has been minimized (thus the length of this 291 # function. 292 293 if token.type == TokenType.IDENTIFIER: 294 # TODO(user): Consider saving the whole identifier in metadata. 295 whole_identifier_string = tokenutil.GetIdentifierForToken(token) 296 if whole_identifier_string is None: 297 # We only want to process the identifier one time. If the whole string 298 # identifier is None, that means this token was part of a multi-token 299 # identifier, but it was not the first token of the identifier. 300 return 301 302 # In the odd case that a goog.require is encountered inside a function, 303 # just ignore it (e.g. dynamic loading in test runners). 304 if token.string == 'goog.require' and not state_tracker.InFunction(): 305 self._require_tokens.append(token) 306 namespace = tokenutil.GetStringAfterToken(token) 307 if namespace in self._required_namespaces: 308 self._duplicate_require_tokens.append(token) 309 else: 310 self._required_namespaces.append(namespace) 311 312 # If there is a suppression for the require, add a usage for it so it 313 # gets treated as a regular goog.require (i.e. still gets sorted). 314 jsdoc = state_tracker.GetDocComment() 315 if jsdoc and ('extraRequire' in jsdoc.suppressions): 316 self._suppressed_requires.append(namespace) 317 self._AddUsedNamespace(state_tracker, namespace, token.line_number) 318 319 elif token.string == 'goog.provide': 320 self._provide_tokens.append(token) 321 namespace = tokenutil.GetStringAfterToken(token) 322 if namespace in self._provided_namespaces: 323 self._duplicate_provide_tokens.append(token) 324 else: 325 self._provided_namespaces.append(namespace) 326 327 # If there is a suppression for the provide, add a creation for it so it 328 # gets treated as a regular goog.provide (i.e. still gets sorted). 329 jsdoc = state_tracker.GetDocComment() 330 if jsdoc and ('extraProvide' in jsdoc.suppressions): 331 self._AddCreatedNamespace(state_tracker, namespace, token.line_number) 332 333 elif token.string == 'goog.scope': 334 self._scopified_file = True 335 336 elif token.string == 'goog.setTestOnly': 337 338 # Since the message is optional, we don't want to scan to later lines. 339 for t in tokenutil.GetAllTokensInSameLine(token): 340 if t.type == TokenType.STRING_TEXT: 341 message = t.string 342 343 if re.match(r'^\w+(\.\w+)+$', message): 344 # This looks like a namespace. If it's a Closurized namespace, 345 # consider it created. 346 base_namespace = message.split('.', 1)[0] 347 if base_namespace in self._closurized_namespaces: 348 self._AddCreatedNamespace(state_tracker, message, 349 token.line_number) 350 351 break 352 else: 353 jsdoc = state_tracker.GetDocComment() 354 if token.metadata and token.metadata.aliased_symbol: 355 whole_identifier_string = token.metadata.aliased_symbol 356 if jsdoc and jsdoc.HasFlag('typedef'): 357 self._AddCreatedNamespace(state_tracker, whole_identifier_string, 358 token.line_number, 359 namespace=self.GetClosurizedNamespace( 360 whole_identifier_string)) 361 else: 362 if not (token.metadata and token.metadata.is_alias_definition): 363 self._AddUsedNamespace(state_tracker, whole_identifier_string, 364 token.line_number) 365 366 elif token.type == TokenType.SIMPLE_LVALUE: 367 identifier = token.values['identifier'] 368 start_token = tokenutil.GetIdentifierStart(token) 369 if start_token and start_token != token: 370 # Multi-line identifier being assigned. Get the whole identifier. 371 identifier = tokenutil.GetIdentifierForToken(start_token) 372 else: 373 start_token = token 374 # If an alias is defined on the start_token, use it instead. 375 if (start_token and 376 start_token.metadata and 377 start_token.metadata.aliased_symbol and 378 not start_token.metadata.is_alias_definition): 379 identifier = start_token.metadata.aliased_symbol 380 381 if identifier: 382 namespace = self.GetClosurizedNamespace(identifier) 383 if state_tracker.InFunction(): 384 self._AddUsedNamespace(state_tracker, identifier, token.line_number) 385 elif namespace and namespace != 'goog': 386 self._AddCreatedNamespace(state_tracker, identifier, 387 token.line_number, namespace=namespace) 388 389 elif token.type == TokenType.DOC_FLAG: 390 flag_type = token.attached_object.flag_type 391 is_interface = state_tracker.GetDocComment().HasFlag('interface') 392 if flag_type == 'implements' or (flag_type == 'extends' and is_interface): 393 # Interfaces should be goog.require'd. 394 doc_start = tokenutil.Search(token, TokenType.DOC_START_BRACE) 395 interface = tokenutil.Search(doc_start, TokenType.COMMENT) 396 self._AddUsedNamespace(state_tracker, interface.string, 397 token.line_number) 398 399 def _AddCreatedNamespace(self, state_tracker, identifier, line_number, 400 namespace=None): 401 """Adds the namespace of an identifier to the list of created namespaces. 402 403 If the identifier is annotated with a 'missingProvide' suppression, it is 404 not added. 405 406 Args: 407 state_tracker: The JavaScriptStateTracker instance. 408 identifier: The identifier to add. 409 line_number: Line number where namespace is created. 410 namespace: The namespace of the identifier or None if the identifier is 411 also the namespace. 412 """ 413 if not namespace: 414 namespace = identifier 415 416 jsdoc = state_tracker.GetDocComment() 417 if jsdoc and 'missingProvide' in jsdoc.suppressions: 418 return 419 420 self._created_namespaces.append([namespace, identifier, line_number]) 421 422 def _AddUsedNamespace(self, state_tracker, identifier, line_number): 423 """Adds the namespace of an identifier to the list of used namespaces. 424 425 If the identifier is annotated with a 'missingRequire' suppression, it is 426 not added. 427 428 Args: 429 state_tracker: The JavaScriptStateTracker instance. 430 identifier: An identifier which has been used. 431 line_number: Line number where namespace is used. 432 """ 433 jsdoc = state_tracker.GetDocComment() 434 if jsdoc and 'missingRequire' in jsdoc.suppressions: 435 return 436 437 namespace = self.GetClosurizedNamespace(identifier) 438 # b/5362203 If its a variable in scope then its not a required namespace. 439 if namespace and not state_tracker.IsVariableInScope(namespace): 440 self._used_namespaces.append([namespace, identifier, line_number]) 441 442 def GetClosurizedNamespace(self, identifier): 443 """Given an identifier, returns the namespace that identifier is from. 444 445 Args: 446 identifier: The identifier to extract a namespace from. 447 448 Returns: 449 The namespace the given identifier resides in, or None if one could not 450 be found. 451 """ 452 if identifier.startswith('goog.global'): 453 # Ignore goog.global, since it is, by definition, global. 454 return None 455 456 parts = identifier.split('.') 457 for namespace in self._closurized_namespaces: 458 if not identifier.startswith(namespace + '.'): 459 continue 460 461 last_part = parts[-1] 462 if not last_part: 463 # TODO(robbyw): Handle this: it's a multi-line identifier. 464 return None 465 466 # The namespace for a class is the shortest prefix ending in a class 467 # name, which starts with a capital letter but is not a capitalized word. 468 # 469 # We ultimately do not want to allow requiring or providing of inner 470 # classes/enums. Instead, a file should provide only the top-level class 471 # and users should require only that. 472 namespace = [] 473 for part in parts: 474 if part == 'prototype' or part.isupper(): 475 return '.'.join(namespace) 476 namespace.append(part) 477 if part[0].isupper(): 478 return '.'.join(namespace) 479 480 # At this point, we know there's no class or enum, so the namespace is 481 # just the identifier with the last part removed. With the exception of 482 # apply, inherits, and call, which should also be stripped. 483 if parts[-1] in ('apply', 'inherits', 'call'): 484 parts.pop() 485 parts.pop() 486 487 # If the last part ends with an underscore, it is a private variable, 488 # method, or enum. The namespace is whatever is before it. 489 if parts and parts[-1].endswith('_'): 490 parts.pop() 491 492 return '.'.join(parts) 493 494 return None 495