1#!/usr/bin/env python 2# 3# Copyright 2008 The Closure Linter Authors. All Rights Reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS-IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Logic for computing dependency information for closurized JavaScript files. 18 19Closurized JavaScript files express dependencies using goog.require and 20goog.provide statements. In order for the linter to detect when a statement is 21missing or unnecessary, all identifiers in the JavaScript file must first be 22processed to determine if they constitute the creation or usage of a dependency. 23""" 24 25 26 27from closure_linter import javascripttokens 28from closure_linter import tokenutil 29 30# pylint: disable-msg=C6409 31TokenType = javascripttokens.JavaScriptTokenType 32 33DEFAULT_EXTRA_NAMESPACES = [ 34 'goog.testing.asserts', 35 'goog.testing.jsunit', 36] 37 38class ClosurizedNamespacesInfo(object): 39 """Dependency information for closurized JavaScript files. 40 41 Processes token streams for dependency creation or usage and provides logic 42 for determining if a given require or provide statement is unnecessary or if 43 there are missing require or provide statements. 44 """ 45 46 def __init__(self, closurized_namespaces, ignored_extra_namespaces): 47 """Initializes an instance the ClosurizedNamespacesInfo class. 48 49 Args: 50 closurized_namespaces: A list of namespace prefixes that should be 51 processed for dependency information. Non-matching namespaces are 52 ignored. 53 ignored_extra_namespaces: A list of namespaces that should not be reported 54 as extra regardless of whether they are actually used. 55 """ 56 self._closurized_namespaces = closurized_namespaces 57 self._ignored_extra_namespaces = (ignored_extra_namespaces + 58 DEFAULT_EXTRA_NAMESPACES) 59 self.Reset() 60 61 def Reset(self): 62 """Resets the internal state to prepare for processing a new file.""" 63 64 # A list of goog.provide tokens in the order they appeared in the file. 65 self._provide_tokens = [] 66 67 # A list of goog.require tokens in the order they appeared in the file. 68 self._require_tokens = [] 69 70 # Namespaces that are already goog.provided. 71 self._provided_namespaces = [] 72 73 # Namespaces that are already goog.required. 74 self._required_namespaces = [] 75 76 # Note that created_namespaces and used_namespaces contain both namespaces 77 # and identifiers because there are many existing cases where a method or 78 # constant is provided directly instead of its namespace. Ideally, these 79 # two lists would only have to contain namespaces. 80 81 # A list of tuples where the first element is the namespace of an identifier 82 # created in the file and the second is the identifier itself. 83 self._created_namespaces = [] 84 85 # A list of tuples where the first element is the namespace of an identifier 86 # used in the file and the second is the identifier itself. 87 self._used_namespaces = [] 88 89 # A list of seemingly-unnecessary namespaces that are goog.required() and 90 # annotated with @suppress {extraRequire}. 91 self._suppressed_requires = [] 92 93 # A list of goog.provide tokens which are duplicates. 94 self._duplicate_provide_tokens = [] 95 96 # A list of goog.require tokens which are duplicates. 97 self._duplicate_require_tokens = [] 98 99 # Whether this file is in a goog.scope. Someday, we may add support 100 # for checking scopified namespaces, but for now let's just fail 101 # in a more reasonable way. 102 self._scopified_file = False 103 104 # TODO(user): Handle the case where there are 2 different requires 105 # that can satisfy the same dependency, but only one is necessary. 106 107 def GetProvidedNamespaces(self): 108 """Returns the namespaces which are already provided by this file. 109 110 Returns: 111 A list of strings where each string is a 'namespace' corresponding to an 112 existing goog.provide statement in the file being checked. 113 """ 114 return list(self._provided_namespaces) 115 116 def GetRequiredNamespaces(self): 117 """Returns the namespaces which are already required by this file. 118 119 Returns: 120 A list of strings where each string is a 'namespace' corresponding to an 121 existing goog.require statement in the file being checked. 122 """ 123 return list(self._required_namespaces) 124 125 def IsExtraProvide(self, token): 126 """Returns whether the given goog.provide token is unnecessary. 127 128 Args: 129 token: A goog.provide token. 130 131 Returns: 132 True if the given token corresponds to an unnecessary goog.provide 133 statement, otherwise False. 134 """ 135 if self._scopified_file: 136 return False 137 138 namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string 139 140 base_namespace = namespace.split('.', 1)[0] 141 if base_namespace not in self._closurized_namespaces: 142 return False 143 144 if token in self._duplicate_provide_tokens: 145 return True 146 147 # TODO(user): There's probably a faster way to compute this. 148 for created_namespace, created_identifier in self._created_namespaces: 149 if namespace == created_namespace or namespace == created_identifier: 150 return False 151 152 return True 153 154 def IsExtraRequire(self, token): 155 """Returns whether the given goog.require token is unnecessary. 156 157 Args: 158 token: A goog.require token. 159 160 Returns: 161 True if the given token corresponds to an unnecessary goog.require 162 statement, otherwise False. 163 """ 164 if self._scopified_file: 165 return False 166 167 namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string 168 169 base_namespace = namespace.split('.', 1)[0] 170 if base_namespace not in self._closurized_namespaces: 171 return False 172 173 if namespace in self._ignored_extra_namespaces: 174 return False 175 176 if token in self._duplicate_require_tokens: 177 return True 178 179 if namespace in self._suppressed_requires: 180 return False 181 182 # If the namespace contains a component that is initial caps, then that 183 # must be the last component of the namespace. 184 parts = namespace.split('.') 185 if len(parts) > 1 and parts[-2][0].isupper(): 186 return True 187 188 # TODO(user): There's probably a faster way to compute this. 189 for used_namespace, used_identifier in self._used_namespaces: 190 if namespace == used_namespace or namespace == used_identifier: 191 return False 192 193 return True 194 195 def GetMissingProvides(self): 196 """Returns the set of missing provided namespaces for the current file. 197 198 Returns: 199 Returns a set of strings where each string is a namespace that should be 200 provided by this file, but is not. 201 """ 202 if self._scopified_file: 203 return set() 204 205 missing_provides = set() 206 for namespace, identifier in self._created_namespaces: 207 if (not self._IsPrivateIdentifier(identifier) and 208 namespace not in self._provided_namespaces and 209 identifier not in self._provided_namespaces and 210 namespace not in self._required_namespaces): 211 missing_provides.add(namespace) 212 213 return missing_provides 214 215 def GetMissingRequires(self): 216 """Returns the set of missing required namespaces for the current file. 217 218 For each non-private identifier used in the file, find either a 219 goog.require, goog.provide or a created identifier that satisfies it. 220 goog.require statements can satisfy the identifier by requiring either the 221 namespace of the identifier or the identifier itself. goog.provide 222 statements can satisfy the identifier by providing the namespace of the 223 identifier. A created identifier can only satisfy the used identifier if 224 it matches it exactly (necessary since things can be defined on a 225 namespace in more than one file). Note that provided namespaces should be 226 a subset of created namespaces, but we check both because in some cases we 227 can't always detect the creation of the namespace. 228 229 Returns: 230 Returns a set of strings where each string is a namespace that should be 231 required by this file, but is not. 232 """ 233 if self._scopified_file: 234 return set() 235 236 external_dependencies = set(self._required_namespaces) 237 238 # Assume goog namespace is always available. 239 external_dependencies.add('goog') 240 241 created_identifiers = set() 242 for namespace, identifier in self._created_namespaces: 243 created_identifiers.add(identifier) 244 245 missing_requires = set() 246 for namespace, identifier in self._used_namespaces: 247 if (not self._IsPrivateIdentifier(identifier) and 248 namespace not in external_dependencies and 249 namespace not in self._provided_namespaces and 250 identifier not in external_dependencies and 251 identifier not in created_identifiers): 252 missing_requires.add(namespace) 253 254 return missing_requires 255 256 def _IsPrivateIdentifier(self, identifier): 257 """Returns whether the given identifer is private.""" 258 pieces = identifier.split('.') 259 for piece in pieces: 260 if piece.endswith('_'): 261 return True 262 return False 263 264 def IsFirstProvide(self, token): 265 """Returns whether token is the first provide token.""" 266 return self._provide_tokens and token == self._provide_tokens[0] 267 268 def IsFirstRequire(self, token): 269 """Returns whether token is the first require token.""" 270 return self._require_tokens and token == self._require_tokens[0] 271 272 def IsLastProvide(self, token): 273 """Returns whether token is the last provide token.""" 274 return self._provide_tokens and token == self._provide_tokens[-1] 275 276 def IsLastRequire(self, token): 277 """Returns whether token is the last require token.""" 278 return self._require_tokens and token == self._require_tokens[-1] 279 280 def ProcessToken(self, token, state_tracker): 281 """Processes the given token for dependency information. 282 283 Args: 284 token: The token to process. 285 state_tracker: The JavaScript state tracker. 286 """ 287 288 # Note that this method is in the critical path for the linter and has been 289 # optimized for performance in the following ways: 290 # - Tokens are checked by type first to minimize the number of function 291 # calls necessary to determine if action needs to be taken for the token. 292 # - The most common tokens types are checked for first. 293 # - The number of function calls has been minimized (thus the length of this 294 # function. 295 296 if token.type == TokenType.IDENTIFIER: 297 # TODO(user): Consider saving the whole identifier in metadata. 298 whole_identifier_string = self._GetWholeIdentifierString(token) 299 if whole_identifier_string is None: 300 # We only want to process the identifier one time. If the whole string 301 # identifier is None, that means this token was part of a multi-token 302 # identifier, but it was not the first token of the identifier. 303 return 304 305 # In the odd case that a goog.require is encountered inside a function, 306 # just ignore it (e.g. dynamic loading in test runners). 307 if token.string == 'goog.require' and not state_tracker.InFunction(): 308 self._require_tokens.append(token) 309 namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string 310 if namespace in self._required_namespaces: 311 self._duplicate_require_tokens.append(token) 312 else: 313 self._required_namespaces.append(namespace) 314 315 # If there is a suppression for the require, add a usage for it so it 316 # gets treated as a regular goog.require (i.e. still gets sorted). 317 jsdoc = state_tracker.GetDocComment() 318 if jsdoc and ('extraRequire' in jsdoc.suppressions): 319 self._suppressed_requires.append(namespace) 320 self._AddUsedNamespace(state_tracker, namespace) 321 322 elif token.string == 'goog.provide': 323 self._provide_tokens.append(token) 324 namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string 325 if namespace in self._provided_namespaces: 326 self._duplicate_provide_tokens.append(token) 327 else: 328 self._provided_namespaces.append(namespace) 329 330 # If there is a suppression for the provide, add a creation for it so it 331 # gets treated as a regular goog.provide (i.e. still gets sorted). 332 jsdoc = state_tracker.GetDocComment() 333 if jsdoc and ('extraProvide' in jsdoc.suppressions): 334 self._AddCreatedNamespace(state_tracker, namespace) 335 336 elif token.string == 'goog.scope': 337 self._scopified_file = True 338 339 else: 340 jsdoc = state_tracker.GetDocComment() 341 if jsdoc and jsdoc.HasFlag('typedef'): 342 self._AddCreatedNamespace(state_tracker, whole_identifier_string, 343 self.GetClosurizedNamespace( 344 whole_identifier_string)) 345 else: 346 self._AddUsedNamespace(state_tracker, whole_identifier_string) 347 348 elif token.type == TokenType.SIMPLE_LVALUE: 349 identifier = token.values['identifier'] 350 namespace = self.GetClosurizedNamespace(identifier) 351 if state_tracker.InFunction(): 352 self._AddUsedNamespace(state_tracker, identifier) 353 elif namespace and namespace != 'goog': 354 self._AddCreatedNamespace(state_tracker, identifier, namespace) 355 356 elif token.type == TokenType.DOC_FLAG: 357 flag_type = token.attached_object.flag_type 358 is_interface = state_tracker.GetDocComment().HasFlag('interface') 359 if flag_type == 'implements' or (flag_type == 'extends' and is_interface): 360 # Interfaces should be goog.require'd. 361 doc_start = tokenutil.Search(token, TokenType.DOC_START_BRACE) 362 interface = tokenutil.Search(doc_start, TokenType.COMMENT) 363 self._AddUsedNamespace(state_tracker, interface.string) 364 365 366 def _GetWholeIdentifierString(self, token): 367 """Returns the whole identifier string for the given token. 368 369 Checks the tokens after the current one to see if the token is one in a 370 sequence of tokens which are actually just one identifier (i.e. a line was 371 wrapped in the middle of an identifier). 372 373 Args: 374 token: The token to check. 375 376 Returns: 377 The whole identifier string or None if this token is not the first token 378 in a multi-token identifier. 379 """ 380 result = '' 381 382 # Search backward to determine if this token is the first token of the 383 # identifier. If it is not the first token, return None to signal that this 384 # token should be ignored. 385 prev_token = token.previous 386 while prev_token: 387 if (prev_token.IsType(TokenType.IDENTIFIER) or 388 prev_token.IsType(TokenType.NORMAL) and prev_token.string == '.'): 389 return None 390 elif (not prev_token.IsType(TokenType.WHITESPACE) and 391 not prev_token.IsAnyType(TokenType.COMMENT_TYPES)): 392 break 393 prev_token = prev_token.previous 394 395 # Search forward to find other parts of this identifier separated by white 396 # space. 397 next_token = token 398 while next_token: 399 if (next_token.IsType(TokenType.IDENTIFIER) or 400 next_token.IsType(TokenType.NORMAL) and next_token.string == '.'): 401 result += next_token.string 402 elif (not next_token.IsType(TokenType.WHITESPACE) and 403 not next_token.IsAnyType(TokenType.COMMENT_TYPES)): 404 break 405 next_token = next_token.next 406 407 return result 408 409 def _AddCreatedNamespace(self, state_tracker, identifier, namespace=None): 410 """Adds the namespace of an identifier to the list of created namespaces. 411 412 If the identifier is annotated with a 'missingProvide' suppression, it is 413 not added. 414 415 Args: 416 state_tracker: The JavaScriptStateTracker instance. 417 identifier: The identifier to add. 418 namespace: The namespace of the identifier or None if the identifier is 419 also the namespace. 420 """ 421 if not namespace: 422 namespace = identifier 423 424 jsdoc = state_tracker.GetDocComment() 425 if jsdoc and 'missingProvide' in jsdoc.suppressions: 426 return 427 428 self._created_namespaces.append([namespace, identifier]) 429 430 def _AddUsedNamespace(self, state_tracker, identifier): 431 """Adds the namespace of an identifier to the list of used namespaces. 432 433 If the identifier is annotated with a 'missingRequire' suppression, it is 434 not added. 435 436 Args: 437 state_tracker: The JavaScriptStateTracker instance. 438 identifier: An identifier which has been used. 439 """ 440 jsdoc = state_tracker.GetDocComment() 441 if jsdoc and 'missingRequire' in jsdoc.suppressions: 442 return 443 444 namespace = self.GetClosurizedNamespace(identifier) 445 if namespace: 446 self._used_namespaces.append([namespace, identifier]) 447 448 def GetClosurizedNamespace(self, identifier): 449 """Given an identifier, returns the namespace that identifier is from. 450 451 Args: 452 identifier: The identifier to extract a namespace from. 453 454 Returns: 455 The namespace the given identifier resides in, or None if one could not 456 be found. 457 """ 458 if identifier.startswith('goog.global'): 459 # Ignore goog.global, since it is, by definition, global. 460 return None 461 462 parts = identifier.split('.') 463 for namespace in self._closurized_namespaces: 464 if not identifier.startswith(namespace + '.'): 465 continue 466 467 last_part = parts[-1] 468 if not last_part: 469 # TODO(robbyw): Handle this: it's a multi-line identifier. 470 return None 471 472 # The namespace for a class is the shortest prefix ending in a class 473 # name, which starts with a capital letter but is not a capitalized word. 474 # 475 # We ultimately do not want to allow requiring or providing of inner 476 # classes/enums. Instead, a file should provide only the top-level class 477 # and users should require only that. 478 namespace = [] 479 for part in parts: 480 if part == 'prototype' or part.isupper(): 481 return '.'.join(namespace) 482 namespace.append(part) 483 if part[0].isupper(): 484 return '.'.join(namespace) 485 486 # At this point, we know there's no class or enum, so the namespace is 487 # just the identifier with the last part removed. With the exception of 488 # apply, inherits, and call, which should also be stripped. 489 if parts[-1] in ('apply', 'inherits', 'call'): 490 parts.pop() 491 parts.pop() 492 493 # If the last part ends with an underscore, it is a private variable, 494 # method, or enum. The namespace is whatever is before it. 495 if parts and parts[-1].endswith('_'): 496 parts.pop() 497 498 return '.'.join(parts) 499 500 return None 501