directory.py revision ddb351dbec246cf1fab5ec20d2d5520909041de1
1#!/usr/bin/python
2# Copyright (c) 2010 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Class for parsing metadata about extension samples."""
7
8import locale
9import os
10import os.path
11import re
12import hashlib
13import zipfile
14import simplejson as json
15
16# Make sure we get consistent string sorting behavior by explicitly using the
17# default C locale.
18locale.setlocale(locale.LC_ALL, 'C')
19
20def sorted_walk(path):
21  """ A version of os.walk that yields results in order sorted by name.
22
23  This is to prevent spurious docs changes due to os.walk returning items in a
24  filesystem dependent order (by inode creation time, etc).
25  """
26  for base, dirs, files in os.walk(path):
27    dirs.sort()
28    files.sort()
29    yield base, dirs, files
30
31def parse_json_file(path, encoding="utf-8"):
32  """ Load the specified file and parse it as JSON.
33
34  Args:
35    path: Path to a file containing JSON-encoded data.
36    encoding: Encoding used in the file.  Defaults to utf-8.
37
38  Returns:
39    A Python object representing the data encoded in the file.
40
41  Raises:
42    Exception: If the file could not be read or its contents could not be
43        parsed as JSON data.
44  """
45  try:
46    json_file = open(path, 'r')
47  except IOError, msg:
48    raise Exception("Failed to read the file at %s: %s" % (path, msg))
49
50  try:
51    json_obj = json.load(json_file, encoding)
52  except ValueError, msg:
53    raise Exception("Failed to parse JSON out of file %s: %s" % (path, msg))
54  finally:
55    json_file.close()
56
57  return json_obj
58
59class ApiManifest(object):
60  """ Represents the list of API methods contained in extension_api.json """
61
62  _MODULE_DOC_KEYS = ['functions', 'events']
63  """ Keys which may be passed to the _parseModuleDocLinksByKey method."""
64
65  def __init__(self, manifest_path):
66    """ Read the supplied manifest file and parse its contents.
67
68    Args:
69      manifest_path: Path to extension_api.json
70    """
71    self._manifest = parse_json_file(manifest_path)
72
73  def _getDocLink(self, method, hashprefix):
74    """
75    Given an API method, return a partial URL corresponding to the doc
76    file for that method.
77
78    Args:
79      method: A string like 'chrome.foo.bar' or 'chrome.experimental.foo.onBar'
80      hashprefix: The prefix to put in front of hash links - 'method' for
81          methods and 'event' for events.
82
83    Returns:
84      A string like 'foo.html#method-bar' or 'experimental.foo.html#event-onBar'
85    """
86    urlpattern = '%%s.html#%s-%%s' % hashprefix
87    urlparts = tuple(method.replace('chrome.', '').rsplit('.', 1))
88    return urlpattern % urlparts
89
90  def _parseModuleDocLinksByKey(self, module, key):
91    """
92    Given a specific API module, returns a dict of methods or events mapped to
93    documentation URLs.
94
95    Args:
96      module: The data in extension_api.json corresponding to a single module.
97      key: A key belonging to _MODULE_DOC_KEYS to determine which set of
98          methods to parse, and what kind of documentation URL to generate.
99
100    Returns:
101      A dict of extension methods mapped to file and hash URL parts for the
102      corresponding documentation links, like:
103        {
104          "chrome.tabs.remove": "tabs.html#method-remove",
105          "chrome.tabs.onDetached" : "tabs.html#event-onDetatched"
106        }
107
108      If the API namespace is defined "nodoc" then an empty dict is returned.
109
110    Raises:
111      Exception: If the key supplied is not a member of _MODULE_DOC_KEYS.
112    """
113    methods = []
114    api_dict = {}
115    namespace = module['namespace']
116    if module.has_key('nodoc'):
117      return api_dict
118    if key not in self._MODULE_DOC_KEYS:
119      raise Exception("key %s must be one of %s" % (key, self._MODULE_DOC_KEYS))
120    if module.has_key(key):
121      methods.extend(module[key])
122    for method in methods:
123      method_name = 'chrome.%s.%s' % (namespace, method['name'])
124      hashprefix = 'method'
125      if key == 'events':
126        hashprefix = 'event'
127      api_dict[method_name] = self._getDocLink(method_name, hashprefix)
128    return api_dict
129
130  def getModuleNames(self):
131    """ Returns the names of individual modules in the API.
132
133    Returns:
134      The namespace """
135    # Exclude modules with a "nodoc" property.
136    return set(module['namespace'].encode() for module in self._manifest
137               if "nodoc" not in module)
138
139  def getDocumentationLinks(self):
140    """ Parses the extension_api.json manifest and returns a dict of all
141    events and methods for every module, mapped to relative documentation links.
142
143    Returns:
144      A dict of methods/events => partial doc links for every module.
145    """
146    api_dict = {}
147    for module in self._manifest:
148      api_dict.update(self._parseModuleDocLinksByKey(module, 'functions'))
149      api_dict.update(self._parseModuleDocLinksByKey(module, 'events'))
150    return api_dict
151
152class SamplesManifest(object):
153  """ Represents a manifest file containing information about the sample
154  extensions available in the codebase. """
155
156  def __init__(self, base_sample_path, base_dir, api_manifest):
157    """ Reads through the filesystem and obtains information about any Chrome
158    extensions which exist underneath the specified folder.
159
160    Args:
161      base_sample_path: The directory under which to search for samples.
162      base_dir: The base directory samples will be referenced from.
163      api_manifest: An instance of the ApiManifest class, which will indicate
164          which API methods are available.
165    """
166    self._base_dir = base_dir
167    manifest_paths = self._locateManifestsFromPath(base_sample_path)
168    self._manifest_data = self._parseManifestData(manifest_paths, api_manifest)
169
170  def _locateManifestsFromPath(self, path):
171    """
172    Returns a list of paths to sample extension manifest.json files.
173
174    Args:
175      base_path: Base path in which to start the search.
176    Returns:
177      A list of paths below base_path pointing at manifest.json files.
178    """
179    manifest_paths = []
180    for root, directories, files in sorted_walk(path):
181      if 'manifest.json' in files:
182        directories = []             # Don't go any further down this tree
183        manifest_paths.append(os.path.join(root, 'manifest.json'))
184      if '.svn' in directories:
185        directories.remove('.svn')   # Don't go into SVN metadata directories
186    return manifest_paths
187
188  def _parseManifestData(self, manifest_paths, api_manifest):
189    """ Returns metadata about the sample extensions given their manifest
190    paths.
191
192    Args:
193      manifest_paths: A list of paths to extension manifests
194      api_manifest: An instance of the ApiManifest class, which will indicate
195          which API methods are available.
196
197    Returns:
198      Manifest data containing a list of samples and available API methods.
199    """
200    api_method_dict = api_manifest.getDocumentationLinks()
201    api_methods = api_method_dict.keys()
202
203    samples = []
204    for path in manifest_paths:
205      sample = Sample(path, api_methods, self._base_dir)
206      # Don't render apps
207      if sample.is_app() == False:
208        samples.append(sample)
209
210    def compareSamples(sample1, sample2):
211      """ Compares two samples as a sort comparator, by name then path. """
212      value = cmp(sample1['name'].upper(), sample2['name'].upper())
213      if value == 0:
214        value = cmp(sample1['path'], sample2['path'])
215      return value
216
217    samples.sort(compareSamples)
218
219    manifest_data = {'samples': samples, 'api': api_method_dict}
220    return manifest_data
221
222  def writeToFile(self, path):
223    """ Writes the contents of this manifest file as a JSON-encoded text file.
224
225    Args:
226      path: The path to write the samples manifest file to.
227    """
228    manifest_text = json.dumps(self._manifest_data, indent=2,
229                               sort_keys=True, separators=(',', ': '))
230    output_path = os.path.realpath(path)
231    try:
232      output_file = open(output_path, 'w')
233    except IOError, msg:
234      raise Exception("Failed to write the samples manifest file."
235                      "The specific error was: %s." % msg)
236    output_file.write(manifest_text)
237    output_file.close()
238
239  def writeZippedSamples(self):
240    """ For each sample in the current manifest, create a zip file with the
241    sample contents in the sample's parent directory if not zip exists, or
242    update the zip file if the sample has been updated.
243
244    Returns:
245      A set of paths representing zip files which have been modified.
246    """
247    modified_paths = []
248    for sample in self._manifest_data['samples']:
249      path = sample.write_zip()
250      if path:
251        modified_paths.append(path)
252    return modified_paths
253
254class Sample(dict):
255  """ Represents metadata about a Chrome extension sample.
256
257  Extends dict so that it can be easily JSON serialized.
258  """
259
260  def __init__(self, manifest_path, api_methods, base_dir):
261    """ Initializes a Sample instance given a path to a manifest.
262
263    Args:
264      manifest_path: A filesystem path to a manifest file.
265      api_methods: A list of strings containing all possible Chrome extension
266          API calls.
267      base_dir: The base directory where this sample will be referenced from -
268          paths will be made relative to this directory.
269    """
270    self._base_dir = base_dir
271    self._manifest_path = manifest_path
272    self._manifest = parse_json_file(self._manifest_path)
273    self._locale_data = self._parse_locale_data()
274
275    # The following calls set data which will be serialized when converting
276    # this object to JSON.
277    source_data = self._parse_source_data(api_methods)
278    self['api_calls'] = source_data['api_calls']
279    self['source_files'] = source_data['source_files']
280    self['source_hash'] = source_data['source_hash']
281
282    self['name'] = self._parse_name()
283    self['description'] = self._parse_description()
284    self['icon'] = self._parse_icon()
285    self['features'] = self._parse_features()
286    self['protocols'] = self._parse_protocols()
287    self['path'] = self._get_relative_path()
288    self['search_string'] = self._get_search_string()
289    self['id'] = hashlib.sha1(self['path']).hexdigest()
290    self['zip_path'] = self._get_relative_zip_path()
291
292  _FEATURE_ATTRIBUTES = (
293    'browser_action',
294    'page_action',
295    'background_page',
296    'options_page',
297    'plugins',
298    'theme',
299    'chrome_url_overrides'
300  )
301  """ Attributes that will map to "features" if their corresponding key is
302  present in the extension manifest. """
303
304  _SOURCE_FILE_EXTENSIONS = ('.html', '.json', '.js', '.css', '.htm')
305  """ File extensions to files which may contain source code."""
306
307  _ENGLISH_LOCALES = ['en_US', 'en', 'en_GB']
308  """ Locales from which translations may be used in the sample gallery. """
309
310  def _get_localized_manifest_value(self, key):
311    """ Returns a localized version of the requested manifest value.
312
313    Args:
314      key: The manifest key whose value the caller wants translated.
315
316    Returns:
317      If the supplied value exists and contains a ___MSG_token___ value, this
318      method will resolve the appropriate translation and return the result.
319      If no token exists, the manifest value will be returned.  If the key does
320      not exist, an empty string will be returned.
321
322    Raises:
323      Exception: If the localized value for the given token could not be found.
324    """
325    if self._manifest.has_key(key):
326      if self._manifest[key][:6] == '__MSG_':
327        try:
328          return self._get_localized_value(self._manifest[key])
329        except Exception, msg:
330          raise Exception("Could not translate manifest value for key %s: %s" %
331                          (key, msg))
332      else:
333        return self._manifest[key]
334    else:
335      return ''
336
337  def _get_localized_value(self, message_token):
338    """ Returns the localized version of the requested MSG bundle token.
339
340    Args:
341      message_token: A message bundle token like __MSG_extensionName__.
342
343    Returns:
344      The translated text corresponding to the token, with any placeholders
345      automatically resolved and substituted in.
346
347    Raises:
348      Exception: If a message bundle token is not found in the translations.
349    """
350    placeholder_pattern = re.compile('\$(\w*)\$')
351    token = message_token[6:-2]
352    if self._locale_data.has_key(token):
353      message = self._locale_data[token]['message']
354
355      placeholder_match = placeholder_pattern.search(message)
356      if placeholder_match:
357        # There are placeholders in the translation - substitute them.
358        placeholder_name = placeholder_match.group(1)
359        placeholders = self._locale_data[token]['placeholders']
360        if placeholders.has_key(placeholder_name.lower()):
361          placeholder_value = placeholders[placeholder_name.lower()]['content']
362          placeholder_token = '$%s$' % placeholder_name
363          message = message.replace(placeholder_token, placeholder_value)
364      return message
365    else:
366      raise Exception('Could not find localized string: %s' % message_token)
367
368  def _get_relative_path(self):
369    """ Returns a relative path from the supplied base dir to the manifest dir.
370
371    This method is used because we may not be able to rely on os.path.relpath
372    which was introduced in Python 2.6 and only works on Windows and Unix.
373
374    Since the example extensions should always be subdirectories of the
375    base sample manifest path, we can get a relative path through a simple
376    string substitution.
377
378    Returns:
379      A relative directory path from the sample manifest's directory to the
380      directory containing this sample's manifest.json.
381    """
382    real_manifest_path = os.path.realpath(self._manifest_path)
383    real_base_path = os.path.realpath(self._base_dir)
384    return real_manifest_path.replace(real_base_path, '')\
385                             .replace('manifest.json', '')[1:]
386
387  def _get_relative_zip_path(self):
388    """ Returns a relative path from the base dir to the sample's zip file.
389
390    Intended for locating the zip file for the sample in the samples manifest.
391
392    Returns:
393      A relative directory path form the sample manifest's directory to this
394      sample's zip file.
395    """
396    zip_filename = self._get_zip_filename()
397    zip_relpath = os.path.dirname(os.path.dirname(self._get_relative_path()))
398    return os.path.join(zip_relpath, zip_filename)
399
400  def _get_search_string(self):
401    """ Constructs a string to be used when searching the samples list.
402
403    To make the implementation of the JavaScript-based search very direct, a
404    string is constructed containing the title, description, API calls, and
405    features that this sample uses, and is converted to uppercase.  This makes
406    JavaScript sample searching very fast and easy to implement.
407
408    Returns:
409      An uppercase string containing information to match on for searching
410      samples on the client.
411    """
412    search_terms = [
413      self['name'],
414      self['description'],
415    ]
416    search_terms.extend(self['features'])
417    search_terms.extend(self['api_calls'])
418    search_string = ' '.join(search_terms).replace('"', '')\
419                                          .replace('\'', '')\
420                                          .upper()
421    return search_string
422
423  def _get_zip_filename(self):
424    """ Returns the filename to be used for a generated zip of the sample.
425
426    Returns:
427      A string in the form of "<dirname>.zip" where <dirname> is the name
428      of the directory containing this sample's manifest.json.
429    """
430    sample_path = os.path.realpath(os.path.dirname(self._manifest_path))
431    sample_dirname = os.path.basename(sample_path)
432    return "%s.zip" % sample_dirname
433
434  def _parse_description(self):
435    """ Returns a localized description of the extension.
436
437    Returns:
438      A localized version of the sample's description.
439    """
440    return self._get_localized_manifest_value('description')
441
442  def _parse_features(self):
443    """ Returns a list of features the sample uses.
444
445    Returns:
446      A list of features the extension uses, as determined by
447      self._FEATURE_ATTRIBUTES.
448    """
449    features = set()
450    for feature_attr in self._FEATURE_ATTRIBUTES:
451      if self._manifest.has_key(feature_attr):
452        features.add(feature_attr)
453
454    if self._uses_popup():
455      features.add('popup')
456
457    if self._manifest.has_key('permissions'):
458      for permission in self._manifest['permissions']:
459        split = permission.split('://')
460        if (len(split) == 1):
461          features.add(split[0])
462    return sorted(features)
463
464  def _parse_icon(self):
465    """ Returns the path to the 128px icon for this sample.
466
467    Returns:
468      The path to the 128px icon if defined in the manifest, None otherwise.
469    """
470    if (self._manifest.has_key('icons') and
471        self._manifest['icons'].has_key('128')):
472      return self._manifest['icons']['128']
473    else:
474      return None
475
476  def _parse_locale_data(self):
477    """ Parses this sample's locale data into a dict.
478
479    Because the sample gallery is in English, this method only looks for
480    translations as defined by self._ENGLISH_LOCALES.
481
482    Returns:
483      A dict containing the translation keys and corresponding English text
484      for this extension.
485
486    Raises:
487      Exception: If the messages file cannot be read, or if it is improperly
488          formatted JSON.
489    """
490    en_messages = {}
491    extension_dir_path = os.path.dirname(self._manifest_path)
492    for locale in self._ENGLISH_LOCALES:
493      en_messages_path = os.path.join(extension_dir_path, '_locales', locale,
494                                      'messages.json')
495      if (os.path.isfile(en_messages_path)):
496        break
497
498    if (os.path.isfile(en_messages_path)):
499      try:
500        en_messages_file = open(en_messages_path, 'r')
501      except IOError, msg:
502        raise Exception("Failed to read %s: %s" % (en_messages_path, msg))
503      en_messages_contents = en_messages_file.read()
504      en_messages_file.close()
505      try:
506        en_messages = json.loads(en_messages_contents)
507      except ValueError, msg:
508        raise Exception("File %s has a syntax error: %s" %
509                        (en_messages_path, msg))
510    return en_messages
511
512  def _parse_name(self):
513    """ Returns a localized name for the extension.
514
515    Returns:
516      A localized version of the sample's name.
517    """
518    return self._get_localized_manifest_value('name')
519
520  def _parse_protocols(self):
521    """ Returns a list of protocols this extension requests permission for.
522
523    Returns:
524      A list of every unique protocol listed in the manifest's permssions.
525    """
526    protocols = []
527    if self._manifest.has_key('permissions'):
528      for permission in self._manifest['permissions']:
529        split = permission.split('://')
530        if (len(split) == 2) and (split[0] not in protocols):
531          protocols.append(split[0] + "://")
532    return protocols
533
534  def _parse_source_data(self, api_methods):
535    """ Iterates over the sample's source files and parses data from them.
536
537    Parses any files in the sample directory with known source extensions
538    (as defined in self._SOURCE_FILE_EXTENSIONS).  For each file, this method:
539
540       1. Stores a relative path from the manifest.json directory to the file.
541       2. Searches through the contents of the file for chrome.* API calls.
542       3. Calculates a SHA1 digest for the contents of the file.
543
544    Args:
545      api_methods: A list of strings containing the potential
546          API calls the and the extension sample could be making.
547
548    Raises:
549      Exception: If any of the source files cannot be read.
550
551    Returns:
552      A dictionary containing the keys/values:
553        'api_calls'     A sorted list of API calls the sample makes.
554        'source_files'  A sorted list of paths to files the extension uses.
555        'source_hash'   A hash of the individual file hashes.
556    """
557    data = {}
558    source_paths = []
559    source_hashes = []
560    api_calls = set()
561    base_path = os.path.realpath(os.path.dirname(self._manifest_path))
562    for root, directories, files in sorted_walk(base_path):
563      if '.svn' in directories:
564        directories.remove('.svn')   # Don't go into SVN metadata directories
565
566      for file_name in files:
567        ext = os.path.splitext(file_name)[1]
568        if ext in self._SOURCE_FILE_EXTENSIONS:
569          # Add the file path to the list of source paths.
570          fullpath = os.path.realpath(os.path.join(root, file_name))
571          path = fullpath.replace(base_path, '')[1:]
572          source_paths.append(path)
573
574          # Read the contents and parse out API calls.
575          try:
576            code_file = open(fullpath, "r")
577          except IOError, msg:
578            raise Exception("Failed to read %s: %s" % (fullpath, msg))
579          code_contents = unicode(code_file.read(), errors="replace")
580          code_file.close()
581          for method in api_methods:
582            if (code_contents.find(method) > -1):
583              api_calls.add(method)
584
585          # Get a hash of the file contents for zip file generation.
586          hash = hashlib.sha1(code_contents.encode("ascii", "replace"))
587          source_hashes.append(hash.hexdigest())
588
589    data['api_calls'] = sorted(api_calls)
590    data['source_files'] = sorted(source_paths)
591    data['source_hash'] = hashlib.sha1(''.join(source_hashes)).hexdigest()
592    return data
593
594  def _uses_background(self):
595    """ Returns true if the extension defines a background page. """
596    return self._manifest.has_key('background_page')
597
598  def _uses_browser_action(self):
599    """ Returns true if the extension defines a browser action. """
600    return self._manifest.has_key('browser_action')
601
602  def _uses_content_scripts(self):
603    """ Returns true if the extension uses content scripts. """
604    return self._manifest.has_key('content_scripts')
605
606  def _uses_options(self):
607    """ Returns true if the extension defines an options page. """
608    return self._manifest.has_key('options_page')
609
610  def _uses_page_action(self):
611    """ Returns true if the extension uses a page action. """
612    return self._manifest.has_key('page_action')
613
614  def _uses_popup(self):
615    """ Returns true if the extension defines a popup on a page or browser
616    action. """
617    has_b_popup = (self._uses_browser_action() and
618                   self._manifest['browser_action'].has_key('popup'))
619    has_p_popup = (self._uses_page_action() and
620                   self._manifest['page_action'].has_key('popup'))
621    return has_b_popup or has_p_popup
622
623  def is_app(self):
624    """ Returns true if the extension has an 'app' section in its manifest."""
625    return self._manifest.has_key('app')
626
627  def write_zip(self):
628    """ Writes a zip file containing all of the files in this Sample's dir."""
629    sample_path = os.path.realpath(os.path.dirname(self._manifest_path))
630    sample_dirname = os.path.basename(sample_path)
631    sample_parentpath = os.path.dirname(sample_path)
632
633    zip_filename = self._get_zip_filename()
634    zip_path = os.path.join(sample_parentpath, zip_filename)
635    # we pass zip_manifest_path to zipfile.getinfo(), which chokes on
636    # backslashes, so don't rely on os.path.join, use forward slash on
637    # all platforms.
638    zip_manifest_path = sample_dirname + '/manifest.json'
639
640    zipfile.ZipFile.debug = 3
641
642    if os.path.isfile(zip_path):
643      try:
644        old_zip_file = zipfile.ZipFile(zip_path, 'r')
645      except IOError, msg:
646        raise Exception("Could not read zip at %s: %s" % (zip_path, msg))
647      except zipfile.BadZipfile, msg:
648        raise Exception("File at %s is not a zip file: %s" % (zip_path, msg))
649
650      try:
651        info = old_zip_file.getinfo(zip_manifest_path)
652        hash = info.comment
653        if hash == self['source_hash']:
654          return None    # Hashes match - no need to generate file
655      except KeyError, msg:
656        pass             # The old zip file doesn't contain a hash - overwrite
657      finally:
658        old_zip_file.close()
659
660    zip_file = zipfile.ZipFile(zip_path, 'w')
661
662    try:
663      for root, dirs, files in sorted_walk(sample_path):
664        if '.svn' in dirs:
665          dirs.remove('.svn')
666        for file in files:
667          # Absolute path to the file to be added.
668          abspath = os.path.realpath(os.path.join(root, file))
669          # Relative path to store the file in under the zip.
670          relpath = sample_dirname + abspath.replace(sample_path, "")
671
672          zip_file.write(abspath, relpath)
673          if file == 'manifest.json':
674            info = zip_file.getinfo(zip_manifest_path)
675            info.comment = self['source_hash']
676    except RuntimeError, msg:
677      raise Exception("Could not write zip at %s: %s" % (zip_path, msg))
678    finally:
679      zip_file.close()
680
681    return self._get_relative_zip_path()
682