directory.py revision 731df977c0511bca2206b5f333555b1205ff1f43
1#!/usr/bin/python
2# Copyright (c) 2010 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Class for parsing metadata about extension samples."""
7
8import locale
9import os
10import os.path
11import re
12import hashlib
13import zipfile
14import simplejson as json
15
16# Make sure we get consistent string sorting behavior by explicitly using the
17# default C locale.
18locale.setlocale(locale.LC_ALL, 'C')
19
20def sorted_walk(path):
21  """ A version of os.walk that yields results in order sorted by name.
22
23  This is to prevent spurious docs changes due to os.walk returning items in a
24  filesystem dependent order (by inode creation time, etc).
25  """
26  for base, dirs, files in os.walk(path):
27    dirs.sort()
28    files.sort()
29    yield base, dirs, files
30
31def parse_json_file(path, encoding="utf-8"):
32  """ Load the specified file and parse it as JSON.
33
34  Args:
35    path: Path to a file containing JSON-encoded data.
36    encoding: Encoding used in the file.  Defaults to utf-8.
37
38  Returns:
39    A Python object representing the data encoded in the file.
40
41  Raises:
42    Exception: If the file could not be read or its contents could not be
43        parsed as JSON data.
44  """
45  try:
46    json_file = open(path, 'r')
47  except IOError, msg:
48    raise Exception("Failed to read the file at %s: %s" % (path, msg))
49
50  try:
51    json_obj = json.load(json_file, encoding)
52  except ValueError, msg:
53    raise Exception("Failed to parse JSON out of file %s: %s" % (path, msg))
54  finally:
55    json_file.close()
56
57  return json_obj
58
59class ApiManifest(object):
60  """ Represents the list of API methods contained in extension_api.json """
61
62  _MODULE_DOC_KEYS = ['functions', 'events']
63  """ Keys which may be passed to the _parseModuleDocLinksByKey method."""
64
65  def __init__(self, manifest_path):
66    """ Read the supplied manifest file and parse its contents.
67
68    Args:
69      manifest_path: Path to extension_api.json
70    """
71    self._manifest = parse_json_file(manifest_path)
72
73  def _getDocLink(self, method, hashprefix):
74    """
75    Given an API method, return a partial URL corresponding to the doc
76    file for that method.
77
78    Args:
79      method: A string like 'chrome.foo.bar' or 'chrome.experimental.foo.onBar'
80      hashprefix: The prefix to put in front of hash links - 'method' for
81          methods and 'event' for events.
82
83    Returns:
84      A string like 'foo.html#method-bar' or 'experimental.foo.html#event-onBar'
85    """
86    urlpattern = '%%s.html#%s-%%s' % hashprefix
87    urlparts = tuple(method.replace('chrome.', '').rsplit('.', 1))
88    return urlpattern % urlparts
89
90  def _parseModuleDocLinksByKey(self, module, key):
91    """
92    Given a specific API module, returns a dict of methods or events mapped to
93    documentation URLs.
94
95    Args:
96      module: The data in extension_api.json corresponding to a single module.
97      key: A key belonging to _MODULE_DOC_KEYS to determine which set of
98          methods to parse, and what kind of documentation URL to generate.
99
100    Returns:
101      A dict of extension methods mapped to file and hash URL parts for the
102      corresponding documentation links, like:
103        {
104          "chrome.tabs.remove": "tabs.html#method-remove",
105          "chrome.tabs.onDetached" : "tabs.html#event-onDetatched"
106        }
107
108      If the API namespace is defined "nodoc" then an empty dict is returned.
109
110    Raises:
111      Exception: If the key supplied is not a member of _MODULE_DOC_KEYS.
112    """
113    methods = []
114    api_dict = {}
115    namespace = module['namespace']
116    if module.has_key('nodoc'):
117      return api_dict
118    if key not in self._MODULE_DOC_KEYS:
119      raise Exception("key %s must be one of %s" % (key, self._MODULE_DOC_KEYS))
120    if module.has_key(key):
121      methods.extend(module[key])
122    for method in methods:
123      method_name = 'chrome.%s.%s' % (namespace, method['name'])
124      hashprefix = 'method'
125      if key == 'events':
126        hashprefix = 'event'
127      api_dict[method_name] = self._getDocLink(method_name, hashprefix)
128    return api_dict
129
130  def getModuleNames(self):
131    """ Returns the names of individual modules in the API.
132
133    Returns:
134      The namespace """
135    # Exclude modules with a "nodoc" property.
136    return set(module['namespace'].encode() for module in self._manifest
137               if "nodoc" not in module)
138
139  def getDocumentationLinks(self):
140    """ Parses the extension_api.json manifest and returns a dict of all
141    events and methods for every module, mapped to relative documentation links.
142
143    Returns:
144      A dict of methods/events => partial doc links for every module.
145    """
146    api_dict = {}
147    for module in self._manifest:
148      api_dict.update(self._parseModuleDocLinksByKey(module, 'functions'))
149      api_dict.update(self._parseModuleDocLinksByKey(module, 'events'))
150    return api_dict
151
152class SamplesManifest(object):
153  """ Represents a manifest file containing information about the sample
154  extensions available in the codebase. """
155
156  def __init__(self, base_sample_path, base_dir, api_manifest):
157    """ Reads through the filesystem and obtains information about any Chrome
158    extensions which exist underneath the specified folder.
159
160    Args:
161      base_sample_path: The directory under which to search for samples.
162      base_dir: The base directory samples will be referenced from.
163      api_manifest: An instance of the ApiManifest class, which will indicate
164          which API methods are available.
165    """
166    self._base_dir = base_dir
167    manifest_paths = self._locateManifestsFromPath(base_sample_path)
168    self._manifest_data = self._parseManifestData(manifest_paths, api_manifest)
169
170  def _locateManifestsFromPath(self, path):
171    """
172    Returns a list of paths to sample extension manifest.json files.
173
174    Args:
175      base_path: Base path in which to start the search.
176    Returns:
177      A list of paths below base_path pointing at manifest.json files.
178    """
179    manifest_paths = []
180    for root, directories, files in sorted_walk(path):
181      if 'manifest.json' in files:
182        directories = []             # Don't go any further down this tree
183        manifest_paths.append(os.path.join(root, 'manifest.json'))
184      if '.svn' in directories:
185        directories.remove('.svn')   # Don't go into SVN metadata directories
186    return manifest_paths
187
188  def _parseManifestData(self, manifest_paths, api_manifest):
189    """ Returns metadata about the sample extensions given their manifest
190    paths.
191
192    Args:
193      manifest_paths: A list of paths to extension manifests
194      api_manifest: An instance of the ApiManifest class, which will indicate
195          which API methods are available.
196
197    Returns:
198      Manifest data containing a list of samples and available API methods.
199    """
200    api_method_dict = api_manifest.getDocumentationLinks()
201    api_methods = api_method_dict.keys()
202
203    samples = []
204    for path in manifest_paths:
205      sample = Sample(path, api_methods, self._base_dir)
206      # Don't render apps
207      if sample.is_app() == False:
208        samples.append(sample)
209
210    def compareSamples(sample1, sample2):
211      """ Compares two samples as a sort comparator, by name then path. """
212      value = cmp(sample1['name'].upper(), sample2['name'].upper())
213      if value == 0:
214        value = cmp(sample1['path'], sample2['path'])
215      return value
216
217    samples.sort(compareSamples)
218
219    manifest_data = {'samples': samples, 'api': api_method_dict}
220    return manifest_data
221
222  def writeToFile(self, path):
223    """ Writes the contents of this manifest file as a JSON-encoded text file.
224    For each sample written to the manifest, create a zip file with the sample
225    contents in the sample's parent directory.
226
227    Args:
228      path: The path to write the samples manifest file to.
229    """
230
231    for sample in self._manifest_data['samples']:
232      sample.write_zip()
233
234    manifest_text = json.dumps(self._manifest_data, indent=2)
235    output_path = os.path.realpath(path)
236    try:
237      output_file = open(output_path, 'w')
238    except IOError, msg:
239      raise Exception("Failed to write the samples manifest file."
240                      "The specific error was: %s." % msg)
241    output_file.write(manifest_text)
242    output_file.close()
243
244class Sample(dict):
245  """ Represents metadata about a Chrome extension sample.
246
247  Extends dict so that it can be easily JSON serialized.
248  """
249
250  def __init__(self, manifest_path, api_methods, base_dir):
251    """ Initializes a Sample instance given a path to a manifest.
252
253    Args:
254      manifest_path: A filesystem path to a manifest file.
255      api_methods: A list of strings containing all possible Chrome extension
256          API calls.
257      base_dir: The base directory where this sample will be referenced from -
258          paths will be made relative to this directory.
259    """
260    self._base_dir = base_dir
261    self._manifest_path = manifest_path
262    self._manifest = parse_json_file(self._manifest_path)
263    self._locale_data = self._parse_locale_data()
264
265    # The following properties will be serialized when converting this object
266    # to JSON.
267
268    self['api_calls'] = self._parse_api_calls(api_methods)
269    self['name'] = self._parse_name()
270    self['description'] = self._parse_description()
271    self['icon'] = self._parse_icon()
272    self['features'] = self._parse_features()
273    self['protocols'] = self._parse_protocols()
274    self['path'] = self._get_relative_path()
275    self['search_string'] = self._get_search_string()
276    self['source_files'] = self._parse_source_files()
277    self['id'] = hashlib.sha1(self['path']).hexdigest()
278
279  _FEATURE_ATTRIBUTES = (
280    'browser_action',
281    'page_action',
282    'background_page',
283    'options_page',
284    'plugins',
285    'theme',
286    'chrome_url_overrides'
287  )
288  """ Attributes that will map to "features" if their corresponding key is
289  present in the extension manifest. """
290
291  _SOURCE_FILE_EXTENSIONS = ('.html', '.json', '.js', '.css', '.htm')
292  """ File extensions to files which may contain source code."""
293
294  _ENGLISH_LOCALES = ['en_US', 'en', 'en_GB']
295  """ Locales from which translations may be used in the sample gallery. """
296
297  def _get_localized_manifest_value(self, key):
298    """ Returns a localized version of the requested manifest value.
299
300    Args:
301      key: The manifest key whose value the caller wants translated.
302
303    Returns:
304      If the supplied value exists and contains a ___MSG_token___ value, this
305      method will resolve the appropriate translation and return the result.
306      If no token exists, the manifest value will be returned.  If the key does
307      not exist, an empty string will be returned.
308
309    Raises:
310      Exception: If the localized value for the given token could not be found.
311    """
312    if self._manifest.has_key(key):
313      if self._manifest[key][:6] == '__MSG_':
314        try:
315          return self._get_localized_value(self._manifest[key])
316        except Exception, msg:
317          raise Exception("Could not translate manifest value for key %s: %s" %
318                          (key, msg))
319      else:
320        return self._manifest[key]
321    else:
322      return ''
323
324  def _get_localized_value(self, message_token):
325    """ Returns the localized version of the requested MSG bundle token.
326
327    Args:
328      message_token: A message bundle token like __MSG_extensionName__.
329
330    Returns:
331      The translated text corresponding to the token, with any placeholders
332      automatically resolved and substituted in.
333
334    Raises:
335      Exception: If a message bundle token is not found in the translations.
336    """
337    placeholder_pattern = re.compile('\$(\w*)\$')
338    token = message_token[6:-2]
339    if self._locale_data.has_key(token):
340      message = self._locale_data[token]['message']
341
342      placeholder_match = placeholder_pattern.search(message)
343      if placeholder_match:
344        # There are placeholders in the translation - substitute them.
345        placeholder_name = placeholder_match.group(1)
346        placeholders = self._locale_data[token]['placeholders']
347        if placeholders.has_key(placeholder_name.lower()):
348          placeholder_value = placeholders[placeholder_name.lower()]['content']
349          placeholder_token = '$%s$' % placeholder_name
350          message = message.replace(placeholder_token, placeholder_value)
351      return message
352    else:
353      raise Exception('Could not find localized string: %s' % message_token)
354
355  def _get_relative_path(self):
356    """ Returns a relative path from the supplied base dir to the manifest dir.
357
358    This method is used because we may not be able to rely on os.path.relpath
359    which was introduced in Python 2.6 and only works on Windows and Unix.
360
361    Since the example extensions should always be subdirectories of the
362    base sample manifest path, we can get a relative path through a simple
363    string substitution.
364
365    Returns:
366      A relative directory path from the sample manifest's directory to the
367      directory containing this sample's manifest.json.
368    """
369    real_manifest_path = os.path.realpath(self._manifest_path)
370    real_base_path = os.path.realpath(self._base_dir)
371    return real_manifest_path.replace(real_base_path, '')\
372                             .replace('manifest.json', '')[1:]
373
374  def _get_search_string(self):
375    """ Constructs a string to be used when searching the samples list.
376
377    To make the implementation of the JavaScript-based search very direct, a
378    string is constructed containing the title, description, API calls, and
379    features that this sample uses, and is converted to uppercase.  This makes
380    JavaScript sample searching very fast and easy to implement.
381
382    Returns:
383      An uppercase string containing information to match on for searching
384      samples on the client.
385    """
386    search_terms = [
387      self['name'],
388      self['description'],
389    ]
390    search_terms.extend(self['features'])
391    search_terms.extend(self['api_calls'])
392    search_string = ' '.join(search_terms).replace('"', '')\
393                                          .replace('\'', '')\
394                                          .upper()
395    return search_string
396
397  def _parse_api_calls(self, api_methods):
398    """ Returns a list of Chrome extension API calls the sample makes.
399
400    Parses any *.html and *.js files in the sample directory and matches them
401    against the supplied list of all available API methods, returning methods
402    which show up in the sample code.
403
404    Args:
405      api_methods: A list of strings containing the potential
406          API calls the and the extension sample could be making.
407
408    Returns:
409      A set of every member of api_methods that appears in any *.html or *.js
410      file contained in this sample's directory (or subdirectories).
411
412    Raises:
413      Exception: If any of the *.html or *.js files cannot be read.
414    """
415    api_calls = set()
416    extension_dir_path = os.path.dirname(self._manifest_path)
417    for root, dirs, files in sorted_walk(extension_dir_path):
418      for file in files:
419        if file[-5:] == '.html' or file[-3:] == '.js':
420          path = os.path.join(root, file)
421          try:
422            code_file = open(path)
423          except IOError, msg:
424            raise Exception("Failed to read %s: %s" % (path, msg))
425          code_contents = code_file.read()
426          code_file.close()
427
428          for method in api_methods:
429            if (code_contents.find(method) > -1):
430              api_calls.add(method)
431    return sorted(api_calls)
432
433  def _parse_source_files(self):
434    """ Returns a list of paths to source files present in the extenion.
435
436    Returns:
437      A list of paths relative to the manifest file directory.
438    """
439    source_paths = []
440    base_path = os.path.realpath(os.path.dirname(self._manifest_path))
441    for root, directories, files in sorted_walk(base_path):
442      if '.svn' in directories:
443        directories.remove('.svn')   # Don't go into SVN metadata directories
444
445      for file_name in files:
446        ext = os.path.splitext(file_name)[1]
447        if ext in self._SOURCE_FILE_EXTENSIONS:
448          path = os.path.realpath(os.path.join(root, file_name))
449          path = path.replace(base_path, '')[1:]
450          source_paths.append(path)
451    return sorted(source_paths)
452
453  def _parse_description(self):
454    """ Returns a localized description of the extension.
455
456    Returns:
457      A localized version of the sample's description.
458    """
459    return self._get_localized_manifest_value('description')
460
461  def _parse_features(self):
462    """ Returns a list of features the sample uses.
463
464    Returns:
465      A list of features the extension uses, as determined by
466      self._FEATURE_ATTRIBUTES.
467    """
468    features = set()
469    for feature_attr in self._FEATURE_ATTRIBUTES:
470      if self._manifest.has_key(feature_attr):
471        features.add(feature_attr)
472
473    if self._uses_popup():
474      features.add('popup')
475
476    if self._manifest.has_key('permissions'):
477      for permission in self._manifest['permissions']:
478        split = permission.split('://')
479        if (len(split) == 1):
480          features.add(split[0])
481    return sorted(features)
482
483  def _parse_icon(self):
484    """ Returns the path to the 128px icon for this sample.
485
486    Returns:
487      The path to the 128px icon if defined in the manifest, None otherwise.
488    """
489    if (self._manifest.has_key('icons') and
490        self._manifest['icons'].has_key('128')):
491      return self._manifest['icons']['128']
492    else:
493      return None
494
495  def _parse_locale_data(self):
496    """ Parses this sample's locale data into a dict.
497
498    Because the sample gallery is in English, this method only looks for
499    translations as defined by self._ENGLISH_LOCALES.
500
501    Returns:
502      A dict containing the translation keys and corresponding English text
503      for this extension.
504
505    Raises:
506      Exception: If the messages file cannot be read, or if it is improperly
507          formatted JSON.
508    """
509    en_messages = {}
510    extension_dir_path = os.path.dirname(self._manifest_path)
511    for locale in self._ENGLISH_LOCALES:
512      en_messages_path = os.path.join(extension_dir_path, '_locales', locale,
513                                      'messages.json')
514      if (os.path.isfile(en_messages_path)):
515        break
516
517    if (os.path.isfile(en_messages_path)):
518      try:
519        en_messages_file = open(en_messages_path, 'r')
520      except IOError, msg:
521        raise Exception("Failed to read %s: %s" % (en_messages_path, msg))
522      en_messages_contents = en_messages_file.read()
523      en_messages_file.close()
524      try:
525        en_messages = json.loads(en_messages_contents)
526      except ValueError, msg:
527        raise Exception("File %s has a syntax error: %s" %
528                        (en_messages_path, msg))
529    return en_messages
530
531  def _parse_name(self):
532    """ Returns a localized name for the extension.
533
534    Returns:
535      A localized version of the sample's name.
536    """
537    return self._get_localized_manifest_value('name')
538
539  def _parse_protocols(self):
540    """ Returns a list of protocols this extension requests permission for.
541
542    Returns:
543      A list of every unique protocol listed in the manifest's permssions.
544    """
545    protocols = []
546    if self._manifest.has_key('permissions'):
547      for permission in self._manifest['permissions']:
548        split = permission.split('://')
549        if (len(split) == 2) and (split[0] not in protocols):
550          protocols.append(split[0] + "://")
551    return protocols
552
553  def _uses_background(self):
554    """ Returns true if the extension defines a background page. """
555    return self._manifest.has_key('background_page')
556
557  def _uses_browser_action(self):
558    """ Returns true if the extension defines a browser action. """
559    return self._manifest.has_key('browser_action')
560
561  def _uses_content_scripts(self):
562    """ Returns true if the extension uses content scripts. """
563    return self._manifest.has_key('content_scripts')
564
565  def _uses_options(self):
566    """ Returns true if the extension defines an options page. """
567    return self._manifest.has_key('options_page')
568
569  def _uses_page_action(self):
570    """ Returns true if the extension uses a page action. """
571    return self._manifest.has_key('page_action')
572
573  def _uses_popup(self):
574    """ Returns true if the extension defines a popup on a page or browser
575    action. """
576    has_b_popup = (self._uses_browser_action() and
577                   self._manifest['browser_action'].has_key('popup'))
578    has_p_popup = (self._uses_page_action() and
579                   self._manifest['page_action'].has_key('popup'))
580    return has_b_popup or has_p_popup
581
582  def is_app(self):
583    """ Returns true if the extension has an 'app' section in its manifest."""
584    return self._manifest.has_key('app')
585
586  def write_zip(self):
587    """ Writes a zip file containing all of the files in this Sample's dir."""
588    sample_path = os.path.realpath(os.path.dirname(self._manifest_path))
589    sample_dirname = os.path.basename(sample_path)
590    sample_parentpath = os.path.dirname(sample_path)
591
592    zip_filename = "%s.zip" % sample_dirname
593    zip_path = os.path.join(sample_parentpath, zip_filename)
594    zip_file = zipfile.ZipFile(zip_path, 'w')
595
596    try:
597      for root, dirs, files in sorted_walk(sample_path):
598        if '.svn' in dirs:
599          dirs.remove('.svn')
600        for file in files:
601          # Absolute path to the file to be added.
602          abspath = os.path.realpath(os.path.join(root, file))
603          # Relative path to store the file in under the zip.
604          relpath = sample_dirname + abspath.replace(sample_path, "")
605          zip_file.write(abspath, relpath)
606
607      self['zip_path'] = os.path.join(
608          os.path.dirname(os.path.dirname(self._get_relative_path())),
609          zip_filename)
610
611    except RuntimeError, msg:
612      raise Exception("Could not write zip at " % zip_path)
613    finally:
614      zip_file.close()
615