gitiles_file_system.py revision 03b57e008b61dfcb1fbad3aea950ae0e001748b0
1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from base64 import b64decode
6from itertools import izip
7import json
8import posixpath
9import traceback
10
11from appengine_url_fetcher import AppEngineUrlFetcher
12from appengine_wrappers import IsDownloadError
13from docs_server_utils import StringIdentity
14from file_system import (FileNotFoundError,
15                         FileSystem,
16                         FileSystemError,
17                         StatInfo)
18from future import All, Future
19from path_util import AssertIsValid, IsDirectory, ToDirectory
20from third_party.json_schema_compiler.memoize import memoize
21from url_constants import GITILES_BASE, GITILES_BRANCH_BASE
22
23
24_JSON_FORMAT = '?format=JSON'
25_TEXT_FORMAT = '?format=TEXT'
26
27
28def _ParseGitilesJson(json_data):
29  '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON
30  data coming from Gitiles views.
31  '''
32  return json.loads(json_data[json_data.find('{'):])
33
34
35def _CreateStatInfo(json_data):
36  '''Returns a StatInfo object comprised of the tree ID for |json_data|,
37  as well as the tree IDs for the entries in |json_data|.
38  '''
39  tree = _ParseGitilesJson(json_data)
40  return StatInfo(tree['id'],
41                  dict((e['name'], e['id']) for e in tree['entries']))
42
43
44class GitilesFileSystem(FileSystem):
45  '''Class to fetch filesystem data from the Chromium project's gitiles
46  service.
47  '''
48  @staticmethod
49  def Create(branch='master', commit=None):
50    if commit:
51      base_url = '%s/%s' % (GITILES_BASE, commit)
52    elif branch is 'master':
53      base_url = '%s/master' % GITILES_BASE
54    else:
55      base_url = '%s/%s' % (GITILES_BRANCH_BASE, branch)
56    return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit)
57
58  def __init__(self, fetcher, base_url, branch, commit):
59    self._fetcher = fetcher
60    self._base_url = base_url
61    self._branch = branch
62    self._commit = commit
63
64  def _FetchAsync(self, url):
65    '''Convenience wrapper for fetcher.FetchAsync, so callers don't
66    need to use posixpath.join.
67    '''
68    AssertIsValid(url)
69    return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url))
70
71  def _ResolveFetchContent(self, path, fetch_future, skip_not_found=False):
72    '''Returns a future to cleanly resolve |fetch_future|.
73    '''
74    def handle(e):
75      if skip_not_found and IsDownloadError(e):
76        return None
77      exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError
78      raise exc_type('%s fetching %s for Get from %s: %s' %
79          (type(e).__name__, path, self._base_url, traceback.format_exc()))
80
81    def get_content(result):
82      if result.status_code == 404:
83        if skip_not_found:
84          return None
85        raise FileNotFoundError('Got 404 when fetching %s for Get from %s' %
86                                (path, self._base_url))
87      if result.status_code != 200:
88        raise FileSystemError(
89            'Got %s when fetching %s for Get from %s, content %s' %
90            (result.status_code, path, self._base_url, result.content))
91      return result.content
92    return fetch_future.Then(get_content, handle)
93
94  def Read(self, paths, skip_not_found=False):
95    # Directory content is formatted in JSON in Gitiles as follows:
96    #
97    #   {
98    #     "id": "12a5464de48d2c46bc0b2dc78fafed75aab554fa", # The tree ID.
99    #     "entries": [
100    #       {
101    #         "mode": 33188,
102    #         "type": "blob",
103    #           "id": "ab971ca447bc4bce415ed4498369e00164d91cb6", # File ID.
104    #         "name": ".gitignore"
105    #       },
106    #       ...
107    #     ]
108    #   }
109    def list_dir(json_data):
110      entries = _ParseGitilesJson(json_data).get('entries', [])
111      return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries]
112
113    def fixup_url_format(path):
114      # By default, Gitiles URLs display resources in HTML. To get resources
115      # suitable for our consumption, a '?format=' string must be appended to
116      # the URL. The format may be one of 'JSON' or 'TEXT' for directory or
117      # text resources, respectively.
118      return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT)
119
120    # A list of tuples of the form (path, Future).
121    fetches = ((path, self._FetchAsync(fixup_url_format(path)))
122               for path in paths)
123
124    def parse_contents(results):
125      value = {}
126      for path, content in izip(paths, results):
127        if content is None:
128          continue
129        # Gitiles encodes text content in base64 (see
130        # http://tools.ietf.org/html/rfc4648 for info about base64).
131        value[path] = (list_dir if IsDirectory(path) else b64decode)(content)
132      return value
133    return All(self._ResolveFetchContent(path, future, skip_not_found)
134               for path, future in fetches).Then(parse_contents)
135
136  def Refresh(self):
137    return Future(value=())
138
139  @memoize
140  def _GetCommitInfo(self, key):
141    '''Gets the commit information specified by |key|.
142
143    The JSON view for commit info looks like:
144      {
145        "commit": "8fd578e1a7b142cd10a4387861f05fb9459b69e2", # Commit ID.
146        "tree": "3ade65d8a91eadd009a6c9feea8f87db2c528a53",   # Tree ID.
147        "parents": [
148          "a477c787fe847ae0482329f69b39ce0fde047359" # Previous commit ID.
149        ],
150        "author": {
151          "name": "...",
152          "email": "...",
153          "time": "Tue Aug 12 17:17:21 2014"
154        },
155        "committer": {
156          "name": "...",
157          "email": "...",
158          "time": "Tue Aug 12 17:18:28 2014"
159        },
160        "message": "...",
161        "tree_diff": [...]
162      }
163    '''
164    # Commit information for a branch is obtained by appending '?format=JSON'
165    # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is
166    # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves
167    # the root directory JSON content, whereas the former serves the branch
168    # commit info JSON content.
169    fetch_future = self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT)
170    content_future = self._ResolveFetchContent(self._base_url, fetch_future)
171    return content_future.Then(lambda json: _ParseGitilesJson(json)[key])
172
173  def GetCommitID(self):
174    '''Returns a future that resolves to the commit ID for this branch.
175    '''
176    return self._GetCommitInfo('commit')
177
178  def StatAsync(self, path):
179    dir_, filename = posixpath.split(path)
180    def stat(content):
181      stat_info = _CreateStatInfo(content)
182      if stat_info.version is None:
183        raise FileSystemError('Failed to find version of dir %s' % dir_)
184      if IsDirectory(path):
185        return stat_info
186      if filename not in stat_info.child_versions:
187        raise FileNotFoundError(
188            '%s from %s was not in child versions for Stat' % (filename, path))
189      return StatInfo(stat_info.child_versions[filename])
190    fetch_future = self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT)
191    return self._ResolveFetchContent(path, fetch_future).Then(stat)
192
193  def GetIdentity(self):
194    # NOTE: Do not use commit information to create the string identity.
195    # Doing so will mess up caching.
196    if self._commit is None and self._branch != 'master':
197      str_id = GITILES_BRANCH_BASE
198    else:
199      str_id = GITILES_BASE
200    return '@'.join((self.__class__.__name__, StringIdentity(str_id)))
201