1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5
6from base64 import b64decode
7from itertools import izip
8import json
9import logging
10import posixpath
11import time
12import traceback
13
14from appengine_url_fetcher import AppEngineUrlFetcher
15from appengine_wrappers import IsDownloadError, app_identity
16from docs_server_utils import StringIdentity
17from file_system import (FileNotFoundError,
18                         FileSystem,
19                         FileSystemError,
20                         FileSystemThrottledError,
21                         StatInfo)
22from future import All, Future
23from path_util import AssertIsValid, IsDirectory, ToDirectory
24from third_party.json_schema_compiler.memoize import memoize
25from url_constants import (GITILES_BASE,
26                           GITILES_SRC_ROOT,
27                           GITILES_BRANCHES_PATH,
28                           GITILES_OAUTH2_SCOPE)
29
30
31_JSON_FORMAT = '?format=JSON'
32_TEXT_FORMAT = '?format=TEXT'
33_AUTH_PATH_PREFIX = '/a'
34
35
36def _ParseGitilesJson(json_data):
37  '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON
38  data coming from Gitiles views.
39  '''
40  return json.loads(json_data[json_data.find('{'):])
41
42
43def _CreateStatInfo(json_data):
44  '''Returns a StatInfo object comprised of the tree ID for |json_data|,
45  as well as the tree IDs for the entries in |json_data|.
46  '''
47  tree = _ParseGitilesJson(json_data)
48  return StatInfo(tree['id'],
49                  dict((e['name'], e['id']) for e in tree['entries']))
50
51
52class GitilesFileSystem(FileSystem):
53  '''Class to fetch filesystem data from the Chromium project's gitiles
54  service.
55  '''
56  @staticmethod
57  def Create(branch='master', commit=None):
58    token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
59    path_prefix = '' if token is None else _AUTH_PATH_PREFIX
60    if commit:
61      base_url = '%s%s/%s/%s' % (
62          GITILES_BASE, path_prefix, GITILES_SRC_ROOT, commit)
63    elif branch is 'master':
64      base_url = '%s%s/%s/master' % (
65          GITILES_BASE, path_prefix, GITILES_SRC_ROOT)
66    else:
67      base_url = '%s%s/%s/%s/%s' % (
68          GITILES_BASE, path_prefix, GITILES_SRC_ROOT,
69          GITILES_BRANCHES_PATH, branch)
70    return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit)
71
72  def __init__(self, fetcher, base_url, branch, commit):
73    self._fetcher = fetcher
74    self._base_url = base_url
75    self._branch = branch
76    self._commit = commit
77
78  def _FetchAsync(self, url):
79    '''Convenience wrapper for fetcher.FetchAsync, so callers don't
80    need to use posixpath.join.
81    '''
82    AssertIsValid(url)
83    access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
84    return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url),
85                                    access_token=access_token)
86
87  def _ResolveFetchContent(self, path, fetch_future, skip_not_found=False):
88    '''Returns a future to cleanly resolve |fetch_future|.
89    '''
90    def handle(e):
91      if skip_not_found and IsDownloadError(e):
92        return None
93      exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError
94      raise exc_type('%s fetching %s for Get from %s: %s' %
95          (type(e).__name__, path, self._base_url, traceback.format_exc()))
96
97    def get_content(result):
98      if result.status_code == 404:
99        if skip_not_found:
100          return None
101        raise FileNotFoundError('Got 404 when fetching %s for Get from %s' %
102                                (path, self._base_url))
103      if result.status_code == 429:
104        logging.warning('Access throttled when fetching %s for Get from %s' %
105            (path, self._base_url))
106        raise FileSystemThrottledError(
107            'Access throttled when fetching %s for Get from %s' %
108            (path, self._base_url))
109      if result.status_code != 200:
110        raise FileSystemError(
111            'Got %s when fetching %s for Get from %s, content %s' %
112            (result.status_code, path, self._base_url, result.content))
113      return result.content
114
115    return fetch_future.Then(get_content, handle)
116
117  def Read(self, paths, skip_not_found=False):
118    # Directory content is formatted in JSON in Gitiles as follows:
119    #
120    #   {
121    #     "id": "12a5464de48d2c46bc0b2dc78fafed75aab554fa", # The tree ID.
122    #     "entries": [
123    #       {
124    #         "mode": 33188,
125    #         "type": "blob",
126    #           "id": "ab971ca447bc4bce415ed4498369e00164d91cb6", # File ID.
127    #         "name": ".gitignore"
128    #       },
129    #       ...
130    #     ]
131    #   }
132    def list_dir(json_data):
133      entries = _ParseGitilesJson(json_data).get('entries', [])
134      return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries]
135
136    def fixup_url_format(path):
137      # By default, Gitiles URLs display resources in HTML. To get resources
138      # suitable for our consumption, a '?format=' string must be appended to
139      # the URL. The format may be one of 'JSON' or 'TEXT' for directory or
140      # text resources, respectively.
141      return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT)
142
143    # A list of tuples of the form (path, Future).
144    fetches = [(path, self._FetchAsync(fixup_url_format(path)))
145               for path in paths]
146
147    def parse_contents(results):
148      value = {}
149      for path, content in izip(paths, results):
150        if content is None:
151          continue
152        # Gitiles encodes text content in base64 (see
153        # http://tools.ietf.org/html/rfc4648 for info about base64).
154        value[path] = (list_dir if IsDirectory(path) else b64decode)(content)
155      return value
156
157    return All(self._ResolveFetchContent(path, future, skip_not_found)
158               for path, future in fetches).Then(parse_contents)
159
160  def Refresh(self):
161    return Future(value=())
162
163  @memoize
164  def _GetCommitInfo(self, key):
165    '''Gets the commit information specified by |key|.
166
167    The JSON view for commit info looks like:
168      {
169        "commit": "8fd578e1a7b142cd10a4387861f05fb9459b69e2", # Commit ID.
170        "tree": "3ade65d8a91eadd009a6c9feea8f87db2c528a53",   # Tree ID.
171        "parents": [
172          "a477c787fe847ae0482329f69b39ce0fde047359" # Previous commit ID.
173        ],
174        "author": {
175          "name": "...",
176          "email": "...",
177          "time": "Tue Aug 12 17:17:21 2014"
178        },
179        "committer": {
180          "name": "...",
181          "email": "...",
182          "time": "Tue Aug 12 17:18:28 2014"
183        },
184        "message": "...",
185        "tree_diff": [...]
186      }
187    '''
188    # Commit information for a branch is obtained by appending '?format=JSON'
189    # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is
190    # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves
191    # the root directory JSON content, whereas the former serves the branch
192    # commit info JSON content.
193
194    access_token, _ = app_identity.get_access_token(GITILES_OAUTH2_SCOPE)
195    fetch_future = self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT,
196                                            access_token=access_token)
197    content_future = self._ResolveFetchContent(self._base_url, fetch_future)
198    return content_future.Then(lambda json: _ParseGitilesJson(json)[key])
199
200  def GetCommitID(self):
201    '''Returns a future that resolves to the commit ID for this branch.
202    '''
203    return self._GetCommitInfo('commit')
204
205  def GetPreviousCommitID(self):
206    '''Returns a future that resolves to the previous commit ID for this branch.
207    '''
208    return self._GetCommitInfo('parents').Then(lambda parents: parents[0])
209
210  def StatAsync(self, path):
211    dir_, filename = posixpath.split(path)
212    def stat(content):
213      stat_info = _CreateStatInfo(content)
214      if stat_info.version is None:
215        raise FileSystemError('Failed to find version of dir %s' % dir_)
216      if IsDirectory(path):
217        return stat_info
218      if filename not in stat_info.child_versions:
219        raise FileNotFoundError(
220            '%s from %s was not in child versions for Stat' % (filename, path))
221      return StatInfo(stat_info.child_versions[filename])
222
223    fetch_future = self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT)
224    return self._ResolveFetchContent(path, fetch_future).Then(stat)
225
226  def GetIdentity(self):
227    # NOTE: Do not use commit information to create the string identity.
228    # Doing so will mess up caching.
229    if self._commit is None and self._branch != 'master':
230      str_id = '%s/%s/%s/%s' % (
231          GITILES_BASE, GITILES_SRC_ROOT, GITILES_BRANCHES_PATH, self._branch)
232    else:
233      str_id = '%s/%s' % (GITILES_BASE, GITILES_SRC_ROOT)
234    return '@'.join((self.__class__.__name__, StringIdentity(str_id)))
235