gitiles_file_system.py revision 03b57e008b61dfcb1fbad3aea950ae0e001748b0
1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from base64 import b64decode 6from itertools import izip 7import json 8import posixpath 9import traceback 10 11from appengine_url_fetcher import AppEngineUrlFetcher 12from appengine_wrappers import IsDownloadError 13from docs_server_utils import StringIdentity 14from file_system import (FileNotFoundError, 15 FileSystem, 16 FileSystemError, 17 StatInfo) 18from future import All, Future 19from path_util import AssertIsValid, IsDirectory, ToDirectory 20from third_party.json_schema_compiler.memoize import memoize 21from url_constants import GITILES_BASE, GITILES_BRANCH_BASE 22 23 24_JSON_FORMAT = '?format=JSON' 25_TEXT_FORMAT = '?format=TEXT' 26 27 28def _ParseGitilesJson(json_data): 29 '''json.loads with fix-up for non-executable JSON. Use this to parse any JSON 30 data coming from Gitiles views. 31 ''' 32 return json.loads(json_data[json_data.find('{'):]) 33 34 35def _CreateStatInfo(json_data): 36 '''Returns a StatInfo object comprised of the tree ID for |json_data|, 37 as well as the tree IDs for the entries in |json_data|. 38 ''' 39 tree = _ParseGitilesJson(json_data) 40 return StatInfo(tree['id'], 41 dict((e['name'], e['id']) for e in tree['entries'])) 42 43 44class GitilesFileSystem(FileSystem): 45 '''Class to fetch filesystem data from the Chromium project's gitiles 46 service. 47 ''' 48 @staticmethod 49 def Create(branch='master', commit=None): 50 if commit: 51 base_url = '%s/%s' % (GITILES_BASE, commit) 52 elif branch is 'master': 53 base_url = '%s/master' % GITILES_BASE 54 else: 55 base_url = '%s/%s' % (GITILES_BRANCH_BASE, branch) 56 return GitilesFileSystem(AppEngineUrlFetcher(), base_url, branch, commit) 57 58 def __init__(self, fetcher, base_url, branch, commit): 59 self._fetcher = fetcher 60 self._base_url = base_url 61 self._branch = branch 62 self._commit = commit 63 64 def _FetchAsync(self, url): 65 '''Convenience wrapper for fetcher.FetchAsync, so callers don't 66 need to use posixpath.join. 67 ''' 68 AssertIsValid(url) 69 return self._fetcher.FetchAsync('%s/%s' % (self._base_url, url)) 70 71 def _ResolveFetchContent(self, path, fetch_future, skip_not_found=False): 72 '''Returns a future to cleanly resolve |fetch_future|. 73 ''' 74 def handle(e): 75 if skip_not_found and IsDownloadError(e): 76 return None 77 exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError 78 raise exc_type('%s fetching %s for Get from %s: %s' % 79 (type(e).__name__, path, self._base_url, traceback.format_exc())) 80 81 def get_content(result): 82 if result.status_code == 404: 83 if skip_not_found: 84 return None 85 raise FileNotFoundError('Got 404 when fetching %s for Get from %s' % 86 (path, self._base_url)) 87 if result.status_code != 200: 88 raise FileSystemError( 89 'Got %s when fetching %s for Get from %s, content %s' % 90 (result.status_code, path, self._base_url, result.content)) 91 return result.content 92 return fetch_future.Then(get_content, handle) 93 94 def Read(self, paths, skip_not_found=False): 95 # Directory content is formatted in JSON in Gitiles as follows: 96 # 97 # { 98 # "id": "12a5464de48d2c46bc0b2dc78fafed75aab554fa", # The tree ID. 99 # "entries": [ 100 # { 101 # "mode": 33188, 102 # "type": "blob", 103 # "id": "ab971ca447bc4bce415ed4498369e00164d91cb6", # File ID. 104 # "name": ".gitignore" 105 # }, 106 # ... 107 # ] 108 # } 109 def list_dir(json_data): 110 entries = _ParseGitilesJson(json_data).get('entries', []) 111 return [e['name'] + ('/' if e['type'] == 'tree' else '') for e in entries] 112 113 def fixup_url_format(path): 114 # By default, Gitiles URLs display resources in HTML. To get resources 115 # suitable for our consumption, a '?format=' string must be appended to 116 # the URL. The format may be one of 'JSON' or 'TEXT' for directory or 117 # text resources, respectively. 118 return path + (_JSON_FORMAT if IsDirectory(path) else _TEXT_FORMAT) 119 120 # A list of tuples of the form (path, Future). 121 fetches = ((path, self._FetchAsync(fixup_url_format(path))) 122 for path in paths) 123 124 def parse_contents(results): 125 value = {} 126 for path, content in izip(paths, results): 127 if content is None: 128 continue 129 # Gitiles encodes text content in base64 (see 130 # http://tools.ietf.org/html/rfc4648 for info about base64). 131 value[path] = (list_dir if IsDirectory(path) else b64decode)(content) 132 return value 133 return All(self._ResolveFetchContent(path, future, skip_not_found) 134 for path, future in fetches).Then(parse_contents) 135 136 def Refresh(self): 137 return Future(value=()) 138 139 @memoize 140 def _GetCommitInfo(self, key): 141 '''Gets the commit information specified by |key|. 142 143 The JSON view for commit info looks like: 144 { 145 "commit": "8fd578e1a7b142cd10a4387861f05fb9459b69e2", # Commit ID. 146 "tree": "3ade65d8a91eadd009a6c9feea8f87db2c528a53", # Tree ID. 147 "parents": [ 148 "a477c787fe847ae0482329f69b39ce0fde047359" # Previous commit ID. 149 ], 150 "author": { 151 "name": "...", 152 "email": "...", 153 "time": "Tue Aug 12 17:17:21 2014" 154 }, 155 "committer": { 156 "name": "...", 157 "email": "...", 158 "time": "Tue Aug 12 17:18:28 2014" 159 }, 160 "message": "...", 161 "tree_diff": [...] 162 } 163 ''' 164 # Commit information for a branch is obtained by appending '?format=JSON' 165 # to the branch URL. Note that '<gitiles_url>/<branch>?format=JSON' is 166 # different from '<gitiles_url>/<branch>/?format=JSON': the latter serves 167 # the root directory JSON content, whereas the former serves the branch 168 # commit info JSON content. 169 fetch_future = self._fetcher.FetchAsync(self._base_url + _JSON_FORMAT) 170 content_future = self._ResolveFetchContent(self._base_url, fetch_future) 171 return content_future.Then(lambda json: _ParseGitilesJson(json)[key]) 172 173 def GetCommitID(self): 174 '''Returns a future that resolves to the commit ID for this branch. 175 ''' 176 return self._GetCommitInfo('commit') 177 178 def StatAsync(self, path): 179 dir_, filename = posixpath.split(path) 180 def stat(content): 181 stat_info = _CreateStatInfo(content) 182 if stat_info.version is None: 183 raise FileSystemError('Failed to find version of dir %s' % dir_) 184 if IsDirectory(path): 185 return stat_info 186 if filename not in stat_info.child_versions: 187 raise FileNotFoundError( 188 '%s from %s was not in child versions for Stat' % (filename, path)) 189 return StatInfo(stat_info.child_versions[filename]) 190 fetch_future = self._FetchAsync(ToDirectory(dir_) + _JSON_FORMAT) 191 return self._ResolveFetchContent(path, fetch_future).Then(stat) 192 193 def GetIdentity(self): 194 # NOTE: Do not use commit information to create the string identity. 195 # Doing so will mess up caching. 196 if self._commit is None and self._branch != 'master': 197 str_id = GITILES_BRANCH_BASE 198 else: 199 str_id = GITILES_BASE 200 return '@'.join((self.__class__.__name__, StringIdentity(str_id))) 201