github_file_system.py revision 2a99a7e74a7f215066514fe81d2bfa6639d9eddd
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import json
6import logging
7import os
8
9import appengine_blobstore as blobstore
10from appengine_wrappers import urlfetch
11import object_store
12from file_system import FileSystem, StatInfo
13from StringIO import StringIO
14from future import Future
15from zipfile import ZipFile, BadZipfile
16
17ZIP_KEY = 'zipball'
18USERNAME = None
19PASSWORD = None
20
21def _MakeKey(version):
22  return ZIP_KEY + '.' + str(version)
23
24class _AsyncFetchFutureZip(object):
25  def __init__(self, fetcher, blobstore, key_to_set, key_to_delete=None):
26    self._fetcher = fetcher
27    self._fetch = fetcher.FetchAsync(ZIP_KEY,
28                                     username=USERNAME,
29                                     password=PASSWORD)
30    self._blobstore = blobstore
31    self._key_to_set = key_to_set
32    self._key_to_delete = key_to_delete
33
34  def Get(self):
35    try:
36      result = self._fetch.Get()
37      # Check if Github authentication failed.
38      if result.status_code == 401:
39        logging.error('Github authentication failed for %s, falling back to '
40                      'unauthenticated.' % USERNAME)
41        blob = self._fetcher.Fetch(ZIP_KEY).content
42      else:
43        blob = result.content
44    except urlfetch.DownloadError as e:
45      logging.error('Bad github zip file: %s' % e)
46      return None
47    if self._key_to_delete is not None:
48      self._blobstore.Delete(_MakeKey(self._key_to_delete),
49                             blobstore.BLOBSTORE_GITHUB)
50    try:
51      return_zip = ZipFile(StringIO(blob))
52    except BadZipfile as e:
53      logging.error('Bad github zip file: %s' % e)
54      return None
55
56    self._blobstore.Set(_MakeKey(self._key_to_set),
57                        blob,
58                        blobstore.BLOBSTORE_GITHUB)
59    return return_zip
60
61class GithubFileSystem(FileSystem):
62  """FileSystem implementation which fetches resources from github.
63  """
64  def __init__(self, fetcher, object_store, blobstore):
65    self._fetcher = fetcher
66    self._object_store = object_store
67    self._blobstore = blobstore
68    self._version = None
69    self._GetZip(self.Stat(ZIP_KEY).version)
70
71  def _GetZip(self, version):
72    blob = self._blobstore.Get(_MakeKey(version), blobstore.BLOBSTORE_GITHUB)
73    if blob is not None:
74      try:
75        self._zip_file = Future(value=ZipFile(StringIO(blob)))
76      except BadZipfile as e:
77        self._blobstore.Delete(_MakeKey(version), blobstore.BLOBSTORE_GITHUB)
78        logging.error('Bad github zip file: %s' % e)
79        self._zip_file = Future(value=None)
80    else:
81      self._zip_file = Future(
82          delegate=_AsyncFetchFutureZip(self._fetcher,
83                                        self._blobstore,
84                                        version,
85                                        key_to_delete=self._version))
86    self._version = version
87
88  def _ReadFile(self, path):
89    try:
90      zip_file = self._zip_file.Get()
91    except Exception as e:
92      logging.error('Github ReadFile error: %s' % e)
93      return ''
94    if zip_file is None:
95      logging.error('Bad github zip file.')
96      return ''
97    prefix = zip_file.namelist()[0][:-1]
98    return zip_file.read(prefix + path)
99
100  def _ListDir(self, path):
101    try:
102      zip_file = self._zip_file.Get()
103    except Exception as e:
104      logging.error('Github ListDir error: %s' % e)
105      return []
106    if zip_file is None:
107      logging.error('Bad github zip file.')
108      return []
109    filenames = zip_file.namelist()
110    # Take out parent directory name (GoogleChrome-chrome-app-samples-c78a30f)
111    filenames = [f[len(filenames[0]) - 1:] for f in filenames]
112    # Remove the path of the directory we're listing from the filenames.
113    filenames = [f[len(path):] for f in filenames
114                 if f != path and f.startswith(path)]
115    # Remove all files not directly in this directory.
116    return [f for f in filenames if f[:-1].count('/') == 0]
117
118  def Read(self, paths, binary=False):
119    version = self.Stat(ZIP_KEY).version
120    if version != self._version:
121      self._GetZip(version)
122    result = {}
123    for path in paths:
124      if path.endswith('/'):
125        result[path] = self._ListDir(path)
126      else:
127        result[path] = self._ReadFile(path)
128    return Future(value=result)
129
130  def _DefaultStat(self, path):
131    version = 0
132    # Cache for a minute so we don't try to keep fetching bad data.
133    self._object_store.Set(path, version, object_store.GITHUB_STAT, time=60)
134    return StatInfo(version)
135
136  def Stat(self, path):
137    version = self._object_store.Get(path, object_store.GITHUB_STAT).Get()
138    if version is not None:
139      return StatInfo(version)
140    try:
141      result = self._fetcher.Fetch('commits/HEAD',
142                                   username=USERNAME,
143                                   password=PASSWORD)
144    except urlfetch.DownloadError as e:
145      logging.error('GithubFileSystem Stat: %s' % e)
146      return self._DefaultStat(path)
147    # Check if Github authentication failed.
148    if result.status_code == 401:
149      logging.error('Github authentication failed for %s, falling back to '
150                    'unauthenticated.' % USERNAME)
151      try:
152        result = self._fetcher.Fetch('commits/HEAD')
153      except urlfetch.DownloadError as e:
154        logging.error('GithubFileSystem Stat: %s' % e)
155        return self._DefaultStat(path)
156    version = (json.loads(result.content).get('commit', {})
157                                         .get('tree', {})
158                                         .get('sha', None))
159    # Check if the JSON was valid, and set to 0 if not.
160    if version is not None:
161      self._object_store.Set(path, version, object_store.GITHUB_STAT)
162    else:
163      logging.warning('Problem fetching commit hash from github.')
164      return self._DefaultStat(path)
165    return StatInfo(version)
166