gcs_file_system.py revision e5d81f57cb97b3b6b7fccc9c5610d21eb81db09d
1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from third_party.cloudstorage import cloudstorage_api 6from third_party.cloudstorage import common 7from third_party.cloudstorage import errors 8 9from docs_server_utils import StringIdentity 10from file_system import FileSystem, FileNotFoundError, StatInfo 11from future import Future 12from path_util import ( 13 AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join) 14 15import logging 16import traceback 17 18 19# See gcs_file_system_provider.py for documentation on using Google Cloud 20# Storage as a filesystem. 21# 22# Note that the path requirements for GCS are different for the docserver; 23# GCS requires that paths start with a /, we require that they don't. 24 25 26# Name of the file containing the Git hash of the latest commit sync'ed 27# to Cloud Storage. This file is generated by the Github->GCS sync script 28LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt' 29 30def _ReadFile(filename): 31 AssertIsFile(filename) 32 try: 33 with cloudstorage_api.open('/' + filename, 'r') as f: 34 return f.read() 35 except errors.Error: 36 raise FileNotFoundError('Read failed for %s: %s' % (filename, 37 traceback.format_exc())) 38 39def _ListDir(dir_name, recursive=False): 40 AssertIsDirectory(dir_name) 41 try: 42 # The listbucket method uses a prefix approach to simulate hierarchy. 43 # Calling it with the "delimiter" argument set to '/' gets only files 44 # directly inside the directory, not all recursive content. 45 delimiter = None if recursive else '/' 46 files = cloudstorage_api.listbucket('/' + dir_name, delimiter=delimiter) 47 return [os_path.filename.lstrip('/')[len(dir_name):] for os_path in files] 48 except errors.Error: 49 raise FileNotFoundError('cloudstorage.listbucket failed for %s: %s' % 50 (dir_name, traceback.format_exc())) 51 52def _CreateStatInfo(bucket, path): 53 full_path = Join(bucket, path) 54 last_commit_file = Join(bucket, LAST_COMMIT_HASH_FILENAME) 55 try: 56 last_commit = _ReadFile(last_commit_file) 57 if IsDirectory(full_path): 58 child_versions = dict((filename, last_commit) 59 for filename in _ListDir(full_path)) 60 else: 61 child_versions = None 62 return StatInfo(last_commit, child_versions) 63 except (TypeError, errors.Error): 64 raise FileNotFoundError('cloudstorage.stat failed for %s: %s' % (path, 65 traceback.format_exc())) 66 67class CloudStorageFileSystem(FileSystem): 68 '''FileSystem implementation which fetches resources from Google Cloud 69 Storage. 70 ''' 71 def __init__(self, bucket, debug_access_token=None, debug_bucket_prefix=None): 72 self._bucket = bucket 73 if debug_access_token: 74 logging.debug('gcs: using debug access token: %s' % debug_access_token) 75 common.set_access_token(debug_access_token) 76 if debug_bucket_prefix: 77 logging.debug('gcs: prefixing all bucket names with %s' % 78 debug_bucket_prefix) 79 self._bucket = debug_bucket_prefix + self._bucket 80 AssertIsValid(self._bucket) 81 82 def Read(self, paths, skip_not_found=False): 83 def resolve(): 84 try: 85 result = {} 86 for path in paths: 87 full_path = Join(self._bucket, path) 88 logging.debug('gcs: requested path "%s", reading "%s"' % 89 (path, full_path)) 90 if IsDirectory(path): 91 result[path] = _ListDir(full_path) 92 else: 93 result[path] = _ReadFile(full_path) 94 return result 95 except errors.AuthorizationError: 96 self._warnAboutAuthError() 97 raise 98 99 return Future(callback=resolve) 100 101 def Refresh(self): 102 return Future(value=()) 103 104 def Stat(self, path): 105 AssertIsValid(path) 106 try: 107 return _CreateStatInfo(self._bucket, path) 108 except errors.AuthorizationError: 109 self._warnAboutAuthError() 110 raise 111 112 def GetIdentity(self): 113 return '@'.join((self.__class__.__name__, StringIdentity(self._bucket))) 114 115 def __repr__(self): 116 return 'CloudStorageFileSystem(%s)' % self._bucket 117 118 def _warnAboutAuthError(self): 119 logging.warn(('Authentication error on Cloud Storage. Check if your' 120 ' appengine project has permissions to Read the GCS' 121 ' buckets. If you are running a local appengine server,' 122 ' you need to set an access_token in' 123 ' local_debug/gcs_debug.conf.' 124 ' Remember that this token expires in less than 10' 125 ' minutes, so keep it updated. See' 126 ' gcs_file_system_provider.py for instructions.')); 127 logging.debug(traceback.format_exc()) 128