gcs_file_system.py revision e5d81f57cb97b3b6b7fccc9c5610d21eb81db09d
1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from third_party.cloudstorage import cloudstorage_api
6from third_party.cloudstorage import common
7from third_party.cloudstorage import errors
8
9from docs_server_utils import StringIdentity
10from file_system import FileSystem, FileNotFoundError, StatInfo
11from future import Future
12from path_util import (
13    AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join)
14
15import logging
16import traceback
17
18
19# See gcs_file_system_provider.py for documentation on using Google Cloud
20# Storage as a filesystem.
21#
22# Note that the path requirements for GCS are different for the docserver;
23# GCS requires that paths start with a /, we require that they don't.
24
25
26# Name of the file containing the Git hash of the latest commit sync'ed
27# to Cloud Storage. This file is generated by the Github->GCS sync script
28LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt'
29
30def _ReadFile(filename):
31  AssertIsFile(filename)
32  try:
33    with cloudstorage_api.open('/' + filename, 'r') as f:
34      return f.read()
35  except errors.Error:
36    raise FileNotFoundError('Read failed for %s: %s' % (filename,
37        traceback.format_exc()))
38
39def _ListDir(dir_name, recursive=False):
40  AssertIsDirectory(dir_name)
41  try:
42    # The listbucket method uses a prefix approach to simulate hierarchy.
43    # Calling it with the "delimiter" argument set to '/' gets only files
44    # directly inside the directory, not all recursive content.
45    delimiter = None if recursive else '/'
46    files = cloudstorage_api.listbucket('/' + dir_name, delimiter=delimiter)
47    return [os_path.filename.lstrip('/')[len(dir_name):] for os_path in files]
48  except errors.Error:
49    raise FileNotFoundError('cloudstorage.listbucket failed for %s: %s' %
50                            (dir_name, traceback.format_exc()))
51
52def _CreateStatInfo(bucket, path):
53  full_path = Join(bucket, path)
54  last_commit_file = Join(bucket, LAST_COMMIT_HASH_FILENAME)
55  try:
56    last_commit = _ReadFile(last_commit_file)
57    if IsDirectory(full_path):
58      child_versions = dict((filename, last_commit)
59                            for filename in _ListDir(full_path))
60    else:
61      child_versions = None
62    return StatInfo(last_commit, child_versions)
63  except (TypeError, errors.Error):
64    raise FileNotFoundError('cloudstorage.stat failed for %s: %s' % (path,
65                            traceback.format_exc()))
66
67class CloudStorageFileSystem(FileSystem):
68  '''FileSystem implementation which fetches resources from Google Cloud
69  Storage.
70  '''
71  def __init__(self, bucket, debug_access_token=None, debug_bucket_prefix=None):
72    self._bucket = bucket
73    if debug_access_token:
74      logging.debug('gcs: using debug access token: %s' % debug_access_token)
75      common.set_access_token(debug_access_token)
76    if debug_bucket_prefix:
77      logging.debug('gcs: prefixing all bucket names with %s' %
78                    debug_bucket_prefix)
79      self._bucket = debug_bucket_prefix + self._bucket
80    AssertIsValid(self._bucket)
81
82  def Read(self, paths, skip_not_found=False):
83    def resolve():
84      try:
85        result = {}
86        for path in paths:
87          full_path = Join(self._bucket, path)
88          logging.debug('gcs: requested path "%s", reading "%s"' %
89                        (path, full_path))
90          if IsDirectory(path):
91            result[path] = _ListDir(full_path)
92          else:
93            result[path] = _ReadFile(full_path)
94        return result
95      except errors.AuthorizationError:
96        self._warnAboutAuthError()
97        raise
98
99    return Future(callback=resolve)
100
101  def Refresh(self):
102    return Future(value=())
103
104  def Stat(self, path):
105    AssertIsValid(path)
106    try:
107      return _CreateStatInfo(self._bucket, path)
108    except errors.AuthorizationError:
109      self._warnAboutAuthError()
110      raise
111
112  def GetIdentity(self):
113    return '@'.join((self.__class__.__name__, StringIdentity(self._bucket)))
114
115  def __repr__(self):
116    return 'CloudStorageFileSystem(%s)' % self._bucket
117
118  def _warnAboutAuthError(self):
119    logging.warn(('Authentication error on Cloud Storage. Check if your'
120                  ' appengine project has permissions to Read the GCS'
121                  ' buckets. If you are running a local appengine server,'
122                  ' you need to set an access_token in'
123                  ' local_debug/gcs_debug.conf.'
124                  ' Remember that this token expires in less than 10'
125                  ' minutes, so keep it updated. See'
126                  ' gcs_file_system_provider.py for instructions.'));
127    logging.debug(traceback.format_exc())
128