1b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Copyright 2012 Google Inc. All Rights Reserved. 2b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# 3b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Licensed under the Apache License, Version 2.0 (the "License"); 4b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# you may not use this file except in compliance with the License. 5b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# You may obtain a copy of the License at 6b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# 7b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# http://www.apache.org/licenses/LICENSE-2.0 8b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# 9b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# Unless required by applicable law or agreed to in writing, 10b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# software distributed under the License is distributed on an 11b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# either express or implied. See the License for the specific 13b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik# language governing permissions and limitations under the License. 14b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 15b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik"""Helpers shared by cloudstorage_stub and cloudstorage_api.""" 16b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 17b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 18b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 19b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 20b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 21b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik__all__ = ['CS_XML_NS', 22b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'CSFileStat', 23b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'dt_str_to_posix', 24b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'local_api_url', 25b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'LOCAL_GCS_ENDPOINT', 26b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'local_run', 27b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'get_access_token', 28b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'get_stored_content_length', 29b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'get_metadata', 30b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'GCSFileStat', 31b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'http_time_to_posix', 32b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'memory_usage', 33b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'posix_time_to_http', 34b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'posix_to_dt_str', 35b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'set_access_token', 36b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'validate_options', 37b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'validate_bucket_name', 38b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'validate_bucket_path', 39b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'validate_file_path', 40b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ] 41b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 42b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 43b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport calendar 44b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport datetime 45b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikfrom email import utils as email_utils 46b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport logging 47b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport os 48b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikimport re 49b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 50b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craiktry: 51b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik from google.appengine.api import runtime 52b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikexcept ImportError: 53b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik from google.appengine.api import runtime 54b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 55b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 56b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}' 57b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$') 58b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$') 59b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*') 60b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*') 61b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_GCS_METADATA = ['x-goog-meta-', 62b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'content-disposition', 63b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'cache-control', 64b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'content-encoding'] 65b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl'] 66b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris CraikCS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01' 67b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris CraikLOCAL_GCS_ENDPOINT = '/_ah/gcs' 68b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_access_token = '' 69b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 70b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 71b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_MAX_GET_BUCKET_RESULT = 1000 72b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 73b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 74b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef set_access_token(access_token): 75b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Set the shared access token to authenticate with Google Cloud Storage. 76b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 77b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik When set, the library will always attempt to communicate with the 78b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik real Google Cloud Storage with this token even when running on dev appserver. 79b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Note the token could expire so it's up to you to renew it. 80b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 81b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik When absent, the library will automatically request and refresh a token 82b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik on appserver, or when on dev appserver, talk to a Google Cloud Storage 83b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik stub. 84b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 85b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 86b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik access_token: you can get one by run 'gsutil -d ls' and copy the 87b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik str after 'Bearer'. 88b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 89b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik global _access_token 90b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik _access_token = access_token 91b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 92b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 93b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef get_access_token(): 94b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Returns the shared access token.""" 95b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return _access_token 96b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 97b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 98b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikclass GCSFileStat(object): 99b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Container for GCS file stat.""" 100b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 101b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik def __init__(self, 102b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik filename, 103b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik st_size, 104b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik etag, 105b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik st_ctime, 106b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik content_type=None, 107b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik metadata=None, 108b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik is_dir=False): 109b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Initialize. 110b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 111b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik For files, the non optional arguments are always set. 112b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik For directories, only filename and is_dir is set. 113b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 114b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 115b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik filename: a Google Cloud Storage filename of form '/bucket/filename'. 116b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik st_size: file size in bytes. long compatible. 117b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik etag: hex digest of the md5 hash of the file's content. str. 118b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik st_ctime: posix file creation time. float compatible. 119b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik content_type: content type. str. 120b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik metadata: a str->str dict of user specified options when creating 121b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik the file. Possible keys are x-goog-meta-, content-disposition, 122b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik content-encoding, and cache-control. 123b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik is_dir: True if this represents a directory. False if this is a real file. 124b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 125b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik self.filename = filename 126b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik self.is_dir = is_dir 127b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik self.st_size = None 128b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik self.st_ctime = None 129b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik self.etag = None 130b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik self.content_type = content_type 131b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik self.metadata = metadata 132b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 133b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not is_dir: 134b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik self.st_size = long(st_size) 135b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik self.st_ctime = float(st_ctime) 136b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if etag[0] == '"' and etag[-1] == '"': 137b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik etag = etag[1:-1] 138b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik self.etag = etag 139b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 140b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik def __repr__(self): 141b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if self.is_dir: 142b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return '(directory: %s)' % self.filename 143b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 144b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return ( 145b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik '(filename: %(filename)s, st_size: %(st_size)s, ' 146b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'st_ctime: %(st_ctime)s, etag: %(etag)s, ' 147b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'content_type: %(content_type)s, ' 148b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'metadata: %(metadata)s)' % 149b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik dict(filename=self.filename, 150b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik st_size=self.st_size, 151b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik st_ctime=self.st_ctime, 152b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik etag=self.etag, 153b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik content_type=self.content_type, 154b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik metadata=self.metadata)) 155b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 156b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik def __cmp__(self, other): 157b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not isinstance(other, self.__class__): 158b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise ValueError('Argument to cmp must have the same type. ' 159b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'Expect %s, got %s', self.__class__.__name__, 160b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik other.__class__.__name__) 161b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if self.filename > other.filename: 162b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return 1 163b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik elif self.filename < other.filename: 164b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return -1 165b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return 0 166b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 167b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik def __hash__(self): 168b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if self.etag: 169b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return hash(self.etag) 170b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return hash(self.filename) 171b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 172b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 173b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris CraikCSFileStat = GCSFileStat 174b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 175b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 176b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef get_stored_content_length(headers): 177b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Return the content length (in bytes) of the object as stored in GCS. 178b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 179b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik x-goog-stored-content-length should always be present except when called via 180b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik the local dev_appserver. Therefore if it is not present we default to the 181b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik standard content-length header. 182b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 183b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 184b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik headers: a dict of headers from the http response. 185b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 186b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Returns: 187b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik the stored content length. 188b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 189b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik length = headers.get('x-goog-stored-content-length') 190b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if length is None: 191b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik length = headers.get('content-length') 192b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return length 193b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 194b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 195b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef get_metadata(headers): 196b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Get user defined options from HTTP response headers.""" 197b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return dict((k, v) for k, v in headers.iteritems() 198b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if any(k.lower().startswith(valid) for valid in _GCS_METADATA)) 199b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 200b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 201b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef validate_bucket_name(name): 202b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Validate a Google Storage bucket name. 203b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 204b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 205b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik name: a Google Storage bucket name with no prefix or suffix. 206b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 207b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Raises: 208b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ValueError: if name is invalid. 209b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 210b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik _validate_path(name) 211b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not _GCS_BUCKET_REGEX.match(name): 212b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise ValueError('Bucket should be 3-63 characters long using only a-z,' 213b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik '0-9, underscore, dash or dot but got %s' % name) 214b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 215b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 216b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef validate_bucket_path(path): 217b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Validate a Google Cloud Storage bucket path. 218b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 219b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 220b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik path: a Google Storage bucket path. It should have form '/bucket'. 221b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 222b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Raises: 223b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ValueError: if path is invalid. 224b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 225b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik _validate_path(path) 226b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not _GCS_BUCKET_PATH_REGEX.match(path): 227b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise ValueError('Bucket should have format /bucket ' 228b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'but got %s' % path) 229b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 230b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 231b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef validate_file_path(path): 232b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Validate a Google Cloud Storage file path. 233b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 234b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 235b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik path: a Google Storage file path. It should have form '/bucket/filename'. 236b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 237b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Raises: 238b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ValueError: if path is invalid. 239b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 240b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik _validate_path(path) 241b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not _GCS_FULLPATH_REGEX.match(path): 242b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise ValueError('Path should have format /bucket/filename ' 243b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'but got %s' % path) 244b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 245b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 246b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef _process_path_prefix(path_prefix): 247b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Validate and process a Google Cloud Stoarge path prefix. 248b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 249b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 250b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix' 251b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik or '/bucket/' or '/bucket'. 252b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 253b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Raises: 254b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ValueError: if path is invalid. 255b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 256b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Returns: 257b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik a tuple of /bucket and prefix. prefix can be None. 258b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 259b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik _validate_path(path_prefix) 260b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not _GCS_PATH_PREFIX_REGEX.match(path_prefix): 261b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise ValueError('Path prefix should have format /bucket, /bucket/, ' 262b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'or /bucket/prefix but got %s.' % path_prefix) 263b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik bucket_name_end = path_prefix.find('/', 1) 264b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik bucket = path_prefix 265b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik prefix = None 266b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if bucket_name_end != -1: 267b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik bucket = path_prefix[:bucket_name_end] 268b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik prefix = path_prefix[bucket_name_end + 1:] or None 269b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return bucket, prefix 270b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 271b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 272b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef _validate_path(path): 273b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Basic validation of Google Storage paths. 274b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 275b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 276b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik path: a Google Storage path. It should have form '/bucket/filename' 277b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik or '/bucket'. 278b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 279b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Raises: 280b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ValueError: if path is invalid. 281b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik TypeError: if path is not of type basestring. 282b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 283b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not path: 284b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise ValueError('Path is empty') 285b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not isinstance(path, basestring): 286b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise TypeError('Path should be a string but is %s (%s).' % 287b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik (path.__class__, path)) 288b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 289b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 290b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef validate_options(options): 291b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Validate Google Cloud Storage options. 292b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 293b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 294b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik options: a str->basestring dict of options to pass to Google Cloud Storage. 295b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 296b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Raises: 297b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik ValueError: if option is not supported. 298b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik TypeError: if option is not of type str or value of an option 299b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik is not of type basestring. 300b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 301b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not options: 302b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return 303b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 304b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik for k, v in options.iteritems(): 305b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not isinstance(k, str): 306b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise TypeError('option %r should be a str.' % k) 307b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS): 308b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise ValueError('option %s is not supported.' % k) 309b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if not isinstance(v, basestring): 310b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik raise TypeError('value %r for option %s should be of type basestring.' % 311b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik (v, k)) 312b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 313b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 314b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef http_time_to_posix(http_time): 315b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Convert HTTP time format to posix time. 316b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 317b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1 318b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik for http time format. 319b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 320b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 321b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik http_time: time in RFC 2616 format. e.g. 322b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik "Mon, 20 Nov 1995 19:12:08 GMT". 323b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 324b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Returns: 325b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik A float of secs from unix epoch. 326b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 327b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if http_time is not None: 328b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return email_utils.mktime_tz(email_utils.parsedate_tz(http_time)) 329b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 330b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 331b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef posix_time_to_http(posix_time): 332b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Convert posix time to HTML header time format. 333b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 334b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 335b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik posix_time: unix time. 336b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 337b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Returns: 338b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik A datatime str in RFC 2616 format. 339b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 340b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if posix_time: 341b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return email_utils.formatdate(posix_time, usegmt=True) 342b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 343b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 344b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_DT_FORMAT = '%Y-%m-%dT%H:%M:%S' 345b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 346b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 347b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef dt_str_to_posix(dt_str): 348b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """format str to posix. 349b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 350b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ, 351b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator 352b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik between date and time when they are on the same line. 353b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Z indicates UTC (zero meridian). 354b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 355b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html 356b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 357b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik This is used to parse LastModified node from GCS's GET bucket XML response. 358b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 359b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 360b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik dt_str: A datetime str. 361b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 362b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Returns: 363b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik A float of secs from unix epoch. By posix definition, epoch is midnight 364b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 1970/1/1 UTC. 365b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 366b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik parsable, _ = dt_str.split('.') 367b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik dt = datetime.datetime.strptime(parsable, _DT_FORMAT) 368b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return calendar.timegm(dt.utctimetuple()) 369b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 370b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 371b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef posix_to_dt_str(posix): 372b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Reverse of str_to_datetime. 373b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 374b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik This is used by GCS stub to generate GET bucket XML response. 375b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 376b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Args: 377b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik posix: A float of secs from unix epoch. 378b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 379b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik Returns: 380b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik A datetime str. 381b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """ 382b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik dt = datetime.datetime.utcfromtimestamp(posix) 383b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik dt_str = dt.strftime(_DT_FORMAT) 384b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return dt_str + '.000Z' 385b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 386b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 387b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef local_run(): 388b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Whether we should hit GCS dev appserver stub.""" 389b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik server_software = os.environ.get('SERVER_SOFTWARE') 390b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if server_software is None: 391b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return True 392b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if 'remote_api' in server_software: 393b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return False 394b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik if server_software.startswith(('Development', 'testutil')): 395b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return True 396b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return False 397b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 398b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 399b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef local_api_url(): 400b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Return URL for GCS emulation on dev appserver.""" 401b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT) 402b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 403b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 404b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef memory_usage(method): 405b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik """Log memory usage before and after a method.""" 406b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik def wrapper(*args, **kwargs): 407b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik logging.info('Memory before method %s is %s.', 408b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik method.__name__, runtime.memory_usage().current()) 409b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik result = method(*args, **kwargs) 410b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik logging.info('Memory after method %s is %s', 411b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik method.__name__, runtime.memory_usage().current()) 412b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return result 413b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return wrapper 414b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 415b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 416b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craikdef _add_ns(tagname): 417b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS, 418b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 'tag': tagname} 419b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 420b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik 421b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_T_CONTENTS = _add_ns('Contents') 422b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_T_LAST_MODIFIED = _add_ns('LastModified') 423b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_T_ETAG = _add_ns('ETag') 424b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_T_KEY = _add_ns('Key') 425b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_T_SIZE = _add_ns('Size') 426b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_T_PREFIX = _add_ns('Prefix') 427b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_T_COMMON_PREFIXES = _add_ns('CommonPrefixes') 428b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_T_NEXT_MARKER = _add_ns('NextMarker') 429b2cbf1594f8d6e4ba32d384cf379f62a74ed7654Chris Craik_T_IS_TRUNCATED = _add_ns('IsTruncated') 430