cloud_storage.py revision 5ad62731e62b9eb8d13f6e66dd1b57deaebdee11
1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Wrappers for gsutil, for basic interaction with Google Cloud Storage.""" 6 7import collections 8import contextlib 9import hashlib 10import logging 11import os 12import shutil 13import stat 14import subprocess 15import sys 16import tempfile 17import time 18 19import py_utils 20from py_utils import lock 21 22# Do a no-op import here so that cloud_storage_global_lock dep is picked up 23# by https://cs.chromium.org/chromium/src/build/android/test_runner.pydeps. 24# TODO(nedn, jbudorick): figure out a way to get rid of this ugly hack. 25from py_utils import cloud_storage_global_lock # pylint: disable=unused-import 26 27 28PUBLIC_BUCKET = 'chromium-telemetry' 29PARTNER_BUCKET = 'chrome-partner-telemetry' 30INTERNAL_BUCKET = 'chrome-telemetry' 31TELEMETRY_OUTPUT = 'chrome-telemetry-output' 32 33# Uses ordered dict to make sure that bucket's key-value items are ordered from 34# the most open to the most restrictive. 35BUCKET_ALIASES = collections.OrderedDict(( 36 ('public', PUBLIC_BUCKET), 37 ('partner', PARTNER_BUCKET), 38 ('internal', INTERNAL_BUCKET), 39 ('output', TELEMETRY_OUTPUT), 40)) 41 42BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys() 43 44 45_GSUTIL_PATH = os.path.join(py_utils.GetCatapultDir(), 'third_party', 'gsutil', 46 'gsutil') 47 48# TODO(tbarzic): A workaround for http://crbug.com/386416 and 49# http://crbug.com/359293. See |_RunCommand|. 50_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/' 51 52 53# If Environment variables has DISABLE_CLOUD_STORAGE_IO set to '1', any method 54# calls that invoke cloud storage network io will throw exceptions. 55DISABLE_CLOUD_STORAGE_IO = 'DISABLE_CLOUD_STORAGE_IO' 56 57 58 59class CloudStorageError(Exception): 60 61 @staticmethod 62 def _GetConfigInstructions(): 63 command = _GSUTIL_PATH 64 if py_utils.IsRunningOnCrosDevice(): 65 command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH) 66 return ('To configure your credentials:\n' 67 ' 1. Run "%s config" and follow its instructions.\n' 68 ' 2. If you have a @google.com account, use that account.\n' 69 ' 3. For the project-id, just enter 0.' % command) 70 71 72class PermissionError(CloudStorageError): 73 74 def __init__(self): 75 super(PermissionError, self).__init__( 76 'Attempted to access a file from Cloud Storage but you don\'t ' 77 'have permission. ' + self._GetConfigInstructions()) 78 79 80class CredentialsError(CloudStorageError): 81 82 def __init__(self): 83 super(CredentialsError, self).__init__( 84 'Attempted to access a file from Cloud Storage but you have no ' 85 'configured credentials. ' + self._GetConfigInstructions()) 86 87 88class CloudStorageIODisabled(CloudStorageError): 89 pass 90 91 92class NotFoundError(CloudStorageError): 93 pass 94 95 96class ServerError(CloudStorageError): 97 pass 98 99 100# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()? 101def _FindExecutableInPath(relative_executable_path, *extra_search_paths): 102 search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep) 103 for search_path in search_paths: 104 executable_path = os.path.join(search_path, relative_executable_path) 105 if py_utils.IsExecutable(executable_path): 106 return executable_path 107 return None 108 109 110def _EnsureExecutable(gsutil): 111 """chmod +x if gsutil is not executable.""" 112 st = os.stat(gsutil) 113 if not st.st_mode & stat.S_IEXEC: 114 os.chmod(gsutil, st.st_mode | stat.S_IEXEC) 115 116 117def _RunCommand(args): 118 # On cros device, as telemetry is running as root, home will be set to /root/, 119 # which is not writable. gsutil will attempt to create a download tracker dir 120 # in home dir and fail. To avoid this, override HOME dir to something writable 121 # when running on cros device. 122 # 123 # TODO(tbarzic): Figure out a better way to handle gsutil on cros. 124 # http://crbug.com/386416, http://crbug.com/359293. 125 gsutil_env = None 126 if py_utils.IsRunningOnCrosDevice(): 127 gsutil_env = os.environ.copy() 128 gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR 129 130 if os.name == 'nt': 131 # If Windows, prepend python. Python scripts aren't directly executable. 132 args = [sys.executable, _GSUTIL_PATH] + args 133 else: 134 # Don't do it on POSIX, in case someone is using a shell script to redirect. 135 args = [_GSUTIL_PATH] + args 136 _EnsureExecutable(_GSUTIL_PATH) 137 138 if args[0] not in ('help', 'hash', 'version') and not IsNetworkIOEnabled(): 139 raise CloudStorageIODisabled( 140 "Environment variable DISABLE_CLOUD_STORAGE_IO is set to 1. " 141 'Command %s is not allowed to run' % args) 142 143 gsutil = subprocess.Popen(args, stdout=subprocess.PIPE, 144 stderr=subprocess.PIPE, env=gsutil_env) 145 stdout, stderr = gsutil.communicate() 146 147 if gsutil.returncode: 148 if stderr.startswith(( 149 'You are attempting to access protected data with no configured', 150 'Failure: No handler was ready to authenticate.')): 151 raise CredentialsError() 152 if ('status=403' in stderr or 'status 403' in stderr or 153 '403 Forbidden' in stderr): 154 raise PermissionError() 155 if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or 156 'No URLs matched' in stderr or 'One or more URLs matched no' in stderr): 157 raise NotFoundError(stderr) 158 if '500 Internal Server Error' in stderr: 159 raise ServerError(stderr) 160 raise CloudStorageError(stderr) 161 162 return stdout 163 164 165def IsNetworkIOEnabled(): 166 """Returns true if cloud storage is enabled.""" 167 disable_cloud_storage_env_val = os.getenv(DISABLE_CLOUD_STORAGE_IO) 168 169 if disable_cloud_storage_env_val and disable_cloud_storage_env_val != '1': 170 logging.error( 171 'Unsupported value of environment variable ' 172 'DISABLE_CLOUD_STORAGE_IO. Expected None or \'1\' but got %s.', 173 disable_cloud_storage_env_val) 174 175 return disable_cloud_storage_env_val != '1' 176 177 178def List(bucket): 179 query = 'gs://%s/' % bucket 180 stdout = _RunCommand(['ls', query]) 181 return [url[len(query):] for url in stdout.splitlines()] 182 183 184def Exists(bucket, remote_path): 185 try: 186 _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)]) 187 return True 188 except NotFoundError: 189 return False 190 191 192def Move(bucket1, bucket2, remote_path): 193 url1 = 'gs://%s/%s' % (bucket1, remote_path) 194 url2 = 'gs://%s/%s' % (bucket2, remote_path) 195 logging.info('Moving %s to %s', url1, url2) 196 _RunCommand(['mv', url1, url2]) 197 198 199def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to): 200 """Copy a file from one location in CloudStorage to another. 201 202 Args: 203 bucket_from: The cloud storage bucket where the file is currently located. 204 bucket_to: The cloud storage bucket it is being copied to. 205 remote_path_from: The file path where the file is located in bucket_from. 206 remote_path_to: The file path it is being copied to in bucket_to. 207 208 It should: cause no changes locally or to the starting file, and will 209 overwrite any existing files in the destination location. 210 """ 211 url1 = 'gs://%s/%s' % (bucket_from, remote_path_from) 212 url2 = 'gs://%s/%s' % (bucket_to, remote_path_to) 213 logging.info('Copying %s to %s', url1, url2) 214 _RunCommand(['cp', url1, url2]) 215 216 217def Delete(bucket, remote_path): 218 url = 'gs://%s/%s' % (bucket, remote_path) 219 logging.info('Deleting %s', url) 220 _RunCommand(['rm', url]) 221 222 223def Get(bucket, remote_path, local_path): 224 with _FileLock(local_path): 225 _GetLocked(bucket, remote_path, local_path) 226 227 228_CLOUD_STORAGE_GLOBAL_LOCK = os.path.join( 229 os.path.dirname(os.path.abspath(__file__)), 'cloud_storage_global_lock.py') 230 231 232@contextlib.contextmanager 233def _FileLock(base_path): 234 pseudo_lock_path = '%s.pseudo_lock' % base_path 235 _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path)) 236 237 # We need to make sure that there is no other process which is acquiring the 238 # lock on |base_path| and has not finished before proceeding further to create 239 # the |pseudo_lock_path|. Otherwise, |pseudo_lock_path| may be deleted by 240 # that other process after we create it in this process. 241 while os.path.exists(pseudo_lock_path): 242 time.sleep(0.1) 243 244 # Guard the creation & acquiring lock of |pseudo_lock_path| by the global lock 245 # to make sure that there is no race condition on creating the file. 246 with open(_CLOUD_STORAGE_GLOBAL_LOCK) as global_file: 247 with lock.FileLock(global_file, lock.LOCK_EX): 248 fd = open(pseudo_lock_path, 'w') 249 lock.AcquireFileLock(fd, lock.LOCK_EX) 250 try: 251 yield 252 finally: 253 lock.ReleaseFileLock(fd) 254 try: 255 fd.close() 256 os.remove(pseudo_lock_path) 257 except OSError: 258 # We don't care if the pseudo-lock gets removed elsewhere before we have 259 # a chance to do so. 260 pass 261 262 263def _CreateDirectoryIfNecessary(directory): 264 if not os.path.exists(directory): 265 os.makedirs(directory) 266 267 268def _GetLocked(bucket, remote_path, local_path): 269 url = 'gs://%s/%s' % (bucket, remote_path) 270 logging.info('Downloading %s to %s', url, local_path) 271 _CreateDirectoryIfNecessary(os.path.dirname(local_path)) 272 with tempfile.NamedTemporaryFile( 273 dir=os.path.dirname(local_path), 274 delete=False) as partial_download_path: 275 try: 276 # Windows won't download to an open file. 277 partial_download_path.close() 278 try: 279 _RunCommand(['cp', url, partial_download_path.name]) 280 except ServerError: 281 logging.info('Cloud Storage server error, retrying download') 282 _RunCommand(['cp', url, partial_download_path.name]) 283 shutil.move(partial_download_path.name, local_path) 284 finally: 285 if os.path.exists(partial_download_path.name): 286 os.remove(partial_download_path.name) 287 288 289def Insert(bucket, remote_path, local_path, publicly_readable=False): 290 """ Upload file in |local_path| to cloud storage. 291 Args: 292 bucket: the google cloud storage bucket name. 293 remote_path: the remote file path in |bucket|. 294 local_path: path of the local file to be uploaded. 295 publicly_readable: whether the uploaded file has publicly readable 296 permission. 297 298 Returns: 299 The url where the file is uploaded to. 300 """ 301 url = 'gs://%s/%s' % (bucket, remote_path) 302 command_and_args = ['cp'] 303 extra_info = '' 304 if publicly_readable: 305 command_and_args += ['-a', 'public-read'] 306 extra_info = ' (publicly readable)' 307 command_and_args += [local_path, url] 308 logging.info('Uploading %s to %s%s', local_path, url, extra_info) 309 _RunCommand(command_and_args) 310 return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % ( 311 bucket, remote_path) 312 313 314def GetIfHashChanged(cs_path, download_path, bucket, file_hash): 315 """Downloads |download_path| to |file_path| if |file_path| doesn't exist or 316 it's hash doesn't match |file_hash|. 317 318 Returns: 319 True if the binary was changed. 320 Raises: 321 CredentialsError if the user has no configured credentials. 322 PermissionError if the user does not have permission to access the bucket. 323 NotFoundError if the file is not in the given bucket in cloud_storage. 324 """ 325 with _FileLock(download_path): 326 if (os.path.exists(download_path) and 327 CalculateHash(download_path) == file_hash): 328 return False 329 _GetLocked(bucket, cs_path, download_path) 330 return True 331 332 333def GetIfChanged(file_path, bucket): 334 """Gets the file at file_path if it has a hash file that doesn't match or 335 if there is no local copy of file_path, but there is a hash file for it. 336 337 Returns: 338 True if the binary was changed. 339 Raises: 340 CredentialsError if the user has no configured credentials. 341 PermissionError if the user does not have permission to access the bucket. 342 NotFoundError if the file is not in the given bucket in cloud_storage. 343 """ 344 with _FileLock(file_path): 345 hash_path = file_path + '.sha1' 346 if not os.path.exists(hash_path): 347 logging.warning('Hash file not found: %s', hash_path) 348 return False 349 350 expected_hash = ReadHash(hash_path) 351 if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash: 352 return False 353 _GetLocked(bucket, expected_hash, file_path) 354 return True 355 356 357def GetFilesInDirectoryIfChanged(directory, bucket): 358 """ Scan the directory for .sha1 files, and download them from the given 359 bucket in cloud storage if the local and remote hash don't match or 360 there is no local copy. 361 """ 362 if not os.path.isdir(directory): 363 raise ValueError( 364 '%s does not exist. Must provide a valid directory path.' % directory) 365 # Don't allow the root directory to be a serving_dir. 366 if directory == os.path.abspath(os.sep): 367 raise ValueError('Trying to serve root directory from HTTP server.') 368 for dirpath, _, filenames in os.walk(directory): 369 for filename in filenames: 370 path_name, extension = os.path.splitext( 371 os.path.join(dirpath, filename)) 372 if extension != '.sha1': 373 continue 374 GetIfChanged(path_name, bucket) 375 376 377def CalculateHash(file_path): 378 """Calculates and returns the hash of the file at file_path.""" 379 sha1 = hashlib.sha1() 380 with open(file_path, 'rb') as f: 381 while True: 382 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. 383 chunk = f.read(1024 * 1024) 384 if not chunk: 385 break 386 sha1.update(chunk) 387 return sha1.hexdigest() 388 389 390def ReadHash(hash_path): 391 with open(hash_path, 'rb') as f: 392 return f.read(1024).rstrip() 393