cloud_storage.py revision 1320f92c476a1ad9d19dba2a48c72b75566198e9
1e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# Copyright 2014 The Chromium Authors. All rights reserved. 2e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# Use of this source code is governed by a BSD-style license that can be 3e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# found in the LICENSE file. 4e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 5e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott"""Wrappers for gsutil, for basic interaction with Google Cloud Storage.""" 6e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 7e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport contextlib 8e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport cStringIO 9e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport hashlib 10e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport logging 11e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport os 12e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport subprocess 13e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport sys 14e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport tarfile 15e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport urllib2 16e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 17e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottfrom telemetry.core import platform 18e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottfrom telemetry.util import path 19e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 20e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 21e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick ScottPUBLIC_BUCKET = 'chromium-telemetry' 22e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick ScottPARTNER_BUCKET = 'chrome-partner-telemetry' 23e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick ScottINTERNAL_BUCKET = 'chrome-telemetry' 24e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 25e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 26e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick ScottBUCKET_ALIASES = { 27e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 'public': PUBLIC_BUCKET, 28e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 'partner': PARTNER_BUCKET, 29e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 'internal': INTERNAL_BUCKET, 30e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott} 31e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 32e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 33e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott_GSUTIL_URL = 'http://storage.googleapis.com/pub/gsutil.tar.gz' 34e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott_DOWNLOAD_PATH = os.path.join(path.GetTelemetryDir(), 'third_party', 'gsutil') 35e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# TODO(tbarzic): A workaround for http://crbug.com/386416 and 36e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# http://crbug.com/359293. See |_RunCommand|. 37e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/' 38e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 39e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 40e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottclass CloudStorageError(Exception): 41e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott @staticmethod 42e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott def _GetConfigInstructions(gsutil_path): 43e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if SupportsProdaccess(gsutil_path) and _FindExecutableInPath('prodaccess'): 44e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return 'Run prodaccess to authenticate.' 45e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott else: 46e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if platform.GetHostPlatform().GetOSName() == 'chromeos': 47e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott gsutil_path = ('HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, gsutil_path)) 48e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return ('To configure your credentials:\n' 49e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott ' 1. Run "%s config" and follow its instructions.\n' 50e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott ' 2. If you have a @google.com account, use that account.\n' 51e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott ' 3. For the project-id, just enter 0.' % gsutil_path) 52e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 53e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 54e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottclass PermissionError(CloudStorageError): 55e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott def __init__(self, gsutil_path): 56e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott super(PermissionError, self).__init__( 57e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 'Attempted to access a file from Cloud Storage but you don\'t ' 58e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 'have permission. ' + self._GetConfigInstructions(gsutil_path)) 59e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 60e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 61e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottclass CredentialsError(CloudStorageError): 62e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott def __init__(self, gsutil_path): 63e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott super(CredentialsError, self).__init__( 64e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 'Attempted to access a file from Cloud Storage but you have no ' 65e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 'configured credentials. ' + self._GetConfigInstructions(gsutil_path)) 66e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 67e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 68e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottclass NotFoundError(CloudStorageError): 69e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott pass 70e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 71e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 72e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()? 73e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef _FindExecutableInPath(relative_executable_path, *extra_search_paths): 74e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep) 75e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott for search_path in search_paths: 76e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott executable_path = os.path.join(search_path, relative_executable_path) 77e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if path.IsExecutable(executable_path): 78e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return executable_path 79e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return None 80e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 81e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 82e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef _DownloadGsutil(): 83e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott logging.info('Downloading gsutil') 84e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott with contextlib.closing(urllib2.urlopen(_GSUTIL_URL, timeout=60)) as response: 85e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott with tarfile.open(fileobj=cStringIO.StringIO(response.read())) as tar_file: 86e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott tar_file.extractall(os.path.dirname(_DOWNLOAD_PATH)) 87e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott logging.info('Downloaded gsutil to %s' % _DOWNLOAD_PATH) 88e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 89e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return os.path.join(_DOWNLOAD_PATH, 'gsutil') 90e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 91e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 92e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef FindGsutil(): 93e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott """Return the gsutil executable path. If we can't find it, download it.""" 94e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # Look for a depot_tools installation. 95e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott gsutil_path = _FindExecutableInPath( 96e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott os.path.join('third_party', 'gsutil', 'gsutil'), _DOWNLOAD_PATH) 97e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if gsutil_path: 98e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return gsutil_path 99e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 100e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # Look for a gsutil installation. 101e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott gsutil_path = _FindExecutableInPath('gsutil', _DOWNLOAD_PATH) 102e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if gsutil_path: 103e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return gsutil_path 104e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 105e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # Failed to find it. Download it! 106e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return _DownloadGsutil() 107e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 108e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 109e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef SupportsProdaccess(gsutil_path): 110e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott with open(gsutil_path, 'r') as gsutil: 111e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return 'prodaccess' in gsutil.read() 112e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 113e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 114e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef _RunCommand(args): 115e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott gsutil_path = FindGsutil() 116e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 117e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # On cros device, as telemetry is running as root, home will be set to /root/, 118e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # which is not writable. gsutil will attempt to create a download tracker dir 119e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # in home dir and fail. To avoid this, override HOME dir to something writable 120e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # when running on cros device. 121e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # 122e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # TODO(tbarzic): Figure out a better way to handle gsutil on cros. 123e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # http://crbug.com/386416, http://crbug.com/359293. 124e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott gsutil_env = None 125e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if platform.GetHostPlatform().GetOSName() == 'chromeos': 126e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott gsutil_env = os.environ.copy() 127e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR 128e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 129e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott gsutil = subprocess.Popen([sys.executable, gsutil_path] + args, 130e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott stdout=subprocess.PIPE, stderr=subprocess.PIPE, 131e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott env=gsutil_env) 132e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott stdout, stderr = gsutil.communicate() 133e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 134e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if gsutil.returncode: 135e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if stderr.startswith(( 136e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 'You are attempting to access protected data with no configured', 137e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 'Failure: No handler was ready to authenticate.')): 138e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott raise CredentialsError(gsutil_path) 139e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if 'status=401' in stderr or 'status 401' in stderr: 140e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott raise CredentialsError(gsutil_path) 141e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if 'status=403' in stderr or 'status 403' in stderr: 142e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott raise PermissionError(gsutil_path) 143e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or 144e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 'No URLs matched' in stderr): 145e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott raise NotFoundError(stderr) 146e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott raise CloudStorageError(stderr) 147e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 148e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return stdout 149e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 150e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 151e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef List(bucket): 152e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott query = 'gs://%s/' % bucket 153e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott stdout = _RunCommand(['ls', query]) 154e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return [url[len(query):] for url in stdout.splitlines()] 155e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 156e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 157e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef Exists(bucket, remote_path): 158e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott try: 159e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)]) 160e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return True 161e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott except NotFoundError: 162e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return False 163e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 164e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 165e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef Move(bucket1, bucket2, remote_path): 166e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott url1 = 'gs://%s/%s' % (bucket1, remote_path) 167e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott url2 = 'gs://%s/%s' % (bucket2, remote_path) 168e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott logging.info('Moving %s to %s' % (url1, url2)) 169e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott _RunCommand(['mv', url1, url2]) 170e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 171e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 172e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef Delete(bucket, remote_path): 173e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott url = 'gs://%s/%s' % (bucket, remote_path) 174e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott logging.info('Deleting %s' % url) 175e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott _RunCommand(['rm', url]) 176e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 177e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 178e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef Get(bucket, remote_path, local_path): 179e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott url = 'gs://%s/%s' % (bucket, remote_path) 180e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott logging.info('Downloading %s to %s' % (url, local_path)) 181e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott _RunCommand(['cp', url, local_path]) 182e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 183e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 184e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef Insert(bucket, remote_path, local_path, publicly_readable=False): 185e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott url = 'gs://%s/%s' % (bucket, remote_path) 186e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott command_and_args = ['cp'] 187e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott extra_info = '' 188e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if publicly_readable: 189e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott command_and_args += ['-a', 'public-read'] 190e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott extra_info = ' (publicly readable)' 191e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott command_and_args += [local_path, url] 192e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott logging.info('Uploading %s to %s%s' % (local_path, url, extra_info)) 193e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott _RunCommand(command_and_args) 194e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 195e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 196e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef GetIfChanged(file_path, bucket=None): 197e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott """Gets the file at file_path if it has a hash file that doesn't match. 198e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 199e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott If the file is not in Cloud Storage, log a warning instead of raising an 200e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott exception. We assume that the user just hasn't uploaded the file yet. 201e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 202e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott Returns: 203e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott True if the binary was changed. 204e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott """ 205e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott hash_path = file_path + '.sha1' 206e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if not os.path.exists(hash_path): 207e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott logging.warning('Hash file not found: %s' % hash_path) 208e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return False 209e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 210e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott expected_hash = ReadHash(hash_path) 211e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash: 212e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return False 213e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 214e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if bucket: 215e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott buckets = [bucket] 216e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott else: 217e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott buckets = [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET] 218e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 219e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott for bucket in buckets: 220e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott try: 221e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott url = 'gs://%s/%s' % (bucket, expected_hash) 222e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott _RunCommand(['cp', url, file_path]) 223e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott logging.info('Downloaded %s to %s' % (url, file_path)) 224e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return True 225e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott except NotFoundError: 226e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott continue 227e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 228e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott logging.warning('Unable to find file in Cloud Storage: %s', file_path) 229e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return False 230e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 231e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 232e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef CalculateHash(file_path): 233e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott """Calculates and returns the hash of the file at file_path.""" 234e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott sha1 = hashlib.sha1() 235e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott with open(file_path, 'rb') as f: 236e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott while True: 237e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott # Read in 1mb chunks, so it doesn't all have to be loaded into memory. 238e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott chunk = f.read(1024*1024) 239e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott if not chunk: 240e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott break 241e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott sha1.update(chunk) 242e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return sha1.hexdigest() 243e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 244e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott 245e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef ReadHash(hash_path): 246e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott with open(hash_path, 'rb') as f: 247e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott return f.read(1024).rstrip() 248e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott