cloud_storage.py revision 1320f92c476a1ad9d19dba2a48c72b75566198e9
1e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# Copyright 2014 The Chromium Authors. All rights reserved.
2e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# Use of this source code is governed by a BSD-style license that can be
3e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# found in the LICENSE file.
4e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
5e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott"""Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
6e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
7e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport contextlib
8e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport cStringIO
9e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport hashlib
10e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport logging
11e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport os
12e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport subprocess
13e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport sys
14e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport tarfile
15e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottimport urllib2
16e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
17e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottfrom telemetry.core import platform
18e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottfrom telemetry.util import path
19e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
20e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
21e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick ScottPUBLIC_BUCKET = 'chromium-telemetry'
22e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick ScottPARTNER_BUCKET = 'chrome-partner-telemetry'
23e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick ScottINTERNAL_BUCKET = 'chrome-telemetry'
24e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
25e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
26e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick ScottBUCKET_ALIASES = {
27e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    'public': PUBLIC_BUCKET,
28e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    'partner': PARTNER_BUCKET,
29e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    'internal': INTERNAL_BUCKET,
30e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott}
31e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
32e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
33e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott_GSUTIL_URL = 'http://storage.googleapis.com/pub/gsutil.tar.gz'
34e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott_DOWNLOAD_PATH = os.path.join(path.GetTelemetryDir(), 'third_party', 'gsutil')
35e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# TODO(tbarzic): A workaround for http://crbug.com/386416 and
36e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott#     http://crbug.com/359293. See |_RunCommand|.
37e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/'
38e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
39e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
40e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottclass CloudStorageError(Exception):
41e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  @staticmethod
42e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  def _GetConfigInstructions(gsutil_path):
43e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    if SupportsProdaccess(gsutil_path) and _FindExecutableInPath('prodaccess'):
44e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      return 'Run prodaccess to authenticate.'
45e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    else:
46e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      if platform.GetHostPlatform().GetOSName() == 'chromeos':
47e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott        gsutil_path = ('HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, gsutil_path))
48e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      return ('To configure your credentials:\n'
49e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott              '  1. Run "%s config" and follow its instructions.\n'
50e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott              '  2. If you have a @google.com account, use that account.\n'
51e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott              '  3. For the project-id, just enter 0.' % gsutil_path)
52e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
53e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
54e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottclass PermissionError(CloudStorageError):
55e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  def __init__(self, gsutil_path):
56e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    super(PermissionError, self).__init__(
57e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott        'Attempted to access a file from Cloud Storage but you don\'t '
58e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott        'have permission. ' + self._GetConfigInstructions(gsutil_path))
59e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
60e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
61e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottclass CredentialsError(CloudStorageError):
62e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  def __init__(self, gsutil_path):
63e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    super(CredentialsError, self).__init__(
64e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott        'Attempted to access a file from Cloud Storage but you have no '
65e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott        'configured credentials. ' + self._GetConfigInstructions(gsutil_path))
66e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
67e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
68e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottclass NotFoundError(CloudStorageError):
69e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  pass
70e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
71e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
72e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()?
73e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef _FindExecutableInPath(relative_executable_path, *extra_search_paths):
74e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep)
75e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  for search_path in search_paths:
76e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    executable_path = os.path.join(search_path, relative_executable_path)
77e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    if path.IsExecutable(executable_path):
78e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      return executable_path
79e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  return None
80e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
81e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
82e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef _DownloadGsutil():
83e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  logging.info('Downloading gsutil')
84e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  with contextlib.closing(urllib2.urlopen(_GSUTIL_URL, timeout=60)) as response:
85e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    with tarfile.open(fileobj=cStringIO.StringIO(response.read())) as tar_file:
86e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      tar_file.extractall(os.path.dirname(_DOWNLOAD_PATH))
87e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  logging.info('Downloaded gsutil to %s' % _DOWNLOAD_PATH)
88e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
89e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  return os.path.join(_DOWNLOAD_PATH, 'gsutil')
90e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
91e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
92e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef FindGsutil():
93e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  """Return the gsutil executable path. If we can't find it, download it."""
94e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  # Look for a depot_tools installation.
95e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  gsutil_path = _FindExecutableInPath(
96e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      os.path.join('third_party', 'gsutil', 'gsutil'), _DOWNLOAD_PATH)
97e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  if gsutil_path:
98e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    return gsutil_path
99e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
100e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  # Look for a gsutil installation.
101e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  gsutil_path = _FindExecutableInPath('gsutil', _DOWNLOAD_PATH)
102e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  if gsutil_path:
103e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    return gsutil_path
104e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
105e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  # Failed to find it. Download it!
106e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  return _DownloadGsutil()
107e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
108e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
109e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef SupportsProdaccess(gsutil_path):
110e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  with open(gsutil_path, 'r') as gsutil:
111e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    return 'prodaccess' in gsutil.read()
112e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
113e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
114e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef _RunCommand(args):
115e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  gsutil_path = FindGsutil()
116e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
117e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  # On cros device, as telemetry is running as root, home will be set to /root/,
118e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  # which is not writable. gsutil will attempt to create a download tracker dir
119e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  # in home dir and fail. To avoid this, override HOME dir to something writable
120e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  # when running on cros device.
121e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  #
122e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  # TODO(tbarzic): Figure out a better way to handle gsutil on cros.
123e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  #     http://crbug.com/386416, http://crbug.com/359293.
124e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  gsutil_env = None
125e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  if platform.GetHostPlatform().GetOSName() == 'chromeos':
126e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    gsutil_env = os.environ.copy()
127e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR
128e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
129e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  gsutil = subprocess.Popen([sys.executable, gsutil_path] + args,
130e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott                            stdout=subprocess.PIPE, stderr=subprocess.PIPE,
131e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott                            env=gsutil_env)
132e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  stdout, stderr = gsutil.communicate()
133e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
134e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  if gsutil.returncode:
135e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    if stderr.startswith((
136e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott        'You are attempting to access protected data with no configured',
137e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott        'Failure: No handler was ready to authenticate.')):
138e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      raise CredentialsError(gsutil_path)
139e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    if 'status=401' in stderr or 'status 401' in stderr:
140e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      raise CredentialsError(gsutil_path)
141e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    if 'status=403' in stderr or 'status 403' in stderr:
142e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      raise PermissionError(gsutil_path)
143e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or
144e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott        'No URLs matched' in stderr):
145e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      raise NotFoundError(stderr)
146e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    raise CloudStorageError(stderr)
147e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
148e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  return stdout
149e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
150e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
151e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef List(bucket):
152e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  query = 'gs://%s/' % bucket
153e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  stdout = _RunCommand(['ls', query])
154e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  return [url[len(query):] for url in stdout.splitlines()]
155e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
156e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
157e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef Exists(bucket, remote_path):
158e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  try:
159e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
160e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    return True
161e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  except NotFoundError:
162e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    return False
163e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
164e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
165e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef Move(bucket1, bucket2, remote_path):
166e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  url1 = 'gs://%s/%s' % (bucket1, remote_path)
167e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  url2 = 'gs://%s/%s' % (bucket2, remote_path)
168e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  logging.info('Moving %s to %s' % (url1, url2))
169e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  _RunCommand(['mv', url1, url2])
170e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
171e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
172e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef Delete(bucket, remote_path):
173e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  url = 'gs://%s/%s' % (bucket, remote_path)
174e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  logging.info('Deleting %s' % url)
175e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  _RunCommand(['rm', url])
176e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
177e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
178e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef Get(bucket, remote_path, local_path):
179e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  url = 'gs://%s/%s' % (bucket, remote_path)
180e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  logging.info('Downloading %s to %s' % (url, local_path))
181e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  _RunCommand(['cp', url, local_path])
182e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
183e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
184e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef Insert(bucket, remote_path, local_path, publicly_readable=False):
185e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  url = 'gs://%s/%s' % (bucket, remote_path)
186e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  command_and_args = ['cp']
187e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  extra_info = ''
188e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  if publicly_readable:
189e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    command_and_args += ['-a', 'public-read']
190e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    extra_info = ' (publicly readable)'
191e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  command_and_args += [local_path, url]
192e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  logging.info('Uploading %s to %s%s' % (local_path, url, extra_info))
193e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  _RunCommand(command_and_args)
194e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
195e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
196e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef GetIfChanged(file_path, bucket=None):
197e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  """Gets the file at file_path if it has a hash file that doesn't match.
198e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
199e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  If the file is not in Cloud Storage, log a warning instead of raising an
200e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  exception. We assume that the user just hasn't uploaded the file yet.
201e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
202e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  Returns:
203e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    True if the binary was changed.
204e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  """
205e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  hash_path = file_path + '.sha1'
206e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  if not os.path.exists(hash_path):
207e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    logging.warning('Hash file not found: %s' % hash_path)
208e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    return False
209e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
210e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  expected_hash = ReadHash(hash_path)
211e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
212e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    return False
213e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
214e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  if bucket:
215e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    buckets = [bucket]
216e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  else:
217e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    buckets = [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET]
218e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
219e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  for bucket in buckets:
220e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    try:
221e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      url = 'gs://%s/%s' % (bucket, expected_hash)
222e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      _RunCommand(['cp', url, file_path])
223e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      logging.info('Downloaded %s to %s' % (url, file_path))
224e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      return True
225e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    except NotFoundError:
226e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      continue
227e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
228e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  logging.warning('Unable to find file in Cloud Storage: %s', file_path)
229e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  return False
230e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
231e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
232e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef CalculateHash(file_path):
233e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  """Calculates and returns the hash of the file at file_path."""
234e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  sha1 = hashlib.sha1()
235e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  with open(file_path, 'rb') as f:
236e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    while True:
237e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
238e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      chunk = f.read(1024*1024)
239e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      if not chunk:
240e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott        break
241e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott      sha1.update(chunk)
242e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  return sha1.hexdigest()
243e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
244e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott
245e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scottdef ReadHash(hash_path):
246e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott  with open(hash_path, 'rb') as f:
247e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott    return f.read(1024).rstrip()
248e46c9386c4f79aa40185f79a19fc5b2a7ef528b3Patrick Scott