1116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch# Copyright 2014 The Chromium Authors. All rights reserved.
27dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch# Use of this source code is governed by a BSD-style license that can be
37dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch# found in the LICENSE file.
47dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
57dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch"""Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
67dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
76e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)import contextlib
87dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport cStringIO
9a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)import hashlib
107dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport logging
117dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport os
127dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport subprocess
137dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport sys
147dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport tarfile
157dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport urllib2
167dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccifrom telemetry.core import platform
185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)from telemetry.util import path
197dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)PUBLIC_BUCKET = 'chromium-telemetry'
22a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)PARTNER_BUCKET = 'chrome-partner-telemetry'
2368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)INTERNAL_BUCKET = 'chrome-telemetry'
24ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
25ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciBUCKET_ALIASES = {
271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    'public': PUBLIC_BUCKET,
281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    'partner': PARTNER_BUCKET,
291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    'internal': INTERNAL_BUCKET,
301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
337dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch_GSUTIL_URL = 'http://storage.googleapis.com/pub/gsutil.tar.gz'
345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)_DOWNLOAD_PATH = os.path.join(path.GetTelemetryDir(), 'third_party', 'gsutil')
356d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)# TODO(tbarzic): A workaround for http://crbug.com/386416 and
366d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)#     http://crbug.com/359293. See |_RunCommand|.
376d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/'
387dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
40a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)class CloudStorageError(Exception):
410f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  @staticmethod
420f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  def _GetConfigInstructions(gsutil_path):
43cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if SupportsProdaccess(gsutil_path) and _FindExecutableInPath('prodaccess'):
445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      return 'Run prodaccess to authenticate.'
455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    else:
461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if platform.GetHostPlatform().GetOSName() == 'chromeos':
476d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)        gsutil_path = ('HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, gsutil_path))
485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      return ('To configure your credentials:\n'
495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)              '  1. Run "%s config" and follow its instructions.\n'
505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)              '  2. If you have a @google.com account, use that account.\n'
515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)              '  3. For the project-id, just enter 0.' % gsutil_path)
52a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
53a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
54424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)class PermissionError(CloudStorageError):
55424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)  def __init__(self, gsutil_path):
56424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    super(PermissionError, self).__init__(
57424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)        'Attempted to access a file from Cloud Storage but you don\'t '
58424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)        'have permission. ' + self._GetConfigInstructions(gsutil_path))
59424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)
60424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)
610f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)class CredentialsError(CloudStorageError):
623551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  def __init__(self, gsutil_path):
633551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    super(CredentialsError, self).__init__(
64424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)        'Attempted to access a file from Cloud Storage but you have no '
65424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)        'configured credentials. ' + self._GetConfigInstructions(gsutil_path))
663551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
673551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
683551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)class NotFoundError(CloudStorageError):
693551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  pass
703551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
713551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()?
735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)def _FindExecutableInPath(relative_executable_path, *extra_search_paths):
745f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep)
755f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  for search_path in search_paths:
765f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    executable_path = os.path.join(search_path, relative_executable_path)
775f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    if path.IsExecutable(executable_path):
785d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      return executable_path
795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return None
805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
827dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochdef _DownloadGsutil():
837dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  logging.info('Downloading gsutil')
8403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  with contextlib.closing(urllib2.urlopen(_GSUTIL_URL, timeout=60)) as response:
856e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)    with tarfile.open(fileobj=cStringIO.StringIO(response.read())) as tar_file:
866e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)      tar_file.extractall(os.path.dirname(_DOWNLOAD_PATH))
877dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  logging.info('Downloaded gsutil to %s' % _DOWNLOAD_PATH)
887dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
897dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  return os.path.join(_DOWNLOAD_PATH, 'gsutil')
907dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
917dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)def FindGsutil():
937dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  """Return the gsutil executable path. If we can't find it, download it."""
947dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  # Look for a depot_tools installation.
955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  gsutil_path = _FindExecutableInPath(
965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      os.path.join('third_party', 'gsutil', 'gsutil'), _DOWNLOAD_PATH)
975d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if gsutil_path:
985d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return gsutil_path
997dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1007dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  # Look for a gsutil installation.
1015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  gsutil_path = _FindExecutableInPath('gsutil', _DOWNLOAD_PATH)
1025d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if gsutil_path:
1035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return gsutil_path
1047dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1057dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  # Failed to find it. Download it!
1067dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  return _DownloadGsutil()
1077dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1087dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1095d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)def SupportsProdaccess(gsutil_path):
110cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  with open(gsutil_path, 'r') as gsutil:
111cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    return 'prodaccess' in gsutil.read()
1125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1147dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochdef _RunCommand(args):
1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  gsutil_path = FindGsutil()
116a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
1176d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  # On cros device, as telemetry is running as root, home will be set to /root/,
1186d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  # which is not writable. gsutil will attempt to create a download tracker dir
1196d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  # in home dir and fail. To avoid this, override HOME dir to something writable
1206d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  # when running on cros device.
1216d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  #
1226d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  # TODO(tbarzic): Figure out a better way to handle gsutil on cros.
1236d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  #     http://crbug.com/386416, http://crbug.com/359293.
1246d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  gsutil_env = None
1251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if platform.GetHostPlatform().GetOSName() == 'chromeos':
1266d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)    gsutil_env = os.environ.copy()
1276d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)    gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR
1286d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)
1297dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  gsutil = subprocess.Popen([sys.executable, gsutil_path] + args,
1306d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)                            stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1316d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)                            env=gsutil_env)
1327dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  stdout, stderr = gsutil.communicate()
1337dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1347dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  if gsutil.returncode:
1355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if stderr.startswith((
1365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        'You are attempting to access protected data with no configured',
1375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        'Failure: No handler was ready to authenticate.')):
1383551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      raise CredentialsError(gsutil_path)
1391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if 'status=401' in stderr or 'status 401' in stderr:
1401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      raise CredentialsError(gsutil_path)
1415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if 'status=403' in stderr or 'status 403' in stderr:
142424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)      raise PermissionError(gsutil_path)
1436d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)    if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or
1446d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)        'No URLs matched' in stderr):
1453551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      raise NotFoundError(stderr)
1463551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    raise CloudStorageError(stderr)
1477dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1487dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  return stdout
1497dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1507dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1517dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochdef List(bucket):
1525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  query = 'gs://%s/' % bucket
1535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  stdout = _RunCommand(['ls', query])
1545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return [url[len(query):] for url in stdout.splitlines()]
1555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)def Exists(bucket, remote_path):
1585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  try:
1595d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
1605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return True
1615d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  except NotFoundError:
1625d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return False
1635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)def Move(bucket1, bucket2, remote_path):
1665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  url1 = 'gs://%s/%s' % (bucket1, remote_path)
1675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  url2 = 'gs://%s/%s' % (bucket2, remote_path)
1685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  logging.info('Moving %s to %s' % (url1, url2))
1695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  _RunCommand(['mv', url1, url2])
1707dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1717dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1727dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochdef Delete(bucket, remote_path):
1737dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  url = 'gs://%s/%s' % (bucket, remote_path)
174ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  logging.info('Deleting %s' % url)
1757dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  _RunCommand(['rm', url])
1767dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1777dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1787dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochdef Get(bucket, remote_path, local_path):
1797dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  url = 'gs://%s/%s' % (bucket, remote_path)
180ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  logging.info('Downloading %s to %s' % (url, local_path))
1817dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  _RunCommand(['cp', url, local_path])
1827dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1837dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)def Insert(bucket, remote_path, local_path, publicly_readable=False):
1857dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  url = 'gs://%s/%s' % (bucket, remote_path)
1865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  command_and_args = ['cp']
1875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  extra_info = ''
1885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if publicly_readable:
1895d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    command_and_args += ['-a', 'public-read']
1905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    extra_info = ' (publicly readable)'
1915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  command_and_args += [local_path, url]
1925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  logging.info('Uploading %s to %s%s' % (local_path, url, extra_info))
1935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  _RunCommand(command_and_args)
194a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
195a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
1965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)def GetIfChanged(file_path, bucket=None):
1973551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  """Gets the file at file_path if it has a hash file that doesn't match.
1983551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1993551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  If the file is not in Cloud Storage, log a warning instead of raising an
2003551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  exception. We assume that the user just hasn't uploaded the file yet.
201f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
202f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  Returns:
203f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    True if the binary was changed.
2043551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  """
205ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  hash_path = file_path + '.sha1'
206ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  if not os.path.exists(hash_path):
207cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    logging.warning('Hash file not found: %s' % hash_path)
208f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return False
209ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
21023730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)  expected_hash = ReadHash(hash_path)
21123730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)  if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
212f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return False
2133551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
2145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if bucket:
2155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    buckets = [bucket]
2165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  else:
217a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    buckets = [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET]
2185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  for bucket in buckets:
2205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    try:
2215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      url = 'gs://%s/%s' % (bucket, expected_hash)
2225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      _RunCommand(['cp', url, file_path])
2235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      logging.info('Downloaded %s to %s' % (url, file_path))
224116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      return True
2255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    except NotFoundError:
2265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      continue
2275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
228116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  logging.warning('Unable to find file in Cloud Storage: %s', file_path)
229116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  return False
230ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
231ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
23223730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)def CalculateHash(file_path):
233ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  """Calculates and returns the hash of the file at file_path."""
234a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  sha1 = hashlib.sha1()
235a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  with open(file_path, 'rb') as f:
236a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    while True:
237a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
238a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      chunk = f.read(1024*1024)
239a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      if not chunk:
240a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        break
241a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      sha1.update(chunk)
242a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  return sha1.hexdigest()
24323730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)
24423730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)
24523730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)def ReadHash(hash_path):
24623730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)  with open(hash_path, 'rb') as f:
24723730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)    return f.read(1024).rstrip()
248