cloud_storage.py revision 6e8cce623b6e4fe0c9e4af605d675dd9d0338c38
1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
6
7import contextlib
8import cStringIO
9import hashlib
10import logging
11import os
12import subprocess
13import sys
14import tarfile
15import urllib2
16
17from telemetry.core.backends.chrome import cros_interface
18from telemetry.util import path
19
20PUBLIC_BUCKET = 'chromium-telemetry'
21PARTNER_BUCKET = 'chrome-partner-telemetry'
22INTERNAL_BUCKET = 'chrome-telemetry'
23
24
25_GSUTIL_URL = 'http://storage.googleapis.com/pub/gsutil.tar.gz'
26_DOWNLOAD_PATH = os.path.join(path.GetTelemetryDir(), 'third_party', 'gsutil')
27# TODO(tbarzic): A workaround for http://crbug.com/386416 and
28#     http://crbug.com/359293. See |_RunCommand|.
29_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/'
30
31class CloudStorageError(Exception):
32  @staticmethod
33  def _GetConfigInstructions(gsutil_path):
34    if SupportsProdaccess(gsutil_path) and _FindExecutableInPath('prodaccess'):
35      return 'Run prodaccess to authenticate.'
36    else:
37      if cros_interface.IsRunningOnCrosDevice():
38        gsutil_path = ('HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, gsutil_path))
39      return ('To configure your credentials:\n'
40              '  1. Run "%s config" and follow its instructions.\n'
41              '  2. If you have a @google.com account, use that account.\n'
42              '  3. For the project-id, just enter 0.' % gsutil_path)
43
44
45class PermissionError(CloudStorageError):
46  def __init__(self, gsutil_path):
47    super(PermissionError, self).__init__(
48        'Attempted to access a file from Cloud Storage but you don\'t '
49        'have permission. ' + self._GetConfigInstructions(gsutil_path))
50
51
52class CredentialsError(CloudStorageError):
53  def __init__(self, gsutil_path):
54    super(CredentialsError, self).__init__(
55        'Attempted to access a file from Cloud Storage but you have no '
56        'configured credentials. ' + self._GetConfigInstructions(gsutil_path))
57
58
59class NotFoundError(CloudStorageError):
60  pass
61
62
63# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()?
64def _FindExecutableInPath(relative_executable_path, *extra_search_paths):
65  search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep)
66  for search_path in search_paths:
67    executable_path = os.path.join(search_path, relative_executable_path)
68    if path.IsExecutable(executable_path):
69      return executable_path
70  return None
71
72
73def _DownloadGsutil():
74  logging.info('Downloading gsutil')
75  with contextlib.closing(urllib2.urlopen(_GSUTIL_URL), timeout=60) as response:
76    with tarfile.open(fileobj=cStringIO.StringIO(response.read())) as tar_file:
77      tar_file.extractall(os.path.dirname(_DOWNLOAD_PATH))
78  logging.info('Downloaded gsutil to %s' % _DOWNLOAD_PATH)
79
80  return os.path.join(_DOWNLOAD_PATH, 'gsutil')
81
82
83def FindGsutil():
84  """Return the gsutil executable path. If we can't find it, download it."""
85  # Look for a depot_tools installation.
86  gsutil_path = _FindExecutableInPath(
87      os.path.join('third_party', 'gsutil', 'gsutil'), _DOWNLOAD_PATH)
88  if gsutil_path:
89    return gsutil_path
90
91  # Look for a gsutil installation.
92  gsutil_path = _FindExecutableInPath('gsutil', _DOWNLOAD_PATH)
93  if gsutil_path:
94    return gsutil_path
95
96  # Failed to find it. Download it!
97  return _DownloadGsutil()
98
99
100def SupportsProdaccess(gsutil_path):
101  with open(gsutil_path, 'r') as gsutil:
102    return 'prodaccess' in gsutil.read()
103
104
105def _RunCommand(args):
106  gsutil_path = FindGsutil()
107
108  # On cros device, as telemetry is running as root, home will be set to /root/,
109  # which is not writable. gsutil will attempt to create a download tracker dir
110  # in home dir and fail. To avoid this, override HOME dir to something writable
111  # when running on cros device.
112  #
113  # TODO(tbarzic): Figure out a better way to handle gsutil on cros.
114  #     http://crbug.com/386416, http://crbug.com/359293.
115  gsutil_env = None
116  if cros_interface.IsRunningOnCrosDevice():
117    gsutil_env = os.environ.copy()
118    gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR
119
120  gsutil = subprocess.Popen([sys.executable, gsutil_path] + args,
121                            stdout=subprocess.PIPE, stderr=subprocess.PIPE,
122                            env=gsutil_env)
123  stdout, stderr = gsutil.communicate()
124
125  if gsutil.returncode:
126    if stderr.startswith((
127        'You are attempting to access protected data with no configured',
128        'Failure: No handler was ready to authenticate.')):
129      raise CredentialsError(gsutil_path)
130    if 'status=403' in stderr or 'status 403' in stderr:
131      raise PermissionError(gsutil_path)
132    if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or
133        'No URLs matched' in stderr):
134      raise NotFoundError(stderr)
135    raise CloudStorageError(stderr)
136
137  return stdout
138
139
140def List(bucket):
141  query = 'gs://%s/' % bucket
142  stdout = _RunCommand(['ls', query])
143  return [url[len(query):] for url in stdout.splitlines()]
144
145
146def Exists(bucket, remote_path):
147  try:
148    _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
149    return True
150  except NotFoundError:
151    return False
152
153
154def Move(bucket1, bucket2, remote_path):
155  url1 = 'gs://%s/%s' % (bucket1, remote_path)
156  url2 = 'gs://%s/%s' % (bucket2, remote_path)
157  logging.info('Moving %s to %s' % (url1, url2))
158  _RunCommand(['mv', url1, url2])
159
160
161def Delete(bucket, remote_path):
162  url = 'gs://%s/%s' % (bucket, remote_path)
163  logging.info('Deleting %s' % url)
164  _RunCommand(['rm', url])
165
166
167def Get(bucket, remote_path, local_path):
168  url = 'gs://%s/%s' % (bucket, remote_path)
169  logging.info('Downloading %s to %s' % (url, local_path))
170  _RunCommand(['cp', url, local_path])
171
172
173def Insert(bucket, remote_path, local_path, publicly_readable=False):
174  url = 'gs://%s/%s' % (bucket, remote_path)
175  command_and_args = ['cp']
176  extra_info = ''
177  if publicly_readable:
178    command_and_args += ['-a', 'public-read']
179    extra_info = ' (publicly readable)'
180  command_and_args += [local_path, url]
181  logging.info('Uploading %s to %s%s' % (local_path, url, extra_info))
182  _RunCommand(command_and_args)
183
184
185def GetIfChanged(file_path, bucket=None):
186  """Gets the file at file_path if it has a hash file that doesn't match.
187
188  If the file is not in Cloud Storage, log a warning instead of raising an
189  exception. We assume that the user just hasn't uploaded the file yet.
190
191  Returns:
192    True if the binary was changed.
193  """
194  hash_path = file_path + '.sha1'
195  if not os.path.exists(hash_path):
196    logging.warning('Hash file not found: %s' % hash_path)
197    return False
198
199  expected_hash = ReadHash(hash_path)
200  if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
201    return False
202
203  if bucket:
204    buckets = [bucket]
205  else:
206    buckets = [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET]
207
208  for bucket in buckets:
209    try:
210      url = 'gs://%s/%s' % (bucket, expected_hash)
211      _RunCommand(['cp', url, file_path])
212      logging.info('Downloaded %s to %s' % (url, file_path))
213      return True
214    except NotFoundError:
215      continue
216
217  logging.warning('Unable to find file in Cloud Storage: %s', file_path)
218  return False
219
220
221def CalculateHash(file_path):
222  """Calculates and returns the hash of the file at file_path."""
223  sha1 = hashlib.sha1()
224  with open(file_path, 'rb') as f:
225    while True:
226      # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
227      chunk = f.read(1024*1024)
228      if not chunk:
229        break
230      sha1.update(chunk)
231  return sha1.hexdigest()
232
233
234def ReadHash(hash_path):
235  with open(hash_path, 'rb') as f:
236    return f.read(1024).rstrip()
237