1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
6
7import contextlib
8import cStringIO
9import hashlib
10import logging
11import os
12import subprocess
13import sys
14import tarfile
15import urllib2
16
17from telemetry.core import platform
18from telemetry.util import path
19
20
21PUBLIC_BUCKET = 'chromium-telemetry'
22PARTNER_BUCKET = 'chrome-partner-telemetry'
23INTERNAL_BUCKET = 'chrome-telemetry'
24
25
26BUCKET_ALIASES = {
27    'public': PUBLIC_BUCKET,
28    'partner': PARTNER_BUCKET,
29    'internal': INTERNAL_BUCKET,
30}
31
32
33_GSUTIL_URL = 'http://storage.googleapis.com/pub/gsutil.tar.gz'
34_DOWNLOAD_PATH = os.path.join(path.GetTelemetryDir(), 'third_party', 'gsutil')
35# TODO(tbarzic): A workaround for http://crbug.com/386416 and
36#     http://crbug.com/359293. See |_RunCommand|.
37_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/'
38
39
40class CloudStorageError(Exception):
41  @staticmethod
42  def _GetConfigInstructions(gsutil_path):
43    if SupportsProdaccess(gsutil_path) and _FindExecutableInPath('prodaccess'):
44      return 'Run prodaccess to authenticate.'
45    else:
46      if platform.GetHostPlatform().GetOSName() == 'chromeos':
47        gsutil_path = ('HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, gsutil_path))
48      return ('To configure your credentials:\n'
49              '  1. Run "%s config" and follow its instructions.\n'
50              '  2. If you have a @google.com account, use that account.\n'
51              '  3. For the project-id, just enter 0.' % gsutil_path)
52
53
54class PermissionError(CloudStorageError):
55  def __init__(self, gsutil_path):
56    super(PermissionError, self).__init__(
57        'Attempted to access a file from Cloud Storage but you don\'t '
58        'have permission. ' + self._GetConfigInstructions(gsutil_path))
59
60
61class CredentialsError(CloudStorageError):
62  def __init__(self, gsutil_path):
63    super(CredentialsError, self).__init__(
64        'Attempted to access a file from Cloud Storage but you have no '
65        'configured credentials. ' + self._GetConfigInstructions(gsutil_path))
66
67
68class NotFoundError(CloudStorageError):
69  pass
70
71
72# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()?
73def _FindExecutableInPath(relative_executable_path, *extra_search_paths):
74  search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep)
75  for search_path in search_paths:
76    executable_path = os.path.join(search_path, relative_executable_path)
77    if path.IsExecutable(executable_path):
78      return executable_path
79  return None
80
81
82def _DownloadGsutil():
83  logging.info('Downloading gsutil')
84  with contextlib.closing(urllib2.urlopen(_GSUTIL_URL, timeout=60)) as response:
85    with tarfile.open(fileobj=cStringIO.StringIO(response.read())) as tar_file:
86      tar_file.extractall(os.path.dirname(_DOWNLOAD_PATH))
87  logging.info('Downloaded gsutil to %s' % _DOWNLOAD_PATH)
88
89  return os.path.join(_DOWNLOAD_PATH, 'gsutil')
90
91
92def FindGsutil():
93  """Return the gsutil executable path. If we can't find it, download it."""
94  # Look for a depot_tools installation.
95  gsutil_path = _FindExecutableInPath(
96      os.path.join('third_party', 'gsutil', 'gsutil'), _DOWNLOAD_PATH)
97  if gsutil_path:
98    return gsutil_path
99
100  # Look for a gsutil installation.
101  gsutil_path = _FindExecutableInPath('gsutil', _DOWNLOAD_PATH)
102  if gsutil_path:
103    return gsutil_path
104
105  # Failed to find it. Download it!
106  return _DownloadGsutil()
107
108
109def SupportsProdaccess(gsutil_path):
110  with open(gsutil_path, 'r') as gsutil:
111    return 'prodaccess' in gsutil.read()
112
113
114def _RunCommand(args):
115  gsutil_path = FindGsutil()
116
117  # On cros device, as telemetry is running as root, home will be set to /root/,
118  # which is not writable. gsutil will attempt to create a download tracker dir
119  # in home dir and fail. To avoid this, override HOME dir to something writable
120  # when running on cros device.
121  #
122  # TODO(tbarzic): Figure out a better way to handle gsutil on cros.
123  #     http://crbug.com/386416, http://crbug.com/359293.
124  gsutil_env = None
125  if platform.GetHostPlatform().GetOSName() == 'chromeos':
126    gsutil_env = os.environ.copy()
127    gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR
128
129  gsutil = subprocess.Popen([sys.executable, gsutil_path] + args,
130                            stdout=subprocess.PIPE, stderr=subprocess.PIPE,
131                            env=gsutil_env)
132  stdout, stderr = gsutil.communicate()
133
134  if gsutil.returncode:
135    if stderr.startswith((
136        'You are attempting to access protected data with no configured',
137        'Failure: No handler was ready to authenticate.')):
138      raise CredentialsError(gsutil_path)
139    if 'status=401' in stderr or 'status 401' in stderr:
140      raise CredentialsError(gsutil_path)
141    if 'status=403' in stderr or 'status 403' in stderr:
142      raise PermissionError(gsutil_path)
143    if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or
144        'No URLs matched' in stderr):
145      raise NotFoundError(stderr)
146    raise CloudStorageError(stderr)
147
148  return stdout
149
150
151def List(bucket):
152  query = 'gs://%s/' % bucket
153  stdout = _RunCommand(['ls', query])
154  return [url[len(query):] for url in stdout.splitlines()]
155
156
157def Exists(bucket, remote_path):
158  try:
159    _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
160    return True
161  except NotFoundError:
162    return False
163
164
165def Move(bucket1, bucket2, remote_path):
166  url1 = 'gs://%s/%s' % (bucket1, remote_path)
167  url2 = 'gs://%s/%s' % (bucket2, remote_path)
168  logging.info('Moving %s to %s' % (url1, url2))
169  _RunCommand(['mv', url1, url2])
170
171
172def Delete(bucket, remote_path):
173  url = 'gs://%s/%s' % (bucket, remote_path)
174  logging.info('Deleting %s' % url)
175  _RunCommand(['rm', url])
176
177
178def Get(bucket, remote_path, local_path):
179  url = 'gs://%s/%s' % (bucket, remote_path)
180  logging.info('Downloading %s to %s' % (url, local_path))
181  _RunCommand(['cp', url, local_path])
182
183
184def Insert(bucket, remote_path, local_path, publicly_readable=False):
185  url = 'gs://%s/%s' % (bucket, remote_path)
186  command_and_args = ['cp']
187  extra_info = ''
188  if publicly_readable:
189    command_and_args += ['-a', 'public-read']
190    extra_info = ' (publicly readable)'
191  command_and_args += [local_path, url]
192  logging.info('Uploading %s to %s%s' % (local_path, url, extra_info))
193  _RunCommand(command_and_args)
194
195
196def GetIfChanged(file_path, bucket=None):
197  """Gets the file at file_path if it has a hash file that doesn't match.
198
199  If the file is not in Cloud Storage, log a warning instead of raising an
200  exception. We assume that the user just hasn't uploaded the file yet.
201
202  Returns:
203    True if the binary was changed.
204  """
205  hash_path = file_path + '.sha1'
206  if not os.path.exists(hash_path):
207    logging.warning('Hash file not found: %s' % hash_path)
208    return False
209
210  expected_hash = ReadHash(hash_path)
211  if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
212    return False
213
214  if bucket:
215    buckets = [bucket]
216  else:
217    buckets = [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET]
218
219  for bucket in buckets:
220    try:
221      url = 'gs://%s/%s' % (bucket, expected_hash)
222      _RunCommand(['cp', url, file_path])
223      logging.info('Downloaded %s to %s' % (url, file_path))
224      return True
225    except NotFoundError:
226      continue
227
228  logging.warning('Unable to find file in Cloud Storage: %s', file_path)
229  return False
230
231
232def CalculateHash(file_path):
233  """Calculates and returns the hash of the file at file_path."""
234  sha1 = hashlib.sha1()
235  with open(file_path, 'rb') as f:
236    while True:
237      # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
238      chunk = f.read(1024*1024)
239      if not chunk:
240        break
241      sha1.update(chunk)
242  return sha1.hexdigest()
243
244
245def ReadHash(hash_path):
246  with open(hash_path, 'rb') as f:
247    return f.read(1024).rstrip()
248