cloud_storage.py revision 5ad62731e62b9eb8d13f6e66dd1b57deaebdee11
1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
6
7import collections
8import contextlib
9import hashlib
10import logging
11import os
12import shutil
13import stat
14import subprocess
15import sys
16import tempfile
17import time
18
19import py_utils
20from py_utils import lock
21
22# Do a no-op import here so that cloud_storage_global_lock dep is picked up
23# by https://cs.chromium.org/chromium/src/build/android/test_runner.pydeps.
24# TODO(nedn, jbudorick): figure out a way to get rid of this ugly hack.
25from py_utils import cloud_storage_global_lock  # pylint: disable=unused-import
26
27
28PUBLIC_BUCKET = 'chromium-telemetry'
29PARTNER_BUCKET = 'chrome-partner-telemetry'
30INTERNAL_BUCKET = 'chrome-telemetry'
31TELEMETRY_OUTPUT = 'chrome-telemetry-output'
32
33# Uses ordered dict to make sure that bucket's key-value items are ordered from
34# the most open to the most restrictive.
35BUCKET_ALIASES = collections.OrderedDict((
36    ('public', PUBLIC_BUCKET),
37    ('partner', PARTNER_BUCKET),
38    ('internal', INTERNAL_BUCKET),
39    ('output', TELEMETRY_OUTPUT),
40))
41
42BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys()
43
44
45_GSUTIL_PATH = os.path.join(py_utils.GetCatapultDir(), 'third_party', 'gsutil',
46                            'gsutil')
47
48# TODO(tbarzic): A workaround for http://crbug.com/386416 and
49#     http://crbug.com/359293. See |_RunCommand|.
50_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/'
51
52
53# If Environment variables has DISABLE_CLOUD_STORAGE_IO set to '1', any method
54# calls that invoke cloud storage network io will throw exceptions.
55DISABLE_CLOUD_STORAGE_IO = 'DISABLE_CLOUD_STORAGE_IO'
56
57
58
59class CloudStorageError(Exception):
60
61  @staticmethod
62  def _GetConfigInstructions():
63    command = _GSUTIL_PATH
64    if py_utils.IsRunningOnCrosDevice():
65      command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH)
66    return ('To configure your credentials:\n'
67            '  1. Run "%s config" and follow its instructions.\n'
68            '  2. If you have a @google.com account, use that account.\n'
69            '  3. For the project-id, just enter 0.' % command)
70
71
72class PermissionError(CloudStorageError):
73
74  def __init__(self):
75    super(PermissionError, self).__init__(
76        'Attempted to access a file from Cloud Storage but you don\'t '
77        'have permission. ' + self._GetConfigInstructions())
78
79
80class CredentialsError(CloudStorageError):
81
82  def __init__(self):
83    super(CredentialsError, self).__init__(
84        'Attempted to access a file from Cloud Storage but you have no '
85        'configured credentials. ' + self._GetConfigInstructions())
86
87
88class CloudStorageIODisabled(CloudStorageError):
89  pass
90
91
92class NotFoundError(CloudStorageError):
93  pass
94
95
96class ServerError(CloudStorageError):
97  pass
98
99
100# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()?
101def _FindExecutableInPath(relative_executable_path, *extra_search_paths):
102  search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep)
103  for search_path in search_paths:
104    executable_path = os.path.join(search_path, relative_executable_path)
105    if py_utils.IsExecutable(executable_path):
106      return executable_path
107  return None
108
109
110def _EnsureExecutable(gsutil):
111  """chmod +x if gsutil is not executable."""
112  st = os.stat(gsutil)
113  if not st.st_mode & stat.S_IEXEC:
114    os.chmod(gsutil, st.st_mode | stat.S_IEXEC)
115
116
117def _RunCommand(args):
118  # On cros device, as telemetry is running as root, home will be set to /root/,
119  # which is not writable. gsutil will attempt to create a download tracker dir
120  # in home dir and fail. To avoid this, override HOME dir to something writable
121  # when running on cros device.
122  #
123  # TODO(tbarzic): Figure out a better way to handle gsutil on cros.
124  #     http://crbug.com/386416, http://crbug.com/359293.
125  gsutil_env = None
126  if py_utils.IsRunningOnCrosDevice():
127    gsutil_env = os.environ.copy()
128    gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR
129
130  if os.name == 'nt':
131    # If Windows, prepend python. Python scripts aren't directly executable.
132    args = [sys.executable, _GSUTIL_PATH] + args
133  else:
134    # Don't do it on POSIX, in case someone is using a shell script to redirect.
135    args = [_GSUTIL_PATH] + args
136    _EnsureExecutable(_GSUTIL_PATH)
137
138  if args[0] not in ('help', 'hash', 'version') and not IsNetworkIOEnabled():
139    raise CloudStorageIODisabled(
140        "Environment variable DISABLE_CLOUD_STORAGE_IO is set to 1. "
141        'Command %s is not allowed to run' % args)
142
143  gsutil = subprocess.Popen(args, stdout=subprocess.PIPE,
144                            stderr=subprocess.PIPE, env=gsutil_env)
145  stdout, stderr = gsutil.communicate()
146
147  if gsutil.returncode:
148    if stderr.startswith((
149        'You are attempting to access protected data with no configured',
150        'Failure: No handler was ready to authenticate.')):
151      raise CredentialsError()
152    if ('status=403' in stderr or 'status 403' in stderr or
153        '403 Forbidden' in stderr):
154      raise PermissionError()
155    if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or
156        'No URLs matched' in stderr or 'One or more URLs matched no' in stderr):
157      raise NotFoundError(stderr)
158    if '500 Internal Server Error' in stderr:
159      raise ServerError(stderr)
160    raise CloudStorageError(stderr)
161
162  return stdout
163
164
165def IsNetworkIOEnabled():
166  """Returns true if cloud storage is enabled."""
167  disable_cloud_storage_env_val = os.getenv(DISABLE_CLOUD_STORAGE_IO)
168
169  if disable_cloud_storage_env_val and disable_cloud_storage_env_val != '1':
170    logging.error(
171        'Unsupported value of environment variable '
172        'DISABLE_CLOUD_STORAGE_IO. Expected None or \'1\' but got %s.',
173        disable_cloud_storage_env_val)
174
175  return disable_cloud_storage_env_val != '1'
176
177
178def List(bucket):
179  query = 'gs://%s/' % bucket
180  stdout = _RunCommand(['ls', query])
181  return [url[len(query):] for url in stdout.splitlines()]
182
183
184def Exists(bucket, remote_path):
185  try:
186    _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
187    return True
188  except NotFoundError:
189    return False
190
191
192def Move(bucket1, bucket2, remote_path):
193  url1 = 'gs://%s/%s' % (bucket1, remote_path)
194  url2 = 'gs://%s/%s' % (bucket2, remote_path)
195  logging.info('Moving %s to %s', url1, url2)
196  _RunCommand(['mv', url1, url2])
197
198
199def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to):
200  """Copy a file from one location in CloudStorage to another.
201
202  Args:
203      bucket_from: The cloud storage bucket where the file is currently located.
204      bucket_to: The cloud storage bucket it is being copied to.
205      remote_path_from: The file path where the file is located in bucket_from.
206      remote_path_to: The file path it is being copied to in bucket_to.
207
208  It should: cause no changes locally or to the starting file, and will
209  overwrite any existing files in the destination location.
210  """
211  url1 = 'gs://%s/%s' % (bucket_from, remote_path_from)
212  url2 = 'gs://%s/%s' % (bucket_to, remote_path_to)
213  logging.info('Copying %s to %s', url1, url2)
214  _RunCommand(['cp', url1, url2])
215
216
217def Delete(bucket, remote_path):
218  url = 'gs://%s/%s' % (bucket, remote_path)
219  logging.info('Deleting %s', url)
220  _RunCommand(['rm', url])
221
222
223def Get(bucket, remote_path, local_path):
224  with _FileLock(local_path):
225    _GetLocked(bucket, remote_path, local_path)
226
227
228_CLOUD_STORAGE_GLOBAL_LOCK = os.path.join(
229    os.path.dirname(os.path.abspath(__file__)), 'cloud_storage_global_lock.py')
230
231
232@contextlib.contextmanager
233def _FileLock(base_path):
234  pseudo_lock_path = '%s.pseudo_lock' % base_path
235  _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path))
236
237  # We need to make sure that there is no other process which is acquiring the
238  # lock on |base_path| and has not finished before proceeding further to create
239  # the |pseudo_lock_path|. Otherwise, |pseudo_lock_path| may be deleted by
240  # that other process after we create it in this process.
241  while os.path.exists(pseudo_lock_path):
242    time.sleep(0.1)
243
244  # Guard the creation & acquiring lock of |pseudo_lock_path| by the global lock
245  # to make sure that there is no race condition on creating the file.
246  with open(_CLOUD_STORAGE_GLOBAL_LOCK) as global_file:
247    with lock.FileLock(global_file, lock.LOCK_EX):
248      fd = open(pseudo_lock_path, 'w')
249      lock.AcquireFileLock(fd, lock.LOCK_EX)
250  try:
251    yield
252  finally:
253    lock.ReleaseFileLock(fd)
254    try:
255      fd.close()
256      os.remove(pseudo_lock_path)
257    except OSError:
258      # We don't care if the pseudo-lock gets removed elsewhere before we have
259      # a chance to do so.
260      pass
261
262
263def _CreateDirectoryIfNecessary(directory):
264  if not os.path.exists(directory):
265    os.makedirs(directory)
266
267
268def _GetLocked(bucket, remote_path, local_path):
269  url = 'gs://%s/%s' % (bucket, remote_path)
270  logging.info('Downloading %s to %s', url, local_path)
271  _CreateDirectoryIfNecessary(os.path.dirname(local_path))
272  with tempfile.NamedTemporaryFile(
273      dir=os.path.dirname(local_path),
274      delete=False) as partial_download_path:
275    try:
276      # Windows won't download to an open file.
277      partial_download_path.close()
278      try:
279        _RunCommand(['cp', url, partial_download_path.name])
280      except ServerError:
281        logging.info('Cloud Storage server error, retrying download')
282        _RunCommand(['cp', url, partial_download_path.name])
283      shutil.move(partial_download_path.name, local_path)
284    finally:
285      if os.path.exists(partial_download_path.name):
286        os.remove(partial_download_path.name)
287
288
289def Insert(bucket, remote_path, local_path, publicly_readable=False):
290  """ Upload file in |local_path| to cloud storage.
291  Args:
292    bucket: the google cloud storage bucket name.
293    remote_path: the remote file path in |bucket|.
294    local_path: path of the local file to be uploaded.
295    publicly_readable: whether the uploaded file has publicly readable
296    permission.
297
298  Returns:
299    The url where the file is uploaded to.
300  """
301  url = 'gs://%s/%s' % (bucket, remote_path)
302  command_and_args = ['cp']
303  extra_info = ''
304  if publicly_readable:
305    command_and_args += ['-a', 'public-read']
306    extra_info = ' (publicly readable)'
307  command_and_args += [local_path, url]
308  logging.info('Uploading %s to %s%s', local_path, url, extra_info)
309  _RunCommand(command_and_args)
310  return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % (
311      bucket, remote_path)
312
313
314def GetIfHashChanged(cs_path, download_path, bucket, file_hash):
315  """Downloads |download_path| to |file_path| if |file_path| doesn't exist or
316     it's hash doesn't match |file_hash|.
317
318  Returns:
319    True if the binary was changed.
320  Raises:
321    CredentialsError if the user has no configured credentials.
322    PermissionError if the user does not have permission to access the bucket.
323    NotFoundError if the file is not in the given bucket in cloud_storage.
324  """
325  with _FileLock(download_path):
326    if (os.path.exists(download_path) and
327        CalculateHash(download_path) == file_hash):
328      return False
329    _GetLocked(bucket, cs_path, download_path)
330    return True
331
332
333def GetIfChanged(file_path, bucket):
334  """Gets the file at file_path if it has a hash file that doesn't match or
335  if there is no local copy of file_path, but there is a hash file for it.
336
337  Returns:
338    True if the binary was changed.
339  Raises:
340    CredentialsError if the user has no configured credentials.
341    PermissionError if the user does not have permission to access the bucket.
342    NotFoundError if the file is not in the given bucket in cloud_storage.
343  """
344  with _FileLock(file_path):
345    hash_path = file_path + '.sha1'
346    if not os.path.exists(hash_path):
347      logging.warning('Hash file not found: %s', hash_path)
348      return False
349
350    expected_hash = ReadHash(hash_path)
351    if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
352      return False
353    _GetLocked(bucket, expected_hash, file_path)
354    return True
355
356
357def GetFilesInDirectoryIfChanged(directory, bucket):
358  """ Scan the directory for .sha1 files, and download them from the given
359  bucket in cloud storage if the local and remote hash don't match or
360  there is no local copy.
361  """
362  if not os.path.isdir(directory):
363    raise ValueError(
364        '%s does not exist. Must provide a valid directory path.' % directory)
365  # Don't allow the root directory to be a serving_dir.
366  if directory == os.path.abspath(os.sep):
367    raise ValueError('Trying to serve root directory from HTTP server.')
368  for dirpath, _, filenames in os.walk(directory):
369    for filename in filenames:
370      path_name, extension = os.path.splitext(
371          os.path.join(dirpath, filename))
372      if extension != '.sha1':
373        continue
374      GetIfChanged(path_name, bucket)
375
376
377def CalculateHash(file_path):
378  """Calculates and returns the hash of the file at file_path."""
379  sha1 = hashlib.sha1()
380  with open(file_path, 'rb') as f:
381    while True:
382      # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
383      chunk = f.read(1024 * 1024)
384      if not chunk:
385        break
386      sha1.update(chunk)
387  return sha1.hexdigest()
388
389
390def ReadHash(hash_path):
391  with open(hash_path, 'rb') as f:
392    return f.read(1024).rstrip()
393