1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
6
7import collections
8import contextlib
9import hashlib
10import logging
11import os
12import shutil
13import stat
14import subprocess
15import sys
16import tempfile
17import time
18
19try:
20  import fcntl
21except ImportError:
22  fcntl = None
23
24from catapult_base import util
25
26
27PUBLIC_BUCKET = 'chromium-telemetry'
28PARTNER_BUCKET = 'chrome-partner-telemetry'
29INTERNAL_BUCKET = 'chrome-telemetry'
30TELEMETRY_OUTPUT = 'chrome-telemetry-output'
31
32# Uses ordered dict to make sure that bucket's key-value items are ordered from
33# the most open to the most restrictive.
34BUCKET_ALIASES = collections.OrderedDict((
35    ('public', PUBLIC_BUCKET),
36    ('partner', PARTNER_BUCKET),
37    ('internal', INTERNAL_BUCKET),
38    ('output', TELEMETRY_OUTPUT),
39))
40
41BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys()
42
43
44_GSUTIL_PATH = os.path.join(util.GetCatapultDir(), 'third_party', 'gsutil',
45                            'gsutil')
46
47# TODO(tbarzic): A workaround for http://crbug.com/386416 and
48#     http://crbug.com/359293. See |_RunCommand|.
49_CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/'
50
51
52# If Environment variables has DISABLE_CLOUD_STORAGE_IO set to '1', any method
53# calls that invoke cloud storage network io will throw exceptions.
54DISABLE_CLOUD_STORAGE_IO = 'DISABLE_CLOUD_STORAGE_IO'
55
56
57
58class CloudStorageError(Exception):
59
60  @staticmethod
61  def _GetConfigInstructions():
62    command = _GSUTIL_PATH
63    if util.IsRunningOnCrosDevice():
64      command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH)
65    return ('To configure your credentials:\n'
66            '  1. Run "%s config" and follow its instructions.\n'
67            '  2. If you have a @google.com account, use that account.\n'
68            '  3. For the project-id, just enter 0.' % command)
69
70
71class PermissionError(CloudStorageError):
72
73  def __init__(self):
74    super(PermissionError, self).__init__(
75        'Attempted to access a file from Cloud Storage but you don\'t '
76        'have permission. ' + self._GetConfigInstructions())
77
78
79class CredentialsError(CloudStorageError):
80
81  def __init__(self):
82    super(CredentialsError, self).__init__(
83        'Attempted to access a file from Cloud Storage but you have no '
84        'configured credentials. ' + self._GetConfigInstructions())
85
86
87class CloudStorageIODisabled(CloudStorageError):
88  pass
89
90
91class NotFoundError(CloudStorageError):
92  pass
93
94
95class ServerError(CloudStorageError):
96  pass
97
98
99# TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()?
100def _FindExecutableInPath(relative_executable_path, *extra_search_paths):
101  search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep)
102  for search_path in search_paths:
103    executable_path = os.path.join(search_path, relative_executable_path)
104    if util.IsExecutable(executable_path):
105      return executable_path
106  return None
107
108
109def _EnsureExecutable(gsutil):
110  """chmod +x if gsutil is not executable."""
111  st = os.stat(gsutil)
112  if not st.st_mode & stat.S_IEXEC:
113    os.chmod(gsutil, st.st_mode | stat.S_IEXEC)
114
115
116def _RunCommand(args):
117  # On cros device, as telemetry is running as root, home will be set to /root/,
118  # which is not writable. gsutil will attempt to create a download tracker dir
119  # in home dir and fail. To avoid this, override HOME dir to something writable
120  # when running on cros device.
121  #
122  # TODO(tbarzic): Figure out a better way to handle gsutil on cros.
123  #     http://crbug.com/386416, http://crbug.com/359293.
124  gsutil_env = None
125  if util.IsRunningOnCrosDevice():
126    gsutil_env = os.environ.copy()
127    gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR
128
129  if os.name == 'nt':
130    # If Windows, prepend python. Python scripts aren't directly executable.
131    args = [sys.executable, _GSUTIL_PATH] + args
132  else:
133    # Don't do it on POSIX, in case someone is using a shell script to redirect.
134    args = [_GSUTIL_PATH] + args
135    _EnsureExecutable(_GSUTIL_PATH)
136
137  if (os.getenv(DISABLE_CLOUD_STORAGE_IO) == '1' and
138      args[0] not in ('help', 'hash', 'version')):
139    raise CloudStorageIODisabled(
140        "Environment variable DISABLE_CLOUD_STORAGE_IO is set to 1. "
141        'Command %s is not allowed to run' % args)
142
143  gsutil = subprocess.Popen(args, stdout=subprocess.PIPE,
144                            stderr=subprocess.PIPE, env=gsutil_env)
145  stdout, stderr = gsutil.communicate()
146
147  if gsutil.returncode:
148    if stderr.startswith((
149        'You are attempting to access protected data with no configured',
150        'Failure: No handler was ready to authenticate.')):
151      raise CredentialsError()
152    if ('status=403' in stderr or 'status 403' in stderr or
153        '403 Forbidden' in stderr):
154      raise PermissionError()
155    if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or
156        'No URLs matched' in stderr or 'One or more URLs matched no' in stderr):
157      raise NotFoundError(stderr)
158    if '500 Internal Server Error' in stderr:
159      raise ServerError(stderr)
160    raise CloudStorageError(stderr)
161
162  return stdout
163
164
165def List(bucket):
166  query = 'gs://%s/' % bucket
167  stdout = _RunCommand(['ls', query])
168  return [url[len(query):] for url in stdout.splitlines()]
169
170
171def Exists(bucket, remote_path):
172  try:
173    _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
174    return True
175  except NotFoundError:
176    return False
177
178
179def Move(bucket1, bucket2, remote_path):
180  url1 = 'gs://%s/%s' % (bucket1, remote_path)
181  url2 = 'gs://%s/%s' % (bucket2, remote_path)
182  logging.info('Moving %s to %s', url1, url2)
183  _RunCommand(['mv', url1, url2])
184
185
186def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to):
187  """Copy a file from one location in CloudStorage to another.
188
189  Args:
190      bucket_from: The cloud storage bucket where the file is currently located.
191      bucket_to: The cloud storage bucket it is being copied to.
192      remote_path_from: The file path where the file is located in bucket_from.
193      remote_path_to: The file path it is being copied to in bucket_to.
194
195  It should: cause no changes locally or to the starting file, and will
196  overwrite any existing files in the destination location.
197  """
198  url1 = 'gs://%s/%s' % (bucket_from, remote_path_from)
199  url2 = 'gs://%s/%s' % (bucket_to, remote_path_to)
200  logging.info('Copying %s to %s', url1, url2)
201  _RunCommand(['cp', url1, url2])
202
203
204def Delete(bucket, remote_path):
205  url = 'gs://%s/%s' % (bucket, remote_path)
206  logging.info('Deleting %s', url)
207  _RunCommand(['rm', url])
208
209
210def Get(bucket, remote_path, local_path):
211  with _PseudoFileLock(local_path):
212    _GetLocked(bucket, remote_path, local_path)
213
214
215@contextlib.contextmanager
216def _PseudoFileLock(base_path):
217  pseudo_lock_path = '%s.pseudo_lock' % base_path
218  _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path))
219  # This is somewhat of a racy hack because we don't have a good
220  # cross-platform file lock. If we get one, this should be refactored
221  # to use it.
222  while os.path.exists(pseudo_lock_path):
223    time.sleep(0.1)
224  fd = os.open(pseudo_lock_path, os.O_RDONLY | os.O_CREAT)
225  if fcntl:
226    fcntl.flock(fd, fcntl.LOCK_EX)
227  try:
228    yield
229  finally:
230    if fcntl:
231      fcntl.flock(fd, fcntl.LOCK_UN)
232    try:
233      os.close(fd)
234      os.remove(pseudo_lock_path)
235    except OSError:
236      # We don't care if the pseudo-lock gets removed elsewhere before we have
237      # a chance to do so.
238      pass
239
240
241def _CreateDirectoryIfNecessary(directory):
242  if not os.path.exists(directory):
243    os.makedirs(directory)
244
245
246def _GetLocked(bucket, remote_path, local_path):
247  url = 'gs://%s/%s' % (bucket, remote_path)
248  logging.info('Downloading %s to %s', url, local_path)
249  _CreateDirectoryIfNecessary(os.path.dirname(local_path))
250  with tempfile.NamedTemporaryFile(
251      dir=os.path.dirname(local_path),
252      delete=False) as partial_download_path:
253    try:
254      # Windows won't download to an open file.
255      partial_download_path.close()
256      try:
257        _RunCommand(['cp', url, partial_download_path.name])
258      except ServerError:
259        logging.info('Cloud Storage server error, retrying download')
260        _RunCommand(['cp', url, partial_download_path.name])
261      shutil.move(partial_download_path.name, local_path)
262    finally:
263      if os.path.exists(partial_download_path.name):
264        os.remove(partial_download_path.name)
265
266
267def Insert(bucket, remote_path, local_path, publicly_readable=False):
268  """ Upload file in |local_path| to cloud storage.
269  Args:
270    bucket: the google cloud storage bucket name.
271    remote_path: the remote file path in |bucket|.
272    local_path: path of the local file to be uploaded.
273    publicly_readable: whether the uploaded file has publicly readable
274    permission.
275
276  Returns:
277    The url where the file is uploaded to.
278  """
279  url = 'gs://%s/%s' % (bucket, remote_path)
280  command_and_args = ['cp']
281  extra_info = ''
282  if publicly_readable:
283    command_and_args += ['-a', 'public-read']
284    extra_info = ' (publicly readable)'
285  command_and_args += [local_path, url]
286  logging.info('Uploading %s to %s%s', local_path, url, extra_info)
287  _RunCommand(command_and_args)
288  return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % (
289      bucket, remote_path)
290
291
292def GetIfHashChanged(cs_path, download_path, bucket, file_hash):
293  """Downloads |download_path| to |file_path| if |file_path| doesn't exist or
294     it's hash doesn't match |file_hash|.
295
296  Returns:
297    True if the binary was changed.
298  Raises:
299    CredentialsError if the user has no configured credentials.
300    PermissionError if the user does not have permission to access the bucket.
301    NotFoundError if the file is not in the given bucket in cloud_storage.
302  """
303  with _PseudoFileLock(download_path):
304    if (os.path.exists(download_path) and
305        CalculateHash(download_path) == file_hash):
306      return False
307    _GetLocked(bucket, cs_path, download_path)
308    return True
309
310
311def GetIfChanged(file_path, bucket):
312  """Gets the file at file_path if it has a hash file that doesn't match or
313  if there is no local copy of file_path, but there is a hash file for it.
314
315  Returns:
316    True if the binary was changed.
317  Raises:
318    CredentialsError if the user has no configured credentials.
319    PermissionError if the user does not have permission to access the bucket.
320    NotFoundError if the file is not in the given bucket in cloud_storage.
321  """
322  with _PseudoFileLock(file_path):
323    hash_path = file_path + '.sha1'
324    if not os.path.exists(hash_path):
325      logging.warning('Hash file not found: %s', hash_path)
326      return False
327
328    expected_hash = ReadHash(hash_path)
329    if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
330      return False
331    _GetLocked(bucket, expected_hash, file_path)
332    return True
333
334
335def GetFilesInDirectoryIfChanged(directory, bucket):
336  """ Scan the directory for .sha1 files, and download them from the given
337  bucket in cloud storage if the local and remote hash don't match or
338  there is no local copy.
339  """
340  if not os.path.isdir(directory):
341    raise ValueError(
342        '%s does not exist. Must provide a valid directory path.' % directory)
343  # Don't allow the root directory to be a serving_dir.
344  if directory == os.path.abspath(os.sep):
345    raise ValueError('Trying to serve root directory from HTTP server.')
346  for dirpath, _, filenames in os.walk(directory):
347    for filename in filenames:
348      path_name, extension = os.path.splitext(
349          os.path.join(dirpath, filename))
350      if extension != '.sha1':
351        continue
352      GetIfChanged(path_name, bucket)
353
354
355def CalculateHash(file_path):
356  """Calculates and returns the hash of the file at file_path."""
357  sha1 = hashlib.sha1()
358  with open(file_path, 'rb') as f:
359    while True:
360      # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
361      chunk = f.read(1024 * 1024)
362      if not chunk:
363        break
364      sha1.update(chunk)
365  return sha1.hexdigest()
366
367
368def ReadHash(hash_path):
369  with open(hash_path, 'rb') as f:
370    return f.read(1024).rstrip()
371