1# Copyright (c) 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
6
7import cStringIO
8import hashlib
9import logging
10import os
11import subprocess
12import sys
13import tarfile
14import urllib2
15
16from telemetry.core import util
17
18
19DEFAULT_BUCKET = 'chromium-wpr'
20
21
22_GSUTIL_URL = 'http://storage.googleapis.com/pub/gsutil.tar.gz'
23_DOWNLOAD_PATH = os.path.join(util.GetTelemetryDir(), 'third_party', 'gsutil')
24
25
26class CloudStorageError(Exception):
27  pass
28
29
30def _DownloadGsutil():
31  logging.info('Downloading gsutil')
32  response = urllib2.urlopen(_GSUTIL_URL)
33  with tarfile.open(fileobj=cStringIO.StringIO(response.read())) as tar_file:
34    tar_file.extractall(os.path.dirname(_DOWNLOAD_PATH))
35  logging.info('Downloaded gsutil to %s' % _DOWNLOAD_PATH)
36
37  return os.path.join(_DOWNLOAD_PATH, 'gsutil')
38
39
40def _FindGsutil():
41  """Return the gsutil executable path. If we can't find it, download it."""
42  search_paths = [_DOWNLOAD_PATH] + os.environ['PATH'].split(os.pathsep)
43
44  # Look for a depot_tools installation.
45  for path in search_paths:
46    gsutil_path = os.path.join(path, 'third_party', 'gsutil', 'gsutil')
47    if os.path.isfile(gsutil_path):
48      return gsutil_path
49
50  # Look for a gsutil installation.
51  for path in search_paths:
52    gsutil_path = os.path.join(path, 'gsutil')
53    if os.path.isfile(gsutil_path):
54      return gsutil_path
55
56  # Failed to find it. Download it!
57  return _DownloadGsutil()
58
59
60def _RunCommand(args):
61  gsutil_path = _FindGsutil()
62  gsutil = subprocess.Popen([sys.executable, gsutil_path] + args,
63                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
64  stdout, stderr = gsutil.communicate()
65
66  if gsutil.returncode:
67    raise CloudStorageError(stderr.splitlines()[-1])
68
69  return stdout
70
71
72def List(bucket):
73  stdout = _RunCommand(['ls', 'gs://%s' % bucket])
74  return [url.split('/')[-1] for url in stdout.splitlines()]
75
76
77def Delete(bucket, remote_path):
78  url = 'gs://%s/%s' % (bucket, remote_path)
79  logging.info('Deleting %s' % url)
80  _RunCommand(['rm', url])
81
82
83def Get(bucket, remote_path, local_path):
84  url = 'gs://%s/%s' % (bucket, remote_path)
85  logging.info('Downloading %s to %s' % (url, local_path))
86  _RunCommand(['cp', url, local_path])
87
88
89def Insert(bucket, remote_path, local_path):
90  url = 'gs://%s/%s' % (bucket, remote_path)
91  logging.info('Uploading %s to %s' % (local_path, url))
92  _RunCommand(['cp', local_path, url])
93
94
95def GetIfChanged(bucket, file_path):
96  """Gets the file at file_path if it has a hash file that doesn't match."""
97  hash_path = file_path + '.sha1'
98  if not os.path.exists(hash_path):
99    return
100
101  with open(hash_path, 'rb') as f:
102    expected_hash = f.read(1024).rstrip()
103  if not os.path.exists(file_path) or GetHash(file_path) != expected_hash:
104    Get(bucket, expected_hash, file_path)
105
106
107def GetHash(file_path):
108  """Calculates and returns the hash of the file at file_path."""
109  sha1 = hashlib.sha1()
110  with open(file_path, 'rb') as f:
111    while True:
112      # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
113      chunk = f.read(1024*1024)
114      if not chunk:
115        break
116      sha1.update(chunk)
117  return sha1.hexdigest()
118