1#!/usr/bin/python
2# Copyright (c) 2012 The Native Client Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A library to assist automatically downloading files.
7
8This library is used by scripts that download tarballs, zipfiles, etc. as part
9of the build process.
10"""
11
12import hashlib
13import http_download
14import os.path
15import re
16import shutil
17import sys
18import time
19import urllib2
20
21SOURCE_STAMP = 'SOURCE_URL'
22HASH_STAMP = 'SOURCE_SHA1'
23
24
25# Designed to handle more general inputs than sys.platform because the platform
26# name may come from the command line.
27PLATFORM_COLLAPSE = {
28    'windows': 'windows',
29    'win32': 'windows',
30    'cygwin': 'windows',
31    'linux': 'linux',
32    'linux2': 'linux',
33    'linux3': 'linux',
34    'darwin': 'mac',
35    'mac': 'mac',
36}
37
38ARCH_COLLAPSE = {
39    'i386'  : 'x86',
40    'i686'  : 'x86',
41    'x86_64': 'x86',
42    'armv7l': 'arm',
43}
44
45
46class HashError(Exception):
47  def __init__(self, download_url, expected_hash, actual_hash):
48    self.download_url = download_url
49    self.expected_hash = expected_hash
50    self.actual_hash = actual_hash
51
52  def __str__(self):
53    return 'Got hash "%s" but expected hash "%s" for "%s"' % (
54        self.actual_hash, self.expected_hash, self.download_url)
55
56
57def PlatformName(name=None):
58  if name is None:
59    name = sys.platform
60  return PLATFORM_COLLAPSE[name]
61
62def ArchName(name=None):
63  if name is None:
64    if PlatformName() == 'windows':
65      # TODO(pdox): Figure out how to auto-detect 32-bit vs 64-bit Windows.
66      name = 'i386'
67    else:
68      import platform
69      name = platform.machine()
70  return ARCH_COLLAPSE[name]
71
72def EnsureFileCanBeWritten(filename):
73  directory = os.path.dirname(filename)
74  if not os.path.exists(directory):
75    os.makedirs(directory)
76
77
78def WriteData(filename, data):
79  EnsureFileCanBeWritten(filename)
80  f = open(filename, 'wb')
81  f.write(data)
82  f.close()
83
84
85def WriteDataFromStream(filename, stream, chunk_size, verbose=True):
86  EnsureFileCanBeWritten(filename)
87  dst = open(filename, 'wb')
88  try:
89    while True:
90      data = stream.read(chunk_size)
91      if len(data) == 0:
92        break
93      dst.write(data)
94      if verbose:
95        # Indicate that we're still writing.
96        sys.stdout.write('.')
97        sys.stdout.flush()
98  finally:
99    if verbose:
100      sys.stdout.write('\n')
101    dst.close()
102
103
104def DoesStampMatch(stampfile, expected, index):
105  try:
106    f = open(stampfile, 'r')
107    stamp = f.read()
108    f.close()
109    if stamp.split('\n')[index] == expected:
110      return "already up-to-date."
111    elif stamp.startswith('manual'):
112      return "manual override."
113    return False
114  except IOError:
115    return False
116
117
118def WriteStamp(stampfile, data):
119  EnsureFileCanBeWritten(stampfile)
120  f = open(stampfile, 'w')
121  f.write(data)
122  f.close()
123
124
125def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0):
126  stampfile = os.path.join(path, stamp_name)
127
128  # Check if the stampfile is older than the minimum last mod time
129  if min_time:
130    try:
131      stamp_time = os.stat(stampfile).st_mtime
132      if stamp_time <= min_time:
133        return False
134    except OSError:
135      return False
136
137  return DoesStampMatch(stampfile, stamp_contents, index)
138
139
140def WriteSourceStamp(path, url):
141  stampfile = os.path.join(path, SOURCE_STAMP)
142  WriteStamp(stampfile, url)
143
144def WriteHashStamp(path, hash_val):
145  hash_stampfile = os.path.join(path, HASH_STAMP)
146  WriteStamp(hash_stampfile, hash_val)
147
148
149def Retry(op, *args):
150  # Windows seems to be prone to having commands that delete files or
151  # directories fail.  We currently do not have a complete understanding why,
152  # and as a workaround we simply retry the command a few times.
153  # It appears that file locks are hanging around longer than they should.  This
154  # may be a secondary effect of processes hanging around longer than they
155  # should.  This may be because when we kill a browser sel_ldr does not exit
156  # immediately, etc.
157  # Virus checkers can also accidently prevent files from being deleted, but
158  # that shouldn't be a problem on the bots.
159  if sys.platform in ('win32', 'cygwin'):
160    count = 0
161    while True:
162      try:
163        op(*args)
164        break
165      except Exception:
166        sys.stdout.write("FAILED: %s %s\n" % (op.__name__, repr(args)))
167        count += 1
168        if count < 5:
169          sys.stdout.write("RETRY: %s %s\n" % (op.__name__, repr(args)))
170          time.sleep(pow(2, count))
171        else:
172          # Don't mask the exception.
173          raise
174  else:
175    op(*args)
176
177
178def MoveDirCleanly(src, dst):
179  RemoveDir(dst)
180  MoveDir(src, dst)
181
182
183def MoveDir(src, dst):
184  Retry(shutil.move, src, dst)
185
186
187def RemoveDir(path):
188  if os.path.exists(path):
189    Retry(shutil.rmtree, path)
190
191
192def RemoveFile(path):
193  if os.path.exists(path):
194    Retry(os.unlink, path)
195
196
197def _HashFileHandle(fh):
198  """sha1 of a file like object.
199
200  Arguments:
201    fh: file handle like object to hash.
202  Returns:
203    sha1 as a string.
204  """
205  hasher = hashlib.sha1()
206  try:
207    while True:
208      data = fh.read(4096)
209      if not data:
210        break
211      hasher.update(data)
212  finally:
213    fh.close()
214  return hasher.hexdigest()
215
216
217def HashFile(filename):
218  """sha1 a file on disk.
219
220  Arguments:
221    filename: filename to hash.
222  Returns:
223    sha1 as a string.
224  """
225  fh = open(filename, 'rb')
226  return _HashFileHandle(fh)
227
228
229def HashUrlByDownloading(url):
230  """sha1 the data at an url.
231
232  Arguments:
233    url: url to download from.
234  Returns:
235    sha1 of the data at the url.
236  """
237  try:
238    fh = urllib2.urlopen(url)
239  except:
240    sys.stderr.write("Failed fetching URL: %s\n" % url)
241    raise
242  return _HashFileHandle(fh)
243
244
245# Attempts to get the SHA1 hash of a file given a URL by looking for
246# an adjacent file with a ".sha1hash" suffix.  This saves having to
247# download a large tarball just to get its hash.  Otherwise, we fall
248# back to downloading the main file.
249def HashUrl(url):
250  hash_url = '%s.sha1hash' % url
251  try:
252    fh = urllib2.urlopen(hash_url)
253    data = fh.read(100)
254    fh.close()
255  except urllib2.HTTPError, exn:
256    if exn.code == 404:
257      return HashUrlByDownloading(url)
258    raise
259  else:
260    if not re.match('[0-9a-f]{40}\n?$', data):
261      raise AssertionError('Bad SHA1 hash file: %r' % data)
262    return data.strip()
263
264
265def SyncURL(url, filename=None, stamp_dir=None, min_time=None,
266            hash_val=None, keep=False, verbose=False, stamp_index=0):
267  """Synchronize a destination file with a URL
268
269  if the URL does not match the URL stamp, then we must re-download it.
270
271  Arugments:
272    url: the url which will to compare against and download
273    filename: the file to create on download
274    path: the download path
275    stamp_dir: the filename containing the URL stamp to check against
276    hash_val: if set, the expected hash which must be matched
277    verbose: prints out status as it runs
278    stamp_index: index within the stamp file to check.
279  Returns:
280    True if the file is replaced
281    False if the file is not replaced
282  Exception:
283    HashError: if the hash does not match
284  """
285
286  assert url and filename
287
288  # If we are not keeping the tarball, or we already have it, we can
289  # skip downloading it for this reason. If we are keeping it,
290  # it must exist.
291  if keep:
292    tarball_ok = os.path.isfile(filename)
293  else:
294    tarball_ok = True
295
296  # If we don't need the tarball and the stamp_file matches the url, then
297  # we must be up to date.  If the URL differs but the recorded hash matches
298  # the one we'll insist the tarball has, then that's good enough too.
299  # TODO(mcgrathr): Download the .sha1sum file first to compare with
300  # the cached hash, in case --file-hash options weren't used.
301  if tarball_ok and stamp_dir is not None:
302    if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time):
303      if verbose:
304        print '%s is already up to date.' % filename
305      return False
306    if (hash_val is not None and
307        StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)):
308      if verbose:
309        print '%s is identical to the up to date file.' % filename
310      return False
311
312  if verbose:
313    print 'Updating %s\n\tfrom %s.' % (filename, url)
314  EnsureFileCanBeWritten(filename)
315  http_download.HttpDownload(url, filename)
316
317  if hash_val:
318    tar_hash = HashFile(filename)
319    if hash_val != tar_hash:
320      raise HashError(actual_hash=tar_hash, expected_hash=hash_val,
321                      download_url=url)
322
323  return True
324