1#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A utility script for downloading versioned Syzygy binaries."""
7
8import cStringIO
9import hashlib
10import errno
11import json
12import logging
13import optparse
14import os
15import re
16import shutil
17import stat
18import sys
19import subprocess
20import urllib2
21import zipfile
22
23
24_LOGGER = logging.getLogger(os.path.basename(__file__))
25
26# The URL where official builds are archived.
27_SYZYGY_ARCHIVE_URL = ('http://syzygy-archive.commondatastorage.googleapis.com/'
28    'builds/official/%(revision)s')
29
30# A JSON file containing the state of the download directory. If this file and
31# directory state do not agree, then the binaries will be downloaded and
32# installed again.
33_STATE = '.state'
34
35# This matches an integer (an SVN revision number) or a SHA1 value (a GIT hash).
36# The archive exclusively uses lowercase GIT hashes.
37_REVISION_RE = re.compile('^(?:\d+|[a-f0-9]{40})$')
38
39# This matches an MD5 hash.
40_MD5_RE = re.compile('^[a-f0-9]{32}$')
41
42# List of reources to be downloaded and installed. These are tuples with the
43# following format:
44# (basename, logging name, relative installation path, extraction filter)
45_RESOURCES = [
46  ('benchmark.zip', 'benchmark', '', None),
47  ('binaries.zip', 'binaries', 'exe', None),
48  ('symbols.zip', 'symbols', 'exe',
49      lambda x: x.filename.endswith('.dll.pdb')),
50  ('include.zip', 'include', 'include', None),
51  ('lib.zip', 'library', 'lib', None)]
52
53
54def _Shell(*cmd, **kw):
55  """Runs |cmd|, returns the results from Popen(cmd).communicate()."""
56  _LOGGER.debug('Executing %s.', cmd)
57  prog = subprocess.Popen(cmd, shell=True, **kw)
58
59  stdout, stderr = prog.communicate()
60  if prog.returncode != 0:
61    raise RuntimeError('Command "%s" returned %d.' % (cmd, prog.returncode))
62  return (stdout, stderr)
63
64
65def _LoadState(output_dir):
66  """Loads the contents of the state file for a given |output_dir|, returning
67  None if it doesn't exist.
68  """
69  path = os.path.join(output_dir, _STATE)
70  if not os.path.exists(path):
71    _LOGGER.debug('No state file found.')
72    return None
73  with open(path, 'rb') as f:
74    _LOGGER.debug('Reading state file: %s', path)
75    try:
76      return json.load(f)
77    except ValueError:
78      _LOGGER.debug('Invalid state file.')
79      return None
80
81
82def _SaveState(output_dir, state, dry_run=False):
83  """Saves the |state| dictionary to the given |output_dir| as a JSON file."""
84  path = os.path.join(output_dir, _STATE)
85  _LOGGER.debug('Writing state file: %s', path)
86  if dry_run:
87    return
88  with open(path, 'wb') as f:
89    f.write(json.dumps(state, sort_keys=True, indent=2))
90
91
92def _Md5(path):
93  """Returns the MD5 hash of the file at |path|, which must exist."""
94  return hashlib.md5(open(path, 'rb').read()).hexdigest()
95
96
97def _StateIsValid(state):
98  """Returns true if the given state structure is valid."""
99  if not isinstance(state, dict):
100    _LOGGER.debug('State must be a dict.')
101    return False
102  r = state.get('revision', None)
103  if not isinstance(r, basestring) or not _REVISION_RE.match(r):
104    _LOGGER.debug('State contains an invalid revision.')
105    return False
106  c = state.get('contents', None)
107  if not isinstance(c, dict):
108    _LOGGER.debug('State must contain a contents dict.')
109    return False
110  for (relpath, md5) in c.iteritems():
111    if not isinstance(relpath, basestring) or len(relpath) == 0:
112      _LOGGER.debug('State contents dict contains an invalid path.')
113      return False
114    if not isinstance(md5, basestring) or not _MD5_RE.match(md5):
115      _LOGGER.debug('State contents dict contains an invalid MD5 digest.')
116      return False
117  return True
118
119
120def _BuildActualState(stored, revision, output_dir):
121  """Builds the actual state using the provided |stored| state as a template.
122  Only examines files listed in the stored state, causing the script to ignore
123  files that have been added to the directories locally. |stored| must be a
124  valid state dictionary.
125  """
126  contents = {}
127  state = { 'revision': revision, 'contents': contents }
128  for relpath, md5 in stored['contents'].iteritems():
129    abspath = os.path.abspath(os.path.join(output_dir, relpath))
130    if os.path.isfile(abspath):
131      m = _Md5(abspath)
132      contents[relpath] = m
133
134  return state
135
136
137def _StatesAreConsistent(stored, actual):
138  """Validates whether two state dictionaries are consistent. Both must be valid
139  state dictionaries. Additional entries in |actual| are ignored.
140  """
141  if stored['revision'] != actual['revision']:
142    _LOGGER.debug('Mismatched revision number.')
143    return False
144  cont_stored = stored['contents']
145  cont_actual = actual['contents']
146  for relpath, md5 in cont_stored.iteritems():
147    if relpath not in cont_actual:
148      _LOGGER.debug('Missing content: %s', relpath)
149      return False
150    if md5 != cont_actual[relpath]:
151      _LOGGER.debug('Modified content: %s', relpath)
152      return False
153  return True
154
155
156def _GetCurrentState(revision, output_dir):
157  """Loads the current state and checks to see if it is consistent. Returns
158  a tuple (state, bool). The returned state will always be valid, even if an
159  invalid state is present on disk.
160  """
161  stored = _LoadState(output_dir)
162  if not _StateIsValid(stored):
163    _LOGGER.debug('State is invalid.')
164    # Return a valid but empty state.
165    return ({'revision': '0', 'contents': {}}, False)
166  actual = _BuildActualState(stored, revision, output_dir)
167  # If the script has been modified consider the state invalid.
168  path = os.path.join(output_dir, _STATE)
169  if os.path.getmtime(__file__) > os.path.getmtime(path):
170    return (stored, False)
171  # Otherwise, explicitly validate the state.
172  if not _StatesAreConsistent(stored, actual):
173    return (stored, False)
174  return (stored, True)
175
176
177def _DirIsEmpty(path):
178  """Returns true if the given directory is empty, false otherwise."""
179  for root, dirs, files in os.walk(path):
180    return not dirs and not files
181
182
183def _RmTreeHandleReadOnly(func, path, exc):
184  """An error handling function for use with shutil.rmtree. This will
185  detect failures to remove read-only files, and will change their properties
186  prior to removing them. This is necessary on Windows as os.remove will return
187  an access error for read-only files, and git repos contain read-only
188  pack/index files.
189  """
190  excvalue = exc[1]
191  if func in (os.rmdir, os.remove) and excvalue.errno == errno.EACCES:
192    _LOGGER.debug('Removing read-only path: %s', path)
193    os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
194    func(path)
195  else:
196    raise
197
198
199def _RmTree(path):
200  """A wrapper of shutil.rmtree that handles read-only files."""
201  shutil.rmtree(path, ignore_errors=False, onerror=_RmTreeHandleReadOnly)
202
203
204def _CleanState(output_dir, state, dry_run=False):
205  """Cleans up files/directories in |output_dir| that are referenced by
206  the given |state|. Raises an error if there are local changes. Returns a
207  dictionary of files that were deleted.
208  """
209  _LOGGER.debug('Deleting files from previous installation.')
210  deleted = {}
211
212  # Generate a list of files to delete, relative to |output_dir|.
213  contents = state['contents']
214  files = sorted(contents.keys())
215
216  # Try to delete the files. Keep track of directories to delete as well.
217  dirs = {}
218  for relpath in files:
219    fullpath = os.path.join(output_dir, relpath)
220    fulldir = os.path.dirname(fullpath)
221    dirs[fulldir] = True
222    if os.path.exists(fullpath):
223      # If somehow the file has become a directory complain about it.
224      if os.path.isdir(fullpath):
225        raise Exception('Directory exists where file expected: %s' % fullpath)
226
227      # Double check that the file doesn't have local changes. If it does
228      # then refuse to delete it.
229      if relpath in contents:
230        stored_md5 = contents[relpath]
231        actual_md5 = _Md5(fullpath)
232        if actual_md5 != stored_md5:
233          raise Exception('File has local changes: %s' % fullpath)
234
235      # The file is unchanged so it can safely be deleted.
236      _LOGGER.debug('Deleting file "%s".', fullpath)
237      deleted[relpath] = True
238      if not dry_run:
239        os.unlink(fullpath)
240
241  # Sort directories from longest name to shortest. This lets us remove empty
242  # directories from the most nested paths first.
243  dirs = sorted(dirs.keys(), key=lambda x: len(x), reverse=True)
244  for p in dirs:
245    if os.path.exists(p) and _DirIsEmpty(p):
246      _LOGGER.debug('Deleting empty directory "%s".', p)
247      if not dry_run:
248        _RmTree(p)
249
250  return deleted
251
252
253def _Download(url):
254  """Downloads the given URL and returns the contents as a string."""
255  response = urllib2.urlopen(url)
256  if response.code != 200:
257    raise RuntimeError('Failed to download "%s".' % url)
258  return response.read()
259
260
261def _InstallBinaries(options, deleted={}):
262  """Installs Syzygy binaries. This assumes that the output directory has
263  already been cleaned, as it will refuse to overwrite existing files."""
264  contents = {}
265  state = { 'revision': options.revision, 'contents': contents }
266  archive_url = _SYZYGY_ARCHIVE_URL % { 'revision': options.revision }
267  for (base, name, subdir, filt) in _RESOURCES:
268    # Create the output directory if it doesn't exist.
269    fulldir = os.path.join(options.output_dir, subdir)
270    if os.path.isfile(fulldir):
271      raise Exception('File exists where a directory needs to be created: %s' %
272                      fulldir)
273    if not os.path.exists(fulldir):
274      _LOGGER.debug('Creating directory: %s', fulldir)
275      if not options.dry_run:
276        os.makedirs(fulldir)
277
278    # Download the archive.
279    url = archive_url + '/' + base
280    _LOGGER.debug('Retrieving %s archive at "%s".', name, url)
281    data = _Download(url)
282
283    _LOGGER.debug('Unzipping %s archive.', name)
284    archive = zipfile.ZipFile(cStringIO.StringIO(data))
285    for entry in archive.infolist():
286      if not filt or filt(entry):
287        fullpath = os.path.normpath(os.path.join(fulldir, entry.filename))
288        relpath = os.path.relpath(fullpath, options.output_dir)
289        if os.path.exists(fullpath):
290          # If in a dry-run take into account the fact that the file *would*
291          # have been deleted.
292          if options.dry_run and relpath in deleted:
293            pass
294          else:
295            raise Exception('Path already exists: %s' % fullpath)
296
297        # Extract the file and update the state dictionary.
298        _LOGGER.debug('Extracting "%s".', fullpath)
299        if not options.dry_run:
300          archive.extract(entry.filename, fulldir)
301          md5 = _Md5(fullpath)
302          contents[relpath] = md5
303          if sys.platform == 'cygwin':
304            os.chmod(fullpath, os.stat(fullpath).st_mode | stat.S_IXUSR)
305
306  return state
307
308
309def _ParseCommandLine():
310  """Parses the command-line and returns an options structure."""
311  option_parser = optparse.OptionParser()
312  option_parser.add_option('--dry-run', action='store_true', default=False,
313      help='If true then will simply list actions that would be performed.')
314  option_parser.add_option('--force', action='store_true', default=False,
315      help='Force an installation even if the binaries are up to date.')
316  option_parser.add_option('--output-dir', type='string',
317      help='The path where the binaries will be replaced. Existing binaries '
318           'will only be overwritten if not up to date.')
319  option_parser.add_option('--overwrite', action='store_true', default=False,
320      help='If specified then the installation will happily delete and rewrite '
321           'the entire output directory, blasting any local changes.')
322  option_parser.add_option('--revision', type='string',
323      help='The SVN revision or GIT hash associated with the required version.')
324  option_parser.add_option('--revision-file', type='string',
325      help='A text file containing an SVN revision or GIT hash.')
326  option_parser.add_option('--verbose', dest='log_level', action='store_const',
327      default=logging.INFO, const=logging.DEBUG,
328      help='Enables verbose logging.')
329  option_parser.add_option('--quiet', dest='log_level', action='store_const',
330      default=logging.INFO, const=logging.ERROR,
331      help='Disables all output except for errors.')
332  options, args = option_parser.parse_args()
333  if args:
334    option_parser.error('Unexpected arguments: %s' % args)
335  if not options.output_dir:
336    option_parser.error('Must specify --output-dir.')
337  if not options.revision and not options.revision_file:
338    option_parser.error('Must specify one of --revision or --revision-file.')
339  if options.revision and options.revision_file:
340    option_parser.error('Must not specify both --revision and --revision-file.')
341
342  # Configure logging.
343  logging.basicConfig(level=options.log_level)
344
345  # If a revision file has been specified then read it.
346  if options.revision_file:
347    options.revision = open(options.revision_file, 'rb').read().strip()
348    _LOGGER.debug('Parsed revision "%s" from file "%s".',
349                 options.revision, options.revision_file)
350
351  # Ensure that the specified SVN revision or GIT hash is valid.
352  if not _REVISION_RE.match(options.revision):
353    option_parser.error('Must specify a valid SVN or GIT revision.')
354
355  # This just makes output prettier to read.
356  options.output_dir = os.path.normpath(options.output_dir)
357
358  return options
359
360
361def _RemoveOrphanedFiles(options):
362  """This is run on non-Windows systems to remove orphaned files that may have
363  been downloaded by a previous version of this script.
364  """
365  # Reconfigure logging to output info messages. This will allow inspection of
366  # cleanup status on non-Windows buildbots.
367  _LOGGER.setLevel(logging.INFO)
368
369  output_dir = os.path.abspath(options.output_dir)
370
371  # We only want to clean up the folder in 'src/third_party/syzygy', and we
372  # expect to be called with that as an output directory. This is an attempt to
373  # not start deleting random things if the script is run from an alternate
374  # location, or not called from the gclient hooks.
375  expected_syzygy_dir = os.path.abspath(os.path.join(
376      os.path.dirname(__file__), '..', 'third_party', 'syzygy'))
377  expected_output_dir = os.path.join(expected_syzygy_dir, 'binaries')
378  if expected_output_dir != output_dir:
379    _LOGGER.info('Unexpected output directory, skipping cleanup.')
380    return
381
382  if not os.path.isdir(expected_syzygy_dir):
383    _LOGGER.info('Output directory does not exist, skipping cleanup.')
384    return
385
386  def OnError(function, path, excinfo):
387    """Logs error encountered by shutil.rmtree."""
388    _LOGGER.error('Error when running %s(%s)', function, path, exc_info=excinfo)
389
390  _LOGGER.info('Removing orphaned files from %s', expected_syzygy_dir)
391  if not options.dry_run:
392    shutil.rmtree(expected_syzygy_dir, True, OnError)
393
394
395def main():
396  options = _ParseCommandLine()
397
398  if options.dry_run:
399    _LOGGER.debug('Performing a dry-run.')
400
401  # We only care about Windows platforms, as the Syzygy binaries aren't used
402  # elsewhere. However, there was a short period of time where this script
403  # wasn't gated on OS types, and those OSes downloaded and installed binaries.
404  # This will cleanup orphaned files on those operating systems.
405  if sys.platform not in ('win32', 'cygwin'):
406    return _RemoveOrphanedFiles(options)
407
408  # Load the current installation state, and validate it against the
409  # requested installation.
410  state, is_consistent = _GetCurrentState(options.revision, options.output_dir)
411
412  # Decide whether or not an install is necessary.
413  if options.force:
414    _LOGGER.debug('Forcing reinstall of binaries.')
415  elif is_consistent:
416    # Avoid doing any work if the contents of the directory are consistent.
417    _LOGGER.debug('State unchanged, no reinstall necessary.')
418    return
419
420  # Under normal logging this is the only only message that will be reported.
421  _LOGGER.info('Installing revision %s Syzygy binaries.',
422               options.revision[0:12])
423
424  # Clean up the old state to begin with.
425  deleted = []
426  if options.overwrite:
427    if os.path.exists(options.output_dir):
428      # If overwrite was specified then take a heavy-handed approach.
429      _LOGGER.debug('Deleting entire installation directory.')
430      if not options.dry_run:
431        _RmTree(options.output_dir)
432  else:
433    # Otherwise only delete things that the previous installation put in place,
434    # and take care to preserve any local changes.
435    deleted = _CleanState(options.output_dir, state, options.dry_run)
436
437  # Install the new binaries. In a dry-run this will actually download the
438  # archives, but it won't write anything to disk.
439  state = _InstallBinaries(options, deleted)
440
441  # Build and save the state for the directory.
442  _SaveState(options.output_dir, state, options.dry_run)
443
444
445if __name__ == '__main__':
446  main()
447