1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Snapshot Build Bisect Tool
7
8This script bisects a snapshot archive using binary search. It starts at
9a bad revision (it will try to guess HEAD) and asks for a last known-good
10revision. It will then binary search across this revision range by downloading,
11unzipping, and opening Chromium for you. After testing the specific revision,
12it will ask you whether it is good or bad before continuing the search.
13"""
14
15# The base URL for stored build archives.
16CHROMIUM_BASE_URL = ('http://commondatastorage.googleapis.com'
17                     '/chromium-browser-snapshots')
18WEBKIT_BASE_URL = ('http://commondatastorage.googleapis.com'
19                   '/chromium-webkit-snapshots')
20ASAN_BASE_URL = ('http://commondatastorage.googleapis.com'
21                 '/chromium-browser-asan')
22
23# GS bucket name.
24GS_BUCKET_NAME = 'chrome-unsigned/desktop-W15K3Y'
25
26# Base URL for downloading official builds.
27GOOGLE_APIS_URL = 'commondatastorage.googleapis.com'
28
29# The base URL for official builds.
30OFFICIAL_BASE_URL = 'http://%s/%s' % (GOOGLE_APIS_URL, GS_BUCKET_NAME)
31
32# URL template for viewing changelogs between revisions.
33CHANGELOG_URL = ('https://chromium.googlesource.com/chromium/src/+log/%s..%s')
34
35# URL to convert SVN revision to git hash.
36CRREV_URL = ('https://cr-rev.appspot.com/_ah/api/crrev/v1/redirect/')
37
38# URL template for viewing changelogs between official versions.
39OFFICIAL_CHANGELOG_URL = ('https://chromium.googlesource.com/chromium/'
40                          'src/+log/%s..%s?pretty=full')
41
42# DEPS file URL.
43DEPS_FILE_OLD = ('http://src.chromium.org/viewvc/chrome/trunk/src/'
44                 'DEPS?revision=%d')
45DEPS_FILE_NEW = ('https://chromium.googlesource.com/chromium/src/+/%s/DEPS')
46
47# Blink changelogs URL.
48BLINK_CHANGELOG_URL = ('http://build.chromium.org'
49                      '/f/chromium/perf/dashboard/ui/changelog_blink.html'
50                      '?url=/trunk&range=%d%%3A%d')
51
52DONE_MESSAGE_GOOD_MIN = ('You are probably looking for a change made after %s ('
53                         'known good), but no later than %s (first known bad).')
54DONE_MESSAGE_GOOD_MAX = ('You are probably looking for a change made after %s ('
55                         'known bad), but no later than %s (first known good).')
56
57CHROMIUM_GITHASH_TO_SVN_URL = (
58    'https://chromium.googlesource.com/chromium/src/+/%s?format=json')
59
60BLINK_GITHASH_TO_SVN_URL = (
61    'https://chromium.googlesource.com/chromium/blink/+/%s?format=json')
62
63GITHASH_TO_SVN_URL = {
64    'chromium': CHROMIUM_GITHASH_TO_SVN_URL,
65    'blink': BLINK_GITHASH_TO_SVN_URL,
66}
67
68# Search pattern to be matched in the JSON output from
69# CHROMIUM_GITHASH_TO_SVN_URL to get the chromium revision (svn revision).
70CHROMIUM_SEARCH_PATTERN_OLD = (
71    r'.*git-svn-id: svn://svn.chromium.org/chrome/trunk/src@(\d+) ')
72CHROMIUM_SEARCH_PATTERN = (
73    r'Cr-Commit-Position: refs/heads/master@{#(\d+)}')
74
75# Search pattern to be matched in the json output from
76# BLINK_GITHASH_TO_SVN_URL to get the blink revision (svn revision).
77BLINK_SEARCH_PATTERN = (
78    r'.*git-svn-id: svn://svn.chromium.org/blink/trunk@(\d+) ')
79
80SEARCH_PATTERN = {
81    'chromium': CHROMIUM_SEARCH_PATTERN,
82    'blink': BLINK_SEARCH_PATTERN,
83}
84
85CREDENTIAL_ERROR_MESSAGE = ('You are attempting to access protected data with '
86                            'no configured credentials')
87
88###############################################################################
89
90import httplib
91import json
92import optparse
93import os
94import re
95import shlex
96import shutil
97import subprocess
98import sys
99import tempfile
100import threading
101import urllib
102from distutils.version import LooseVersion
103from xml.etree import ElementTree
104import zipfile
105
106
107class PathContext(object):
108  """A PathContext is used to carry the information used to construct URLs and
109  paths when dealing with the storage server and archives."""
110  def __init__(self, base_url, platform, good_revision, bad_revision,
111               is_official, is_asan, use_local_repo, flash_path = None,
112               pdf_path = None):
113    super(PathContext, self).__init__()
114    # Store off the input parameters.
115    self.base_url = base_url
116    self.platform = platform  # What's passed in to the '-a/--archive' option.
117    self.good_revision = good_revision
118    self.bad_revision = bad_revision
119    self.is_official = is_official
120    self.is_asan = is_asan
121    self.build_type = 'release'
122    self.flash_path = flash_path
123    # Dictionary which stores svn revision number as key and it's
124    # corresponding git hash as value. This data is populated in
125    # _FetchAndParse and used later in GetDownloadURL while downloading
126    # the build.
127    self.githash_svn_dict = {}
128    self.pdf_path = pdf_path
129
130    # The name of the ZIP file in a revision directory on the server.
131    self.archive_name = None
132
133    # If the script is run from a local Chromium checkout,
134    # "--use-local-repo" option can be used to make the script run faster.
135    # It uses "git svn find-rev <SHA1>" command to convert git hash to svn
136    # revision number.
137    self.use_local_repo = use_local_repo
138
139    # Set some internal members:
140    #   _listing_platform_dir = Directory that holds revisions. Ends with a '/'.
141    #   _archive_extract_dir = Uncompressed directory in the archive_name file.
142    #   _binary_name = The name of the executable to run.
143    if self.platform in ('linux', 'linux64', 'linux-arm'):
144      self._binary_name = 'chrome'
145    elif self.platform in ('mac', 'mac64'):
146      self.archive_name = 'chrome-mac.zip'
147      self._archive_extract_dir = 'chrome-mac'
148    elif self.platform in ('win', 'win64'):
149      self.archive_name = 'chrome-win32.zip'
150      self._archive_extract_dir = 'chrome-win32'
151      self._binary_name = 'chrome.exe'
152    else:
153      raise Exception('Invalid platform: %s' % self.platform)
154
155    if is_official:
156      if self.platform == 'linux':
157        self._listing_platform_dir = 'precise32/'
158        self.archive_name = 'chrome-precise32.zip'
159        self._archive_extract_dir = 'chrome-precise32'
160      elif self.platform == 'linux64':
161        self._listing_platform_dir = 'precise64/'
162        self.archive_name = 'chrome-precise64.zip'
163        self._archive_extract_dir = 'chrome-precise64'
164      elif self.platform == 'mac':
165        self._listing_platform_dir = 'mac/'
166        self._binary_name = 'Google Chrome.app/Contents/MacOS/Google Chrome'
167      elif self.platform == 'mac64':
168        self._listing_platform_dir = 'mac64/'
169        self._binary_name = 'Google Chrome.app/Contents/MacOS/Google Chrome'
170      elif self.platform == 'win':
171        self._listing_platform_dir = 'win/'
172        self.archive_name = 'chrome-win.zip'
173        self._archive_extract_dir = 'chrome-win'
174      elif self.platform == 'win64':
175        self._listing_platform_dir = 'win64/'
176        self.archive_name = 'chrome-win64.zip'
177        self._archive_extract_dir = 'chrome-win64'
178    else:
179      if self.platform in ('linux', 'linux64', 'linux-arm'):
180        self.archive_name = 'chrome-linux.zip'
181        self._archive_extract_dir = 'chrome-linux'
182        if self.platform == 'linux':
183          self._listing_platform_dir = 'Linux/'
184        elif self.platform == 'linux64':
185          self._listing_platform_dir = 'Linux_x64/'
186        elif self.platform == 'linux-arm':
187          self._listing_platform_dir = 'Linux_ARM_Cross-Compile/'
188      elif self.platform == 'mac':
189        self._listing_platform_dir = 'Mac/'
190        self._binary_name = 'Chromium.app/Contents/MacOS/Chromium'
191      elif self.platform == 'win':
192        self._listing_platform_dir = 'Win/'
193
194  def GetASANPlatformDir(self):
195    """ASAN builds are in directories like "linux-release", or have filenames
196    like "asan-win32-release-277079.zip". This aligns to our platform names
197    except in the case of Windows where they use "win32" instead of "win"."""
198    if self.platform == 'win':
199      return 'win32'
200    else:
201      return self.platform
202
203  def GetListingURL(self, marker=None):
204    """Returns the URL for a directory listing, with an optional marker."""
205    marker_param = ''
206    if marker:
207      marker_param = '&marker=' + str(marker)
208    if self.is_asan:
209      prefix = '%s-%s' % (self.GetASANPlatformDir(), self.build_type)
210      return self.base_url + '/?delimiter=&prefix=' + prefix + marker_param
211    else:
212      return (self.base_url + '/?delimiter=/&prefix=' +
213              self._listing_platform_dir + marker_param)
214
215  def GetDownloadURL(self, revision):
216    """Gets the download URL for a build archive of a specific revision."""
217    if self.is_asan:
218      return '%s/%s-%s/%s-%d.zip' % (
219          ASAN_BASE_URL, self.GetASANPlatformDir(), self.build_type,
220          self.GetASANBaseName(), revision)
221    if self.is_official:
222      return '%s/%s/%s%s' % (
223          OFFICIAL_BASE_URL, revision, self._listing_platform_dir,
224          self.archive_name)
225    else:
226      if str(revision) in self.githash_svn_dict:
227        revision = self.githash_svn_dict[str(revision)]
228      return '%s/%s%s/%s' % (self.base_url, self._listing_platform_dir,
229                             revision, self.archive_name)
230
231  def GetLastChangeURL(self):
232    """Returns a URL to the LAST_CHANGE file."""
233    return self.base_url + '/' + self._listing_platform_dir + 'LAST_CHANGE'
234
235  def GetASANBaseName(self):
236    """Returns the base name of the ASAN zip file."""
237    if 'linux' in self.platform:
238      return 'asan-symbolized-%s-%s' % (self.GetASANPlatformDir(),
239                                        self.build_type)
240    else:
241      return 'asan-%s-%s' % (self.GetASANPlatformDir(), self.build_type)
242
243  def GetLaunchPath(self, revision):
244    """Returns a relative path (presumably from the archive extraction location)
245    that is used to run the executable."""
246    if self.is_asan:
247      extract_dir = '%s-%d' % (self.GetASANBaseName(), revision)
248    else:
249      extract_dir = self._archive_extract_dir
250    return os.path.join(extract_dir, self._binary_name)
251
252  def ParseDirectoryIndex(self):
253    """Parses the Google Storage directory listing into a list of revision
254    numbers."""
255
256    def _FetchAndParse(url):
257      """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If
258      next-marker is not None, then the listing is a partial listing and another
259      fetch should be performed with next-marker being the marker= GET
260      parameter."""
261      handle = urllib.urlopen(url)
262      document = ElementTree.parse(handle)
263
264      # All nodes in the tree are namespaced. Get the root's tag name to extract
265      # the namespace. Etree does namespaces as |{namespace}tag|.
266      root_tag = document.getroot().tag
267      end_ns_pos = root_tag.find('}')
268      if end_ns_pos == -1:
269        raise Exception('Could not locate end namespace for directory index')
270      namespace = root_tag[:end_ns_pos + 1]
271
272      # Find the prefix (_listing_platform_dir) and whether or not the list is
273      # truncated.
274      prefix_len = len(document.find(namespace + 'Prefix').text)
275      next_marker = None
276      is_truncated = document.find(namespace + 'IsTruncated')
277      if is_truncated is not None and is_truncated.text.lower() == 'true':
278        next_marker = document.find(namespace + 'NextMarker').text
279      # Get a list of all the revisions.
280      revisions = []
281      githash_svn_dict = {}
282      if self.is_asan:
283        asan_regex = re.compile(r'.*%s-(\d+)\.zip$' % (self.GetASANBaseName()))
284        # Non ASAN builds are in a <revision> directory. The ASAN builds are
285        # flat
286        all_prefixes = document.findall(namespace + 'Contents/' +
287                                        namespace + 'Key')
288        for prefix in all_prefixes:
289          m = asan_regex.match(prefix.text)
290          if m:
291            try:
292              revisions.append(int(m.group(1)))
293            except ValueError:
294              pass
295      else:
296        all_prefixes = document.findall(namespace + 'CommonPrefixes/' +
297                                        namespace + 'Prefix')
298        # The <Prefix> nodes have content of the form of
299        # |_listing_platform_dir/revision/|. Strip off the platform dir and the
300        # trailing slash to just have a number.
301        for prefix in all_prefixes:
302          revnum = prefix.text[prefix_len:-1]
303          try:
304            if not revnum.isdigit():
305              git_hash = revnum
306              revnum = self.GetSVNRevisionFromGitHash(git_hash)
307              githash_svn_dict[revnum] = git_hash
308            if revnum is not None:
309              revnum = int(revnum)
310              revisions.append(revnum)
311          except ValueError:
312            pass
313      return (revisions, next_marker, githash_svn_dict)
314
315    # Fetch the first list of revisions.
316    (revisions, next_marker, self.githash_svn_dict) = _FetchAndParse(
317        self.GetListingURL())
318    # If the result list was truncated, refetch with the next marker. Do this
319    # until an entire directory listing is done.
320    while next_marker:
321      next_url = self.GetListingURL(next_marker)
322      (new_revisions, next_marker, new_dict) = _FetchAndParse(next_url)
323      revisions.extend(new_revisions)
324      self.githash_svn_dict.update(new_dict)
325    return revisions
326
327  def _GetSVNRevisionFromGitHashWithoutGitCheckout(self, git_sha1, depot):
328    json_url = GITHASH_TO_SVN_URL[depot] % git_sha1
329    response = urllib.urlopen(json_url)
330    if response.getcode() == 200:
331      try:
332        data = json.loads(response.read()[4:])
333      except ValueError:
334        print 'ValueError for JSON URL: %s' % json_url
335        raise ValueError
336    else:
337      raise ValueError
338    if 'message' in data:
339      message = data['message'].split('\n')
340      message = [line for line in message if line.strip()]
341      search_pattern = re.compile(SEARCH_PATTERN[depot])
342      result = search_pattern.search(message[len(message)-1])
343      if result:
344        return result.group(1)
345      else:
346        if depot == 'chromium':
347          result = re.search(CHROMIUM_SEARCH_PATTERN_OLD,
348                             message[len(message)-1])
349          if result:
350            return result.group(1)
351    print 'Failed to get svn revision number for %s' % git_sha1
352    raise ValueError
353
354  def _GetSVNRevisionFromGitHashFromGitCheckout(self, git_sha1, depot):
355    def _RunGit(command, path):
356      command = ['git'] + command
357      if path:
358        original_path = os.getcwd()
359        os.chdir(path)
360      shell = sys.platform.startswith('win')
361      proc = subprocess.Popen(command, shell=shell, stdout=subprocess.PIPE,
362                              stderr=subprocess.PIPE)
363      (output, _) = proc.communicate()
364
365      if path:
366        os.chdir(original_path)
367      return (output, proc.returncode)
368
369    path = None
370    if depot == 'blink':
371      path = os.path.join(os.getcwd(), 'third_party', 'WebKit')
372    if os.path.basename(os.getcwd()) == 'src':
373      command = ['svn', 'find-rev', git_sha1]
374      (git_output, return_code) = _RunGit(command, path)
375      if not return_code:
376        return git_output.strip('\n')
377      raise ValueError
378    else:
379      print ('Script should be run from src folder. ' +
380             'Eg: python tools/bisect-builds.py -g 280588 -b 280590' +
381             '--archive linux64 --use-local-repo')
382      sys.exit(1)
383
384  def GetSVNRevisionFromGitHash(self, git_sha1, depot='chromium'):
385    if not self.use_local_repo:
386      return self._GetSVNRevisionFromGitHashWithoutGitCheckout(git_sha1, depot)
387    else:
388      return self._GetSVNRevisionFromGitHashFromGitCheckout(git_sha1, depot)
389
390  def GetRevList(self):
391    """Gets the list of revision numbers between self.good_revision and
392    self.bad_revision."""
393    # Download the revlist and filter for just the range between good and bad.
394    minrev = min(self.good_revision, self.bad_revision)
395    maxrev = max(self.good_revision, self.bad_revision)
396    revlist_all = map(int, self.ParseDirectoryIndex())
397
398    revlist = [x for x in revlist_all if x >= int(minrev) and x <= int(maxrev)]
399    revlist.sort()
400
401    # Set good and bad revisions to be legit revisions.
402    if revlist:
403      if self.good_revision < self.bad_revision:
404        self.good_revision = revlist[0]
405        self.bad_revision = revlist[-1]
406      else:
407        self.bad_revision = revlist[0]
408        self.good_revision = revlist[-1]
409
410      # Fix chromium rev so that the deps blink revision matches REVISIONS file.
411      if self.base_url == WEBKIT_BASE_URL:
412        revlist_all.sort()
413        self.good_revision = FixChromiumRevForBlink(revlist,
414                                                    revlist_all,
415                                                    self,
416                                                    self.good_revision)
417        self.bad_revision = FixChromiumRevForBlink(revlist,
418                                                   revlist_all,
419                                                   self,
420                                                   self.bad_revision)
421    return revlist
422
423  def GetOfficialBuildsList(self):
424    """Gets the list of official build numbers between self.good_revision and
425    self.bad_revision."""
426
427    def CheckDepotToolsInPath():
428      delimiter = ';' if sys.platform.startswith('win') else ':'
429      path_list = os.environ['PATH'].split(delimiter)
430      for path in path_list:
431        if path.find('depot_tools') != -1:
432          return path
433      return None
434
435    def RunGsutilCommand(args):
436      gsutil_path = CheckDepotToolsInPath()
437      if gsutil_path is None:
438        print ('Follow the instructions in this document '
439               'http://dev.chromium.org/developers/how-tos/install-depot-tools'
440               ' to install depot_tools and then try again.')
441        sys.exit(1)
442      gsutil_path = os.path.join(gsutil_path, 'third_party', 'gsutil', 'gsutil')
443      gsutil = subprocess.Popen([sys.executable, gsutil_path] + args,
444                                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
445                                env=None)
446      stdout, stderr = gsutil.communicate()
447      if gsutil.returncode:
448        if (re.findall(r'status[ |=]40[1|3]', stderr) or
449            stderr.startswith(CREDENTIAL_ERROR_MESSAGE)):
450          print ('Follow these steps to configure your credentials and try'
451                 ' running the bisect-builds.py again.:\n'
452                 '  1. Run "python %s config" and follow its instructions.\n'
453                 '  2. If you have a @google.com account, use that account.\n'
454                 '  3. For the project-id, just enter 0.' % gsutil_path)
455          sys.exit(1)
456        else:
457          raise Exception('Error running the gsutil command: %s' % stderr)
458      return stdout
459
460    def GsutilList(bucket):
461      query = 'gs://%s/' % bucket
462      stdout = RunGsutilCommand(['ls', query])
463      return [url[len(query):].strip('/') for url in stdout.splitlines()]
464
465    # Download the revlist and filter for just the range between good and bad.
466    minrev = min(self.good_revision, self.bad_revision)
467    maxrev = max(self.good_revision, self.bad_revision)
468    build_numbers = GsutilList(GS_BUCKET_NAME)
469    revision_re = re.compile(r'(\d\d\.\d\.\d{4}\.\d+)')
470    build_numbers = filter(lambda b: revision_re.search(b), build_numbers)
471    final_list = []
472    parsed_build_numbers = [LooseVersion(x) for x in build_numbers]
473    connection = httplib.HTTPConnection(GOOGLE_APIS_URL)
474    for build_number in sorted(parsed_build_numbers):
475      if build_number > maxrev:
476        break
477      if build_number < minrev:
478        continue
479      path = ('/' + GS_BUCKET_NAME + '/' + str(build_number) + '/' +
480              self._listing_platform_dir + self.archive_name)
481      connection.request('HEAD', path)
482      response = connection.getresponse()
483      if response.status == 200:
484        final_list.append(str(build_number))
485      response.read()
486    connection.close()
487    return final_list
488
489def UnzipFilenameToDir(filename, directory):
490  """Unzip |filename| to |directory|."""
491  cwd = os.getcwd()
492  if not os.path.isabs(filename):
493    filename = os.path.join(cwd, filename)
494  zf = zipfile.ZipFile(filename)
495  # Make base.
496  if not os.path.isdir(directory):
497    os.mkdir(directory)
498  os.chdir(directory)
499  # Extract files.
500  for info in zf.infolist():
501    name = info.filename
502    if name.endswith('/'):  # dir
503      if not os.path.isdir(name):
504        os.makedirs(name)
505    else:  # file
506      directory = os.path.dirname(name)
507      if not os.path.isdir(directory):
508        os.makedirs(directory)
509      out = open(name, 'wb')
510      out.write(zf.read(name))
511      out.close()
512    # Set permissions. Permission info in external_attr is shifted 16 bits.
513    os.chmod(name, info.external_attr >> 16L)
514  os.chdir(cwd)
515
516
517def FetchRevision(context, rev, filename, quit_event=None, progress_event=None):
518  """Downloads and unzips revision |rev|.
519  @param context A PathContext instance.
520  @param rev The Chromium revision number/tag to download.
521  @param filename The destination for the downloaded file.
522  @param quit_event A threading.Event which will be set by the master thread to
523                    indicate that the download should be aborted.
524  @param progress_event A threading.Event which will be set by the master thread
525                    to indicate that the progress of the download should be
526                    displayed.
527  """
528  def ReportHook(blocknum, blocksize, totalsize):
529    if quit_event and quit_event.isSet():
530      raise RuntimeError('Aborting download of revision %s' % str(rev))
531    if progress_event and progress_event.isSet():
532      size = blocknum * blocksize
533      if totalsize == -1:  # Total size not known.
534        progress = 'Received %d bytes' % size
535      else:
536        size = min(totalsize, size)
537        progress = 'Received %d of %d bytes, %.2f%%' % (
538            size, totalsize, 100.0 * size / totalsize)
539      # Send a \r to let all progress messages use just one line of output.
540      sys.stdout.write('\r' + progress)
541      sys.stdout.flush()
542
543  download_url = context.GetDownloadURL(rev)
544  try:
545    urllib.urlretrieve(download_url, filename, ReportHook)
546    if progress_event and progress_event.isSet():
547      print
548  except RuntimeError:
549    pass
550
551
552def RunRevision(context, revision, zip_file, profile, num_runs, command, args):
553  """Given a zipped revision, unzip it and run the test."""
554  print 'Trying revision %s...' % str(revision)
555
556  # Create a temp directory and unzip the revision into it.
557  cwd = os.getcwd()
558  tempdir = tempfile.mkdtemp(prefix='bisect_tmp')
559  UnzipFilenameToDir(zip_file, tempdir)
560  os.chdir(tempdir)
561
562  # Run the build as many times as specified.
563  testargs = ['--user-data-dir=%s' % profile] + args
564  # The sandbox must be run as root on Official Chrome, so bypass it.
565  if ((context.is_official or context.flash_path or context.pdf_path) and
566      context.platform.startswith('linux')):
567    testargs.append('--no-sandbox')
568  if context.flash_path:
569    testargs.append('--ppapi-flash-path=%s' % context.flash_path)
570    # We have to pass a large enough Flash version, which currently needs not
571    # be correct. Instead of requiring the user of the script to figure out and
572    # pass the correct version we just spoof it.
573    testargs.append('--ppapi-flash-version=99.9.999.999')
574
575  # TODO(vitalybuka): Remove in the future. See crbug.com/395687.
576  if context.pdf_path:
577    shutil.copy(context.pdf_path,
578                os.path.dirname(context.GetLaunchPath(revision)))
579    testargs.append('--enable-print-preview')
580
581  runcommand = []
582  for token in shlex.split(command):
583    if token == '%a':
584      runcommand.extend(testargs)
585    else:
586      runcommand.append(
587          token.replace('%p', os.path.abspath(context.GetLaunchPath(revision))).
588          replace('%s', ' '.join(testargs)))
589
590  results = []
591  for _ in range(num_runs):
592    subproc = subprocess.Popen(runcommand,
593                               bufsize=-1,
594                               stdout=subprocess.PIPE,
595                               stderr=subprocess.PIPE)
596    (stdout, stderr) = subproc.communicate()
597    results.append((subproc.returncode, stdout, stderr))
598
599  os.chdir(cwd)
600  try:
601    shutil.rmtree(tempdir, True)
602  except Exception:
603    pass
604
605  for (returncode, stdout, stderr) in results:
606    if returncode:
607      return (returncode, stdout, stderr)
608  return results[0]
609
610
611# The arguments official_builds, status, stdout and stderr are unused.
612# They are present here because this function is passed to Bisect which then
613# calls it with 5 arguments.
614# pylint: disable=W0613
615def AskIsGoodBuild(rev, official_builds, status, stdout, stderr):
616  """Asks the user whether build |rev| is good or bad."""
617  # Loop until we get a response that we can parse.
618  while True:
619    response = raw_input('Revision %s is '
620                         '[(g)ood/(b)ad/(r)etry/(u)nknown/(q)uit]: ' %
621                         str(rev))
622    if response and response in ('g', 'b', 'r', 'u'):
623      return response
624    if response and response == 'q':
625      raise SystemExit()
626
627
628def IsGoodASANBuild(rev, official_builds, status, stdout, stderr):
629  """Determine if an ASAN build |rev| is good or bad
630
631  Will examine stderr looking for the error message emitted by ASAN. If not
632  found then will fallback to asking the user."""
633  if stderr:
634    bad_count = 0
635    for line in stderr.splitlines():
636      print line
637      if line.find('ERROR: AddressSanitizer:') != -1:
638        bad_count += 1
639    if bad_count > 0:
640      print 'Revision %d determined to be bad.' % rev
641      return 'b'
642  return AskIsGoodBuild(rev, official_builds, status, stdout, stderr)
643
644class DownloadJob(object):
645  """DownloadJob represents a task to download a given Chromium revision."""
646
647  def __init__(self, context, name, rev, zip_file):
648    super(DownloadJob, self).__init__()
649    # Store off the input parameters.
650    self.context = context
651    self.name = name
652    self.rev = rev
653    self.zip_file = zip_file
654    self.quit_event = threading.Event()
655    self.progress_event = threading.Event()
656    self.thread = None
657
658  def Start(self):
659    """Starts the download."""
660    fetchargs = (self.context,
661                 self.rev,
662                 self.zip_file,
663                 self.quit_event,
664                 self.progress_event)
665    self.thread = threading.Thread(target=FetchRevision,
666                                   name=self.name,
667                                   args=fetchargs)
668    self.thread.start()
669
670  def Stop(self):
671    """Stops the download which must have been started previously."""
672    assert self.thread, 'DownloadJob must be started before Stop is called.'
673    self.quit_event.set()
674    self.thread.join()
675    os.unlink(self.zip_file)
676
677  def WaitFor(self):
678    """Prints a message and waits for the download to complete. The download
679    must have been started previously."""
680    assert self.thread, 'DownloadJob must be started before WaitFor is called.'
681    print 'Downloading revision %s...' % str(self.rev)
682    self.progress_event.set()  # Display progress of download.
683    self.thread.join()
684
685
686def Bisect(context,
687           num_runs=1,
688           command='%p %a',
689           try_args=(),
690           profile=None,
691           interactive=True,
692           evaluate=AskIsGoodBuild):
693  """Given known good and known bad revisions, run a binary search on all
694  archived revisions to determine the last known good revision.
695
696  @param context PathContext object initialized with user provided parameters.
697  @param num_runs Number of times to run each build for asking good/bad.
698  @param try_args A tuple of arguments to pass to the test application.
699  @param profile The name of the user profile to run with.
700  @param interactive If it is false, use command exit code for good or bad
701                     judgment of the argument build.
702  @param evaluate A function which returns 'g' if the argument build is good,
703                  'b' if it's bad or 'u' if unknown.
704
705  Threading is used to fetch Chromium revisions in the background, speeding up
706  the user's experience. For example, suppose the bounds of the search are
707  good_rev=0, bad_rev=100. The first revision to be checked is 50. Depending on
708  whether revision 50 is good or bad, the next revision to check will be either
709  25 or 75. So, while revision 50 is being checked, the script will download
710  revisions 25 and 75 in the background. Once the good/bad verdict on rev 50 is
711  known:
712
713    - If rev 50 is good, the download of rev 25 is cancelled, and the next test
714      is run on rev 75.
715
716    - If rev 50 is bad, the download of rev 75 is cancelled, and the next test
717      is run on rev 25.
718  """
719
720  if not profile:
721    profile = 'profile'
722
723  good_rev = context.good_revision
724  bad_rev = context.bad_revision
725  cwd = os.getcwd()
726
727  print 'Downloading list of known revisions...',
728  if not context.use_local_repo and not context.is_official:
729    print '(use --use-local-repo for speed if you have a local checkout)'
730  else:
731    print
732  _GetDownloadPath = lambda rev: os.path.join(cwd,
733      '%s-%s' % (str(rev), context.archive_name))
734  if context.is_official:
735    revlist = context.GetOfficialBuildsList()
736  else:
737    revlist = context.GetRevList()
738
739  # Get a list of revisions to bisect across.
740  if len(revlist) < 2:  # Don't have enough builds to bisect.
741    msg = 'We don\'t have enough builds to bisect. revlist: %s' % revlist
742    raise RuntimeError(msg)
743
744  # Figure out our bookends and first pivot point; fetch the pivot revision.
745  minrev = 0
746  maxrev = len(revlist) - 1
747  pivot = maxrev / 2
748  rev = revlist[pivot]
749  zip_file = _GetDownloadPath(rev)
750  fetch = DownloadJob(context, 'initial_fetch', rev, zip_file)
751  fetch.Start()
752  fetch.WaitFor()
753
754  # Binary search time!
755  while fetch and fetch.zip_file and maxrev - minrev > 1:
756    if bad_rev < good_rev:
757      min_str, max_str = 'bad', 'good'
758    else:
759      min_str, max_str = 'good', 'bad'
760    print 'Bisecting range [%s (%s), %s (%s)].' % (revlist[minrev], min_str,
761                                                   revlist[maxrev], max_str)
762
763    # Pre-fetch next two possible pivots
764    #   - down_pivot is the next revision to check if the current revision turns
765    #     out to be bad.
766    #   - up_pivot is the next revision to check if the current revision turns
767    #     out to be good.
768    down_pivot = int((pivot - minrev) / 2) + minrev
769    down_fetch = None
770    if down_pivot != pivot and down_pivot != minrev:
771      down_rev = revlist[down_pivot]
772      down_fetch = DownloadJob(context, 'down_fetch', down_rev,
773                               _GetDownloadPath(down_rev))
774      down_fetch.Start()
775
776    up_pivot = int((maxrev - pivot) / 2) + pivot
777    up_fetch = None
778    if up_pivot != pivot and up_pivot != maxrev:
779      up_rev = revlist[up_pivot]
780      up_fetch = DownloadJob(context, 'up_fetch', up_rev,
781                             _GetDownloadPath(up_rev))
782      up_fetch.Start()
783
784    # Run test on the pivot revision.
785    status = None
786    stdout = None
787    stderr = None
788    try:
789      (status, stdout, stderr) = RunRevision(context,
790                                             rev,
791                                             fetch.zip_file,
792                                             profile,
793                                             num_runs,
794                                             command,
795                                             try_args)
796    except Exception, e:
797      print >> sys.stderr, e
798
799    # Call the evaluate function to see if the current revision is good or bad.
800    # On that basis, kill one of the background downloads and complete the
801    # other, as described in the comments above.
802    try:
803      if not interactive:
804        if status:
805          answer = 'b'
806          print 'Bad revision: %s' % rev
807        else:
808          answer = 'g'
809          print 'Good revision: %s' % rev
810      else:
811        answer = evaluate(rev, context.is_official, status, stdout, stderr)
812      if ((answer == 'g' and good_rev < bad_rev)
813          or (answer == 'b' and bad_rev < good_rev)):
814        fetch.Stop()
815        minrev = pivot
816        if down_fetch:
817          down_fetch.Stop()  # Kill the download of the older revision.
818          fetch = None
819        if up_fetch:
820          up_fetch.WaitFor()
821          pivot = up_pivot
822          fetch = up_fetch
823      elif ((answer == 'b' and good_rev < bad_rev)
824            or (answer == 'g' and bad_rev < good_rev)):
825        fetch.Stop()
826        maxrev = pivot
827        if up_fetch:
828          up_fetch.Stop()  # Kill the download of the newer revision.
829          fetch = None
830        if down_fetch:
831          down_fetch.WaitFor()
832          pivot = down_pivot
833          fetch = down_fetch
834      elif answer == 'r':
835        pass  # Retry requires no changes.
836      elif answer == 'u':
837        # Nuke the revision from the revlist and choose a new pivot.
838        fetch.Stop()
839        revlist.pop(pivot)
840        maxrev -= 1  # Assumes maxrev >= pivot.
841
842        if maxrev - minrev > 1:
843          # Alternate between using down_pivot or up_pivot for the new pivot
844          # point, without affecting the range. Do this instead of setting the
845          # pivot to the midpoint of the new range because adjacent revisions
846          # are likely affected by the same issue that caused the (u)nknown
847          # response.
848          if up_fetch and down_fetch:
849            fetch = [up_fetch, down_fetch][len(revlist) % 2]
850          elif up_fetch:
851            fetch = up_fetch
852          else:
853            fetch = down_fetch
854          fetch.WaitFor()
855          if fetch == up_fetch:
856            pivot = up_pivot - 1  # Subtracts 1 because revlist was resized.
857          else:
858            pivot = down_pivot
859          zip_file = fetch.zip_file
860
861        if down_fetch and fetch != down_fetch:
862          down_fetch.Stop()
863        if up_fetch and fetch != up_fetch:
864          up_fetch.Stop()
865      else:
866        assert False, 'Unexpected return value from evaluate(): ' + answer
867    except SystemExit:
868      print 'Cleaning up...'
869      for f in [_GetDownloadPath(revlist[down_pivot]),
870                _GetDownloadPath(revlist[up_pivot])]:
871        try:
872          os.unlink(f)
873        except OSError:
874          pass
875      sys.exit(0)
876
877    rev = revlist[pivot]
878
879  return (revlist[minrev], revlist[maxrev], context)
880
881
882def GetBlinkDEPSRevisionForChromiumRevision(self, rev):
883  """Returns the blink revision that was in REVISIONS file at
884  chromium revision |rev|."""
885
886  def _GetBlinkRev(url, blink_re):
887    m = blink_re.search(url.read())
888    url.close()
889    if m:
890      return m.group(1)
891
892  url = urllib.urlopen(DEPS_FILE_OLD % rev)
893  if url.getcode() == 200:
894    # . doesn't match newlines without re.DOTALL, so this is safe.
895    blink_re = re.compile(r'webkit_revision\D*(\d+)')
896    return int(_GetBlinkRev(url, blink_re))
897  else:
898    url = urllib.urlopen(DEPS_FILE_NEW % GetGitHashFromSVNRevision(rev))
899    if url.getcode() == 200:
900      blink_re = re.compile(r'webkit_revision\D*\d+;\D*\d+;(\w+)')
901      blink_git_sha = _GetBlinkRev(url, blink_re)
902      return self.GetSVNRevisionFromGitHash(blink_git_sha, 'blink')
903  raise Exception('Could not get Blink revision for Chromium rev %d' % rev)
904
905
906def GetBlinkRevisionForChromiumRevision(context, rev):
907  """Returns the blink revision that was in REVISIONS file at
908  chromium revision |rev|."""
909  def _IsRevisionNumber(revision):
910    if isinstance(revision, int):
911      return True
912    else:
913      return revision.isdigit()
914  if str(rev) in context.githash_svn_dict:
915    rev = context.githash_svn_dict[str(rev)]
916  file_url = '%s/%s%s/REVISIONS' % (context.base_url,
917                                    context._listing_platform_dir, rev)
918  url = urllib.urlopen(file_url)
919  if url.getcode() == 200:
920    try:
921      data = json.loads(url.read())
922    except ValueError:
923      print 'ValueError for JSON URL: %s' % file_url
924      raise ValueError
925  else:
926    raise ValueError
927  url.close()
928  if 'webkit_revision' in data:
929    blink_rev = data['webkit_revision']
930    if not _IsRevisionNumber(blink_rev):
931      blink_rev = int(context.GetSVNRevisionFromGitHash(blink_rev, 'blink'))
932    return blink_rev
933  else:
934    raise Exception('Could not get blink revision for cr rev %d' % rev)
935
936
937def FixChromiumRevForBlink(revisions_final, revisions, self, rev):
938  """Returns the chromium revision that has the correct blink revision
939  for blink bisect, DEPS and REVISIONS file might not match since
940  blink snapshots point to tip of tree blink.
941  Note: The revisions_final variable might get modified to include
942  additional revisions."""
943  blink_deps_rev = GetBlinkDEPSRevisionForChromiumRevision(self, rev)
944
945  while (GetBlinkRevisionForChromiumRevision(self, rev) > blink_deps_rev):
946    idx = revisions.index(rev)
947    if idx > 0:
948      rev = revisions[idx-1]
949      if rev not in revisions_final:
950        revisions_final.insert(0, rev)
951
952  revisions_final.sort()
953  return rev
954
955
956def GetChromiumRevision(context, url):
957  """Returns the chromium revision read from given URL."""
958  try:
959    # Location of the latest build revision number
960    latest_revision = urllib.urlopen(url).read()
961    if latest_revision.isdigit():
962      return int(latest_revision)
963    return context.GetSVNRevisionFromGitHash(latest_revision)
964  except Exception:
965    print 'Could not determine latest revision. This could be bad...'
966    return 999999999
967
968def GetGitHashFromSVNRevision(svn_revision):
969  crrev_url = CRREV_URL + str(svn_revision)
970  url = urllib.urlopen(crrev_url)
971  if url.getcode() == 200:
972    data = json.loads(url.read())
973    if 'git_sha' in data:
974      return data['git_sha']
975
976def PrintChangeLog(min_chromium_rev, max_chromium_rev):
977  """Prints the changelog URL."""
978
979  print ('  ' + CHANGELOG_URL % (GetGitHashFromSVNRevision(min_chromium_rev),
980         GetGitHashFromSVNRevision(max_chromium_rev)))
981
982
983def main():
984  usage = ('%prog [options] [-- chromium-options]\n'
985           'Perform binary search on the snapshot builds to find a minimal\n'
986           'range of revisions where a behavior change happened. The\n'
987           'behaviors are described as "good" and "bad".\n'
988           'It is NOT assumed that the behavior of the later revision is\n'
989           'the bad one.\n'
990           '\n'
991           'Revision numbers should use\n'
992           '  Official versions (e.g. 1.0.1000.0) for official builds. (-o)\n'
993           '  SVN revisions (e.g. 123456) for chromium builds, from trunk.\n'
994           '    Use base_trunk_revision from http://omahaproxy.appspot.com/\n'
995           '    for earlier revs.\n'
996           '    Chrome\'s about: build number and omahaproxy branch_revision\n'
997           '    are incorrect, they are from branches.\n'
998           '\n'
999           'Tip: add "-- --no-first-run" to bypass the first run prompts.')
1000  parser = optparse.OptionParser(usage=usage)
1001  # Strangely, the default help output doesn't include the choice list.
1002  choices = ['mac', 'mac64', 'win', 'win64', 'linux', 'linux64', 'linux-arm']
1003            # linux-chromiumos lacks a continuous archive http://crbug.com/78158
1004  parser.add_option('-a', '--archive',
1005                    choices=choices,
1006                    help='The buildbot archive to bisect [%s].' %
1007                         '|'.join(choices))
1008  parser.add_option('-o',
1009                    action='store_true',
1010                    dest='official_builds',
1011                    help='Bisect across official Chrome builds (internal '
1012                         'only) instead of Chromium archives.')
1013  parser.add_option('-b', '--bad',
1014                    type='str',
1015                    help='A bad revision to start bisection. '
1016                         'May be earlier or later than the good revision. '
1017                         'Default is HEAD.')
1018  parser.add_option('-f', '--flash_path',
1019                    type='str',
1020                    help='Absolute path to a recent Adobe Pepper Flash '
1021                         'binary to be used in this bisection (e.g. '
1022                         'on Windows C:\...\pepflashplayer.dll and on Linux '
1023                         '/opt/google/chrome/PepperFlash/'
1024                         'libpepflashplayer.so).')
1025  parser.add_option('-d', '--pdf_path',
1026                    type='str',
1027                    help='Absolute path to a recent PDF plugin '
1028                         'binary to be used in this bisection (e.g. '
1029                         'on Windows C:\...\pdf.dll and on Linux '
1030                         '/opt/google/chrome/libpdf.so). Option also enables '
1031                         'print preview.')
1032  parser.add_option('-g', '--good',
1033                    type='str',
1034                    help='A good revision to start bisection. ' +
1035                         'May be earlier or later than the bad revision. ' +
1036                         'Default is 0.')
1037  parser.add_option('-p', '--profile', '--user-data-dir',
1038                    type='str',
1039                    default='profile',
1040                    help='Profile to use; this will not reset every run. '
1041                         'Defaults to a clean profile.')
1042  parser.add_option('-t', '--times',
1043                    type='int',
1044                    default=1,
1045                    help='Number of times to run each build before asking '
1046                         'if it\'s good or bad. Temporary profiles are reused.')
1047  parser.add_option('-c', '--command',
1048                    type='str',
1049                    default='%p %a',
1050                    help='Command to execute. %p and %a refer to Chrome '
1051                         'executable and specified extra arguments '
1052                         'respectively. Use %s to specify all extra arguments '
1053                         'as one string. Defaults to "%p %a". Note that any '
1054                         'extra paths specified should be absolute.')
1055  parser.add_option('-l', '--blink',
1056                    action='store_true',
1057                    help='Use Blink bisect instead of Chromium. ')
1058  parser.add_option('', '--not-interactive',
1059                    action='store_true',
1060                    default=False,
1061                    help='Use command exit code to tell good/bad revision.')
1062  parser.add_option('--asan',
1063                    dest='asan',
1064                    action='store_true',
1065                    default=False,
1066                    help='Allow the script to bisect ASAN builds')
1067  parser.add_option('--use-local-repo',
1068                    dest='use_local_repo',
1069                    action='store_true',
1070                    default=False,
1071                    help='Allow the script to convert git SHA1 to SVN '
1072                         'revision using "git svn find-rev <SHA1>" '
1073                         'command from a Chromium checkout.')
1074
1075  (opts, args) = parser.parse_args()
1076
1077  if opts.archive is None:
1078    print 'Error: missing required parameter: --archive'
1079    print
1080    parser.print_help()
1081    return 1
1082
1083  if opts.asan:
1084    supported_platforms = ['linux', 'mac', 'win']
1085    if opts.archive not in supported_platforms:
1086      print 'Error: ASAN bisecting only supported on these platforms: [%s].' % (
1087            '|'.join(supported_platforms))
1088      return 1
1089    if opts.official_builds:
1090      print 'Error: Do not yet support bisecting official ASAN builds.'
1091      return 1
1092
1093  if opts.asan:
1094    base_url = ASAN_BASE_URL
1095  elif opts.blink:
1096    base_url = WEBKIT_BASE_URL
1097  else:
1098    base_url = CHROMIUM_BASE_URL
1099
1100  # Create the context. Initialize 0 for the revisions as they are set below.
1101  context = PathContext(base_url, opts.archive, opts.good, opts.bad,
1102                        opts.official_builds, opts.asan, opts.use_local_repo,
1103                        opts.flash_path, opts.pdf_path)
1104  # Pick a starting point, try to get HEAD for this.
1105  if not opts.bad:
1106    context.bad_revision = '999.0.0.0'
1107    context.bad_revision = GetChromiumRevision(
1108        context, context.GetLastChangeURL())
1109
1110  # Find out when we were good.
1111  if not opts.good:
1112    context.good_revision = '0.0.0.0' if opts.official_builds else 0
1113
1114  if opts.flash_path:
1115    msg = 'Could not find Flash binary at %s' % opts.flash_path
1116    assert os.path.exists(opts.flash_path), msg
1117
1118  if opts.pdf_path:
1119    msg = 'Could not find PDF binary at %s' % opts.pdf_path
1120    assert os.path.exists(opts.pdf_path), msg
1121
1122  if opts.official_builds:
1123    context.good_revision = LooseVersion(context.good_revision)
1124    context.bad_revision = LooseVersion(context.bad_revision)
1125  else:
1126    context.good_revision = int(context.good_revision)
1127    context.bad_revision = int(context.bad_revision)
1128
1129  if opts.times < 1:
1130    print('Number of times to run (%d) must be greater than or equal to 1.' %
1131          opts.times)
1132    parser.print_help()
1133    return 1
1134
1135  if opts.asan:
1136    evaluator = IsGoodASANBuild
1137  else:
1138    evaluator = AskIsGoodBuild
1139
1140  # Save these revision numbers to compare when showing the changelog URL
1141  # after the bisect.
1142  good_rev = context.good_revision
1143  bad_rev = context.bad_revision
1144
1145  (min_chromium_rev, max_chromium_rev, context) = Bisect(
1146      context, opts.times, opts.command, args, opts.profile,
1147      not opts.not_interactive, evaluator)
1148
1149  # Get corresponding blink revisions.
1150  try:
1151    min_blink_rev = GetBlinkRevisionForChromiumRevision(context,
1152                                                        min_chromium_rev)
1153    max_blink_rev = GetBlinkRevisionForChromiumRevision(context,
1154                                                        max_chromium_rev)
1155  except Exception:
1156    # Silently ignore the failure.
1157    min_blink_rev, max_blink_rev = 0, 0
1158
1159  if opts.blink:
1160    # We're done. Let the user know the results in an official manner.
1161    if good_rev > bad_rev:
1162      print DONE_MESSAGE_GOOD_MAX % (str(min_blink_rev), str(max_blink_rev))
1163    else:
1164      print DONE_MESSAGE_GOOD_MIN % (str(min_blink_rev), str(max_blink_rev))
1165
1166    print 'BLINK CHANGELOG URL:'
1167    print '  ' + BLINK_CHANGELOG_URL % (max_blink_rev, min_blink_rev)
1168
1169  else:
1170    # We're done. Let the user know the results in an official manner.
1171    if good_rev > bad_rev:
1172      print DONE_MESSAGE_GOOD_MAX % (str(min_chromium_rev),
1173                                     str(max_chromium_rev))
1174    else:
1175      print DONE_MESSAGE_GOOD_MIN % (str(min_chromium_rev),
1176                                     str(max_chromium_rev))
1177    if min_blink_rev != max_blink_rev:
1178      print ('NOTE: There is a Blink roll in the range, '
1179             'you might also want to do a Blink bisect.')
1180
1181    print 'CHANGELOG URL:'
1182    if opts.official_builds:
1183      print OFFICIAL_CHANGELOG_URL % (min_chromium_rev, max_chromium_rev)
1184    else:
1185      PrintChangeLog(min_chromium_rev, max_chromium_rev)
1186
1187
1188if __name__ == '__main__':
1189  sys.exit(main())
1190