binary_search_state.py revision 4cc3dd8a3b2ab04cf861d7c71967de0298746cb9
1#!/usr/bin/python2
2"""The binary search wrapper."""
3
4from __future__ import print_function
5
6import argparse
7import contextlib
8import errno
9import math
10import os
11import pickle
12import sys
13import tempfile
14import time
15
16# Adds cros_utils to PYTHONPATH
17import common
18
19# Now we do import from cros_utils
20from cros_utils import command_executer
21from cros_utils import logger
22
23import binary_search_perforce
24
25GOOD_SET_VAR = 'BISECT_GOOD_SET'
26BAD_SET_VAR = 'BISECT_BAD_SET'
27
28STATE_FILE = '%s.state' % sys.argv[0]
29HIDDEN_STATE_FILE = os.path.join(
30    os.path.dirname(STATE_FILE), '.%s' % os.path.basename(STATE_FILE))
31
32
33class Error(Exception):
34  """The general binary search tool error class."""
35  pass
36
37
38@contextlib.contextmanager
39def SetFile(env_var, items):
40  """Generate set files that can be used by switch/test scripts.
41
42  Generate temporary set file (good/bad) holding contents of good/bad items for
43  the current binary search iteration. Store the name of each file as an
44  environment variable so all child processes can access it.
45
46  This function is a contextmanager, meaning it's meant to be used with the
47  "with" statement in Python. This is so cleanup and setup happens automatically
48  and cleanly. Execution of the outer "with" statement happens at the "yield"
49  statement.
50
51  Args:
52    env_var: What environment variable to store the file name in.
53    items: What items are in this set.
54  """
55  with tempfile.NamedTemporaryFile() as f:
56    os.environ[env_var] = f.name
57    f.write('\n'.join(items))
58    f.flush()
59    yield
60
61
62class BinarySearchState(object):
63  """The binary search state class."""
64
65  def __init__(self, get_initial_items, switch_to_good, switch_to_bad,
66               test_setup_script, test_script, incremental, prune, iterations,
67               prune_iterations, verify, file_args, verbose):
68    """BinarySearchState constructor, see Run for full args documentation."""
69    self.get_initial_items = get_initial_items
70    self.switch_to_good = switch_to_good
71    self.switch_to_bad = switch_to_bad
72    self.test_setup_script = test_setup_script
73    self.test_script = test_script
74    self.incremental = incremental
75    self.prune = prune
76    self.iterations = iterations
77    self.prune_iterations = prune_iterations
78    self.verify = verify
79    self.file_args = file_args
80    self.verbose = verbose
81
82    self.l = logger.GetLogger()
83    self.ce = command_executer.GetCommandExecuter()
84
85    self.resumed = False
86    self.prune_cycles = 0
87    self.search_cycles = 0
88    self.binary_search = None
89    self.all_items = None
90    self.PopulateItemsUsingCommand(self.get_initial_items)
91    self.currently_good_items = set([])
92    self.currently_bad_items = set([])
93    self.found_items = set([])
94    self.known_good = set([])
95
96    self.start_time = time.time()
97
98  def SwitchToGood(self, item_list):
99    """Switch given items to "good" set."""
100    if self.incremental:
101      self.l.LogOutput(
102          'Incremental set. Wanted to switch %s to good' % str(item_list),
103          print_to_console=self.verbose)
104      incremental_items = [
105          item for item in item_list if item not in self.currently_good_items
106      ]
107      item_list = incremental_items
108      self.l.LogOutput(
109          'Incremental set. Actually switching %s to good' % str(item_list),
110          print_to_console=self.verbose)
111
112    if not item_list:
113      return
114
115    self.l.LogOutput(
116        'Switching %s to good' % str(item_list), print_to_console=self.verbose)
117    self.RunSwitchScript(self.switch_to_good, item_list)
118    self.currently_good_items = self.currently_good_items.union(set(item_list))
119    self.currently_bad_items.difference_update(set(item_list))
120
121  def SwitchToBad(self, item_list):
122    """Switch given items to "bad" set."""
123    if self.incremental:
124      self.l.LogOutput(
125          'Incremental set. Wanted to switch %s to bad' % str(item_list),
126          print_to_console=self.verbose)
127      incremental_items = [
128          item for item in item_list if item not in self.currently_bad_items
129      ]
130      item_list = incremental_items
131      self.l.LogOutput(
132          'Incremental set. Actually switching %s to bad' % str(item_list),
133          print_to_console=self.verbose)
134
135    if not item_list:
136      return
137
138    self.l.LogOutput(
139        'Switching %s to bad' % str(item_list), print_to_console=self.verbose)
140    self.RunSwitchScript(self.switch_to_bad, item_list)
141    self.currently_bad_items = self.currently_bad_items.union(set(item_list))
142    self.currently_good_items.difference_update(set(item_list))
143
144  def RunSwitchScript(self, switch_script, item_list):
145    """Pass given items to switch script.
146
147    Args:
148      switch_script: path to switch script
149      item_list: list of all items to be switched
150    """
151    if self.file_args:
152      with tempfile.NamedTemporaryFile() as f:
153        f.write('\n'.join(item_list))
154        f.flush()
155        command = '%s %s' % (switch_script, f.name)
156        ret, _, _ = self.ce.RunCommandWExceptionCleanup(
157            command, print_to_console=self.verbose)
158    else:
159      command = '%s %s' % (switch_script, ' '.join(item_list))
160      try:
161        ret, _, _ = self.ce.RunCommandWExceptionCleanup(
162            command, print_to_console=self.verbose)
163      except OSError as e:
164        if e.errno == errno.E2BIG:
165          raise Error('Too many arguments for switch script! Use --file_args')
166        else:
167          raise
168    assert ret == 0, 'Switch script %s returned %d' % (switch_script, ret)
169
170  def TestScript(self):
171    """Run test script and return exit code from script."""
172    command = self.test_script
173    ret, _, _ = self.ce.RunCommandWExceptionCleanup(command)
174    return ret
175
176  def TestSetupScript(self):
177    """Run test setup script and return exit code from script."""
178    if not self.test_setup_script:
179      return 0
180
181    command = self.test_setup_script
182    ret, _, _ = self.ce.RunCommandWExceptionCleanup(command)
183    return ret
184
185  def DoVerify(self):
186    """Verify correctness of test environment.
187
188    Verify that a "good" set of items produces a "good" result and that a "bad"
189    set of items produces a "bad" result. To be run directly before running
190    DoSearch. If verify is False this step is skipped.
191    """
192    if not self.verify:
193      return
194
195    self.l.LogOutput('VERIFICATION')
196    self.l.LogOutput('Beginning tests to verify good/bad sets\n')
197
198    self._OutputProgress('Verifying items from GOOD set\n')
199    with SetFile(GOOD_SET_VAR, self.all_items), SetFile(BAD_SET_VAR, []):
200      self.l.LogOutput('Resetting all items to good to verify.')
201      self.SwitchToGood(self.all_items)
202      status = self.TestSetupScript()
203      assert status == 0, 'When reset_to_good, test setup should succeed.'
204      status = self.TestScript()
205      assert status == 0, 'When reset_to_good, status should be 0.'
206
207    self._OutputProgress('Verifying items from BAD set\n')
208    with SetFile(GOOD_SET_VAR, []), SetFile(BAD_SET_VAR, self.all_items):
209      self.l.LogOutput('Resetting all items to bad to verify.')
210      self.SwitchToBad(self.all_items)
211      status = self.TestSetupScript()
212      assert status == 0, 'When reset_to_bad, test setup should succeed.'
213      status = self.TestScript()
214      assert status == 1, 'When reset_to_bad, status should be 1.'
215
216  def DoSearch(self):
217    """Perform full search for bad items.
218
219    Perform full search until prune_iterations number of bad items are found.
220    """
221    while (True and len(self.all_items) > 1 and
222           self.prune_cycles < self.prune_iterations):
223      terminated = self.DoBinarySearch()
224      self.prune_cycles += 1
225      if not terminated:
226        break
227      # Prune is set.
228      prune_index = self.binary_search.current
229
230      # If found item is last item, no new items can be found
231      if prune_index == len(self.all_items) - 1:
232        self.l.LogOutput('First bad item is the last item. Breaking.')
233        self.l.LogOutput('Bad items are: %s' % self.all_items[-1])
234        break
235
236      # If already seen item we have no new bad items to find, finish up
237      if self.all_items[prune_index] in self.found_items:
238        self.l.LogOutput(
239            'Found item already found before: %s.' %
240            self.all_items[prune_index],
241            print_to_console=self.verbose)
242        self.l.LogOutput('No more bad items remaining. Done searching.')
243        self.l.LogOutput('Bad items are: %s' % ' '.join(self.found_items))
244        break
245
246      new_all_items = list(self.all_items)
247      # Move prune item to the end of the list.
248      new_all_items.append(new_all_items.pop(prune_index))
249      self.found_items.add(new_all_items[-1])
250
251      # Everything below newly found bad item is now known to be a good item.
252      # Take these good items out of the equation to save time on the next
253      # search. We save these known good items so they are still sent to the
254      # switch_to_good script.
255      if prune_index:
256        self.known_good.update(new_all_items[:prune_index])
257        new_all_items = new_all_items[prune_index:]
258
259      self.l.LogOutput(
260          'Old list: %s. New list: %s' % (str(self.all_items),
261                                          str(new_all_items)),
262          print_to_console=self.verbose)
263
264      if not self.prune:
265        self.l.LogOutput('Not continuning further, --prune is not set')
266        break
267      # FIXME: Do we need to Convert the currently good items to bad
268      self.PopulateItemsUsingList(new_all_items)
269
270  def DoBinarySearch(self):
271    """Perform single iteration of binary search."""
272    # If in resume mode don't reset search_cycles
273    if not self.resumed:
274      self.search_cycles = 0
275    else:
276      self.resumed = False
277
278    terminated = False
279    while self.search_cycles < self.iterations and not terminated:
280      self.SaveState()
281      self.OutputIterationProgress()
282
283      self.search_cycles += 1
284      [bad_items, good_items] = self.GetNextItems()
285
286      with SetFile(GOOD_SET_VAR, good_items), SetFile(BAD_SET_VAR, bad_items):
287        # TODO: bad_items should come first.
288        self.SwitchToGood(good_items)
289        self.SwitchToBad(bad_items)
290        status = self.TestSetupScript()
291        if status == 0:
292          status = self.TestScript()
293        else:
294          # Test setup script failed, treat as skipped item
295          status = 125
296        terminated = self.binary_search.SetStatus(status)
297
298      if terminated:
299        self.l.LogOutput('Terminated!', print_to_console=self.verbose)
300    if not terminated:
301      self.l.LogOutput('Ran out of iterations searching...')
302    self.l.LogOutput(str(self), print_to_console=self.verbose)
303    return terminated
304
305  def PopulateItemsUsingCommand(self, command):
306    """Update all_items and binary search logic from executable.
307
308    This method is mainly required for enumerating the initial list of items
309    from the get_initial_items script.
310
311    Args:
312      command: path to executable that will enumerate items.
313    """
314    ce = command_executer.GetCommandExecuter()
315    _, out, _ = ce.RunCommandWExceptionCleanup(
316        command, return_output=True, print_to_console=self.verbose)
317    all_items = out.split()
318    self.PopulateItemsUsingList(all_items)
319
320  def PopulateItemsUsingList(self, all_items):
321    """Update all_items and binary searching logic from list.
322
323    Args:
324      all_items: new list of all_items
325    """
326    self.all_items = all_items
327    self.binary_search = binary_search_perforce.BinarySearcher(
328        logger_to_set=self.l)
329    self.binary_search.SetSortedList(self.all_items)
330
331  def SaveState(self):
332    """Save state to STATE_FILE.
333
334    SaveState will create a new unique, hidden state file to hold data from
335    object. Then atomically overwrite the STATE_FILE symlink to point to the
336    new data.
337
338    Raises:
339      Error if STATE_FILE already exists but is not a symlink.
340    """
341    ce, l = self.ce, self.l
342    self.ce, self.l, self.binary_search.logger = None, None, None
343    old_state = None
344
345    _, path = tempfile.mkstemp(prefix=HIDDEN_STATE_FILE, dir='.')
346    with open(path, 'wb') as f:
347      pickle.dump(self, f)
348
349    if os.path.exists(STATE_FILE):
350      if os.path.islink(STATE_FILE):
351        old_state = os.readlink(STATE_FILE)
352      else:
353        raise Error(('%s already exists and is not a symlink!\n'
354                     'State file saved to %s' % (STATE_FILE, path)))
355
356    # Create new link and atomically overwrite old link
357    temp_link = '%s.link' % HIDDEN_STATE_FILE
358    os.symlink(path, temp_link)
359    os.rename(temp_link, STATE_FILE)
360
361    if old_state:
362      os.remove(old_state)
363
364    self.ce, self.l, self.binary_search.logger = ce, l, l
365
366  @classmethod
367  def LoadState(cls):
368    """Create BinarySearchState object from STATE_FILE."""
369    if not os.path.isfile(STATE_FILE):
370      return None
371    try:
372      bss = pickle.load(file(STATE_FILE))
373      bss.l = logger.GetLogger()
374      bss.ce = command_executer.GetCommandExecuter()
375      bss.binary_search.logger = bss.l
376      bss.start_time = time.time()
377
378      # Set resumed to be True so we can enter DoBinarySearch without the method
379      # resetting our current search_cycles to 0.
380      bss.resumed = True
381
382      # Set currently_good_items and currently_bad_items to empty so that the
383      # first iteration after resuming will always be non-incremental. This is
384      # just in case the environment changes, the user makes manual changes, or
385      # a previous switch_script corrupted the environment.
386      bss.currently_good_items = set([])
387      bss.currently_bad_items = set([])
388
389      binary_search_perforce.verbose = bss.verbose
390      return bss
391    except StandardError:
392      return None
393
394  def RemoveState(self):
395    """Remove STATE_FILE and its symlinked data from file system."""
396    if os.path.exists(STATE_FILE):
397      if os.path.islink(STATE_FILE):
398        real_file = os.readlink(STATE_FILE)
399        os.remove(real_file)
400        os.remove(STATE_FILE)
401
402  def GetNextItems(self):
403    """Get next items for binary search based on result of the last test run."""
404    border_item = self.binary_search.GetNext()
405    index = self.all_items.index(border_item)
406
407    next_bad_items = self.all_items[:index + 1]
408    next_good_items = self.all_items[index + 1:] + list(self.known_good)
409
410    return [next_bad_items, next_good_items]
411
412  def ElapsedTimeString(self):
413    """Return h m s format of elapsed time since execution has started."""
414    diff = int(time.time() - self.start_time)
415    seconds = diff % 60
416    minutes = (diff / 60) % 60
417    hours = diff / (60 * 60)
418
419    seconds = str(seconds).rjust(2)
420    minutes = str(minutes).rjust(2)
421    hours = str(hours).rjust(2)
422
423    return '%sh %sm %ss' % (hours, minutes, seconds)
424
425  def _OutputProgress(self, progress_text):
426    """Output current progress of binary search to console and logs.
427
428    Args:
429      progress_text: The progress to display to the user.
430    """
431    progress = ('\n***** PROGRESS (elapsed time: %s) *****\n'
432                '%s'
433                '************************************************')
434    progress = progress % (self.ElapsedTimeString(), progress_text)
435    self.l.LogOutput(progress)
436
437  def OutputIterationProgress(self):
438    out = ('Search %d of estimated %d.\n'
439           'Prune %d of max %d.\n'
440           'Current bad items found:\n'
441           '%s\n')
442    out = out % (self.search_cycles + 1,
443                 math.ceil(math.log(len(self.all_items), 2)),
444                 self.prune_cycles + 1, self.prune_iterations,
445                 ', '.join(self.found_items))
446    self._OutputProgress(out)
447
448  def __str__(self):
449    ret = ''
450    ret += 'all: %s\n' % str(self.all_items)
451    ret += 'currently_good: %s\n' % str(self.currently_good_items)
452    ret += 'currently_bad: %s\n' % str(self.currently_bad_items)
453    ret += str(self.binary_search)
454    return ret
455
456
457class MockBinarySearchState(BinarySearchState):
458  """Mock class for BinarySearchState."""
459
460  def __init__(self, **kwargs):
461    # Initialize all arguments to None
462    default_kwargs = {
463        'get_initial_items': 'echo "1"',
464        'switch_to_good': None,
465        'switch_to_bad': None,
466        'test_setup_script': None,
467        'test_script': None,
468        'incremental': True,
469        'prune': False,
470        'iterations': 50,
471        'prune_iterations': 100,
472        'verify': True,
473        'file_args': False,
474        'verbose': False
475    }
476    default_kwargs.update(kwargs)
477    super(MockBinarySearchState, self).__init__(**default_kwargs)
478
479
480def _CanonicalizeScript(script_name):
481  """Return canonical path to script.
482
483  Args:
484    script_name: Relative or absolute path to script
485
486  Returns:
487    Canonicalized script path
488  """
489  script_name = os.path.expanduser(script_name)
490  if not script_name.startswith('/'):
491    return os.path.join('.', script_name)
492
493
494def Run(get_initial_items,
495        switch_to_good,
496        switch_to_bad,
497        test_script,
498        test_setup_script=None,
499        iterations=50,
500        prune=False,
501        noincremental=False,
502        file_args=False,
503        verify=True,
504        prune_iterations=100,
505        verbose=False,
506        resume=False):
507  """Run binary search tool. Equivalent to running through terminal.
508
509  Args:
510    get_initial_items: Script to enumerate all items being binary searched
511    switch_to_good: Script that will take items as input and switch them to good
512                    set
513    switch_to_bad: Script that will take items as input and switch them to bad
514                   set
515    test_script: Script that will determine if the current combination of good
516                 and bad items make a "good" or "bad" result.
517    test_setup_script: Script to do necessary setup (building, compilation,
518                       etc.) for test_script.
519    iterations: How many binary search iterations to run before exiting.
520    prune: If False the binary search tool will stop when the first bad item is
521           found. Otherwise then binary search tool will continue searching
522           until all bad items are found (or prune_iterations is reached).
523    noincremental: Whether to send "diffs" of good/bad items to switch scripts.
524    file_args: If True then arguments to switch scripts will be a file name
525               containing a newline separated list of the items to switch.
526    verify: If True, run tests to ensure initial good/bad sets actually
527            produce a good/bad result.
528    prune_iterations: Max number of bad items to search for.
529    verbose: If True will print extra debug information to user.
530    resume: If True will resume using STATE_FILE.
531
532  Returns:
533    0 for success, error otherwise
534  """
535  if resume:
536    bss = BinarySearchState.LoadState()
537    if not bss:
538      logger.GetLogger().LogOutput(
539          '%s is not a valid binary_search_tool state file, cannot resume!' %
540          STATE_FILE)
541      return 1
542  else:
543    switch_to_good = _CanonicalizeScript(switch_to_good)
544    switch_to_bad = _CanonicalizeScript(switch_to_bad)
545    if test_setup_script:
546      test_setup_script = _CanonicalizeScript(test_setup_script)
547    test_script = _CanonicalizeScript(test_script)
548    get_initial_items = _CanonicalizeScript(get_initial_items)
549    incremental = not noincremental
550
551    binary_search_perforce.verbose = verbose
552
553    bss = BinarySearchState(get_initial_items, switch_to_good, switch_to_bad,
554                            test_setup_script, test_script, incremental, prune,
555                            iterations, prune_iterations, verify, file_args,
556                            verbose)
557    bss.DoVerify()
558
559  try:
560    bss.DoSearch()
561    bss.RemoveState()
562    logger.GetLogger().LogOutput('Total execution time: %s' %
563                                 bss.ElapsedTimeString())
564  except Error as e:
565    logger.GetLogger().LogError(e)
566    return 1
567
568  return 0
569
570
571def Main(argv):
572  """The main function."""
573  # Common initializations
574
575  parser = argparse.ArgumentParser()
576  common.BuildArgParser(parser)
577  logger.GetLogger().LogOutput(' '.join(argv))
578  options = parser.parse_args(argv)
579
580  if not (options.get_initial_items and options.switch_to_good and
581          options.switch_to_bad and options.test_script) and not options.resume:
582    parser.print_help()
583    return 1
584
585  if options.resume:
586    logger.GetLogger().LogOutput('Resuming from %s' % STATE_FILE)
587    if len(argv) > 1:
588      logger.GetLogger().LogOutput(('Note: resuming from previous state, '
589                                    'ignoring given options and loading saved '
590                                    'options instead.'))
591
592  # Get dictionary of all options
593  args = vars(options)
594  return Run(**args)
595
596
597if __name__ == '__main__':
598  sys.exit(Main(sys.argv[1:]))
599