bisect_driver.py revision 7248b3f00937ced8385259923aa038fc00d413cb
1# Copyright 2016 Google Inc. All Rights Reserved.
2#
3# This script is used to help the compiler wrapper in the Android build system
4# bisect for bad object files.
5"""Utilities for bisection of Android object files.
6
7This module contains a set of utilities to allow bisection between
8two sets (good and bad) of object files. Mostly used to find compiler
9bugs.
10
11Reference page:
12https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper
13
14Design doc:
15https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM
16"""
17
18from __future__ import print_function
19
20import contextlib
21import fcntl
22import os
23import shutil
24import subprocess
25import sys
26
27VALID_MODES = ['POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE']
28GOOD_CACHE = 'good'
29BAD_CACHE = 'bad'
30LIST_FILE = os.path.join(GOOD_CACHE, '_LIST')
31
32CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1'
33WRAPPER_SAFE_MODE = os.environ.get('BISECT_WRAPPER_SAFE_MODE', None) == '1'
34
35
36class Error(Exception):
37  """The general compiler wrapper error class."""
38  pass
39
40
41@contextlib.contextmanager
42def lock_file(path, mode):
43  """Lock file and block if other process has lock on file.
44
45  Acquire exclusive lock for file. Only blocks other processes if they attempt
46  to also acquire lock through this method. If only reading (modes 'r' and 'rb')
47  then the lock is shared (i.e. many reads can happen concurrently, but only one
48  process may write at a time).
49
50  This function is a contextmanager, meaning it's meant to be used with the
51  "with" statement in Python. This is so cleanup and setup happens automatically
52  and cleanly. Execution of the outer "with" statement happens at the "yield"
53  statement. Execution resumes after the yield when the outer "with" statement
54  ends.
55
56  Args:
57    path: path to file being locked
58    mode: mode to open file with ('w', 'r', etc.)
59  """
60  with open(path, mode) as f:
61    # Share the lock if just reading, make lock exclusive if writing
62    if f.mode == 'r' or f.mode == 'rb':
63      lock_type = fcntl.LOCK_SH
64    else:
65      lock_type = fcntl.LOCK_EX
66
67    try:
68      fcntl.lockf(f, lock_type)
69      yield f
70      f.flush()
71    except:
72      raise
73    finally:
74      fcntl.lockf(f, fcntl.LOCK_UN)
75
76
77def log_to_file(path, execargs, link_from=None, link_to=None):
78  """Common logging function.
79
80  Log current working directory, current execargs, and a from-to relationship
81  between files.
82  """
83  with lock_file(path, 'a') as log:
84    log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs)))
85    if link_from and link_to:
86      log.write('%s -> %s\n' % (link_from, link_to))
87
88
89def exec_and_return(execargs):
90  """Execute process and return.
91
92  Execute according to execargs and return immediately. Don't inspect
93  stderr or stdout.
94  """
95  return subprocess.call(execargs)
96
97
98def which_cache(obj_file):
99  """Determine which cache an object belongs to.
100
101  The binary search tool creates two files for each search iteration listing
102  the full set of bad objects and full set of good objects. We use this to
103  determine where an object file should be linked from (good or bad).
104  """
105  bad_set_file = os.environ.get('BISECT_BAD_SET')
106  ret = subprocess.call(['grep', '-x', '-q', obj_file, bad_set_file])
107  if ret == 0:
108    return BAD_CACHE
109  else:
110    return GOOD_CACHE
111
112
113def makedirs(path):
114  """Try to create directories in path."""
115  try:
116    os.makedirs(path)
117  except os.error:
118    if not os.path.isdir(path):
119      raise
120
121
122def get_obj_path(execargs):
123  """Get the object path for the object file in the list of arguments.
124
125  Returns:
126    Absolute object path from execution args (-o argument). If no object being
127    outputted or output doesn't end in ".o" then return empty string.
128  """
129  try:
130    i = execargs.index('-o')
131  except ValueError:
132    return ''
133
134  obj_path = execargs[i + 1]
135  if not obj_path.endswith(('.o',)):
136    # TODO: what suffixes do we need to contemplate
137    # TODO: add this as a warning
138    # TODO: need to handle -r compilations
139    return ''
140
141  return os.path.abspath(obj_path)
142
143
144def get_dep_path(execargs):
145  """Get the dep file path for the dep file in the list of arguments.
146
147  Returns:
148    Absolute path of dependency file path from execution args (-o argument). If
149    no dependency being outputted then return empty string.
150  """
151  if '-MD' not in execargs and '-MMD' not in execargs:
152    return ''
153
154  # If -MF given this is the path of the dependency file. Otherwise the
155  # dependency file is the value of -o but with a .d extension
156  if '-MF' in execargs:
157    i = execargs.index('-MF')
158    dep_path = execargs[i + 1]
159    return os.path.abspath(dep_path)
160
161  full_obj_path = get_obj_path(execargs)
162  if not full_obj_path:
163    return ''
164
165  return full_obj_path[:-2] + '.d'
166
167
168def get_dwo_path(execargs):
169  """Get the dwo file path for the dwo file in the list of arguments.
170
171  Returns:
172    Absolute dwo file path from execution args (-gsplit-dwarf argument) If no
173    dwo file being outputted then return empty string.
174  """
175  if '-gsplit-dwarf' not in execargs:
176    return ''
177
178  full_obj_path = get_obj_path(execargs)
179  if not full_obj_path:
180    return ''
181
182  return full_obj_path[:-2] + '.dwo'
183
184
185def in_object_list(obj_name, list_filename):
186  """Check if object file name exist in file with object list."""
187  if not obj_name:
188    return False
189
190  with lock_file(list_filename, 'r') as list_file:
191    for line in list_file:
192      if line.strip() == obj_name:
193        return True
194
195    return False
196
197
198def get_side_effects(execargs):
199  """Determine side effects generated by compiler
200
201  Returns:
202    List of paths of objects that the compiler generates as side effects.
203  """
204  side_effects = []
205
206  # Cache dependency files
207  full_dep_path = get_dep_path(execargs)
208  if full_dep_path:
209    side_effects.append(full_dep_path)
210
211  # Cache dwo files
212  full_dwo_path = get_dwo_path(execargs)
213  if full_dwo_path:
214    side_effects.append(full_dwo_path)
215
216  return side_effects
217
218
219def cache_file(execargs, bisect_dir, cache, abs_file_path):
220  """Cache compiler output file (.o/.d/.dwo)."""
221  # os.path.join fails with absolute paths, use + instead
222  bisect_path = os.path.join(bisect_dir, cache) + abs_file_path
223  bisect_path_dir = os.path.dirname(bisect_path)
224  makedirs(bisect_path_dir)
225  pop_log = os.path.join(bisect_dir, cache, '_POPULATE_LOG')
226  log_to_file(pop_log, execargs, abs_file_path, bisect_path)
227
228  try:
229    if os.path.exists(abs_file_path):
230      shutil.copy2(abs_file_path, bisect_path)
231  except Exception:
232    print('Could not cache file %s' % abs_file_path, file=sys.stderr)
233    raise
234
235
236def restore_file(bisect_dir, cache, abs_file_path):
237  """Restore file from cache (.o/.d/.dwo)."""
238  # os.path.join fails with absolute paths, use + instead
239  cached_path = os.path.join(bisect_dir, cache) + abs_file_path
240  if os.path.exists(cached_path):
241    if os.path.exists(abs_file_path):
242      os.remove(abs_file_path)
243    os.link(cached_path, abs_file_path)
244  else:
245    raise Error(('%s is missing from %s cache! Unsure how to proceed. Make '
246                 'will now crash.' % (cache, cached_path)))
247
248
249def bisect_populate(execargs, bisect_dir, population_name):
250  """Add necessary information to the bisect cache for the given execution.
251
252  Extract the necessary information for bisection from the compiler
253  execution arguments and put it into the bisection cache. This
254  includes copying the created object file, adding the object
255  file path to the cache list and keeping a log of the execution.
256
257  Args:
258    execargs: compiler execution arguments.
259    bisect_dir: bisection directory.
260    population_name: name of the cache being populated (good/bad).
261  """
262  retval = exec_and_return(execargs)
263  if retval:
264    return retval
265
266  full_obj_path = get_obj_path(execargs)
267  # If not a normal compiler call then just exit
268  if not full_obj_path:
269    return
270
271  cache_file(execargs, bisect_dir, population_name, full_obj_path)
272
273  population_dir = os.path.join(bisect_dir, population_name)
274  with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list:
275    object_list.write('%s\n' % full_obj_path)
276
277  for side_effect in get_side_effects(execargs):
278    cache_file(execargs, bisect_dir, population_name, side_effect)
279
280
281def bisect_triage(execargs, bisect_dir):
282  full_obj_path = get_obj_path(execargs)
283  obj_list = os.path.join(bisect_dir, LIST_FILE)
284
285  # If the output isn't an object file just call compiler
286  if not full_obj_path:
287    return exec_and_return(execargs)
288
289  # If this isn't a bisected object just call compiler
290  # This shouldn't happen!
291  if not in_object_list(full_obj_path, obj_list):
292    if CONTINUE_ON_MISSING:
293      log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG')
294      log_to_file(log_file, execargs, '? compiler', full_obj_path)
295      return exec_and_return(execargs)
296    else:
297      raise Error(('%s is missing from cache! To ignore export '
298                   'BISECT_CONTINUE_ON_MISSING=1. See documentation for more '
299                   'details on this option.' % full_obj_path))
300
301  cache = which_cache(full_obj_path)
302
303  # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the
304  # result from the good/bad cache. This option is safe and covers all compiler
305  # side effects, but is very slow!
306  if WRAPPER_SAFE_MODE:
307    retval = exec_and_return(execargs)
308    if retval:
309      return retval
310    os.remove(full_obj_path)
311    restore_file(bisect_dir, cache, full_obj_path)
312    return
313
314  # Generate compiler side effects. Trick Make into thinking compiler was
315  # actually executed.
316  for side_effect in get_side_effects(execargs):
317    restore_file(bisect_dir, cache, side_effect)
318
319  # If generated object file happened to be pruned/cleaned by Make then link it
320  # over from cache again.
321  if not os.path.exists(full_obj_path):
322    restore_file(bisect_dir, cache, full_obj_path)
323
324
325def bisect_driver(bisect_stage, bisect_dir, execargs):
326  """Call appropriate bisection stage according to value in bisect_stage."""
327  if bisect_stage == 'POPULATE_GOOD':
328    bisect_populate(execargs, bisect_dir, GOOD_CACHE)
329  elif bisect_stage == 'POPULATE_BAD':
330    bisect_populate(execargs, bisect_dir, BAD_CACHE)
331  elif bisect_stage == 'TRIAGE':
332    bisect_triage(execargs, bisect_dir)
333  else:
334    raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage)
335