bisect_driver.py revision 2a141a72ae3f5d420f892cbc7a81fc40db076022
1# Copyright 2016 Google Inc. All Rights Reserved.
2#
3# This script is used to help the compiler wrapper in the Android build system
4# bisect for bad object files.
5"""Utilities for bisection of Android object files.
6
7This module contains a set of utilities to allow bisection between
8two sets (good and bad) of object files. Mostly used to find compiler
9bugs.
10
11Design doc:
12https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM
13"""
14
15from __future__ import print_function
16
17import contextlib
18import fcntl
19import os
20import shutil
21import subprocess
22import sys
23
24VALID_MODES = ['POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE']
25DEP_CACHE = 'dep'
26GOOD_CACHE = 'good'
27BAD_CACHE = 'bad'
28LIST_FILE = os.path.join(GOOD_CACHE, '_LIST')
29
30CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1'
31
32
33class Error(Exception):
34  """The general compiler wrapper error class."""
35  pass
36
37
38@contextlib.contextmanager
39def lock_file(path, mode):
40  """Lock file and block if other process has lock on file.
41
42  Acquire exclusive lock for file. Only blocks other processes if they attempt
43  to also acquire lock through this method. If only reading (modes 'r' and 'rb')
44  then the lock is shared (i.e. many reads can happen concurrently, but only one
45  process may write at a time).
46
47  This function is a contextmanager, meaning it's meant to be used with the
48  "with" statement in Python. This is so cleanup and setup happens automatically
49  and cleanly. Execution of the outer "with" statement happens at the "yield"
50  statement. Execution resumes after the yield when the outer "with" statement
51  ends.
52
53  Args:
54    path: path to file being locked
55    mode: mode to open file with ('w', 'r', etc.)
56  """
57  with open(path, mode) as f:
58    # Share the lock if just reading, make lock exclusive if writing
59    if f.mode == 'r' or f.mode == 'rb':
60      lock_type = fcntl.LOCK_SH
61    else:
62      lock_type = fcntl.LOCK_EX
63
64    try:
65      fcntl.lockf(f, lock_type)
66      yield f
67      f.flush()
68    except:
69      raise
70    finally:
71      fcntl.lockf(f, fcntl.LOCK_UN)
72
73
74def log_to_file(path, execargs, link_from=None, link_to=None):
75  """Common logging function.
76
77  Log current working directory, current execargs, and a from-to relationship
78  between files.
79  """
80  with lock_file(path, 'a') as log:
81    log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs)))
82    if link_from and link_to:
83      log.write('%s -> %s\n' % (link_from, link_to))
84
85
86def exec_and_return(execargs):
87  """Execute process and return.
88
89  Execute according to execargs and return immediately. Don't inspect
90  stderr or stdout.
91  """
92  return subprocess.call(execargs)
93
94
95def in_bad_set(obj_file):
96  """Check if object file is in bad set.
97
98  The binary search tool creates two files for each search iteration listing
99  the full set of bad objects and full set of good objects. We use this to
100  determine where an object file should be linked from (good or bad).
101  """
102  bad_set_file = os.environ.get('BISECT_BAD_SET')
103  ret = subprocess.call(['grep', '-x', '-q', obj_file, bad_set_file])
104  return ret == 0
105
106
107def makedirs(path):
108  """Try to create directories in path."""
109  try:
110    os.makedirs(path)
111  except os.error:
112    if not os.path.isdir(path):
113      raise
114
115
116def get_obj_path(execargs):
117  """Get the object path for the object file in the list of arguments.
118
119  Returns:
120    Tuple of object path from execution args (-o argument) and full object
121    path. If no object being outputted or output doesn't end in ".o" then return
122    empty strings.
123  """
124  try:
125    i = execargs.index('-o')
126  except ValueError:
127    return '', ''
128
129  obj_path = execargs[i + 1]
130  if not obj_path.endswith(('.o',)):
131    # TODO: what suffixes do we need to contemplate
132    # TODO: add this as a warning
133    # TODO: need to handle -r compilations
134    return '', ''
135
136  return obj_path, os.path.abspath(obj_path)
137
138
139def get_dep_path(execargs):
140  """Get the dep file path for the dep file in the list of arguments.
141
142  Returns:
143    Tuple of dependency file path from execution args (-o argument) and full
144    dependency file path. If no dependency being outputted then return empty
145    strings.
146  """
147  try:
148    i = execargs.index('-MF')
149  except ValueError:
150    return '', ''
151
152  dep_path = execargs[i + 1]
153  return dep_path, os.path.abspath(dep_path)
154
155
156def in_object_list(obj_name, list_filename):
157  """Check if object file name exist in file with object list."""
158  if not obj_name:
159    return False
160
161  with lock_file(list_filename, 'r') as list_file:
162    for line in list_file:
163      if line.strip() == obj_name:
164        return True
165
166    return False
167
168
169def generate_side_effects(execargs, bisect_dir):
170  """Generate compiler side effects.
171
172  Generate and cache side effects so that we can trick make into thinking
173  the compiler is actually called during triaging.
174  """
175  # TODO(cburden): Cache .dwo files
176
177  # Cache dependency files
178  dep_path, full_dep_path = get_dep_path(execargs)
179  if not dep_path:
180    return
181
182  # os.path.join fails with absolute paths, use + instead
183  bisect_path = os.path.join(bisect_dir, DEP_CACHE) + full_dep_path
184  bisect_path_dir = os.path.dirname(bisect_path)
185  makedirs(bisect_path_dir)
186  pop_log = os.path.join(bisect_dir, DEP_CACHE, '_POPULATE_LOG')
187  log_to_file(pop_log, execargs, dep_path, bisect_path)
188
189  try:
190    if os.path.exists(dep_path):
191      shutil.copy2(dep_path, bisect_path)
192  except Exception:
193    print('Could not get dep file', file=sys.stderr)
194    raise
195
196
197def bisect_populate(execargs, bisect_dir, population_name):
198  """Add necessary information to the bisect cache for the given execution.
199
200  Extract the necessary information for bisection from the compiler
201  execution arguments and put it into the bisection cache. This
202  includes copying the created object file, adding the object
203  file path to the cache list and keeping a log of the execution.
204
205  Args:
206    execargs: compiler execution arguments.
207    bisect_dir: bisection directory.
208    population_name: name of the cache being populated (good/bad).
209  """
210  retval = exec_and_return(execargs)
211  if retval:
212    return retval
213
214  population_dir = os.path.join(bisect_dir, population_name)
215  makedirs(population_dir)
216  pop_log = os.path.join(population_dir, '_POPULATE_LOG')
217  log_to_file(pop_log, execargs)
218
219  obj_path, full_obj_path = get_obj_path(execargs)
220  if not obj_path:
221    return
222
223  # os.path.join fails with absolute paths, use + instead
224  bisect_path = population_dir + full_obj_path
225  bisect_path_dir = os.path.dirname(bisect_path)
226  makedirs(bisect_path_dir)
227
228  try:
229    if os.path.exists(obj_path):
230      shutil.copy2(obj_path, bisect_path)
231      # Set cache object to be read-only so later compilations can't
232      # accidentally overwrite it.
233      os.chmod(bisect_path, 0444)
234  except Exception:
235    print('Could not populate bisect cache', file=sys.stderr)
236    raise
237
238  with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list:
239    object_list.write('%s\n' % full_obj_path)
240
241  # Cache the side effects generated by good compiler
242  if population_name == GOOD_CACHE:
243    generate_side_effects(execargs, bisect_dir)
244
245
246def bisect_triage(execargs, bisect_dir):
247  obj_path, full_obj_path = get_obj_path(execargs)
248  obj_list = os.path.join(bisect_dir, LIST_FILE)
249
250  # If the output isn't an object file just call compiler
251  if not obj_path:
252    return exec_and_return(execargs)
253
254  # If this isn't a bisected object just call compiler
255  # This shouldn't happen!
256  if not in_object_list(full_obj_path, obj_list):
257    if CONTINUE_ON_MISSING:
258      log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG')
259      log_to_file(log_file, execargs, '? compiler', full_obj_path)
260      return exec_and_return(execargs)
261    else:
262      raise Error(('%s is missing from cache! To ignore export '
263                   'BISECT_CONTINUE_ON_MISSING=1. See documentation for more '
264                   'details on this option.' % obj_path))
265
266  # Generate compiler side effects. Trick Make into thinking compiler was
267  # actually executed.
268
269  # If dependency is generated from this call, link it from dependency cache
270  dep_path, full_dep_path = get_dep_path(execargs)
271  if dep_path:
272    cached_dep_path = os.path.join(bisect_dir, DEP_CACHE) + dep_path
273    if os.path.exists(cached_dep_path):
274      if os.path.exists(full_dep_path):
275        os.remove(full_dep_path)
276      os.link(cached_dep_path, full_dep_path)
277    else:
278      raise Error(('%s is missing from dependency cache! Unsure how to '
279                   'proceed. Make will now crash.' % cached_dep_path))
280
281  # If generated object file happened to be pruned/cleaned by Make then link it
282  # over from cache again.
283  if not os.path.exists(obj_path):
284    cache = BAD_CACHE if in_bad_set(full_obj_path) else GOOD_CACHE
285    cached_obj_path = os.path.join(bisect_dir, cache) + full_obj_path
286    if os.path.exists(cached_obj_path):
287      os.link(cached_obj_path, full_obj_path)
288    else:
289      raise Error('%s does not exist in %s cache' % (full_obj_path, cache))
290
291    # This is just used for debugging and stats gathering
292    log_file = os.path.join(bisect_dir, '_MISSING_OBJ_LOG')
293    log_to_file(log_file, execargs, cached_obj_path, full_obj_path)
294
295
296def bisect_driver(bisect_stage, bisect_dir, execargs):
297  """Call appropriate bisection stage according to value in bisect_stage."""
298  if bisect_stage == 'POPULATE_GOOD':
299    bisect_populate(execargs, bisect_dir, GOOD_CACHE)
300  elif bisect_stage == 'POPULATE_BAD':
301    bisect_populate(execargs, bisect_dir, BAD_CACHE)
302  elif bisect_stage == 'TRIAGE':
303    bisect_triage(execargs, bisect_dir)
304  else:
305    raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage)
306