bisect_driver.py revision f014763a3e24c3f98969f5907b96bec4fec6316f
1# Copyright 2016 Google Inc. All Rights Reserved.
2#
3# This script is used to help the compiler wrapper in the Android build system
4# bisect for bad object files.
5"""Utilities for bisection of Android object files.
6
7This module contains a set of utilities to allow bisection between
8two sets (good and bad) of object files. Mostly used to find compiler
9bugs.
10
11Design doc:
12https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM
13"""
14
15from __future__ import print_function
16
17import contextlib
18import fcntl
19import os
20import shutil
21import subprocess
22import sys
23
24VALID_MODES = ['POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE']
25DEP_CACHE = 'dep'
26GOOD_CACHE = 'good'
27BAD_CACHE = 'bad'
28LIST_FILE = os.path.join(GOOD_CACHE, '_LIST')
29
30CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1'
31
32
33class Error(Exception):
34  """The general compiler wrapper error class."""
35  pass
36
37
38@contextlib.contextmanager
39def lock_file(path, mode):
40  """Lock file and block if other process has lock on file.
41
42  Acquire exclusive lock for file. Only blocks other processes if they attempt
43  to also acquire lock through this method. If only reading (modes 'r' and 'rb')
44  then the lock is shared (i.e. many reads can happen concurrently, but only one
45  process may write at a time).
46
47  This function is a contextmanager, meaning it's meant to be used with the
48  "with" statement in Python. This is so cleanup and setup happens automatically
49  and cleanly. Execution of the outer "with" statement happens at the "yield"
50  statement. Execution resumes after the yield when the outer "with" statement
51  ends.
52
53  Args:
54    path: path to file being locked
55    mode: mode to open file with ('w', 'r', etc.)
56  """
57  with open(path, mode) as f:
58    # Share the lock if just reading, make lock exclusive if writing
59    if f.mode == 'r' or f.mode == 'rb':
60      lock_type = fcntl.LOCK_SH
61    else:
62      lock_type = fcntl.LOCK_EX
63
64    try:
65      fcntl.lockf(f, lock_type)
66      yield f
67      f.flush()
68    except:
69      raise
70    finally:
71      fcntl.lockf(f, fcntl.LOCK_UN)
72
73
74def log_to_file(path, execargs, link_from=None, link_to=None):
75  """Common logging function.
76
77  Log current working directory, current execargs, and a from-to relationship
78  between files.
79  """
80  with lock_file(path, 'a') as log:
81    log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs)))
82    if link_from and link_to:
83      log.write('%s -> %s\n' % (link_from, link_to))
84
85
86def exec_and_return(execargs):
87  """Execute process and return.
88
89  Execute according to execargs and return immediately. Don't inspect
90  stderr or stdout.
91  """
92  return subprocess.call(execargs)
93
94
95def in_bad_set(obj_file):
96  """Check if object file is in bad set.
97
98  The binary search tool creates two files for each search iteration listing
99  the full set of bad objects and full set of good objects. We use this to
100  determine where an object file should be linked from (good or bad).
101  """
102  bad_set_file = os.environ.get('BISECT_BAD_SET')
103  ret = subprocess.call(['grep', '-x', '-q', obj_file, bad_set_file])
104  return ret == 0
105
106
107def makedirs(path):
108  """Try to create directories in path."""
109  try:
110    os.makedirs(path)
111  except os.error:
112    if not os.path.isdir(path):
113      raise
114
115
116def get_obj_path(execargs):
117  """Get the object path for the object file in the list of arguments.
118
119  Returns:
120    Tuple of object path from execution args (-o argument) and full object
121    path. If no object being outputted or output doesn't end in ".o" then return
122    empty strings.
123  """
124  try:
125    i = execargs.index('-o')
126  except ValueError:
127    return '', ''
128
129  obj_path = execargs[i + 1]
130  if not obj_path.endswith(('.o',)):
131    # TODO: what suffixes do we need to contemplate
132    # TODO: add this as a warning
133    # TODO: need to handle -r compilations
134    return '', ''
135
136  return obj_path, os.path.join(os.getcwd(), obj_path)
137
138
139def get_dep_path(execargs):
140  """Get the dep file path for the dep file in the list of arguments.
141
142  Returns:
143    Tuple of dependency file path from execution args (-o argument) and full
144    dependency file path. If no dependency being outputted then return empty
145    strings.
146  """
147  try:
148    i = execargs.index('-MF')
149  except ValueError:
150    return '', ''
151
152  dep_path = execargs[i + 1]
153  return dep_path, os.path.join(os.getcwd(), dep_path)
154
155
156def in_object_list(obj_name, list_filename):
157  """Check if object file name exist in file with object list."""
158  if not obj_name:
159    return False
160
161  with lock_file(list_filename, 'r') as list_file:
162    for line in list_file:
163      if line.strip() == obj_name:
164        return True
165
166    return False
167
168
169def generate_side_effects(execargs, bisect_dir):
170  """Generate compiler side effects.
171
172  Generate and cache side effects so that we can trick make into thinking
173  the compiler is actually called during triaging.
174  """
175  # TODO(cburden): Cache .dwo files
176
177  # Cache dependency files
178  dep_path, _ = get_dep_path(execargs)
179  if not dep_path:
180    return
181
182  bisect_path = os.path.join(bisect_dir, DEP_CACHE, dep_path)
183  bisect_path_dir = os.path.dirname(bisect_path)
184  makedirs(bisect_path_dir)
185  pop_log = os.path.join(bisect_dir, DEP_CACHE, '_POPULATE_LOG')
186  log_to_file(pop_log, execargs, link_from=dep_path, link_to=bisect_path)
187
188  try:
189    if os.path.exists(dep_path):
190      shutil.copy2(dep_path, bisect_path)
191  except Exception:
192    print('Could not get dep file', file=sys.stderr)
193    raise
194
195
196def bisect_populate(execargs, bisect_dir, population_name):
197  """Add necessary information to the bisect cache for the given execution.
198
199  Extract the necessary information for bisection from the compiler
200  execution arguments and put it into the bisection cache. This
201  includes copying the created object file, adding the object
202  file path to the cache list and keeping a log of the execution.
203
204  Args:
205    execargs: compiler execution arguments.
206    bisect_dir: bisection directory.
207    population_name: name of the cache being populated (good/bad).
208  """
209  retval = exec_and_return(execargs)
210  if retval:
211    return retval
212
213  population_dir = os.path.join(bisect_dir, population_name)
214  makedirs(population_dir)
215  pop_log = os.path.join(population_dir, '_POPULATE_LOG')
216  log_to_file(pop_log, execargs)
217
218  obj_path, _ = get_obj_path(execargs)
219  if not obj_path:
220    return
221
222  bisect_path = os.path.join(population_dir, obj_path)
223  bisect_path_dir = os.path.dirname(bisect_path)
224  makedirs(bisect_path_dir)
225
226  try:
227    if os.path.exists(obj_path):
228      shutil.copy2(obj_path, bisect_path)
229      # Set cache object to be read-only so later compilations can't
230      # accidentally overwrite it.
231      os.chmod(bisect_path, 0444)
232  except Exception:
233    print('Could not populate bisect cache', file=sys.stderr)
234    raise
235
236  with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list:
237    object_list.write('%s\n' % obj_path)
238
239  # Cache the side effects generated by good compiler
240  if population_name == GOOD_CACHE:
241    generate_side_effects(execargs, bisect_dir)
242
243
244def bisect_triage(execargs, bisect_dir):
245  obj_path, _ = get_obj_path(execargs)
246  obj_list = os.path.join(bisect_dir, LIST_FILE)
247
248  # If the output isn't an object file just call compiler
249  if not obj_path:
250    return exec_and_return(execargs)
251
252  # If this isn't a bisected object just call compiler
253  # This shouldn't happen!
254  if not in_object_list(obj_path, obj_list):
255    if CONTINUE_ON_MISSING:
256      log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG')
257      log_to_file(log_file, execargs, link_from='? compiler', link_to=obj_path)
258      return exec_and_return(execargs)
259    else:
260      raise Error(('%s is missing from cache! To ignore export '
261                   'BISECT_CONTINUE_ON_MISSING=1. See documentation for more '
262                   'details on this option.' % obj_path))
263
264  # Generate compiler side effects. Trick Make into thinking compiler was
265  # actually executed.
266
267  # If dependency is generated from this call, link it from dependency cache
268  dep_path, full_dep_path = get_dep_path(execargs)
269  if dep_path:
270    cached_dep_path = os.path.join(bisect_dir, DEP_CACHE, dep_path)
271    if os.path.exists(cached_dep_path):
272      if os.path.exists(full_dep_path):
273        os.remove(full_dep_path)
274      os.link(cached_dep_path, full_dep_path)
275    else:
276      raise Error(('%s is missing from dependency cache! Unsure how to '
277                   'proceed. Make will now crash.' % cached_dep_path))
278
279  # If generated object file happened to be pruned/cleaned by Make then link it
280  # over from cache again.
281  if not os.path.exists(obj_path):
282    cache = BAD_CACHE if in_bad_set(obj_path) else GOOD_CACHE
283    cached_obj_path = os.path.join(bisect_dir, cache, obj_path)
284    if os.path.exists(cached_obj_path):
285      os.link(cached_obj_path, obj_path)
286    else:
287      raise Error('%s does not exist in %s cache' % (obj_path, cache))
288
289    # This is just used for debugging and stats gathering
290    log_file = os.path.join(bisect_dir, '_MISSING_OBJ_LOG')
291    log_to_file(log_file, execargs, link_from=cached_obj_path, link_to=obj_path)
292
293
294def bisect_driver(bisect_stage, bisect_dir, execargs):
295  """Call appropriate bisection stage according to value in bisect_stage."""
296  if bisect_stage == 'POPULATE_GOOD':
297    bisect_populate(execargs, bisect_dir, GOOD_CACHE)
298  elif bisect_stage == 'POPULATE_BAD':
299    bisect_populate(execargs, bisect_dir, BAD_CACHE)
300  elif bisect_stage == 'TRIAGE':
301    bisect_triage(execargs, bisect_dir)
302  else:
303    raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage)
304