bisect_driver.py revision f014763a3e24c3f98969f5907b96bec4fec6316f
1# Copyright 2016 Google Inc. All Rights Reserved. 2# 3# This script is used to help the compiler wrapper in the Android build system 4# bisect for bad object files. 5"""Utilities for bisection of Android object files. 6 7This module contains a set of utilities to allow bisection between 8two sets (good and bad) of object files. Mostly used to find compiler 9bugs. 10 11Design doc: 12https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM 13""" 14 15from __future__ import print_function 16 17import contextlib 18import fcntl 19import os 20import shutil 21import subprocess 22import sys 23 24VALID_MODES = ['POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE'] 25DEP_CACHE = 'dep' 26GOOD_CACHE = 'good' 27BAD_CACHE = 'bad' 28LIST_FILE = os.path.join(GOOD_CACHE, '_LIST') 29 30CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1' 31 32 33class Error(Exception): 34 """The general compiler wrapper error class.""" 35 pass 36 37 38@contextlib.contextmanager 39def lock_file(path, mode): 40 """Lock file and block if other process has lock on file. 41 42 Acquire exclusive lock for file. Only blocks other processes if they attempt 43 to also acquire lock through this method. If only reading (modes 'r' and 'rb') 44 then the lock is shared (i.e. many reads can happen concurrently, but only one 45 process may write at a time). 46 47 This function is a contextmanager, meaning it's meant to be used with the 48 "with" statement in Python. This is so cleanup and setup happens automatically 49 and cleanly. Execution of the outer "with" statement happens at the "yield" 50 statement. Execution resumes after the yield when the outer "with" statement 51 ends. 52 53 Args: 54 path: path to file being locked 55 mode: mode to open file with ('w', 'r', etc.) 56 """ 57 with open(path, mode) as f: 58 # Share the lock if just reading, make lock exclusive if writing 59 if f.mode == 'r' or f.mode == 'rb': 60 lock_type = fcntl.LOCK_SH 61 else: 62 lock_type = fcntl.LOCK_EX 63 64 try: 65 fcntl.lockf(f, lock_type) 66 yield f 67 f.flush() 68 except: 69 raise 70 finally: 71 fcntl.lockf(f, fcntl.LOCK_UN) 72 73 74def log_to_file(path, execargs, link_from=None, link_to=None): 75 """Common logging function. 76 77 Log current working directory, current execargs, and a from-to relationship 78 between files. 79 """ 80 with lock_file(path, 'a') as log: 81 log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs))) 82 if link_from and link_to: 83 log.write('%s -> %s\n' % (link_from, link_to)) 84 85 86def exec_and_return(execargs): 87 """Execute process and return. 88 89 Execute according to execargs and return immediately. Don't inspect 90 stderr or stdout. 91 """ 92 return subprocess.call(execargs) 93 94 95def in_bad_set(obj_file): 96 """Check if object file is in bad set. 97 98 The binary search tool creates two files for each search iteration listing 99 the full set of bad objects and full set of good objects. We use this to 100 determine where an object file should be linked from (good or bad). 101 """ 102 bad_set_file = os.environ.get('BISECT_BAD_SET') 103 ret = subprocess.call(['grep', '-x', '-q', obj_file, bad_set_file]) 104 return ret == 0 105 106 107def makedirs(path): 108 """Try to create directories in path.""" 109 try: 110 os.makedirs(path) 111 except os.error: 112 if not os.path.isdir(path): 113 raise 114 115 116def get_obj_path(execargs): 117 """Get the object path for the object file in the list of arguments. 118 119 Returns: 120 Tuple of object path from execution args (-o argument) and full object 121 path. If no object being outputted or output doesn't end in ".o" then return 122 empty strings. 123 """ 124 try: 125 i = execargs.index('-o') 126 except ValueError: 127 return '', '' 128 129 obj_path = execargs[i + 1] 130 if not obj_path.endswith(('.o',)): 131 # TODO: what suffixes do we need to contemplate 132 # TODO: add this as a warning 133 # TODO: need to handle -r compilations 134 return '', '' 135 136 return obj_path, os.path.join(os.getcwd(), obj_path) 137 138 139def get_dep_path(execargs): 140 """Get the dep file path for the dep file in the list of arguments. 141 142 Returns: 143 Tuple of dependency file path from execution args (-o argument) and full 144 dependency file path. If no dependency being outputted then return empty 145 strings. 146 """ 147 try: 148 i = execargs.index('-MF') 149 except ValueError: 150 return '', '' 151 152 dep_path = execargs[i + 1] 153 return dep_path, os.path.join(os.getcwd(), dep_path) 154 155 156def in_object_list(obj_name, list_filename): 157 """Check if object file name exist in file with object list.""" 158 if not obj_name: 159 return False 160 161 with lock_file(list_filename, 'r') as list_file: 162 for line in list_file: 163 if line.strip() == obj_name: 164 return True 165 166 return False 167 168 169def generate_side_effects(execargs, bisect_dir): 170 """Generate compiler side effects. 171 172 Generate and cache side effects so that we can trick make into thinking 173 the compiler is actually called during triaging. 174 """ 175 # TODO(cburden): Cache .dwo files 176 177 # Cache dependency files 178 dep_path, _ = get_dep_path(execargs) 179 if not dep_path: 180 return 181 182 bisect_path = os.path.join(bisect_dir, DEP_CACHE, dep_path) 183 bisect_path_dir = os.path.dirname(bisect_path) 184 makedirs(bisect_path_dir) 185 pop_log = os.path.join(bisect_dir, DEP_CACHE, '_POPULATE_LOG') 186 log_to_file(pop_log, execargs, link_from=dep_path, link_to=bisect_path) 187 188 try: 189 if os.path.exists(dep_path): 190 shutil.copy2(dep_path, bisect_path) 191 except Exception: 192 print('Could not get dep file', file=sys.stderr) 193 raise 194 195 196def bisect_populate(execargs, bisect_dir, population_name): 197 """Add necessary information to the bisect cache for the given execution. 198 199 Extract the necessary information for bisection from the compiler 200 execution arguments and put it into the bisection cache. This 201 includes copying the created object file, adding the object 202 file path to the cache list and keeping a log of the execution. 203 204 Args: 205 execargs: compiler execution arguments. 206 bisect_dir: bisection directory. 207 population_name: name of the cache being populated (good/bad). 208 """ 209 retval = exec_and_return(execargs) 210 if retval: 211 return retval 212 213 population_dir = os.path.join(bisect_dir, population_name) 214 makedirs(population_dir) 215 pop_log = os.path.join(population_dir, '_POPULATE_LOG') 216 log_to_file(pop_log, execargs) 217 218 obj_path, _ = get_obj_path(execargs) 219 if not obj_path: 220 return 221 222 bisect_path = os.path.join(population_dir, obj_path) 223 bisect_path_dir = os.path.dirname(bisect_path) 224 makedirs(bisect_path_dir) 225 226 try: 227 if os.path.exists(obj_path): 228 shutil.copy2(obj_path, bisect_path) 229 # Set cache object to be read-only so later compilations can't 230 # accidentally overwrite it. 231 os.chmod(bisect_path, 0444) 232 except Exception: 233 print('Could not populate bisect cache', file=sys.stderr) 234 raise 235 236 with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list: 237 object_list.write('%s\n' % obj_path) 238 239 # Cache the side effects generated by good compiler 240 if population_name == GOOD_CACHE: 241 generate_side_effects(execargs, bisect_dir) 242 243 244def bisect_triage(execargs, bisect_dir): 245 obj_path, _ = get_obj_path(execargs) 246 obj_list = os.path.join(bisect_dir, LIST_FILE) 247 248 # If the output isn't an object file just call compiler 249 if not obj_path: 250 return exec_and_return(execargs) 251 252 # If this isn't a bisected object just call compiler 253 # This shouldn't happen! 254 if not in_object_list(obj_path, obj_list): 255 if CONTINUE_ON_MISSING: 256 log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG') 257 log_to_file(log_file, execargs, link_from='? compiler', link_to=obj_path) 258 return exec_and_return(execargs) 259 else: 260 raise Error(('%s is missing from cache! To ignore export ' 261 'BISECT_CONTINUE_ON_MISSING=1. See documentation for more ' 262 'details on this option.' % obj_path)) 263 264 # Generate compiler side effects. Trick Make into thinking compiler was 265 # actually executed. 266 267 # If dependency is generated from this call, link it from dependency cache 268 dep_path, full_dep_path = get_dep_path(execargs) 269 if dep_path: 270 cached_dep_path = os.path.join(bisect_dir, DEP_CACHE, dep_path) 271 if os.path.exists(cached_dep_path): 272 if os.path.exists(full_dep_path): 273 os.remove(full_dep_path) 274 os.link(cached_dep_path, full_dep_path) 275 else: 276 raise Error(('%s is missing from dependency cache! Unsure how to ' 277 'proceed. Make will now crash.' % cached_dep_path)) 278 279 # If generated object file happened to be pruned/cleaned by Make then link it 280 # over from cache again. 281 if not os.path.exists(obj_path): 282 cache = BAD_CACHE if in_bad_set(obj_path) else GOOD_CACHE 283 cached_obj_path = os.path.join(bisect_dir, cache, obj_path) 284 if os.path.exists(cached_obj_path): 285 os.link(cached_obj_path, obj_path) 286 else: 287 raise Error('%s does not exist in %s cache' % (obj_path, cache)) 288 289 # This is just used for debugging and stats gathering 290 log_file = os.path.join(bisect_dir, '_MISSING_OBJ_LOG') 291 log_to_file(log_file, execargs, link_from=cached_obj_path, link_to=obj_path) 292 293 294def bisect_driver(bisect_stage, bisect_dir, execargs): 295 """Call appropriate bisection stage according to value in bisect_stage.""" 296 if bisect_stage == 'POPULATE_GOOD': 297 bisect_populate(execargs, bisect_dir, GOOD_CACHE) 298 elif bisect_stage == 'POPULATE_BAD': 299 bisect_populate(execargs, bisect_dir, BAD_CACHE) 300 elif bisect_stage == 'TRIAGE': 301 bisect_triage(execargs, bisect_dir) 302 else: 303 raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage) 304