asan_symbolize.py revision f21e025112d5f82b2b475eb1d8e690824883fc97
1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# The LLVM Compiler Infrastructure 5# 6# This file is distributed under the University of Illinois Open Source 7# License. See LICENSE.TXT for details. 8# 9#===------------------------------------------------------------------------===# 10import bisect 11import getopt 12import os 13import pty 14import re 15import subprocess 16import sys 17import termios 18 19llvm_symbolizer = None 20symbolizers = {} 21DEBUG = False 22demangle = False; 23 24 25# FIXME: merge the code that calls fix_filename(). 26def fix_filename(file_name): 27 for path_to_cut in sys.argv[1:]: 28 file_name = re.sub('.*' + path_to_cut, '', file_name) 29 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) 30 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 31 return file_name 32 33 34class Symbolizer(object): 35 def __init__(self): 36 pass 37 38 def symbolize(self, addr, binary, offset): 39 """Symbolize the given address (pair of binary and offset). 40 41 Overriden in subclasses. 42 Args: 43 addr: virtual address of an instruction. 44 binary: path to executable/shared object containing this instruction. 45 offset: instruction offset in the @binary. 46 Returns: 47 list of strings (one string for each inlined frame) describing 48 the code locations for this instruction (that is, function name, file 49 name, line and column numbers). 50 """ 51 return None 52 53 54class LLVMSymbolizer(Symbolizer): 55 def __init__(self, symbolizer_path): 56 super(LLVMSymbolizer, self).__init__() 57 self.symbolizer_path = symbolizer_path 58 self.pipe = self.open_llvm_symbolizer() 59 60 def open_llvm_symbolizer(self): 61 if not os.path.exists(self.symbolizer_path): 62 return None 63 cmd = [self.symbolizer_path, 64 '--use-symbol-table=true', 65 '--demangle=%s' % demangle, 66 '--functions=true', 67 '--inlining=true'] 68 if DEBUG: 69 print ' '.join(cmd) 70 return subprocess.Popen(cmd, stdin=subprocess.PIPE, 71 stdout=subprocess.PIPE) 72 73 def symbolize(self, addr, binary, offset): 74 """Overrides Symbolizer.symbolize.""" 75 if not self.pipe: 76 return None 77 result = [] 78 try: 79 symbolizer_input = '%s %s' % (binary, offset) 80 if DEBUG: 81 print symbolizer_input 82 print >> self.pipe.stdin, symbolizer_input 83 while True: 84 function_name = self.pipe.stdout.readline().rstrip() 85 if not function_name: 86 break 87 file_name = self.pipe.stdout.readline().rstrip() 88 file_name = fix_filename(file_name) 89 if (not function_name.startswith('??') and 90 not file_name.startswith('??')): 91 # Append only valid frames. 92 result.append('%s in %s %s' % (addr, function_name, 93 file_name)) 94 except Exception: 95 result = [] 96 if not result: 97 result = None 98 return result 99 100 101def LLVMSymbolizerFactory(system): 102 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 103 if not symbolizer_path: 104 # Assume llvm-symbolizer is in PATH. 105 symbolizer_path = 'llvm-symbolizer' 106 return LLVMSymbolizer(symbolizer_path) 107 108 109class Addr2LineSymbolizer(Symbolizer): 110 def __init__(self, binary): 111 super(Addr2LineSymbolizer, self).__init__() 112 self.binary = binary 113 self.pipe = self.open_addr2line() 114 115 def open_addr2line(self): 116 cmd = ['addr2line', '-f'] 117 if demangle: 118 cmd += ['--demangle'] 119 cmd += ['-e', self.binary] 120 if DEBUG: 121 print ' '.join(cmd) 122 return subprocess.Popen(cmd, 123 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 124 125 def symbolize(self, addr, binary, offset): 126 """Overrides Symbolizer.symbolize.""" 127 if self.binary != binary: 128 return None 129 try: 130 print >> self.pipe.stdin, offset 131 function_name = self.pipe.stdout.readline().rstrip() 132 file_name = self.pipe.stdout.readline().rstrip() 133 except Exception: 134 function_name = '' 135 file_name = '' 136 file_name = fix_filename(file_name) 137 return ['%s in %s %s' % (addr, function_name, file_name)] 138 139 140class UnbufferedLineConverter(object): 141 """ 142 Wrap a child process that responds to each line of input with one line of 143 output. Uses pty to trick the child into providing unbuffered output. 144 """ 145 def __init__(self, args): 146 pid, fd = pty.fork() 147 if pid == 0: 148 # We're the child. Transfer control to command. 149 os.execvp(args[0], args) 150 else: 151 # Disable echoing. 152 attr = termios.tcgetattr(fd) 153 attr[3] = attr[3] & ~termios.ECHO 154 termios.tcsetattr(fd, termios.TCSANOW, attr) 155 # Set up a file()-like interface to the child process 156 self.r = os.fdopen(fd, "r", 1) 157 self.w = os.fdopen(os.dup(fd), "w", 1) 158 159 def convert(self, line): 160 self.w.write(line + "\n") 161 return self.readline() 162 163 def readline(self): 164 return self.r.readline().rstrip() 165 166 167class DarwinSymbolizer(Symbolizer): 168 def __init__(self, addr, binary): 169 super(DarwinSymbolizer, self).__init__() 170 self.binary = binary 171 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 172 if len(addr) > 10: 173 self.arch = 'x86_64' 174 else: 175 self.arch = 'i386' 176 self.open_atos() 177 178 def open_atos(self): 179 if DEBUG: 180 print 'atos -o %s -arch %s' % (self.binary, self.arch) 181 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 182 self.atos = UnbufferedLineConverter(cmdline) 183 184 def symbolize(self, addr, binary, offset): 185 """Overrides Symbolizer.symbolize.""" 186 if self.binary != binary: 187 return None 188 atos_line = self.atos.convert('0x%x' % int(offset, 16)) 189 while "got symbolicator for" in atos_line: 190 atos_line = self.atos.readline() 191 # A well-formed atos response looks like this: 192 # foo(type1, type2) (in object.name) (filename.cc:80) 193 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 194 if DEBUG: 195 print 'atos_line: ', atos_line 196 if match: 197 function_name = match.group(1) 198 function_name = re.sub('\(.*?\)', '', function_name) 199 file_name = fix_filename(match.group(3)) 200 return ['%s in %s %s' % (addr, function_name, file_name)] 201 else: 202 return ['%s in %s' % (addr, atos_line)] 203 204 205# Chain several symbolizers so that if one symbolizer fails, we fall back 206# to the next symbolizer in chain. 207class ChainSymbolizer(Symbolizer): 208 def __init__(self, symbolizer_list): 209 super(ChainSymbolizer, self).__init__() 210 self.symbolizer_list = symbolizer_list 211 212 def symbolize(self, addr, binary, offset): 213 """Overrides Symbolizer.symbolize.""" 214 for symbolizer in self.symbolizer_list: 215 if symbolizer: 216 result = symbolizer.symbolize(addr, binary, offset) 217 if result: 218 return result 219 return None 220 221 def append_symbolizer(self, symbolizer): 222 self.symbolizer_list.append(symbolizer) 223 224 225def BreakpadSymbolizerFactory(binary): 226 suffix = os.getenv('BREAKPAD_SUFFIX') 227 if suffix: 228 filename = binary + suffix 229 if os.access(filename, os.F_OK): 230 return BreakpadSymbolizer(filename) 231 return None 232 233 234def SystemSymbolizerFactory(system, addr, binary): 235 if system == 'Darwin': 236 return DarwinSymbolizer(addr, binary) 237 elif system == 'Linux': 238 return Addr2LineSymbolizer(binary) 239 240 241class BreakpadSymbolizer(Symbolizer): 242 def __init__(self, filename): 243 super(BreakpadSymbolizer, self).__init__() 244 self.filename = filename 245 lines = file(filename).readlines() 246 self.files = [] 247 self.symbols = {} 248 self.address_list = [] 249 self.addresses = {} 250 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 251 fragments = lines[0].rstrip().split() 252 self.arch = fragments[2] 253 self.debug_id = fragments[3] 254 self.binary = ' '.join(fragments[4:]) 255 self.parse_lines(lines[1:]) 256 257 def parse_lines(self, lines): 258 cur_function_addr = '' 259 for line in lines: 260 fragments = line.split() 261 if fragments[0] == 'FILE': 262 assert int(fragments[1]) == len(self.files) 263 self.files.append(' '.join(fragments[2:])) 264 elif fragments[0] == 'PUBLIC': 265 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 266 elif fragments[0] in ['CFI', 'STACK']: 267 pass 268 elif fragments[0] == 'FUNC': 269 cur_function_addr = int(fragments[1], 16) 270 if not cur_function_addr in self.symbols.keys(): 271 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 272 else: 273 # Line starting with an address. 274 addr = int(fragments[0], 16) 275 self.address_list.append(addr) 276 # Tuple of symbol address, size, line, file number. 277 self.addresses[addr] = (cur_function_addr, 278 int(fragments[1], 16), 279 int(fragments[2]), 280 int(fragments[3])) 281 self.address_list.sort() 282 283 def get_sym_file_line(self, addr): 284 key = None 285 if addr in self.addresses.keys(): 286 key = addr 287 else: 288 index = bisect.bisect_left(self.address_list, addr) 289 if index == 0: 290 return None 291 else: 292 key = self.address_list[index - 1] 293 sym_id, size, line_no, file_no = self.addresses[key] 294 symbol = self.symbols[sym_id] 295 filename = self.files[file_no] 296 if addr < key + size: 297 return symbol, filename, line_no 298 else: 299 return None 300 301 def symbolize(self, addr, binary, offset): 302 if self.binary != binary: 303 return None 304 res = self.get_sym_file_line(int(offset, 16)) 305 if res: 306 function_name, file_name, line_no = res 307 result = ['%s in %s %s:%d' % ( 308 addr, function_name, file_name, line_no)] 309 print result 310 return result 311 else: 312 return None 313 314 315class SymbolizationLoop(object): 316 def __init__(self, binary_name_filter=None): 317 # Used by clients who may want to supply a different binary name. 318 # E.g. in Chrome several binaries may share a single .dSYM. 319 self.binary_name_filter = binary_name_filter 320 self.system = os.uname()[0] 321 if self.system in ['Linux', 'Darwin']: 322 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) 323 else: 324 raise Exception('Unknown system') 325 326 def symbolize_address(self, addr, binary, offset): 327 # Use the chain of symbolizers: 328 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 329 # (fall back to next symbolizer if the previous one fails). 330 if not binary in symbolizers: 331 symbolizers[binary] = ChainSymbolizer( 332 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) 333 result = symbolizers[binary].symbolize(addr, binary, offset) 334 if result is None: 335 # Initialize system symbolizer only if other symbolizers failed. 336 symbolizers[binary].append_symbolizer( 337 SystemSymbolizerFactory(self.system, addr, binary)) 338 result = symbolizers[binary].symbolize(addr, binary, offset) 339 # The system symbolizer must produce some result. 340 assert result 341 return result 342 343 def print_symbolized_lines(self, symbolized_lines): 344 if not symbolized_lines: 345 print self.current_line 346 else: 347 for symbolized_frame in symbolized_lines: 348 print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip() 349 self.frame_no += 1 350 351 def process_stdin(self): 352 self.frame_no = 0 353 while True: 354 line = sys.stdin.readline() 355 if not line: 356 break 357 self.current_line = line.rstrip() 358 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 359 stack_trace_line_format = ( 360 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') 361 match = re.match(stack_trace_line_format, line) 362 if not match: 363 print self.current_line 364 continue 365 if DEBUG: 366 print line 367 _, frameno_str, addr, binary, offset = match.groups() 368 if frameno_str == '0': 369 # Assume that frame #0 is the first frame of new stack trace. 370 self.frame_no = 0 371 original_binary = binary 372 if self.binary_name_filter: 373 binary = self.binary_name_filter(binary) 374 symbolized_line = self.symbolize_address(addr, binary, offset) 375 if not symbolized_line: 376 if original_binary != binary: 377 symbolized_line = self.symbolize_address(addr, binary, offset) 378 self.print_symbolized_lines(symbolized_line) 379 380 381if __name__ == '__main__': 382 opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"]) 383 for o, a in opts: 384 if o in ("-d", "--demangle"): 385 demangle = True; 386 loop = SymbolizationLoop() 387 loop.process_stdin() 388