asan_symbolize.py revision 41dcb1c8848c8677c06216c6fcaa9b001f736778
1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# The LLVM Compiler Infrastructure 5# 6# This file is distributed under the University of Illinois Open Source 7# License. See LICENSE.TXT for details. 8# 9#===------------------------------------------------------------------------===# 10import bisect 11import getopt 12import os 13import re 14import subprocess 15import sys 16 17llvm_symbolizer = None 18symbolizers = {} 19DEBUG = False 20demangle = False; 21 22 23# FIXME: merge the code that calls fix_filename(). 24def fix_filename(file_name): 25 for path_to_cut in sys.argv[1:]: 26 file_name = re.sub('.*' + path_to_cut, '', file_name) 27 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) 28 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 29 return file_name 30 31 32class Symbolizer(object): 33 def __init__(self): 34 pass 35 36 def symbolize(self, addr, binary, offset): 37 """Symbolize the given address (pair of binary and offset). 38 39 Overriden in subclasses. 40 Args: 41 addr: virtual address of an instruction. 42 binary: path to executable/shared object containing this instruction. 43 offset: instruction offset in the @binary. 44 Returns: 45 list of strings (one string for each inlined frame) describing 46 the code locations for this instruction (that is, function name, file 47 name, line and column numbers). 48 """ 49 return None 50 51 52class LLVMSymbolizer(Symbolizer): 53 def __init__(self, symbolizer_path): 54 super(LLVMSymbolizer, self).__init__() 55 self.symbolizer_path = symbolizer_path 56 self.pipe = self.open_llvm_symbolizer() 57 58 def open_llvm_symbolizer(self): 59 if not os.path.exists(self.symbolizer_path): 60 return None 61 cmd = [self.symbolizer_path, 62 '--use-symbol-table=true', 63 '--demangle=%s' % demangle, 64 '--functions=true', 65 '--inlining=true'] 66 if DEBUG: 67 print ' '.join(cmd) 68 return subprocess.Popen(cmd, stdin=subprocess.PIPE, 69 stdout=subprocess.PIPE) 70 71 def symbolize(self, addr, binary, offset): 72 """Overrides Symbolizer.symbolize.""" 73 if not self.pipe: 74 return None 75 result = [] 76 try: 77 symbolizer_input = '%s %s' % (binary, offset) 78 if DEBUG: 79 print symbolizer_input 80 print >> self.pipe.stdin, symbolizer_input 81 while True: 82 function_name = self.pipe.stdout.readline().rstrip() 83 if not function_name: 84 break 85 file_name = self.pipe.stdout.readline().rstrip() 86 file_name = fix_filename(file_name) 87 if (not function_name.startswith('??') and 88 not file_name.startswith('??')): 89 # Append only valid frames. 90 result.append('%s in %s %s' % (addr, function_name, 91 file_name)) 92 except Exception: 93 result = [] 94 if not result: 95 result = None 96 return result 97 98 99def LLVMSymbolizerFactory(system): 100 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 101 if not symbolizer_path: 102 # Assume llvm-symbolizer is in PATH. 103 symbolizer_path = 'llvm-symbolizer' 104 return LLVMSymbolizer(symbolizer_path) 105 106 107class Addr2LineSymbolizer(Symbolizer): 108 def __init__(self, binary): 109 super(Addr2LineSymbolizer, self).__init__() 110 self.binary = binary 111 self.pipe = self.open_addr2line() 112 113 def open_addr2line(self): 114 cmd = ['addr2line', '-f'] 115 if demangle: 116 cmd += ['--demangle'] 117 cmd += ['-e', self.binary] 118 if DEBUG: 119 print ' '.join(cmd) 120 return subprocess.Popen(cmd, 121 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 122 123 def symbolize(self, addr, binary, offset): 124 """Overrides Symbolizer.symbolize.""" 125 if self.binary != binary: 126 return None 127 try: 128 print >> self.pipe.stdin, offset 129 function_name = self.pipe.stdout.readline().rstrip() 130 file_name = self.pipe.stdout.readline().rstrip() 131 except Exception: 132 function_name = '' 133 file_name = '' 134 file_name = fix_filename(file_name) 135 return ['%s in %s %s' % (addr, function_name, file_name)] 136 137 138class DarwinSymbolizer(Symbolizer): 139 def __init__(self, addr, binary): 140 super(DarwinSymbolizer, self).__init__() 141 self.binary = binary 142 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 143 if len(addr) > 10: 144 self.arch = 'x86_64' 145 else: 146 self.arch = 'i386' 147 self.pipe = None 148 149 def write_addr_to_pipe(self, offset): 150 print >> self.pipe.stdin, '0x%x' % int(offset, 16) 151 152 def open_atos(self): 153 if DEBUG: 154 print 'atos -o %s -arch %s' % (self.binary, self.arch) 155 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 156 self.pipe = subprocess.Popen(cmdline, 157 stdin=subprocess.PIPE, 158 stdout=subprocess.PIPE, 159 stderr=subprocess.PIPE) 160 161 def symbolize(self, addr, binary, offset): 162 """Overrides Symbolizer.symbolize.""" 163 if self.binary != binary: 164 return None 165 self.open_atos() 166 self.write_addr_to_pipe(offset) 167 self.pipe.stdin.close() 168 atos_line = self.pipe.stdout.readline().rstrip() 169 # A well-formed atos response looks like this: 170 # foo(type1, type2) (in object.name) (filename.cc:80) 171 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 172 if DEBUG: 173 print 'atos_line: ', atos_line 174 if match: 175 function_name = match.group(1) 176 function_name = re.sub('\(.*?\)', '', function_name) 177 file_name = fix_filename(match.group(3)) 178 return ['%s in %s %s' % (addr, function_name, file_name)] 179 else: 180 return ['%s in %s' % (addr, atos_line)] 181 182 183# Chain several symbolizers so that if one symbolizer fails, we fall back 184# to the next symbolizer in chain. 185class ChainSymbolizer(Symbolizer): 186 def __init__(self, symbolizer_list): 187 super(ChainSymbolizer, self).__init__() 188 self.symbolizer_list = symbolizer_list 189 190 def symbolize(self, addr, binary, offset): 191 """Overrides Symbolizer.symbolize.""" 192 for symbolizer in self.symbolizer_list: 193 if symbolizer: 194 result = symbolizer.symbolize(addr, binary, offset) 195 if result: 196 return result 197 return None 198 199 def append_symbolizer(self, symbolizer): 200 self.symbolizer_list.append(symbolizer) 201 202 203def BreakpadSymbolizerFactory(binary): 204 suffix = os.getenv('BREAKPAD_SUFFIX') 205 if suffix: 206 filename = binary + suffix 207 if os.access(filename, os.F_OK): 208 return BreakpadSymbolizer(filename) 209 return None 210 211 212def SystemSymbolizerFactory(system, addr, binary): 213 if system == 'Darwin': 214 return DarwinSymbolizer(addr, binary) 215 elif system == 'Linux': 216 return Addr2LineSymbolizer(binary) 217 218 219class BreakpadSymbolizer(Symbolizer): 220 def __init__(self, filename): 221 super(BreakpadSymbolizer, self).__init__() 222 self.filename = filename 223 lines = file(filename).readlines() 224 self.files = [] 225 self.symbols = {} 226 self.address_list = [] 227 self.addresses = {} 228 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 229 fragments = lines[0].rstrip().split() 230 self.arch = fragments[2] 231 self.debug_id = fragments[3] 232 self.binary = ' '.join(fragments[4:]) 233 self.parse_lines(lines[1:]) 234 235 def parse_lines(self, lines): 236 cur_function_addr = '' 237 for line in lines: 238 fragments = line.split() 239 if fragments[0] == 'FILE': 240 assert int(fragments[1]) == len(self.files) 241 self.files.append(' '.join(fragments[2:])) 242 elif fragments[0] == 'PUBLIC': 243 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 244 elif fragments[0] in ['CFI', 'STACK']: 245 pass 246 elif fragments[0] == 'FUNC': 247 cur_function_addr = int(fragments[1], 16) 248 if not cur_function_addr in self.symbols.keys(): 249 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 250 else: 251 # Line starting with an address. 252 addr = int(fragments[0], 16) 253 self.address_list.append(addr) 254 # Tuple of symbol address, size, line, file number. 255 self.addresses[addr] = (cur_function_addr, 256 int(fragments[1], 16), 257 int(fragments[2]), 258 int(fragments[3])) 259 self.address_list.sort() 260 261 def get_sym_file_line(self, addr): 262 key = None 263 if addr in self.addresses.keys(): 264 key = addr 265 else: 266 index = bisect.bisect_left(self.address_list, addr) 267 if index == 0: 268 return None 269 else: 270 key = self.address_list[index - 1] 271 sym_id, size, line_no, file_no = self.addresses[key] 272 symbol = self.symbols[sym_id] 273 filename = self.files[file_no] 274 if addr < key + size: 275 return symbol, filename, line_no 276 else: 277 return None 278 279 def symbolize(self, addr, binary, offset): 280 if self.binary != binary: 281 return None 282 res = self.get_sym_file_line(int(offset, 16)) 283 if res: 284 function_name, file_name, line_no = res 285 result = ['%s in %s %s:%d' % ( 286 addr, function_name, file_name, line_no)] 287 print result 288 return result 289 else: 290 return None 291 292 293class SymbolizationLoop(object): 294 def __init__(self, binary_name_filter=None): 295 # Used by clients who may want to supply a different binary name. 296 # E.g. in Chrome several binaries may share a single .dSYM. 297 self.binary_name_filter = binary_name_filter 298 self.system = os.uname()[0] 299 if self.system in ['Linux', 'Darwin']: 300 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) 301 else: 302 raise Exception('Unknown system') 303 304 def symbolize_address(self, addr, binary, offset): 305 # Use the chain of symbolizers: 306 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 307 # (fall back to next symbolizer if the previous one fails). 308 if not binary in symbolizers: 309 symbolizers[binary] = ChainSymbolizer( 310 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) 311 result = symbolizers[binary].symbolize(addr, binary, offset) 312 if result is None: 313 # Initialize system symbolizer only if other symbolizers failed. 314 symbolizers[binary].append_symbolizer( 315 SystemSymbolizerFactory(self.system, addr, binary)) 316 result = symbolizers[binary].symbolize(addr, binary, offset) 317 # The system symbolizer must produce some result. 318 assert result 319 return result 320 321 def print_symbolized_lines(self, symbolized_lines): 322 if not symbolized_lines: 323 print self.current_line 324 else: 325 for symbolized_frame in symbolized_lines: 326 print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip() 327 self.frame_no += 1 328 329 def process_stdin(self): 330 self.frame_no = 0 331 for line in sys.stdin: 332 self.current_line = line.rstrip() 333 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 334 stack_trace_line_format = ( 335 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') 336 match = re.match(stack_trace_line_format, line) 337 if not match: 338 print self.current_line 339 continue 340 if DEBUG: 341 print line 342 _, frameno_str, addr, binary, offset = match.groups() 343 if frameno_str == '0': 344 # Assume that frame #0 is the first frame of new stack trace. 345 self.frame_no = 0 346 original_binary = binary 347 if self.binary_name_filter: 348 binary = self.binary_name_filter(binary) 349 symbolized_line = self.symbolize_address(addr, binary, offset) 350 if not symbolized_line: 351 if original_binary != binary: 352 symbolized_line = self.symbolize_address(addr, binary, offset) 353 self.print_symbolized_lines(symbolized_line) 354 355 356if __name__ == '__main__': 357 opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"]) 358 for o, a in opts: 359 if o in ("-d", "--demangle"): 360 demangle = True; 361 loop = SymbolizationLoop() 362 loop.process_stdin() 363