asan_symbolize.py revision b2546c44c828a546a98c091c714b71b1c9966673
1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# The LLVM Compiler Infrastructure 5# 6# This file is distributed under the University of Illinois Open Source 7# License. See LICENSE.TXT for details. 8# 9#===------------------------------------------------------------------------===# 10import bisect 11import os 12import re 13import sys 14import subprocess 15 16llvm_symbolizer = None 17symbolizers = {} 18filetypes = {} 19vmaddrs = {} 20DEBUG = False 21 22 23# FIXME: merge the code that calls fix_filename(). 24def fix_filename(file_name): 25 for path_to_cut in sys.argv[1:]: 26 file_name = re.sub(".*" + path_to_cut, "", file_name) 27 file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) 28 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) 29 return file_name 30 31 32class Symbolizer(object): 33 def __init__(self): 34 pass 35 def symbolize(self, addr, binary, offset): 36 """ 37 Overrided in subclasses. 38 Args: 39 addr: virtual address of an instruction. 40 binary: path to executable/shared object containing this instruction. 41 offset: instruction offset in the @binary. 42 Returns: 43 list of strings (one string for each inlined frame) describing 44 the code locations for this instruction (that is, function name, file 45 name, line and column numbers). 46 """ 47 return None 48 49 50class LLVMSymbolizer(Symbolizer): 51 def __init__(self, symbolizer_path): 52 super(LLVMSymbolizer, self).__init__() 53 self.symbolizer_path = symbolizer_path 54 self.pipe = self.open_llvm_symbolizer() 55 def open_llvm_symbolizer(self): 56 if not os.path.exists(self.symbolizer_path): 57 return None 58 cmd = [self.symbolizer_path, 59 "--use-symbol-table=true", 60 "--demangle=false", 61 "--functions=true", 62 "--inlining=true"] 63 if DEBUG: 64 print ' '.join(cmd) 65 return subprocess.Popen(cmd, stdin=subprocess.PIPE, 66 stdout=subprocess.PIPE) 67 68 def symbolize(self, addr, binary, offset): 69 """Overrides Symbolizer.symbolize""" 70 if not self.pipe: 71 return None 72 result = [] 73 try: 74 symbolizer_input = "%s %s" % (binary, offset) 75 print >> self.pipe.stdin, symbolizer_input 76 while True: 77 function_name = self.pipe.stdout.readline().rstrip() 78 if (function_name == ""): 79 break 80 file_name = self.pipe.stdout.readline().rstrip() 81 file_name = fix_filename(file_name) 82 if (not function_name.startswith("??") and 83 not file_name.startswith("??")): 84 # Append only valid frames. 85 result.append("%s in %s %s" % (addr, function_name, 86 file_name)) 87 except Exception: 88 result = [] 89 if len(result) == 0: 90 result = None 91 return result 92 93 94def LLVMSymbolizerFactory(system): 95 if system == "Linux": 96 symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH") 97 if not symbolizer_path: 98 # Assume llvm-symbolizer is in PATH. 99 symbolizer_path = "llvm-symbolizer" 100 return LLVMSymbolizer(symbolizer_path) 101 return None 102 103 104class Addr2LineSymbolizer(Symbolizer): 105 def __init__(self, binary): 106 super(Addr2LineSymbolizer, self).__init__() 107 self.binary = binary 108 self.pipe = self.open_addr2line() 109 def open_addr2line(self): 110 cmd = ["addr2line", "-f", "-e", self.binary] 111 if DEBUG: 112 print ' '.join(cmd) 113 return subprocess.Popen(cmd, 114 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 115 def symbolize(self, addr, binary, offset): 116 """Overrides Symbolizer.symbolize""" 117 if self.binary != binary: 118 return None 119 try: 120 print >> self.pipe.stdin, offset 121 function_name = self.pipe.stdout.readline().rstrip() 122 file_name = self.pipe.stdout.readline().rstrip() 123 except Exception: 124 function_name = "" 125 file_name = "" 126 file_name = fix_filename(file_name) 127 return ["%s in %s %s" % (addr, function_name, file_name)] 128 129 130class DarwinSymbolizer(Symbolizer): 131 def __init__(self, addr, binary): 132 super(DarwinSymbolizer, self).__init__() 133 self.binary = binary 134 # Guess which arch we're running. 10 = len("0x") + 8 hex digits. 135 if len(addr) > 10: 136 self.arch = "x86_64" 137 else: 138 self.arch = "i386" 139 self.vmaddr = None 140 self.pipe = None 141 def get_binary_vmaddr(self): 142 """ 143 Get the slide value to be added to the address. 144 We're ooking for the following piece in otool -l output: 145 Load command 0 146 cmd LC_SEGMENT 147 cmdsize 736 148 segname __TEXT 149 vmaddr 0x00000000 150 """ 151 if self.vmaddr: 152 return self.vmaddr 153 cmdline = ["otool", "-l", self.binary] 154 pipe = subprocess.Popen(cmdline, 155 stdin=subprocess.PIPE, 156 stdout=subprocess.PIPE) 157 is_text = False 158 vmaddr = 0 159 for line in pipe.stdout.readlines(): 160 line = line.strip() 161 if line.startswith('segname'): 162 is_text = (line == 'segname __TEXT') 163 continue 164 if line.startswith('vmaddr') and is_text: 165 sv = line.split(' ') 166 vmaddr = int(sv[-1], 16) 167 break 168 self.vmaddr = vmaddr 169 return self.vmaddr 170 def write_addr_to_pipe(self, offset): 171 slide = self.get_binary_vmaddr() 172 print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide) 173 def open_atos(self): 174 if DEBUG: 175 print "atos -o %s -arch %s" % (self.binary, self.arch) 176 cmdline = ["atos", "-o", self.binary, "-arch", self.arch] 177 self.pipe = subprocess.Popen(cmdline, 178 stdin=subprocess.PIPE, 179 stdout=subprocess.PIPE, 180 stderr=subprocess.PIPE) 181 def symbolize(self, addr, binary, offset): 182 """Overrides Symbolizer.symbolize""" 183 if self.binary != binary: 184 return None 185 self.open_atos() 186 self.write_addr_to_pipe(offset) 187 self.pipe.stdin.close() 188 atos_line = self.pipe.stdout.readline().rstrip() 189 # A well-formed atos response looks like this: 190 # foo(type1, type2) (in object.name) (filename.cc:80) 191 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 192 if DEBUG: 193 print "atos_line: ", atos_line 194 if match: 195 function_name = match.group(1) 196 function_name = re.sub("\(.*?\)", "", function_name) 197 file_name = fix_filename(match.group(3)) 198 return ["%s in %s %s" % (addr, function_name, file_name)] 199 else: 200 return ["%s in %s" % (addr, atos_line)] 201 202 203# Chain several symbolizers so that if one symbolizer fails, we fall back 204# to the next symbolizer in chain. 205class ChainSymbolizer(Symbolizer): 206 def __init__(self, symbolizer_list): 207 super(ChainSymbolizer, self).__init__() 208 self.symbolizer_list = symbolizer_list 209 def symbolize(self, addr, binary, offset): 210 """Overrides Symbolizer.symbolize""" 211 for symbolizer in self.symbolizer_list: 212 if symbolizer: 213 result = symbolizer.symbolize(addr, binary, offset) 214 if result: 215 return result 216 return None 217 def append_symbolizer(self, symbolizer): 218 self.symbolizer_list.append(symbolizer) 219 220 221def BreakpadSymbolizerFactory(addr, binary): 222 suffix = os.getenv("BREAKPAD_SUFFIX") 223 if suffix: 224 filename = binary + suffix 225 if os.access(filename, os.F_OK): 226 return BreakpadSymbolizer(filename) 227 return None 228 229 230def SystemSymbolizerFactory(system, addr, binary): 231 if system == 'Darwin': 232 return DarwinSymbolizer(addr, binary) 233 elif system == 'Linux': 234 return Addr2LineSymbolizer(binary) 235 236 237class BreakpadSymbolizer(Symbolizer): 238 def __init__(self, filename): 239 super(BreakpadSymbolizer, self).__init__() 240 self.filename = filename 241 lines = file(filename).readlines() 242 self.files = [] 243 self.symbols = {} 244 self.address_list = [] 245 self.addresses = {} 246 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 247 fragments = lines[0].rstrip().split() 248 self.arch = fragments[2] 249 self.debug_id = fragments[3] 250 self.binary = ' '.join(fragments[4:]) 251 self.parse_lines(lines[1:]) 252 def parse_lines(self, lines): 253 cur_function_addr = '' 254 for line in lines: 255 fragments = line.split() 256 if fragments[0] == 'FILE': 257 assert int(fragments[1]) == len(self.files) 258 self.files.append(' '.join(fragments[2:])) 259 elif fragments[0] == 'PUBLIC': 260 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 261 elif fragments[0] in ['CFI', 'STACK']: 262 pass 263 elif fragments[0] == 'FUNC': 264 cur_function_addr = int(fragments[1], 16) 265 if not cur_function_addr in self.symbols.keys(): 266 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 267 else: 268 # Line starting with an address. 269 addr = int(fragments[0], 16) 270 self.address_list.append(addr) 271 # Tuple of symbol address, size, line, file number. 272 self.addresses[addr] = (cur_function_addr, 273 int(fragments[1], 16), 274 int(fragments[2]), 275 int(fragments[3])) 276 self.address_list.sort() 277 def get_sym_file_line(self, addr): 278 key = None 279 if addr in self.addresses.keys(): 280 key = addr 281 else: 282 index = bisect.bisect_left(self.address_list, addr) 283 if index == 0: 284 return None 285 else: 286 key = self.address_list[index - 1] 287 sym_id, size, line_no, file_no = self.addresses[key] 288 symbol = self.symbols[sym_id] 289 filename = self.files[file_no] 290 if addr < key + size: 291 return symbol, filename, line_no 292 else: 293 return None 294 def symbolize(self, addr, binary, offset): 295 if self.binary != binary: 296 return None 297 res = self.get_sym_file_line(int(offset, 16)) 298 if res: 299 function_name, file_name, line_no = res 300 result = ["%s in %s %s:%d" % ( 301 addr, function_name, file_name, line_no)] 302 print result 303 return result 304 else: 305 return None 306 307 308class SymbolizationLoop(object): 309 def __init__(self): 310 self.system = os.uname()[0] 311 if self.system in ['Linux', 'Darwin']: 312 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) 313 else: 314 raise Exception("Unknown system") 315 def symbolize_address(self, addr, binary, offset): 316 # Use the chain of symbolizers: 317 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 318 # (fall back to next symbolizer if the previous one fails). 319 if not symbolizers.has_key(binary): 320 symbolizers[binary] = ChainSymbolizer( 321 [BreakpadSymbolizerFactory(addr, binary), llvm_symbolizer]) 322 result = symbolizers[binary].symbolize(addr, binary, offset) 323 if result is None: 324 # Initialize system symbolizer only if other symbolizers failed. 325 symbolizers[binary].append_symbolizer( 326 SystemSymbolizerFactory(self.system, addr, binary)) 327 result = symbolizers[binary].symbolize(addr, binary, offset) 328 # The system symbolizer must produce some result. 329 assert(result) 330 return result 331 def loop(self): 332 frame_no = 0 333 for line in sys.stdin: 334 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 335 stack_trace_line_format = ( 336 "^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)") 337 match = re.match(stack_trace_line_format, line) 338 if not match: 339 print line.rstrip() 340 continue 341 if DEBUG: 342 print line 343 prefix, frameno_str, addr, binary, offset = match.groups() 344 if (frameno_str == "0"): 345 # Assume that frame #0 is the first frame of new stack trace. 346 frame_no = 0 347 symbolized_line = self.symbolize_address(addr, binary, offset) 348 if not symbolized_line: 349 print line.rstrip() 350 else: 351 for symbolized_frame in symbolized_line: 352 print " #" + str(frame_no) + " " + symbolized_frame.rstrip() 353 frame_no += 1 354 355 356if __name__ == '__main__': 357 loop = SymbolizationLoop() 358 loop.loop() 359