asan_symbolize.py revision 63e4df4356c9949d95ad0bb6b8fd5f56de2efd00
1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# The LLVM Compiler Infrastructure 5# 6# This file is distributed under the University of Illinois Open Source 7# License. See LICENSE.TXT for details. 8# 9#===------------------------------------------------------------------------===# 10import bisect 11import os 12import re 13import sys 14import subprocess 15 16llvm_symbolizer = None 17symbolizers = {} 18filetypes = {} 19vmaddrs = {} 20DEBUG = False 21 22 23def fix_filename(file_name): 24 for path_to_cut in sys.argv[1:]: 25 file_name = re.sub(".*" + path_to_cut, "", file_name) 26 file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) 27 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) 28 return file_name 29 30 31class Symbolizer(object): 32 def __init__(self): 33 pass 34 def symbolize(self, addr, binary, offset): 35 """ 36 Overrided in subclasses. 37 Args: 38 addr: virtual address of an instruction. 39 binary: path to executable/shared object containing this instruction. 40 offset: instruction offset in the @binary. 41 Returns: 42 list of strings (one string for each inlined frame) describing 43 the code locations for this instruction (that is, function name, file 44 name, line and column numbers). 45 """ 46 return None 47 48 49class LLVMSymbolizer(Symbolizer): 50 def __init__(self, symbolizer_path): 51 super(LLVMSymbolizer, self).__init__() 52 self.symbolizer_path = symbolizer_path 53 self.pipe = self.open_llvm_symbolizer() 54 def open_llvm_symbolizer(self): 55 cmd = [self.symbolizer_path, 56 "--use-symbol-table=false", # FIXME: Remove this when libObject is 57 # fixed. 58 "--demangle=false", 59 "--functions=true", 60 "--inlining=true"] 61 if DEBUG: 62 print ' '.join(cmd) 63 return subprocess.Popen(cmd, stdin=subprocess.PIPE, 64 stdout=subprocess.PIPE) 65 66 def symbolize(self, addr, binary, offset): 67 """Overrides Symbolizer.symbolize""" 68 result = [] 69 try: 70 symbolizer_input = "%s %s" % (binary, offset) 71 print >> self.pipe.stdin, symbolizer_input 72 while True: 73 function_name = self.pipe.stdout.readline().rstrip() 74 if (function_name == ""): 75 break 76 file_name = self.pipe.stdout.readline().rstrip() 77 file_name = fix_filename(file_name) 78 if (not function_name.startswith("??") and 79 not file_name.startswith("??")): 80 # Append only valid frames. 81 result.append("%s in %s %s" % (addr, function_name, 82 file_name)) 83 except Exception: 84 result = [] 85 if len(result) == 0: 86 result = None 87 return result 88 89 90def LLVMSymbolizerFactory(system): 91 if system == "Linux": 92 symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH") 93 if not symbolizer_path: 94 # Assume llvm-symbolizer is in PATH. 95 symbolizer_path = "llvm-symbolizer" 96 return LLVMSymbolizer(symbolizer_path) 97 return None 98 99 100class Addr2LineSymbolizer(Symbolizer): 101 def __init__(self, binary): 102 super(Addr2LineSymbolizer, self).__init__() 103 self.binary = binary 104 self.pipe = self.open_addr2line() 105 def open_addr2line(self): 106 cmd = ["addr2line", "-f", "-e", self.binary] 107 if DEBUG: 108 print ' '.join(cmd) 109 return subprocess.Popen(cmd, 110 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 111 def symbolize(self, addr, binary, offset): 112 """Overrides Symbolizer.symbolize""" 113 if self.binary != binary: 114 return None 115 try: 116 print >> self.pipe.stdin, offset 117 function_name = self.pipe.stdout.readline().rstrip() 118 file_name = self.pipe.stdout.readline().rstrip() 119 except Exception: 120 function_name = "" 121 file_name = "" 122 file_name = fix_filename(file_name) 123 return ["%s in %s %s" % (addr, function_name, file_name)] 124 125 126class DarwinSymbolizer(Symbolizer): 127 def __init__(self, addr, binary): 128 super(DarwinSymbolizer, self).__init__() 129 self.binary = binary 130 # Guess which arch we're running. 10 = len("0x") + 8 hex digits. 131 if len(addr) > 10: 132 self.arch = "x86_64" 133 else: 134 self.arch = "i386" 135 self.vmaddr = None 136 self.pipe = None 137 def get_binary_vmaddr(self): 138 """ 139 Get the slide value to be added to the address. 140 We're ooking for the following piece in otool -l output: 141 Load command 0 142 cmd LC_SEGMENT 143 cmdsize 736 144 segname __TEXT 145 vmaddr 0x00000000 146 """ 147 if self.vmaddr: 148 return self.vmaddr 149 cmdline = ["otool", "-l", self.binary] 150 pipe = subprocess.Popen(cmdline, 151 stdin=subprocess.PIPE, 152 stdout=subprocess.PIPE) 153 is_text = False 154 vmaddr = 0 155 for line in pipe.stdout.readlines(): 156 line = line.strip() 157 if line.startswith('segname'): 158 is_text = (line == 'segname __TEXT') 159 continue 160 if line.startswith('vmaddr') and is_text: 161 sv = line.split(' ') 162 vmaddr = int(sv[-1], 16) 163 break 164 self.vmaddr = vmaddr 165 return self.vmaddr 166 def write_addr_to_pipe(self, offset): 167 slide = self.get_binary_vmaddr() 168 print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide) 169 def open_atos(self): 170 if DEBUG: 171 print "atos -o %s -arch %s" % (self.binary, self.arch) 172 cmdline = ["atos", "-o", self.binary, "-arch", self.arch] 173 self.pipe = subprocess.Popen(cmdline, 174 stdin=subprocess.PIPE, 175 stdout=subprocess.PIPE, 176 stderr=subprocess.PIPE) 177 def symbolize(self, addr, binary, offset): 178 """Overrides Symbolizer.symbolize""" 179 if self.binary != binary: 180 return None 181 self.open_atos() 182 self.write_addr_to_pipe(offset) 183 self.pipe.stdin.close() 184 atos_line = self.pipe.stdout.readline().rstrip() 185 # A well-formed atos response looks like this: 186 # foo(type1, type2) (in object.name) (filename.cc:80) 187 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 188 if DEBUG: 189 print "atos_line: ", atos_line 190 if match: 191 function_name = match.group(1) 192 function_name = re.sub("\(.*?\)", "", function_name) 193 file_name = fix_filename(match.group(3)) 194 return ["%s in %s %s" % (addr, function_name, file_name)] 195 else: 196 return ["%s in %s" % (addr, atos_line)] 197 198 199# Chain several symbolizers so that if one symbolizer fails, we fall back 200# to the next symbolizer in chain. 201class ChainSymbolizer(Symbolizer): 202 def __init__(self, symbolizer_list): 203 super(ChainSymbolizer, self).__init__() 204 self.symbolizer_list = symbolizer_list 205 def symbolize(self, addr, binary, offset): 206 """Overrides Symbolizer.symbolize""" 207 for symbolizer in self.symbolizer_list: 208 if symbolizer: 209 result = symbolizer.symbolize(addr, binary, offset) 210 if result: 211 return result 212 return None 213 def append_symbolizer(self, symbolizer): 214 self.symbolizer_list.append(symbolizer) 215 216 217def BreakpadSymbolizerFactory(addr, binary): 218 suffix = os.getenv("BREAKPAD_SUFFIX") 219 if suffix: 220 filename = binary + suffix 221 if os.access(filename, os.F_OK): 222 return BreakpadSymbolizer(filename) 223 return None 224 225 226def SystemSymbolizerFactory(system, addr, binary): 227 if system == 'Darwin': 228 return DarwinSymbolizer(addr, binary) 229 elif system == 'Linux': 230 return Addr2LineSymbolizer(binary) 231 232 233class BreakpadSymbolizer(Symbolizer): 234 def __init__(self, filename): 235 super(BreakpadSymbolizer, self).__init__() 236 self.filename = filename 237 lines = file(filename).readlines() 238 self.files = [] 239 self.symbols = {} 240 self.address_list = [] 241 self.addresses = {} 242 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 243 fragments = lines[0].rstrip().split() 244 self.arch = fragments[2] 245 self.debug_id = fragments[3] 246 self.binary = ' '.join(fragments[4:]) 247 self.parse_lines(lines[1:]) 248 def parse_lines(self, lines): 249 cur_function_addr = '' 250 for line in lines: 251 fragments = line.split() 252 if fragments[0] == 'FILE': 253 assert int(fragments[1]) == len(self.files) 254 self.files.append(' '.join(fragments[2:])) 255 elif fragments[0] == 'PUBLIC': 256 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 257 elif fragments[0] in ['CFI', 'STACK']: 258 pass 259 elif fragments[0] == 'FUNC': 260 cur_function_addr = int(fragments[1], 16) 261 if not cur_function_addr in self.symbols.keys(): 262 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 263 else: 264 # Line starting with an address. 265 addr = int(fragments[0], 16) 266 self.address_list.append(addr) 267 # Tuple of symbol address, size, line, file number. 268 self.addresses[addr] = (cur_function_addr, 269 int(fragments[1], 16), 270 int(fragments[2]), 271 int(fragments[3])) 272 self.address_list.sort() 273 def get_sym_file_line(self, addr): 274 key = None 275 if addr in self.addresses.keys(): 276 key = addr 277 else: 278 index = bisect.bisect_left(self.address_list, addr) 279 if index == 0: 280 return None 281 else: 282 key = self.address_list[index - 1] 283 sym_id, size, line_no, file_no = self.addresses[key] 284 symbol = self.symbols[sym_id] 285 filename = self.files[file_no] 286 if addr < key + size: 287 return symbol, filename, line_no 288 else: 289 return None 290 def symbolize(self, addr, binary, offset): 291 if self.binary != binary: 292 return None 293 res = self.get_sym_file_line(int(offset, 16)) 294 if res: 295 function_name, file_name, line_no = res 296 result = ["%s in %s %s:%d" % ( 297 addr, function_name, file_name, line_no)] 298 print result 299 return result 300 else: 301 return None 302 303 304def symbolize_address(system, addr, binary, offset): 305 # Use the chain of symbolizers: 306 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 307 # (fall back to next symbolizer if the previous one fails). 308 if not symbolizers.has_key(binary): 309 symbolizers[binary] = ChainSymbolizer( 310 [BreakpadSymbolizerFactory(addr, binary), llvm_symbolizer]) 311 result = symbolizers[binary].symbolize(addr, binary, offset) 312 if result is None: 313 # Initialize system symbolizer only if other symbolizers failed. 314 symbolizers[binary].append_symbolizer( 315 SystemSymbolizerFactory(system, addr, binary)) 316 result = symbolizers[binary].symbolize(addr, binary, offset) 317 # The system symbolizer must produce some result. 318 assert(result) 319 return result 320 321def main(): 322 system = os.uname()[0] 323 global llvm_symbolizer 324 llvm_symbolizer = LLVMSymbolizerFactory(system) 325 frame_no = 0 326 if system in ['Linux', 'Darwin']: 327 for line in sys.stdin: 328 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 329 stack_trace_line_format = ( 330 "^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)") 331 match = re.match(stack_trace_line_format, line) 332 if not match: 333 print line.rstrip() 334 continue 335 if DEBUG: 336 print line 337 prefix, frameno_str, addr, binary, offset = match.groups() 338 if (frameno_str == "0"): 339 # Assume that frame #0 is the first frame of new stack trace. 340 frame_no = 0 341 symbolized_line = symbolize_address(system, addr, binary, offset) 342 if not symbolized_line: 343 print line.rstrip() 344 else: 345 for symbolized_frame in symbolized_line: 346 print " #" + str(frame_no) + " " + symbolized_frame.rstrip() 347 frame_no += 1 348 else: 349 print 'Unknown system: ', system 350 351 352if __name__ == '__main__': 353 main() 354