asan_symbolize.py revision 8e39869fdc1c56eeaedb6b793e5ada67f41925f3
1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# The LLVM Compiler Infrastructure 5# 6# This file is distributed under the University of Illinois Open Source 7# License. See LICENSE.TXT for details. 8# 9#===------------------------------------------------------------------------===# 10import bisect 11import os 12import re 13import sys 14import subprocess 15 16pipes = {} 17filetypes = {} 18vmaddrs = {} 19DEBUG = False 20 21 22def fix_filename(file_name): 23 for path_to_cut in sys.argv[1:]: 24 file_name = re.sub(".*" + path_to_cut, "", file_name) 25 file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) 26 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) 27 return file_name 28 29 30# TODO(glider): need some refactoring here 31def symbolize_addr2line(line): 32 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 33 match = re.match('^( *#([0-9]+) *0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line) 34 if match: 35 # frameno = match.group(2) 36 binary = match.group(3) 37 addr = match.group(4) 38 if not pipes.has_key(binary): 39 pipes[binary] = subprocess.Popen(["addr2line", "-f", "-e", binary], 40 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 41 p = pipes[binary] 42 try: 43 print >> p.stdin, addr 44 function_name = p.stdout.readline().rstrip() 45 file_name = p.stdout.readline().rstrip() 46 except Exception: 47 function_name = "" 48 file_name = "" 49 file_name = fix_filename(file_name) 50 51 print match.group(1), "in", function_name, file_name 52 else: 53 print line.rstrip() 54 55 56class Symbolizer(object): 57 def __init__(self): 58 pass 59 60 61class DarwinSymbolizer(Symbolizer): 62 def __init__(self, addr, binary): 63 super(DarwinSymbolizer, self).__init__() 64 self.binary = binary 65 # Guess which arch we're running. 10 = len("0x") + 8 hex digits. 66 if len(addr) > 10: 67 self.arch = "x86_64" 68 else: 69 self.arch = "i386" 70 self.vmaddr = None 71 self.pipe = None 72 def get_binary_vmaddr(self): 73 """ 74 Get the slide value to be added to the address. 75 We're ooking for the following piece in otool -l output: 76 Load command 0 77 cmd LC_SEGMENT 78 cmdsize 736 79 segname __TEXT 80 vmaddr 0x00000000 81 """ 82 if self.vmaddr: 83 return self.vmaddr 84 cmdline = ["otool", "-l", self.binary] 85 pipe = subprocess.Popen(cmdline, 86 stdin=subprocess.PIPE, 87 stdout=subprocess.PIPE) 88 is_text = False 89 vmaddr = 0 90 for line in pipe.stdout.readlines(): 91 line = line.strip() 92 if line.startswith('segname'): 93 is_text = (line == 'segname __TEXT') 94 continue 95 if line.startswith('vmaddr') and is_text: 96 sv = line.split(' ') 97 vmaddr = int(sv[-1], 16) 98 break 99 self.vmaddr = vmaddr 100 return self.vmaddr 101 def write_addr_to_pipe(self, offset): 102 slide = self.get_binary_vmaddr() 103 print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide) 104 def open_atos(self): 105 if DEBUG: 106 print "atos -o %s -arch %s" % (self.binary, self.arch) 107 cmdline = ["atos", "-o", self.binary, "-arch", self.arch] 108 self.pipe = subprocess.Popen(cmdline, 109 stdin=subprocess.PIPE, 110 stdout=subprocess.PIPE, 111 stderr=subprocess.PIPE) 112 def symbolize(self, prefix, addr, offset): 113 self.open_atos() 114 self.write_addr_to_pipe(offset) 115 self.pipe.stdin.close() 116 atos_line = self.pipe.stdout.readline().rstrip() 117 # A well-formed atos response looks like this: 118 # foo(type1, type2) (in object.name) (filename.cc:80) 119 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 120 if DEBUG: 121 print "atos_line: ", atos_line 122 if match: 123 function_name = match.group(1) 124 function_name = re.sub("\(.*?\)", "", function_name) 125 file_name = fix_filename(match.group(3)) 126 return "%s%s in %s %s" % (prefix, addr, function_name, file_name) 127 else: 128 return "%s%s in %s" % (prefix, addr, atos_line) 129 130 131# Chain two symbolizers so that the second one is called if the first fails. 132class ChainSymbolizer(Symbolizer): 133 def __init__(self, symbolizer1, symbolizer2): 134 super(ChainSymbolizer, self).__init__() 135 self.symbolizer1 = symbolizer1 136 self.symbolizer2 = symbolizer2 137 def symbolize(self, prefix, addr, offset): 138 result = self.symbolizer1.symbolize(prefix, addr, offset) 139 if result is None: 140 result = self.symbolizer2.symbolize(prefix, addr, offset) 141 return result 142 143 144def BreakpadSymbolizerFactory(addr, binary): 145 suffix = os.getenv("BREAKPAD_SUFFIX") 146 if suffix: 147 filename = binary + suffix 148 if os.access(filename, os.F_OK): 149 return BreakpadSymbolizer(addr, filename) 150 return None 151 152 153class BreakpadSymbolizer(Symbolizer): 154 def __init__(self, filename): 155 super(BreakpadSymbolizer, self).__init__() 156 self.filename = filename 157 lines = file(filename).readlines() 158 self.files = [] 159 self.symbols = {} 160 self.address_list = [] 161 self.addresses = {} 162 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 163 fragments = lines[0].rstrip().split() 164 self.arch = fragments[2] 165 self.debug_id = fragments[3] 166 self.binary = ' '.join(fragments[4:]) 167 self.parse_lines(lines[1:]) 168 def parse_lines(self, lines): 169 cur_function_addr = '' 170 for line in lines: 171 fragments = line.split() 172 if fragments[0] == 'FILE': 173 assert int(fragments[1]) == len(self.files) 174 self.files.append(' '.join(fragments[2:])) 175 elif fragments[0] == 'PUBLIC': 176 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 177 elif fragments[0] == 'CFI': 178 pass 179 elif fragments[0] == 'FUNC': 180 cur_function_addr = int(fragments[1], 16) 181 else: 182 # Line starting with an address. 183 addr = int(fragments[0], 16) 184 self.address_list.append(addr) 185 # Tuple of symbol address, size, line, file number. 186 self.addresses[addr] = (cur_function_addr, 187 int(fragments[1], 16), 188 int(fragments[2]), 189 int(fragments[3])) 190 self.address_list.sort() 191 def get_sym_file_line(self, addr): 192 key = None 193 if addr in self.addresses.keys(): 194 key = addr 195 else: 196 index = bisect.bisect_left(self.address_list, addr) 197 if index == 0: 198 return None 199 else: 200 key = self.address_list[index - 1] 201 sym_id, size, line_no, file_no = self.addresses[key] 202 symbol = self.symbols[sym_id] 203 filename = self.files[file_no] 204 if addr < key + size: 205 return symbol, filename, line_no 206 else: 207 return None 208 def symbolize(self, prefix, addr, offset): 209 res = self.get_sym_file_line(int(offset, 16)) 210 if res: 211 function_name, file_name, line_no = res 212 return "%s%s in %s %s:%d" % ( 213 prefix, addr, function_name, file_name, line_no) 214 else: 215 return None 216 217 218def symbolize_line(line): 219 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 220 match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', 221 line) 222 if match: 223 if DEBUG: 224 print line 225 prefix = match.group(1) 226 # frameno = match.group(2) 227 addr = match.group(3) 228 binary = match.group(4) 229 offset = match.group(5) 230 if not pipes.has_key(binary): 231 p = BreakpadSymbolizerFactory(addr, binary) 232 if p: 233 pipes[binary] = p 234 else: 235 pipes[binary] = DarwinSymbolizer(addr, binary) 236 result = pipes[binary].symbolize(prefix, addr, offset) 237 if result is None: 238 pipes[binary] = ChainSymbolizer(pipes[binary], 239 DarwinSymbolizer(addr, binary)) 240 return pipes[binary].symbolize(prefix, addr, offset) 241 else: 242 return line 243 244 245def main(): 246 system = os.uname()[0] 247 if system in ['Linux', 'Darwin']: 248 for line in sys.stdin: 249 if system == 'Linux': 250 symbolize_addr2line(line) 251 elif system == 'Darwin': 252 line = symbolize_line(line) 253 print line.rstrip() 254 else: 255 print 'Unknown system: ', system 256 257 258if __name__ == '__main__': 259 main() 260