asan_symbolize.py revision 970a9b9c6c6bb894557fe2d1779118ee8c3070d8
1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# The LLVM Compiler Infrastructure 5# 6# This file is distributed under the University of Illinois Open Source 7# License. See LICENSE.TXT for details. 8# 9#===------------------------------------------------------------------------===# 10import os 11import re 12import sys 13import string 14import subprocess 15 16pipes = {} 17filetypes = {} 18DEBUG=False 19 20 21def fix_filename(file_name): 22 for path_to_cut in sys.argv[1:]: 23 file_name = re.sub(".*" + path_to_cut, "", file_name) 24 file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) 25 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) 26 return file_name 27 28 29# TODO(glider): need some refactoring here 30def symbolize_addr2line(line): 31 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 32 match = re.match('^( *#([0-9]+) *0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line) 33 if match: 34 frameno = match.group(2) 35 binary = match.group(3) 36 addr = match.group(4) 37 if not pipes.has_key(binary): 38 pipes[binary] = subprocess.Popen(["addr2line", "-f", "-e", binary], 39 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 40 p = pipes[binary] 41 try: 42 print >>p.stdin, addr 43 function_name = p.stdout.readline().rstrip() 44 file_name = p.stdout.readline().rstrip() 45 except: 46 function_name = "" 47 file_name = "" 48 file_name = fix_filename(file_name) 49 50 print match.group(1), "in", function_name, file_name 51 else: 52 print line.rstrip() 53 54 55def get_macho_filetype(binary): 56 if not filetypes.has_key(binary): 57 otool_pipe = subprocess.Popen(["otool", "-Vh", binary], 58 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 59 otool_line = "".join(otool_pipe.stdout.readlines()) 60 for t in ["DYLIB", "EXECUTE"]: 61 if t in otool_line: 62 filetypes[binary] = t 63 otool_pipe.stdin.close() 64 return filetypes[binary] 65 66 67def symbolize_atos(line): 68 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 69 match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line) 70 if match: 71 #print line 72 prefix = match.group(1) 73 frameno = match.group(2) 74 orig_addr = match.group(3) 75 binary = match.group(4) 76 offset = match.group(5) 77 addr = orig_addr 78 load_addr = hex(int(orig_addr, 16) - int(offset, 16)) 79 filetype = get_macho_filetype(binary) 80 81 if not pipes.has_key(binary): 82 # Guess which arch we're running. 10 = len("0x") + 8 hex digits. 83 if len(addr) > 10: 84 arch = "x86_64" 85 else: 86 arch = "i386" 87 88 if filetype == "DYLIB": 89 load_addr = "0x0" 90 if DEBUG: 91 print "atos -o %s -arch %s -l %s" % (binary, arch, load_addr) 92 cmd = ["atos", "-o", binary, "-arch", arch, "-l", load_addr] 93 pipes[binary] = subprocess.Popen(cmd, 94 stdin=subprocess.PIPE, 95 stdout=subprocess.PIPE, 96 stderr=subprocess.PIPE) 97 p = pipes[binary] 98 if filetype == "DYLIB": 99 print >>p.stdin, "%s" % offset 100 else: 101 print >>p.stdin, "%s" % addr 102 # TODO(glider): it's more efficient to make a batch atos run for each binary. 103 p.stdin.close() 104 atos_line = p.stdout.readline().rstrip() 105 # A well-formed atos response looks like this: 106 # foo(type1, type2) (in object.name) (filename.cc:80) 107 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 108 #print "atos_line: ", atos_line 109 if match: 110 function_name = match.group(1) 111 function_name = re.sub("\(.*?\)", "", function_name) 112 file_name = fix_filename(match.group(3)) 113 print "%s%s in %s %s" % (prefix, addr, function_name, file_name) 114 else: 115 print "%s%s in %s" % (prefix, addr, atos_line) 116 del pipes[binary] 117 else: 118 print line.rstrip() 119 120 121def main(): 122 system = os.uname()[0] 123 if system in ['Linux', 'Darwin']: 124 for line in sys.stdin: 125 if system == 'Linux': 126 symbolize_addr2line(line) 127 elif system == 'Darwin': 128 symbolize_atos(line) 129 else: 130 print 'Unknown system: ', system 131 132 133if __name__ == '__main__': 134 main() 135