asan_symbolize.py revision 8e39869fdc1c56eeaedb6b793e5ada67f41925f3
1#!/usr/bin/env python
2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3#
4#                     The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
10import bisect
11import os
12import re
13import sys
14import subprocess
15
16pipes = {}
17filetypes = {}
18vmaddrs = {}
19DEBUG = False
20
21
22def fix_filename(file_name):
23  for path_to_cut in sys.argv[1:]:
24    file_name = re.sub(".*" + path_to_cut, "", file_name)
25  file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name)
26  file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
27  return file_name
28
29
30# TODO(glider): need some refactoring here
31def symbolize_addr2line(line):
32  #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
33  match = re.match('^( *#([0-9]+) *0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line)
34  if match:
35    # frameno = match.group(2)
36    binary = match.group(3)
37    addr = match.group(4)
38    if not pipes.has_key(binary):
39      pipes[binary] = subprocess.Popen(["addr2line", "-f", "-e", binary],
40                         stdin=subprocess.PIPE, stdout=subprocess.PIPE)
41    p = pipes[binary]
42    try:
43      print >> p.stdin, addr
44      function_name = p.stdout.readline().rstrip()
45      file_name     = p.stdout.readline().rstrip()
46    except Exception:
47      function_name = ""
48      file_name = ""
49    file_name = fix_filename(file_name)
50
51    print match.group(1), "in", function_name, file_name
52  else:
53    print line.rstrip()
54
55
56class Symbolizer(object):
57  def __init__(self):
58    pass
59
60
61class DarwinSymbolizer(Symbolizer):
62  def __init__(self, addr, binary):
63    super(DarwinSymbolizer, self).__init__()
64    self.binary = binary
65    # Guess which arch we're running. 10 = len("0x") + 8 hex digits.
66    if len(addr) > 10:
67      self.arch = "x86_64"
68    else:
69      self.arch = "i386"
70    self.vmaddr = None
71    self.pipe = None
72  def get_binary_vmaddr(self):
73    """
74    Get the slide value to be added to the address.
75    We're ooking for the following piece in otool -l output:
76      Load command 0
77      cmd LC_SEGMENT
78      cmdsize 736
79      segname __TEXT
80      vmaddr 0x00000000
81    """
82    if self.vmaddr:
83      return self.vmaddr
84    cmdline = ["otool", "-l", self.binary]
85    pipe = subprocess.Popen(cmdline,
86                            stdin=subprocess.PIPE,
87                            stdout=subprocess.PIPE)
88    is_text = False
89    vmaddr = 0
90    for line in pipe.stdout.readlines():
91      line = line.strip()
92      if line.startswith('segname'):
93        is_text = (line == 'segname __TEXT')
94        continue
95      if line.startswith('vmaddr') and is_text:
96        sv = line.split(' ')
97        vmaddr = int(sv[-1], 16)
98        break
99    self.vmaddr = vmaddr
100    return self.vmaddr
101  def write_addr_to_pipe(self, offset):
102    slide = self.get_binary_vmaddr()
103    print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide)
104  def open_atos(self):
105    if DEBUG:
106      print "atos -o %s -arch %s" % (self.binary, self.arch)
107    cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
108    self.pipe = subprocess.Popen(cmdline,
109                                 stdin=subprocess.PIPE,
110                                 stdout=subprocess.PIPE,
111                                 stderr=subprocess.PIPE)
112  def symbolize(self, prefix, addr, offset):
113    self.open_atos()
114    self.write_addr_to_pipe(offset)
115    self.pipe.stdin.close()
116    atos_line = self.pipe.stdout.readline().rstrip()
117    # A well-formed atos response looks like this:
118    #   foo(type1, type2) (in object.name) (filename.cc:80)
119    match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
120    if DEBUG:
121      print "atos_line: ", atos_line
122    if match:
123      function_name = match.group(1)
124      function_name = re.sub("\(.*?\)", "", function_name)
125      file_name = fix_filename(match.group(3))
126      return "%s%s in %s %s" % (prefix, addr, function_name, file_name)
127    else:
128      return "%s%s in %s" % (prefix, addr, atos_line)
129
130
131# Chain two symbolizers so that the second one is called if the first fails.
132class ChainSymbolizer(Symbolizer):
133  def __init__(self, symbolizer1, symbolizer2):
134    super(ChainSymbolizer, self).__init__()
135    self.symbolizer1 = symbolizer1
136    self.symbolizer2 = symbolizer2
137  def symbolize(self, prefix, addr, offset):
138    result = self.symbolizer1.symbolize(prefix, addr, offset)
139    if result is None:
140      result = self.symbolizer2.symbolize(prefix, addr, offset)
141    return result
142
143
144def BreakpadSymbolizerFactory(addr, binary):
145  suffix = os.getenv("BREAKPAD_SUFFIX")
146  if suffix:
147    filename = binary + suffix
148    if os.access(filename, os.F_OK):
149      return BreakpadSymbolizer(addr, filename)
150  return None
151
152
153class BreakpadSymbolizer(Symbolizer):
154  def __init__(self, filename):
155    super(BreakpadSymbolizer, self).__init__()
156    self.filename = filename
157    lines = file(filename).readlines()
158    self.files = []
159    self.symbols = {}
160    self.address_list = []
161    self.addresses = {}
162    # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
163    fragments = lines[0].rstrip().split()
164    self.arch = fragments[2]
165    self.debug_id = fragments[3]
166    self.binary = ' '.join(fragments[4:])
167    self.parse_lines(lines[1:])
168  def parse_lines(self, lines):
169    cur_function_addr = ''
170    for line in lines:
171      fragments = line.split()
172      if fragments[0] == 'FILE':
173        assert int(fragments[1]) == len(self.files)
174        self.files.append(' '.join(fragments[2:]))
175      elif fragments[0] == 'PUBLIC':
176        self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
177      elif fragments[0] == 'CFI':
178        pass
179      elif fragments[0] == 'FUNC':
180        cur_function_addr = int(fragments[1], 16)
181      else:
182        # Line starting with an address.
183        addr = int(fragments[0], 16)
184        self.address_list.append(addr)
185        # Tuple of symbol address, size, line, file number.
186        self.addresses[addr] = (cur_function_addr,
187                                int(fragments[1], 16),
188                                int(fragments[2]),
189                                int(fragments[3]))
190    self.address_list.sort()
191  def get_sym_file_line(self, addr):
192    key = None
193    if addr in self.addresses.keys():
194      key = addr
195    else:
196      index = bisect.bisect_left(self.address_list, addr)
197      if index == 0:
198        return None
199      else:
200        key = self.address_list[index - 1]
201    sym_id, size, line_no, file_no = self.addresses[key]
202    symbol = self.symbols[sym_id]
203    filename = self.files[file_no]
204    if addr < key + size:
205      return symbol, filename, line_no
206    else:
207      return None
208  def symbolize(self, prefix, addr, offset):
209    res = self.get_sym_file_line(int(offset, 16))
210    if res:
211      function_name, file_name, line_no = res
212      return "%s%s in %s %s:%d" % (
213          prefix, addr, function_name, file_name, line_no)
214    else:
215      return None
216
217
218def symbolize_line(line):
219  #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
220  match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)',
221                   line)
222  if match:
223    if DEBUG:
224      print line
225    prefix = match.group(1)
226    # frameno = match.group(2)
227    addr = match.group(3)
228    binary = match.group(4)
229    offset = match.group(5)
230    if not pipes.has_key(binary):
231      p = BreakpadSymbolizerFactory(addr, binary)
232      if p:
233        pipes[binary] = p
234      else:
235        pipes[binary] = DarwinSymbolizer(addr, binary)
236    result = pipes[binary].symbolize(prefix, addr, offset)
237    if result is None:
238      pipes[binary] = ChainSymbolizer(pipes[binary],
239                                      DarwinSymbolizer(addr, binary))
240    return pipes[binary].symbolize(prefix, addr, offset)
241  else:
242    return line
243
244
245def main():
246  system = os.uname()[0]
247  if system in ['Linux', 'Darwin']:
248    for line in sys.stdin:
249      if system == 'Linux':
250        symbolize_addr2line(line)
251      elif system == 'Darwin':
252        line = symbolize_line(line)
253        print line.rstrip()
254  else:
255    print 'Unknown system: ', system
256
257
258if __name__ == '__main__':
259  main()
260