asan_symbolize.py revision 63e4df4356c9949d95ad0bb6b8fd5f56de2efd00
1#!/usr/bin/env python
2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3#
4#                     The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
10import bisect
11import os
12import re
13import sys
14import subprocess
15
16llvm_symbolizer = None
17symbolizers = {}
18filetypes = {}
19vmaddrs = {}
20DEBUG = False
21
22
23def fix_filename(file_name):
24  for path_to_cut in sys.argv[1:]:
25    file_name = re.sub(".*" + path_to_cut, "", file_name)
26  file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name)
27  file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
28  return file_name
29
30
31class Symbolizer(object):
32  def __init__(self):
33    pass
34  def symbolize(self, addr, binary, offset):
35    """
36      Overrided in subclasses.
37      Args:
38        addr: virtual address of an instruction.
39        binary: path to executable/shared object containing this instruction.
40        offset: instruction offset in the @binary.
41      Returns:
42        list of strings (one string for each inlined frame) describing
43        the code locations for this instruction (that is, function name, file
44        name, line and column numbers).
45    """
46    return None
47
48
49class LLVMSymbolizer(Symbolizer):
50  def __init__(self, symbolizer_path):
51    super(LLVMSymbolizer, self).__init__()
52    self.symbolizer_path = symbolizer_path
53    self.pipe = self.open_llvm_symbolizer()
54  def open_llvm_symbolizer(self):
55    cmd = [self.symbolizer_path,
56           "--use-symbol-table=false",  # FIXME: Remove this when libObject is
57                                        # fixed.
58           "--demangle=false",
59           "--functions=true",
60           "--inlining=true"]
61    if DEBUG:
62      print ' '.join(cmd)
63    return subprocess.Popen(cmd, stdin=subprocess.PIPE,
64                            stdout=subprocess.PIPE)
65
66  def symbolize(self, addr, binary, offset):
67    """Overrides Symbolizer.symbolize"""
68    result = []
69    try:
70      symbolizer_input = "%s %s" % (binary, offset)
71      print >> self.pipe.stdin, symbolizer_input
72      while True:
73        function_name = self.pipe.stdout.readline().rstrip()
74        if (function_name == ""):
75          break
76        file_name = self.pipe.stdout.readline().rstrip()
77        file_name = fix_filename(file_name)
78        if (not function_name.startswith("??") and
79            not file_name.startswith("??")):
80          # Append only valid frames.
81          result.append("%s in %s %s" % (addr, function_name,
82                                         file_name))
83    except Exception:
84      result = []
85    if len(result) == 0:
86      result = None
87    return result
88
89
90def LLVMSymbolizerFactory(system):
91  if system == "Linux":
92    symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH")
93    if not symbolizer_path:
94      # Assume llvm-symbolizer is in PATH.
95      symbolizer_path = "llvm-symbolizer"
96    return LLVMSymbolizer(symbolizer_path)
97  return None
98
99
100class Addr2LineSymbolizer(Symbolizer):
101  def __init__(self, binary):
102    super(Addr2LineSymbolizer, self).__init__()
103    self.binary = binary
104    self.pipe = self.open_addr2line()
105  def open_addr2line(self):
106    cmd = ["addr2line", "-f", "-e", self.binary]
107    if DEBUG:
108      print ' '.join(cmd)
109    return subprocess.Popen(cmd,
110                            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
111  def symbolize(self, addr, binary, offset):
112    """Overrides Symbolizer.symbolize"""
113    if self.binary != binary:
114      return None
115    try:
116      print >> self.pipe.stdin, offset
117      function_name = self.pipe.stdout.readline().rstrip()
118      file_name     = self.pipe.stdout.readline().rstrip()
119    except Exception:
120      function_name = ""
121      file_name = ""
122    file_name = fix_filename(file_name)
123    return ["%s in %s %s" % (addr, function_name, file_name)]
124
125
126class DarwinSymbolizer(Symbolizer):
127  def __init__(self, addr, binary):
128    super(DarwinSymbolizer, self).__init__()
129    self.binary = binary
130    # Guess which arch we're running. 10 = len("0x") + 8 hex digits.
131    if len(addr) > 10:
132      self.arch = "x86_64"
133    else:
134      self.arch = "i386"
135    self.vmaddr = None
136    self.pipe = None
137  def get_binary_vmaddr(self):
138    """
139    Get the slide value to be added to the address.
140    We're ooking for the following piece in otool -l output:
141      Load command 0
142      cmd LC_SEGMENT
143      cmdsize 736
144      segname __TEXT
145      vmaddr 0x00000000
146    """
147    if self.vmaddr:
148      return self.vmaddr
149    cmdline = ["otool", "-l", self.binary]
150    pipe = subprocess.Popen(cmdline,
151                            stdin=subprocess.PIPE,
152                            stdout=subprocess.PIPE)
153    is_text = False
154    vmaddr = 0
155    for line in pipe.stdout.readlines():
156      line = line.strip()
157      if line.startswith('segname'):
158        is_text = (line == 'segname __TEXT')
159        continue
160      if line.startswith('vmaddr') and is_text:
161        sv = line.split(' ')
162        vmaddr = int(sv[-1], 16)
163        break
164    self.vmaddr = vmaddr
165    return self.vmaddr
166  def write_addr_to_pipe(self, offset):
167    slide = self.get_binary_vmaddr()
168    print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide)
169  def open_atos(self):
170    if DEBUG:
171      print "atos -o %s -arch %s" % (self.binary, self.arch)
172    cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
173    self.pipe = subprocess.Popen(cmdline,
174                                 stdin=subprocess.PIPE,
175                                 stdout=subprocess.PIPE,
176                                 stderr=subprocess.PIPE)
177  def symbolize(self, addr, binary, offset):
178    """Overrides Symbolizer.symbolize"""
179    if self.binary != binary:
180      return None
181    self.open_atos()
182    self.write_addr_to_pipe(offset)
183    self.pipe.stdin.close()
184    atos_line = self.pipe.stdout.readline().rstrip()
185    # A well-formed atos response looks like this:
186    #   foo(type1, type2) (in object.name) (filename.cc:80)
187    match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
188    if DEBUG:
189      print "atos_line: ", atos_line
190    if match:
191      function_name = match.group(1)
192      function_name = re.sub("\(.*?\)", "", function_name)
193      file_name = fix_filename(match.group(3))
194      return ["%s in %s %s" % (addr, function_name, file_name)]
195    else:
196      return ["%s in %s" % (addr, atos_line)]
197
198
199# Chain several symbolizers so that if one symbolizer fails, we fall back
200# to the next symbolizer in chain.
201class ChainSymbolizer(Symbolizer):
202  def __init__(self, symbolizer_list):
203    super(ChainSymbolizer, self).__init__()
204    self.symbolizer_list = symbolizer_list
205  def symbolize(self, addr, binary, offset):
206    """Overrides Symbolizer.symbolize"""
207    for symbolizer in self.symbolizer_list:
208      if symbolizer:
209        result = symbolizer.symbolize(addr, binary, offset)
210        if result:
211          return result
212    return None
213  def append_symbolizer(self, symbolizer):
214    self.symbolizer_list.append(symbolizer)
215
216
217def BreakpadSymbolizerFactory(addr, binary):
218  suffix = os.getenv("BREAKPAD_SUFFIX")
219  if suffix:
220    filename = binary + suffix
221    if os.access(filename, os.F_OK):
222      return BreakpadSymbolizer(filename)
223  return None
224
225
226def SystemSymbolizerFactory(system, addr, binary):
227  if system == 'Darwin':
228    return DarwinSymbolizer(addr, binary)
229  elif system == 'Linux':
230    return Addr2LineSymbolizer(binary)
231
232
233class BreakpadSymbolizer(Symbolizer):
234  def __init__(self, filename):
235    super(BreakpadSymbolizer, self).__init__()
236    self.filename = filename
237    lines = file(filename).readlines()
238    self.files = []
239    self.symbols = {}
240    self.address_list = []
241    self.addresses = {}
242    # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
243    fragments = lines[0].rstrip().split()
244    self.arch = fragments[2]
245    self.debug_id = fragments[3]
246    self.binary = ' '.join(fragments[4:])
247    self.parse_lines(lines[1:])
248  def parse_lines(self, lines):
249    cur_function_addr = ''
250    for line in lines:
251      fragments = line.split()
252      if fragments[0] == 'FILE':
253        assert int(fragments[1]) == len(self.files)
254        self.files.append(' '.join(fragments[2:]))
255      elif fragments[0] == 'PUBLIC':
256        self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
257      elif fragments[0] in ['CFI', 'STACK']:
258        pass
259      elif fragments[0] == 'FUNC':
260        cur_function_addr = int(fragments[1], 16)
261        if not cur_function_addr in self.symbols.keys():
262          self.symbols[cur_function_addr] = ' '.join(fragments[4:])
263      else:
264        # Line starting with an address.
265        addr = int(fragments[0], 16)
266        self.address_list.append(addr)
267        # Tuple of symbol address, size, line, file number.
268        self.addresses[addr] = (cur_function_addr,
269                                int(fragments[1], 16),
270                                int(fragments[2]),
271                                int(fragments[3]))
272    self.address_list.sort()
273  def get_sym_file_line(self, addr):
274    key = None
275    if addr in self.addresses.keys():
276      key = addr
277    else:
278      index = bisect.bisect_left(self.address_list, addr)
279      if index == 0:
280        return None
281      else:
282        key = self.address_list[index - 1]
283    sym_id, size, line_no, file_no = self.addresses[key]
284    symbol = self.symbols[sym_id]
285    filename = self.files[file_no]
286    if addr < key + size:
287      return symbol, filename, line_no
288    else:
289      return None
290  def symbolize(self, addr, binary, offset):
291    if self.binary != binary:
292      return None
293    res = self.get_sym_file_line(int(offset, 16))
294    if res:
295      function_name, file_name, line_no = res
296      result = ["%s in %s %s:%d" % (
297          addr, function_name, file_name, line_no)]
298      print result
299      return result
300    else:
301      return None
302
303
304def symbolize_address(system, addr, binary, offset):
305  # Use the chain of symbolizers:
306  # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
307  # (fall back to next symbolizer if the previous one fails).
308  if not symbolizers.has_key(binary):
309    symbolizers[binary] = ChainSymbolizer(
310        [BreakpadSymbolizerFactory(addr, binary), llvm_symbolizer])
311  result = symbolizers[binary].symbolize(addr, binary, offset)
312  if result is None:
313    # Initialize system symbolizer only if other symbolizers failed.
314    symbolizers[binary].append_symbolizer(
315        SystemSymbolizerFactory(system, addr, binary))
316    result = symbolizers[binary].symbolize(addr, binary, offset)
317  # The system symbolizer must produce some result.
318  assert(result)
319  return result
320
321def main():
322  system = os.uname()[0]
323  global llvm_symbolizer
324  llvm_symbolizer = LLVMSymbolizerFactory(system)
325  frame_no = 0
326  if system in ['Linux', 'Darwin']:
327    for line in sys.stdin:
328      #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
329      stack_trace_line_format = (
330          "^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)")
331      match = re.match(stack_trace_line_format, line)
332      if not match:
333        print line.rstrip()
334        continue
335      if DEBUG:
336        print line
337      prefix, frameno_str, addr, binary, offset = match.groups()
338      if (frameno_str == "0"):
339        # Assume that frame #0 is the first frame of new stack trace.
340        frame_no = 0
341      symbolized_line = symbolize_address(system, addr, binary, offset)
342      if not symbolized_line:
343        print line.rstrip()
344      else:
345        for symbolized_frame in symbolized_line:
346          print "    #" + str(frame_no) + " " + symbolized_frame.rstrip()
347          frame_no += 1
348  else:
349    print 'Unknown system: ', system
350
351
352if __name__ == '__main__':
353  main()
354