1#!/usr/bin/env python
2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3#
4#                     The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
10import bisect
11import getopt
12import os
13import pty
14import re
15import subprocess
16import sys
17import termios
18
19symbolizers = {}
20DEBUG = False
21demangle = False;
22
23
24# FIXME: merge the code that calls fix_filename().
25def fix_filename(file_name):
26  for path_to_cut in sys.argv[1:]:
27    file_name = re.sub('.*' + path_to_cut, '', file_name)
28  file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
29  file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
30  return file_name
31
32def GuessArch(addr):
33  # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
34  if len(addr) > 10:
35    return 'x86_64'
36  else:
37    return 'i386'
38
39class Symbolizer(object):
40  def __init__(self):
41    pass
42
43  def symbolize(self, addr, binary, offset):
44    """Symbolize the given address (pair of binary and offset).
45
46    Overriden in subclasses.
47    Args:
48        addr: virtual address of an instruction.
49        binary: path to executable/shared object containing this instruction.
50        offset: instruction offset in the @binary.
51    Returns:
52        list of strings (one string for each inlined frame) describing
53        the code locations for this instruction (that is, function name, file
54        name, line and column numbers).
55    """
56    return None
57
58
59class LLVMSymbolizer(Symbolizer):
60  def __init__(self, symbolizer_path, addr):
61    super(LLVMSymbolizer, self).__init__()
62    self.symbolizer_path = symbolizer_path
63    self.default_arch = GuessArch(addr)
64    self.pipe = self.open_llvm_symbolizer()
65
66  def open_llvm_symbolizer(self):
67    cmd = [self.symbolizer_path,
68           '--use-symbol-table=true',
69           '--demangle=%s' % demangle,
70           '--functions=short',
71           '--inlining=true',
72           '--default-arch=%s' % self.default_arch]
73    if DEBUG:
74      print ' '.join(cmd)
75    try:
76      result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
77                                stdout=subprocess.PIPE)
78    except OSError:
79      result = None
80    return result
81
82  def symbolize(self, addr, binary, offset):
83    """Overrides Symbolizer.symbolize."""
84    if not self.pipe:
85      return None
86    result = []
87    try:
88      symbolizer_input = '%s %s' % (binary, offset)
89      if DEBUG:
90        print symbolizer_input
91      print >> self.pipe.stdin, symbolizer_input
92      while True:
93        function_name = self.pipe.stdout.readline().rstrip()
94        if not function_name:
95          break
96        file_name = self.pipe.stdout.readline().rstrip()
97        file_name = fix_filename(file_name)
98        if (not function_name.startswith('??') or
99            not file_name.startswith('??')):
100          # Append only non-trivial frames.
101          result.append('%s in %s %s' % (addr, function_name,
102                                         file_name))
103    except Exception:
104      result = []
105    if not result:
106      result = None
107    return result
108
109
110def LLVMSymbolizerFactory(system, addr):
111  symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
112  if not symbolizer_path:
113    symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
114    if not symbolizer_path:
115      # Assume llvm-symbolizer is in PATH.
116      symbolizer_path = 'llvm-symbolizer'
117  return LLVMSymbolizer(symbolizer_path, addr)
118
119
120class Addr2LineSymbolizer(Symbolizer):
121  def __init__(self, binary):
122    super(Addr2LineSymbolizer, self).__init__()
123    self.binary = binary
124    self.pipe = self.open_addr2line()
125
126  def open_addr2line(self):
127    cmd = ['addr2line', '-f']
128    if demangle:
129      cmd += ['--demangle']
130    cmd += ['-e', self.binary]
131    if DEBUG:
132      print ' '.join(cmd)
133    return subprocess.Popen(cmd,
134                            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
135
136  def symbolize(self, addr, binary, offset):
137    """Overrides Symbolizer.symbolize."""
138    if self.binary != binary:
139      return None
140    try:
141      print >> self.pipe.stdin, offset
142      function_name = self.pipe.stdout.readline().rstrip()
143      file_name = self.pipe.stdout.readline().rstrip()
144    except Exception:
145      function_name = ''
146      file_name = ''
147    file_name = fix_filename(file_name)
148    return ['%s in %s %s' % (addr, function_name, file_name)]
149
150
151class UnbufferedLineConverter(object):
152  """
153  Wrap a child process that responds to each line of input with one line of
154  output.  Uses pty to trick the child into providing unbuffered output.
155  """
156  def __init__(self, args, close_stderr=False):
157    pid, fd = pty.fork()
158    if pid == 0:
159      # We're the child. Transfer control to command.
160      if close_stderr:
161        dev_null = os.open('/dev/null', 0)
162        os.dup2(dev_null, 2)
163      os.execvp(args[0], args)
164    else:
165      # Disable echoing.
166      attr = termios.tcgetattr(fd)
167      attr[3] = attr[3] & ~termios.ECHO
168      termios.tcsetattr(fd, termios.TCSANOW, attr)
169      # Set up a file()-like interface to the child process
170      self.r = os.fdopen(fd, "r", 1)
171      self.w = os.fdopen(os.dup(fd), "w", 1)
172
173  def convert(self, line):
174    self.w.write(line + "\n")
175    return self.readline()
176
177  def readline(self):
178    return self.r.readline().rstrip()
179
180
181class DarwinSymbolizer(Symbolizer):
182  def __init__(self, addr, binary):
183    super(DarwinSymbolizer, self).__init__()
184    self.binary = binary
185    self.arch = GuessArch(addr)
186    self.open_atos()
187
188  def open_atos(self):
189    if DEBUG:
190      print 'atos -o %s -arch %s' % (self.binary, self.arch)
191    cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
192    self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
193
194  def symbolize(self, addr, binary, offset):
195    """Overrides Symbolizer.symbolize."""
196    if self.binary != binary:
197      return None
198    atos_line = self.atos.convert('0x%x' % int(offset, 16))
199    while "got symbolicator for" in atos_line:
200      atos_line = self.atos.readline()
201    # A well-formed atos response looks like this:
202    #   foo(type1, type2) (in object.name) (filename.cc:80)
203    match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
204    if DEBUG:
205      print 'atos_line: ', atos_line
206    if match:
207      function_name = match.group(1)
208      function_name = re.sub('\(.*?\)', '', function_name)
209      file_name = fix_filename(match.group(3))
210      return ['%s in %s %s' % (addr, function_name, file_name)]
211    else:
212      return ['%s in %s' % (addr, atos_line)]
213
214
215# Chain several symbolizers so that if one symbolizer fails, we fall back
216# to the next symbolizer in chain.
217class ChainSymbolizer(Symbolizer):
218  def __init__(self, symbolizer_list):
219    super(ChainSymbolizer, self).__init__()
220    self.symbolizer_list = symbolizer_list
221
222  def symbolize(self, addr, binary, offset):
223    """Overrides Symbolizer.symbolize."""
224    for symbolizer in self.symbolizer_list:
225      if symbolizer:
226        result = symbolizer.symbolize(addr, binary, offset)
227        if result:
228          return result
229    return None
230
231  def append_symbolizer(self, symbolizer):
232    self.symbolizer_list.append(symbolizer)
233
234
235def BreakpadSymbolizerFactory(binary):
236  suffix = os.getenv('BREAKPAD_SUFFIX')
237  if suffix:
238    filename = binary + suffix
239    if os.access(filename, os.F_OK):
240      return BreakpadSymbolizer(filename)
241  return None
242
243
244def SystemSymbolizerFactory(system, addr, binary):
245  if system == 'Darwin':
246    return DarwinSymbolizer(addr, binary)
247  elif system == 'Linux':
248    return Addr2LineSymbolizer(binary)
249
250
251class BreakpadSymbolizer(Symbolizer):
252  def __init__(self, filename):
253    super(BreakpadSymbolizer, self).__init__()
254    self.filename = filename
255    lines = file(filename).readlines()
256    self.files = []
257    self.symbols = {}
258    self.address_list = []
259    self.addresses = {}
260    # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
261    fragments = lines[0].rstrip().split()
262    self.arch = fragments[2]
263    self.debug_id = fragments[3]
264    self.binary = ' '.join(fragments[4:])
265    self.parse_lines(lines[1:])
266
267  def parse_lines(self, lines):
268    cur_function_addr = ''
269    for line in lines:
270      fragments = line.split()
271      if fragments[0] == 'FILE':
272        assert int(fragments[1]) == len(self.files)
273        self.files.append(' '.join(fragments[2:]))
274      elif fragments[0] == 'PUBLIC':
275        self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
276      elif fragments[0] in ['CFI', 'STACK']:
277        pass
278      elif fragments[0] == 'FUNC':
279        cur_function_addr = int(fragments[1], 16)
280        if not cur_function_addr in self.symbols.keys():
281          self.symbols[cur_function_addr] = ' '.join(fragments[4:])
282      else:
283        # Line starting with an address.
284        addr = int(fragments[0], 16)
285        self.address_list.append(addr)
286        # Tuple of symbol address, size, line, file number.
287        self.addresses[addr] = (cur_function_addr,
288                                int(fragments[1], 16),
289                                int(fragments[2]),
290                                int(fragments[3]))
291    self.address_list.sort()
292
293  def get_sym_file_line(self, addr):
294    key = None
295    if addr in self.addresses.keys():
296      key = addr
297    else:
298      index = bisect.bisect_left(self.address_list, addr)
299      if index == 0:
300        return None
301      else:
302        key = self.address_list[index - 1]
303    sym_id, size, line_no, file_no = self.addresses[key]
304    symbol = self.symbols[sym_id]
305    filename = self.files[file_no]
306    if addr < key + size:
307      return symbol, filename, line_no
308    else:
309      return None
310
311  def symbolize(self, addr, binary, offset):
312    if self.binary != binary:
313      return None
314    res = self.get_sym_file_line(int(offset, 16))
315    if res:
316      function_name, file_name, line_no = res
317      result = ['%s in %s %s:%d' % (
318          addr, function_name, file_name, line_no)]
319      print result
320      return result
321    else:
322      return None
323
324
325class SymbolizationLoop(object):
326  def __init__(self, binary_name_filter=None):
327    # Used by clients who may want to supply a different binary name.
328    # E.g. in Chrome several binaries may share a single .dSYM.
329    self.binary_name_filter = binary_name_filter
330    self.system = os.uname()[0]
331    if self.system not in ['Linux', 'Darwin']:
332      raise Exception('Unknown system')
333    self.llvm_symbolizer = None
334
335  def symbolize_address(self, addr, binary, offset):
336    # Initialize llvm-symbolizer lazily.
337    if not self.llvm_symbolizer:
338      self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr)
339    # Use the chain of symbolizers:
340    # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
341    # (fall back to next symbolizer if the previous one fails).
342    if not binary in symbolizers:
343      symbolizers[binary] = ChainSymbolizer(
344          [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
345    result = symbolizers[binary].symbolize(addr, binary, offset)
346    if result is None:
347      # Initialize system symbolizer only if other symbolizers failed.
348      symbolizers[binary].append_symbolizer(
349          SystemSymbolizerFactory(self.system, addr, binary))
350      result = symbolizers[binary].symbolize(addr, binary, offset)
351    # The system symbolizer must produce some result.
352    assert result
353    return result
354
355  def print_symbolized_lines(self, symbolized_lines):
356    if not symbolized_lines:
357      print self.current_line
358    else:
359      for symbolized_frame in symbolized_lines:
360        print '    #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
361        self.frame_no += 1
362
363  def process_stdin(self):
364    self.frame_no = 0
365    while True:
366      line = sys.stdin.readline()
367      if not line:
368        break
369      self.current_line = line.rstrip()
370      #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
371      stack_trace_line_format = (
372          '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
373      match = re.match(stack_trace_line_format, line)
374      if not match:
375        print self.current_line
376        continue
377      if DEBUG:
378        print line
379      _, frameno_str, addr, binary, offset = match.groups()
380      if frameno_str == '0':
381        # Assume that frame #0 is the first frame of new stack trace.
382        self.frame_no = 0
383      original_binary = binary
384      if self.binary_name_filter:
385        binary = self.binary_name_filter(binary)
386      symbolized_line = self.symbolize_address(addr, binary, offset)
387      if not symbolized_line:
388        if original_binary != binary:
389          symbolized_line = self.symbolize_address(addr, binary, offset)
390      self.print_symbolized_lines(symbolized_line)
391
392
393if __name__ == '__main__':
394  opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"])
395  for o, a in opts:
396    if o in ("-d", "--demangle"):
397      demangle = True;
398  loop = SymbolizationLoop()
399  loop.process_stdin()
400