11e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany#!/usr/bin/env python
21e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
31e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany#
41e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany#                     The LLVM Compiler Infrastructure
51e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany#
61e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany# This file is distributed under the University of Illinois Open Source
71e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany# License. See LICENSE.TXT for details.
81e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany#
91e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany#===------------------------------------------------------------------------===#
108e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenkoimport bisect
118e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryanyimport getopt
121e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryanyimport os
131e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryanyimport re
141e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryanyimport subprocess
15ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenkoimport sys
161e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany
1763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonovllvm_symbolizer = None
18897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenkosymbolizers = {}
1900424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenkofiletypes = {}
208e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenkovmaddrs = {}
218e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander PotapenkoDEBUG = False
228e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryanydemangle = False;
231e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany
24970a9b9c6c6bb894557fe2d1779118ee8c3070d8Alexander Potapenko
25b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko# FIXME: merge the code that calls fix_filename().
2600424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenkodef fix_filename(file_name):
2700424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko  for path_to_cut in sys.argv[1:]:
28ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    file_name = re.sub('.*' + path_to_cut, '', file_name)
29ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko  file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
30ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko  file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
3100424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko  return file_name
3200424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko
3300424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko
34897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenkoclass Symbolizer(object):
35897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko  def __init__(self):
36897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko    pass
37ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
3863e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov  def symbolize(self, addr, binary, offset):
39ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    """Symbolize the given address (pair of binary and offset).
40ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
41ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    Overriden in subclasses.
42ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    Args:
4363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        addr: virtual address of an instruction.
4463e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        binary: path to executable/shared object containing this instruction.
4563e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        offset: instruction offset in the @binary.
46ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    Returns:
4763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        list of strings (one string for each inlined frame) describing
4863e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        the code locations for this instruction (that is, function name, file
4963e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        name, line and column numbers).
5063e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    """
5163e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    return None
5263e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov
5363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov
5463e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonovclass LLVMSymbolizer(Symbolizer):
5563e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov  def __init__(self, symbolizer_path):
5663e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    super(LLVMSymbolizer, self).__init__()
5763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    self.symbolizer_path = symbolizer_path
5863e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    self.pipe = self.open_llvm_symbolizer()
59ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
6063e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov  def open_llvm_symbolizer(self):
6175317318a7173801a00494717b9408142fbcee5cAlexey Samsonov    if not os.path.exists(self.symbolizer_path):
6275317318a7173801a00494717b9408142fbcee5cAlexey Samsonov      return None
6363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    cmd = [self.symbolizer_path,
64ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko           '--use-symbol-table=true',
658e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryany           '--demangle=%s' % demangle,
66ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko           '--functions=true',
67ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko           '--inlining=true']
6863e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    if DEBUG:
6963e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      print ' '.join(cmd)
7063e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    return subprocess.Popen(cmd, stdin=subprocess.PIPE,
7163e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov                            stdout=subprocess.PIPE)
7263e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov
7363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov  def symbolize(self, addr, binary, offset):
74ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    """Overrides Symbolizer.symbolize."""
7575317318a7173801a00494717b9408142fbcee5cAlexey Samsonov    if not self.pipe:
7675317318a7173801a00494717b9408142fbcee5cAlexey Samsonov      return None
7763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    result = []
7863e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    try:
79ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      symbolizer_input = '%s %s' % (binary, offset)
80ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      if DEBUG:
81ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko        print symbolizer_input
8263e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      print >> self.pipe.stdin, symbolizer_input
8363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      while True:
8463e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        function_name = self.pipe.stdout.readline().rstrip()
85ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko        if not function_name:
8663e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov          break
8763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        file_name = self.pipe.stdout.readline().rstrip()
8863e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        file_name = fix_filename(file_name)
89ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko        if (not function_name.startswith('??') and
90ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko            not file_name.startswith('??')):
9163e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov          # Append only valid frames.
92ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko          result.append('%s in %s %s' % (addr, function_name,
9363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov                                         file_name))
9463e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    except Exception:
9563e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      result = []
96ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    if not result:
9763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      result = None
9863e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    return result
9963e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov
10063e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov
10163e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonovdef LLVMSymbolizerFactory(system):
102444a185d855bccf806f12572d3e8a01eee7c09bfAlexey Samsonov  symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
103444a185d855bccf806f12572d3e8a01eee7c09bfAlexey Samsonov  if not symbolizer_path:
104444a185d855bccf806f12572d3e8a01eee7c09bfAlexey Samsonov    # Assume llvm-symbolizer is in PATH.
105444a185d855bccf806f12572d3e8a01eee7c09bfAlexey Samsonov    symbolizer_path = 'llvm-symbolizer'
106444a185d855bccf806f12572d3e8a01eee7c09bfAlexey Samsonov  return LLVMSymbolizer(symbolizer_path)
107897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko
108897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko
10963e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonovclass Addr2LineSymbolizer(Symbolizer):
110897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko  def __init__(self, binary):
11163e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    super(Addr2LineSymbolizer, self).__init__()
112897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko    self.binary = binary
113897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko    self.pipe = self.open_addr2line()
114ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
115897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko  def open_addr2line(self):
1168e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryany    cmd = ['addr2line', '-f']
1178e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryany    if demangle:
1188e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryany      cmd += ['--demangle']
1198e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryany    cmd += ['-e', self.binary]
1205cfa30e23c092df3265b1ff9e205f36874a2e194Alexander Potapenko    if DEBUG:
1215cfa30e23c092df3265b1ff9e205f36874a2e194Alexander Potapenko      print ' '.join(cmd)
1225cfa30e23c092df3265b1ff9e205f36874a2e194Alexander Potapenko    return subprocess.Popen(cmd,
123897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko                            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
124ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
12563e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov  def symbolize(self, addr, binary, offset):
126ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    """Overrides Symbolizer.symbolize."""
12763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    if self.binary != binary:
12863e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      return None
1291e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany    try:
1305cfa30e23c092df3265b1ff9e205f36874a2e194Alexander Potapenko      print >> self.pipe.stdin, offset
131897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko      function_name = self.pipe.stdout.readline().rstrip()
132ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      file_name = self.pipe.stdout.readline().rstrip()
1338e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    except Exception:
134ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      function_name = ''
135ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      file_name = ''
13600424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko    file_name = fix_filename(file_name)
137ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    return ['%s in %s %s' % (addr, function_name, file_name)]
13800424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko
13900424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko
1408e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenkoclass DarwinSymbolizer(Symbolizer):
1418e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko  def __init__(self, addr, binary):
1428e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    super(DarwinSymbolizer, self).__init__()
1438e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.binary = binary
144ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
1458e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    if len(addr) > 10:
146ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      self.arch = 'x86_64'
1471e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany    else:
148ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      self.arch = 'i386'
1498e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.vmaddr = None
1508e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.pipe = None
151ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
1528e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko  def write_addr_to_pipe(self, offset):
1538bf8b7943848973398be0c3ad99855e20da6d6faAlexander Potapenko    print >> self.pipe.stdin, '0x%x' % int(offset, 16)
154ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
1558e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko  def open_atos(self):
1568e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    if DEBUG:
157ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      print 'atos -o %s -arch %s' % (self.binary, self.arch)
158ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
1598e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.pipe = subprocess.Popen(cmdline,
1608e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko                                 stdin=subprocess.PIPE,
1618e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko                                 stdout=subprocess.PIPE,
1628e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko                                 stderr=subprocess.PIPE)
163ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
16463e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov  def symbolize(self, addr, binary, offset):
165ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    """Overrides Symbolizer.symbolize."""
16663e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    if self.binary != binary:
16763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      return None
1688e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.open_atos()
1698e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.write_addr_to_pipe(offset)
1708e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.pipe.stdin.close()
1718e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    atos_line = self.pipe.stdout.readline().rstrip()
17200424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko    # A well-formed atos response looks like this:
17300424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko    #   foo(type1, type2) (in object.name) (filename.cc:80)
17400424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko    match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
1758e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    if DEBUG:
176ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      print 'atos_line: ', atos_line
17700424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko    if match:
17800424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko      function_name = match.group(1)
179ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      function_name = re.sub('\(.*?\)', '', function_name)
18000424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko      file_name = fix_filename(match.group(3))
181ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      return ['%s in %s %s' % (addr, function_name, file_name)]
18200424110b4ae3d29cef6e635de9b32232b8d7270Alexander Potapenko    else:
183ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      return ['%s in %s' % (addr, atos_line)]
1848e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko
1858e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko
18663e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov# Chain several symbolizers so that if one symbolizer fails, we fall back
18763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov# to the next symbolizer in chain.
1888e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenkoclass ChainSymbolizer(Symbolizer):
18963e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov  def __init__(self, symbolizer_list):
1908e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    super(ChainSymbolizer, self).__init__()
19163e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    self.symbolizer_list = symbolizer_list
192ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
19363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov  def symbolize(self, addr, binary, offset):
194ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    """Overrides Symbolizer.symbolize."""
19563e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    for symbolizer in self.symbolizer_list:
19663e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      if symbolizer:
19763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        result = symbolizer.symbolize(addr, binary, offset)
19863e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        if result:
19963e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov          return result
20063e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    return None
201ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
20263e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov  def append_symbolizer(self, symbolizer):
20363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    self.symbolizer_list.append(symbolizer)
2048e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko
2051e172b4bdec57329bf904f063a29f99cddf2d85fKostya Serebryany
206ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenkodef BreakpadSymbolizerFactory(binary):
207ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko  suffix = os.getenv('BREAKPAD_SUFFIX')
2088e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko  if suffix:
2098e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    filename = binary + suffix
2108e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    if os.access(filename, os.F_OK):
2115cfa30e23c092df3265b1ff9e205f36874a2e194Alexander Potapenko      return BreakpadSymbolizer(filename)
2128e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko  return None
2138e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko
2148e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko
215897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenkodef SystemSymbolizerFactory(system, addr, binary):
216897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko  if system == 'Darwin':
217897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko    return DarwinSymbolizer(addr, binary)
218897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko  elif system == 'Linux':
21963e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    return Addr2LineSymbolizer(binary)
220897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko
221897e89f84c42bd166521684c1aedb7d5bec6954bAlexander Potapenko
2228e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenkoclass BreakpadSymbolizer(Symbolizer):
2238e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko  def __init__(self, filename):
2248e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    super(BreakpadSymbolizer, self).__init__()
2258e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.filename = filename
2268e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    lines = file(filename).readlines()
2278e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.files = []
2288e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.symbols = {}
2298e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.address_list = []
2308e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.addresses = {}
2318e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
2328e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    fragments = lines[0].rstrip().split()
2338e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.arch = fragments[2]
2348e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.debug_id = fragments[3]
2358e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.binary = ' '.join(fragments[4:])
2368e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.parse_lines(lines[1:])
237ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
2388e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko  def parse_lines(self, lines):
2398e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    cur_function_addr = ''
2408e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    for line in lines:
2418e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      fragments = line.split()
2428e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      if fragments[0] == 'FILE':
2438e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        assert int(fragments[1]) == len(self.files)
2448e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        self.files.append(' '.join(fragments[2:]))
2458e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      elif fragments[0] == 'PUBLIC':
2468e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
2475cfa30e23c092df3265b1ff9e205f36874a2e194Alexander Potapenko      elif fragments[0] in ['CFI', 'STACK']:
2488e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        pass
2498e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      elif fragments[0] == 'FUNC':
2508e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        cur_function_addr = int(fragments[1], 16)
2515cfa30e23c092df3265b1ff9e205f36874a2e194Alexander Potapenko        if not cur_function_addr in self.symbols.keys():
2525cfa30e23c092df3265b1ff9e205f36874a2e194Alexander Potapenko          self.symbols[cur_function_addr] = ' '.join(fragments[4:])
2538e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      else:
2548e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        # Line starting with an address.
2558e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        addr = int(fragments[0], 16)
2568e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        self.address_list.append(addr)
2578e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        # Tuple of symbol address, size, line, file number.
2588e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        self.addresses[addr] = (cur_function_addr,
2598e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko                                int(fragments[1], 16),
2608e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko                                int(fragments[2]),
2618e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko                                int(fragments[3]))
2628e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    self.address_list.sort()
263ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
2648e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko  def get_sym_file_line(self, addr):
2658e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    key = None
2668e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    if addr in self.addresses.keys():
2678e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      key = addr
2688e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    else:
2698e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      index = bisect.bisect_left(self.address_list, addr)
2708e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      if index == 0:
2718e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        return None
2728e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      else:
2738e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko        key = self.address_list[index - 1]
2748e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    sym_id, size, line_no, file_no = self.addresses[key]
2758e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    symbol = self.symbols[sym_id]
2768e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    filename = self.files[file_no]
2778e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    if addr < key + size:
2788e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      return symbol, filename, line_no
2798e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    else:
2808e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      return None
281ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
28263e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov  def symbolize(self, addr, binary, offset):
28363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    if self.binary != binary:
28463e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      return None
2858e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    res = self.get_sym_file_line(int(offset, 16))
2868e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    if res:
2878e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      function_name, file_name, line_no = res
288ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      result = ['%s in %s %s:%d' % (
28963e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov          addr, function_name, file_name, line_no)]
2905cfa30e23c092df3265b1ff9e205f36874a2e194Alexander Potapenko      print result
2915cfa30e23c092df3265b1ff9e205f36874a2e194Alexander Potapenko      return result
2928e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko    else:
2938e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko      return None
2948e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko
2958e39869fdc1c56eeaedb6b793e5ada67f41925f3Alexander Potapenko
296b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenkoclass SymbolizationLoop(object):
297ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko  def __init__(self, binary_name_filter=None):
298ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    # Used by clients who may want to supply a different binary name.
299ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    # E.g. in Chrome several binaries may share a single .dSYM.
300ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    self.binary_name_filter = binary_name_filter
301b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko    self.system = os.uname()[0]
302b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko    if self.system in ['Linux', 'Darwin']:
303b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko      self.llvm_symbolizer = LLVMSymbolizerFactory(self.system)
304b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko    else:
305ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      raise Exception('Unknown system')
306ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
307b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko  def symbolize_address(self, addr, binary, offset):
308b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko    # Use the chain of symbolizers:
309b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko    # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
310b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko    # (fall back to next symbolizer if the previous one fails).
311ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    if not binary in symbolizers:
312b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko      symbolizers[binary] = ChainSymbolizer(
313ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko          [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
31463e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov    result = symbolizers[binary].symbolize(addr, binary, offset)
315b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko    if result is None:
316b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko      # Initialize system symbolizer only if other symbolizers failed.
317b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko      symbolizers[binary].append_symbolizer(
318b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko          SystemSymbolizerFactory(self.system, addr, binary))
319b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko      result = symbolizers[binary].symbolize(addr, binary, offset)
320b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko    # The system symbolizer must produce some result.
321ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    assert result
322b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko    return result
323ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
324ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko  def print_symbolized_lines(self, symbolized_lines):
325ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    if not symbolized_lines:
326ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      print self.current_line
327ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    else:
328ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      for symbolized_frame in symbolized_lines:
329ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko        print '    #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
330ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko        self.frame_no += 1
331ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko
332ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko  def process_stdin(self):
333ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko    self.frame_no = 0
334970a9b9c6c6bb894557fe2d1779118ee8c3070d8Alexander Potapenko    for line in sys.stdin:
335ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      self.current_line = line.rstrip()
33663e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
33763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      stack_trace_line_format = (
338ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko          '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
33963e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      match = re.match(stack_trace_line_format, line)
34063e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      if not match:
341ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko        print self.current_line
34263e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        continue
34363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      if DEBUG:
34463e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        print line
345ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      _, frameno_str, addr, binary, offset = match.groups()
346ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      if frameno_str == '0':
34763e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov        # Assume that frame #0 is the first frame of new stack trace.
348ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko        self.frame_no = 0
349ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      original_binary = binary
350ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      if self.binary_name_filter:
351ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko        binary = self.binary_name_filter(binary)
352b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko      symbolized_line = self.symbolize_address(addr, binary, offset)
35363e4df4356c9949d95ad0bb6b8fd5f56de2efd00Alexey Samsonov      if not symbolized_line:
354ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko        if original_binary != binary:
355ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko          symbolized_line = self.symbolize_address(addr, binary, offset)
356ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko      self.print_symbolized_lines(symbolized_line)
357970a9b9c6c6bb894557fe2d1779118ee8c3070d8Alexander Potapenko
358970a9b9c6c6bb894557fe2d1779118ee8c3070d8Alexander Potapenko
359970a9b9c6c6bb894557fe2d1779118ee8c3070d8Alexander Potapenkoif __name__ == '__main__':
3608e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryany  opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"])
3618e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryany  for o, a in opts:
3628e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryany    if o in ("-d", "--demangle"):
3638e32db4efc90a4d136786b4bdda62363df99e12cKostya Serebryany      demangle = True;
364b2546c44c828a546a98c091c714b71b1c9966673Alexander Potapenko  loop = SymbolizationLoop()
365ce31aa700275ab17aa42db0d1d175abddc89eb2eAlexander Potapenko  loop.process_stdin()
366