symbol.py revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1#!/usr/bin/python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
22import os
23import re
24import subprocess
25
26CHROME_SRC = os.path.join(os.path.realpath(os.path.dirname(__file__)),
27                          os.pardir, os.pardir, os.pardir, os.pardir)
28ANDROID_BUILD_TOP = CHROME_SRC
29SYMBOLS_DIR = CHROME_SRC
30CHROME_SYMBOLS_DIR = CHROME_SRC
31
32ARCH = "arm"
33
34TOOLCHAIN_INFO = None
35
36def Uname():
37  """'uname' for constructing prebuilt/<...> and out/host/<...> paths."""
38  uname = os.uname()[0]
39  if uname == "Darwin":
40    proc = os.uname()[-1]
41    if proc == "i386" or proc == "x86_64":
42      return "darwin-x86"
43    return "darwin-ppc"
44  if uname == "Linux":
45    return "linux-x86"
46  return uname
47
48def ToolPath(tool, toolchain_info=None):
49  """Return a full qualified path to the specified tool"""
50  # ToolPath looks for the tools in the completely incorrect directory.
51  # This looks in the checked in android_tools.
52  if ARCH == "arm":
53    toolchain_source = "arm-linux-androideabi-4.6"
54    toolchain_prefix = "arm-linux-androideabi"
55  else:
56    toolchain_source = "x86-4.6"
57    toolchain_prefix = "i686-android-linux"
58
59  toolchain_subdir = (
60      "third_party/android_tools/ndk/toolchains/%s/prebuilt/linux-x86_64/bin" %
61       toolchain_source)
62
63  return os.path.join(CHROME_SRC,
64                      toolchain_subdir,
65                      toolchain_prefix + "-" + tool)
66
67def FindToolchain():
68  """Look for the latest available toolchain
69
70  Args:
71    None
72
73  Returns:
74    A pair of strings containing toolchain label and target prefix.
75  """
76  global TOOLCHAIN_INFO
77  if TOOLCHAIN_INFO is not None:
78    return TOOLCHAIN_INFO
79
80  ## Known toolchains, newer ones in the front.
81  if ARCH == "arm":
82    known_toolchains = [
83      ("arm-linux-androideabi-4.6", "arm", "arm-linux-androideabi"),
84    ]
85  elif ARCH =="x86":
86    known_toolchains = [
87      ("i686-android-linux-4.4.3", "x86", "i686-android-linux")
88    ]
89  else:
90    known_toolchains = []
91
92  # Look for addr2line to check for valid toolchain path.
93  for (label, platform, target) in known_toolchains:
94    toolchain_info = (label, platform, target);
95    if os.path.exists(ToolPath("addr2line", toolchain_info)):
96      TOOLCHAIN_INFO = toolchain_info
97      return toolchain_info
98
99  raise Exception("Could not find tool chain")
100
101def TranslateLibPath(lib):
102  # SymbolInformation(lib, addr) receives lib as the path from symbols
103  # root to the symbols file. This needs to be translated to point to the
104  # correct .so path. If the user doesn't explicitly specify which directory to
105  # use, then use the most recently updated one in one of the known directories.
106  # If the .so is not found somewhere in CHROME_SYMBOLS_DIR, leave it
107  # untranslated in case it is an Android symbol in SYMBOLS_DIR.
108  library_name = os.path.basename(lib)
109  out_dir = os.environ.get('CHROMIUM_OUT_DIR', 'out')
110  candidate_dirs = ['.',
111                    os.path.join(out_dir, 'Debug', 'lib'),
112                    os.path.join(out_dir, 'Debug', 'lib.target'),
113                    os.path.join(out_dir, 'Release', 'lib'),
114                    os.path.join(out_dir, 'Release', 'lib.target'),
115                    ]
116
117  candidate_libraries = map(
118      lambda d: ('%s/%s/%s' % (CHROME_SYMBOLS_DIR, d, library_name)),
119      candidate_dirs)
120  candidate_libraries = filter(os.path.exists, candidate_libraries)
121  candidate_libraries = sorted(candidate_libraries,
122                               key=os.path.getmtime, reverse=True)
123
124  if not candidate_libraries:
125    return lib
126
127  library_path = os.path.relpath(candidate_libraries[0], SYMBOLS_DIR)
128  return '/' + library_path
129
130def SymbolInformation(lib, addr, get_detailed_info):
131  """Look up symbol information about an address.
132
133  Args:
134    lib: library (or executable) pathname containing symbols
135    addr: string hexidecimal address
136
137  Returns:
138    A list of the form [(source_symbol, source_location,
139    object_symbol_with_offset)].
140
141    If the function has been inlined then the list may contain
142    more than one element with the symbols for the most deeply
143    nested inlined location appearing first.  The list is
144    always non-empty, even if no information is available.
145
146    Usually you want to display the source_location and
147    object_symbol_with_offset from the last element in the list.
148  """
149  lib = TranslateLibPath(lib)
150  info = SymbolInformationForSet(lib, set([addr]), get_detailed_info)
151  return (info and info.get(addr)) or [(None, None, None)]
152
153
154def SymbolInformationForSet(lib, unique_addrs, get_detailed_info):
155  """Look up symbol information for a set of addresses from the given library.
156
157  Args:
158    lib: library (or executable) pathname containing symbols
159    unique_addrs: set of hexidecimal addresses
160
161  Returns:
162    A dictionary of the form {addr: [(source_symbol, source_location,
163    object_symbol_with_offset)]} where each address has a list of
164    associated symbols and locations.  The list is always non-empty.
165
166    If the function has been inlined then the list may contain
167    more than one element with the symbols for the most deeply
168    nested inlined location appearing first.  The list is
169    always non-empty, even if no information is available.
170
171    Usually you want to display the source_location and
172    object_symbol_with_offset from the last element in the list.
173  """
174  if not lib:
175    return None
176
177  addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
178  if not addr_to_line:
179    return None
180
181  if get_detailed_info:
182    addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
183    if not addr_to_objdump:
184      return None
185  else:
186    addr_to_objdump = dict((addr, ("", 0)) for addr in unique_addrs)
187
188  result = {}
189  for addr in unique_addrs:
190    source_info = addr_to_line.get(addr)
191    if not source_info:
192      source_info = [(None, None)]
193    if addr in addr_to_objdump:
194      (object_symbol, object_offset) = addr_to_objdump.get(addr)
195      object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
196                                                         object_offset)
197    else:
198      object_symbol_with_offset = None
199    result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
200        for (source_symbol, source_location) in source_info]
201
202  return result
203
204
205class MemoizedForSet(object):
206  def __init__(self, fn):
207    self.fn = fn
208    self.cache = {}
209
210  def __call__(self, lib, unique_addrs):
211    lib_cache = self.cache.setdefault(lib, {})
212
213    no_cache = filter(lambda x: x not in lib_cache, unique_addrs)
214    if no_cache:
215      lib_cache.update((k, None) for k in no_cache)
216      result = self.fn(lib, no_cache)
217      if result:
218        lib_cache.update(result)
219
220    return dict((k, lib_cache[k]) for k in unique_addrs if lib_cache[k])
221
222
223@MemoizedForSet
224def CallAddr2LineForSet(lib, unique_addrs):
225  """Look up line and symbol information for a set of addresses.
226
227  Args:
228    lib: library (or executable) pathname containing symbols
229    unique_addrs: set of string hexidecimal addresses look up.
230
231  Returns:
232    A dictionary of the form {addr: [(symbol, file:line)]} where
233    each address has a list of associated symbols and locations
234    or an empty list if no symbol information was found.
235
236    If the function has been inlined then the list may contain
237    more than one element with the symbols for the most deeply
238    nested inlined location appearing first.
239  """
240  if not lib:
241    return None
242
243
244  symbols = SYMBOLS_DIR + lib
245  if not os.path.isfile(symbols):
246    return None
247
248  (label, platform, target) = FindToolchain()
249  cmd = [ToolPath("addr2line"), "--functions", "--inlines",
250      "--demangle", "--exe=" + symbols]
251  child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
252
253  result = {}
254  addrs = sorted(unique_addrs)
255  for addr in addrs:
256    child.stdin.write("0x%s\n" % addr)
257    child.stdin.flush()
258    records = []
259    first = True
260    while True:
261      symbol = child.stdout.readline().strip()
262      if symbol == "??":
263        symbol = None
264      location = child.stdout.readline().strip()
265      if location == "??:0":
266        location = None
267      if symbol is None and location is None:
268        break
269      records.append((symbol, location))
270      if first:
271        # Write a blank line as a sentinel so we know when to stop
272        # reading inlines from the output.
273        # The blank line will cause addr2line to emit "??\n??:0\n".
274        child.stdin.write("\n")
275        first = False
276    result[addr] = records
277  child.stdin.close()
278  child.stdout.close()
279  return result
280
281
282def StripPC(addr):
283  """Strips the Thumb bit a program counter address when appropriate.
284
285  Args:
286    addr: the program counter address
287
288  Returns:
289    The stripped program counter address.
290  """
291  global ARCH
292
293  if ARCH == "arm":
294    return addr & ~1
295  return addr
296
297@MemoizedForSet
298def CallObjdumpForSet(lib, unique_addrs):
299  """Use objdump to find out the names of the containing functions.
300
301  Args:
302    lib: library (or executable) pathname containing symbols
303    unique_addrs: set of string hexidecimal addresses to find the functions for.
304
305  Returns:
306    A dictionary of the form {addr: (string symbol, offset)}.
307  """
308  if not lib:
309    return None
310
311  symbols = SYMBOLS_DIR + lib
312  if not os.path.exists(symbols):
313    return None
314
315  symbols = SYMBOLS_DIR + lib
316  if not os.path.exists(symbols):
317    return None
318
319  result = {}
320
321  # Function lines look like:
322  #   000177b0 <android::IBinder::~IBinder()+0x2c>:
323  # We pull out the address and function first. Then we check for an optional
324  # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
325  func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
326  offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
327
328  # A disassembly line looks like:
329  #   177b2:  b510        push  {r4, lr}
330  asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
331
332  for target_addr in unique_addrs:
333    start_addr_dec = str(StripPC(int(target_addr, 16)))
334    stop_addr_dec = str(StripPC(int(target_addr, 16)) + 8)
335    cmd = [ToolPath("objdump"),
336           "--section=.text",
337           "--demangle",
338           "--disassemble",
339           "--start-address=" + start_addr_dec,
340           "--stop-address=" + stop_addr_dec,
341           symbols]
342
343    current_symbol = None    # The current function symbol in the disassembly.
344    current_symbol_addr = 0  # The address of the current function.
345
346    stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
347    for line in stream:
348      # Is it a function line like:
349      #   000177b0 <android::IBinder::~IBinder()>:
350      components = func_regexp.match(line)
351      if components:
352        # This is a new function, so record the current function and its address.
353        current_symbol_addr = int(components.group(1), 16)
354        current_symbol = components.group(2)
355
356        # Does it have an optional offset like: "foo(..)+0x2c"?
357        components = offset_regexp.match(current_symbol)
358        if components:
359          current_symbol = components.group(1)
360          offset = components.group(2)
361          if offset:
362            current_symbol_addr -= int(offset, 16)
363
364      # Is it an disassembly line like:
365      #   177b2:  b510        push  {r4, lr}
366      components = asm_regexp.match(line)
367      if components:
368        addr = components.group(1)
369        i_addr = int(addr, 16)
370        i_target = StripPC(int(target_addr, 16))
371        if i_addr == i_target:
372          result[target_addr] = (current_symbol, i_target - current_symbol_addr)
373    stream.close()
374
375  return result
376
377
378def CallCppFilt(mangled_symbol):
379  cmd = [ToolPath("c++filt")]
380  process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
381  process.stdin.write(mangled_symbol)
382  process.stdin.write("\n")
383  process.stdin.close()
384  demangled_symbol = process.stdout.readline().strip()
385  process.stdout.close()
386  return demangled_symbol
387
388def FormatSymbolWithOffset(symbol, offset):
389  if offset == 0:
390    return symbol
391  return "%s+%d" % (symbol, offset)
392