symbol.py revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1#!/usr/bin/python 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Module for looking up symbolic debugging information. 18 19The information can include symbol names, offsets, and source locations. 20""" 21 22import os 23import re 24import subprocess 25 26CHROME_SRC = os.path.join(os.path.realpath(os.path.dirname(__file__)), 27 os.pardir, os.pardir, os.pardir, os.pardir) 28ANDROID_BUILD_TOP = CHROME_SRC 29SYMBOLS_DIR = CHROME_SRC 30CHROME_SYMBOLS_DIR = CHROME_SRC 31 32ARCH = "arm" 33 34TOOLCHAIN_INFO = None 35 36def Uname(): 37 """'uname' for constructing prebuilt/<...> and out/host/<...> paths.""" 38 uname = os.uname()[0] 39 if uname == "Darwin": 40 proc = os.uname()[-1] 41 if proc == "i386" or proc == "x86_64": 42 return "darwin-x86" 43 return "darwin-ppc" 44 if uname == "Linux": 45 return "linux-x86" 46 return uname 47 48def ToolPath(tool, toolchain_info=None): 49 """Return a full qualified path to the specified tool""" 50 # ToolPath looks for the tools in the completely incorrect directory. 51 # This looks in the checked in android_tools. 52 if ARCH == "arm": 53 toolchain_source = "arm-linux-androideabi-4.6" 54 toolchain_prefix = "arm-linux-androideabi" 55 else: 56 toolchain_source = "x86-4.6" 57 toolchain_prefix = "i686-android-linux" 58 59 toolchain_subdir = ( 60 "third_party/android_tools/ndk/toolchains/%s/prebuilt/linux-x86_64/bin" % 61 toolchain_source) 62 63 return os.path.join(CHROME_SRC, 64 toolchain_subdir, 65 toolchain_prefix + "-" + tool) 66 67def FindToolchain(): 68 """Look for the latest available toolchain 69 70 Args: 71 None 72 73 Returns: 74 A pair of strings containing toolchain label and target prefix. 75 """ 76 global TOOLCHAIN_INFO 77 if TOOLCHAIN_INFO is not None: 78 return TOOLCHAIN_INFO 79 80 ## Known toolchains, newer ones in the front. 81 if ARCH == "arm": 82 known_toolchains = [ 83 ("arm-linux-androideabi-4.6", "arm", "arm-linux-androideabi"), 84 ] 85 elif ARCH =="x86": 86 known_toolchains = [ 87 ("i686-android-linux-4.4.3", "x86", "i686-android-linux") 88 ] 89 else: 90 known_toolchains = [] 91 92 # Look for addr2line to check for valid toolchain path. 93 for (label, platform, target) in known_toolchains: 94 toolchain_info = (label, platform, target); 95 if os.path.exists(ToolPath("addr2line", toolchain_info)): 96 TOOLCHAIN_INFO = toolchain_info 97 return toolchain_info 98 99 raise Exception("Could not find tool chain") 100 101def TranslateLibPath(lib): 102 # SymbolInformation(lib, addr) receives lib as the path from symbols 103 # root to the symbols file. This needs to be translated to point to the 104 # correct .so path. If the user doesn't explicitly specify which directory to 105 # use, then use the most recently updated one in one of the known directories. 106 # If the .so is not found somewhere in CHROME_SYMBOLS_DIR, leave it 107 # untranslated in case it is an Android symbol in SYMBOLS_DIR. 108 library_name = os.path.basename(lib) 109 out_dir = os.environ.get('CHROMIUM_OUT_DIR', 'out') 110 candidate_dirs = ['.', 111 os.path.join(out_dir, 'Debug', 'lib'), 112 os.path.join(out_dir, 'Debug', 'lib.target'), 113 os.path.join(out_dir, 'Release', 'lib'), 114 os.path.join(out_dir, 'Release', 'lib.target'), 115 ] 116 117 candidate_libraries = map( 118 lambda d: ('%s/%s/%s' % (CHROME_SYMBOLS_DIR, d, library_name)), 119 candidate_dirs) 120 candidate_libraries = filter(os.path.exists, candidate_libraries) 121 candidate_libraries = sorted(candidate_libraries, 122 key=os.path.getmtime, reverse=True) 123 124 if not candidate_libraries: 125 return lib 126 127 library_path = os.path.relpath(candidate_libraries[0], SYMBOLS_DIR) 128 return '/' + library_path 129 130def SymbolInformation(lib, addr, get_detailed_info): 131 """Look up symbol information about an address. 132 133 Args: 134 lib: library (or executable) pathname containing symbols 135 addr: string hexidecimal address 136 137 Returns: 138 A list of the form [(source_symbol, source_location, 139 object_symbol_with_offset)]. 140 141 If the function has been inlined then the list may contain 142 more than one element with the symbols for the most deeply 143 nested inlined location appearing first. The list is 144 always non-empty, even if no information is available. 145 146 Usually you want to display the source_location and 147 object_symbol_with_offset from the last element in the list. 148 """ 149 lib = TranslateLibPath(lib) 150 info = SymbolInformationForSet(lib, set([addr]), get_detailed_info) 151 return (info and info.get(addr)) or [(None, None, None)] 152 153 154def SymbolInformationForSet(lib, unique_addrs, get_detailed_info): 155 """Look up symbol information for a set of addresses from the given library. 156 157 Args: 158 lib: library (or executable) pathname containing symbols 159 unique_addrs: set of hexidecimal addresses 160 161 Returns: 162 A dictionary of the form {addr: [(source_symbol, source_location, 163 object_symbol_with_offset)]} where each address has a list of 164 associated symbols and locations. The list is always non-empty. 165 166 If the function has been inlined then the list may contain 167 more than one element with the symbols for the most deeply 168 nested inlined location appearing first. The list is 169 always non-empty, even if no information is available. 170 171 Usually you want to display the source_location and 172 object_symbol_with_offset from the last element in the list. 173 """ 174 if not lib: 175 return None 176 177 addr_to_line = CallAddr2LineForSet(lib, unique_addrs) 178 if not addr_to_line: 179 return None 180 181 if get_detailed_info: 182 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) 183 if not addr_to_objdump: 184 return None 185 else: 186 addr_to_objdump = dict((addr, ("", 0)) for addr in unique_addrs) 187 188 result = {} 189 for addr in unique_addrs: 190 source_info = addr_to_line.get(addr) 191 if not source_info: 192 source_info = [(None, None)] 193 if addr in addr_to_objdump: 194 (object_symbol, object_offset) = addr_to_objdump.get(addr) 195 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, 196 object_offset) 197 else: 198 object_symbol_with_offset = None 199 result[addr] = [(source_symbol, source_location, object_symbol_with_offset) 200 for (source_symbol, source_location) in source_info] 201 202 return result 203 204 205class MemoizedForSet(object): 206 def __init__(self, fn): 207 self.fn = fn 208 self.cache = {} 209 210 def __call__(self, lib, unique_addrs): 211 lib_cache = self.cache.setdefault(lib, {}) 212 213 no_cache = filter(lambda x: x not in lib_cache, unique_addrs) 214 if no_cache: 215 lib_cache.update((k, None) for k in no_cache) 216 result = self.fn(lib, no_cache) 217 if result: 218 lib_cache.update(result) 219 220 return dict((k, lib_cache[k]) for k in unique_addrs if lib_cache[k]) 221 222 223@MemoizedForSet 224def CallAddr2LineForSet(lib, unique_addrs): 225 """Look up line and symbol information for a set of addresses. 226 227 Args: 228 lib: library (or executable) pathname containing symbols 229 unique_addrs: set of string hexidecimal addresses look up. 230 231 Returns: 232 A dictionary of the form {addr: [(symbol, file:line)]} where 233 each address has a list of associated symbols and locations 234 or an empty list if no symbol information was found. 235 236 If the function has been inlined then the list may contain 237 more than one element with the symbols for the most deeply 238 nested inlined location appearing first. 239 """ 240 if not lib: 241 return None 242 243 244 symbols = SYMBOLS_DIR + lib 245 if not os.path.isfile(symbols): 246 return None 247 248 (label, platform, target) = FindToolchain() 249 cmd = [ToolPath("addr2line"), "--functions", "--inlines", 250 "--demangle", "--exe=" + symbols] 251 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 252 253 result = {} 254 addrs = sorted(unique_addrs) 255 for addr in addrs: 256 child.stdin.write("0x%s\n" % addr) 257 child.stdin.flush() 258 records = [] 259 first = True 260 while True: 261 symbol = child.stdout.readline().strip() 262 if symbol == "??": 263 symbol = None 264 location = child.stdout.readline().strip() 265 if location == "??:0": 266 location = None 267 if symbol is None and location is None: 268 break 269 records.append((symbol, location)) 270 if first: 271 # Write a blank line as a sentinel so we know when to stop 272 # reading inlines from the output. 273 # The blank line will cause addr2line to emit "??\n??:0\n". 274 child.stdin.write("\n") 275 first = False 276 result[addr] = records 277 child.stdin.close() 278 child.stdout.close() 279 return result 280 281 282def StripPC(addr): 283 """Strips the Thumb bit a program counter address when appropriate. 284 285 Args: 286 addr: the program counter address 287 288 Returns: 289 The stripped program counter address. 290 """ 291 global ARCH 292 293 if ARCH == "arm": 294 return addr & ~1 295 return addr 296 297@MemoizedForSet 298def CallObjdumpForSet(lib, unique_addrs): 299 """Use objdump to find out the names of the containing functions. 300 301 Args: 302 lib: library (or executable) pathname containing symbols 303 unique_addrs: set of string hexidecimal addresses to find the functions for. 304 305 Returns: 306 A dictionary of the form {addr: (string symbol, offset)}. 307 """ 308 if not lib: 309 return None 310 311 symbols = SYMBOLS_DIR + lib 312 if not os.path.exists(symbols): 313 return None 314 315 symbols = SYMBOLS_DIR + lib 316 if not os.path.exists(symbols): 317 return None 318 319 result = {} 320 321 # Function lines look like: 322 # 000177b0 <android::IBinder::~IBinder()+0x2c>: 323 # We pull out the address and function first. Then we check for an optional 324 # offset. This is tricky due to functions that look like "operator+(..)+0x2c" 325 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") 326 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") 327 328 # A disassembly line looks like: 329 # 177b2: b510 push {r4, lr} 330 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") 331 332 for target_addr in unique_addrs: 333 start_addr_dec = str(StripPC(int(target_addr, 16))) 334 stop_addr_dec = str(StripPC(int(target_addr, 16)) + 8) 335 cmd = [ToolPath("objdump"), 336 "--section=.text", 337 "--demangle", 338 "--disassemble", 339 "--start-address=" + start_addr_dec, 340 "--stop-address=" + stop_addr_dec, 341 symbols] 342 343 current_symbol = None # The current function symbol in the disassembly. 344 current_symbol_addr = 0 # The address of the current function. 345 346 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout 347 for line in stream: 348 # Is it a function line like: 349 # 000177b0 <android::IBinder::~IBinder()>: 350 components = func_regexp.match(line) 351 if components: 352 # This is a new function, so record the current function and its address. 353 current_symbol_addr = int(components.group(1), 16) 354 current_symbol = components.group(2) 355 356 # Does it have an optional offset like: "foo(..)+0x2c"? 357 components = offset_regexp.match(current_symbol) 358 if components: 359 current_symbol = components.group(1) 360 offset = components.group(2) 361 if offset: 362 current_symbol_addr -= int(offset, 16) 363 364 # Is it an disassembly line like: 365 # 177b2: b510 push {r4, lr} 366 components = asm_regexp.match(line) 367 if components: 368 addr = components.group(1) 369 i_addr = int(addr, 16) 370 i_target = StripPC(int(target_addr, 16)) 371 if i_addr == i_target: 372 result[target_addr] = (current_symbol, i_target - current_symbol_addr) 373 stream.close() 374 375 return result 376 377 378def CallCppFilt(mangled_symbol): 379 cmd = [ToolPath("c++filt")] 380 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 381 process.stdin.write(mangled_symbol) 382 process.stdin.write("\n") 383 process.stdin.close() 384 demangled_symbol = process.stdout.readline().strip() 385 process.stdout.close() 386 return demangled_symbol 387 388def FormatSymbolWithOffset(symbol, offset): 389 if offset == 0: 390 return symbol 391 return "%s+%d" % (symbol, offset) 392