Package lldb :: Package utils :: Module symbolication
[hide private]
[frames] | no frames]

Source Code for Module lldb.utils.symbolication

  1  #!/usr/bin/python 
  2   
  3  #---------------------------------------------------------------------- 
  4  # Be sure to add the python path that points to the LLDB shared library. 
  5  # 
  6  # To use this in the embedded python interpreter using "lldb": 
  7  # 
  8  #   cd /path/containing/crashlog.py 
  9  #   lldb 
 10  #   (lldb) script import crashlog 
 11  #   "crashlog" command installed, type "crashlog --help" for detailed help 
 12  #   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash 
 13  # 
 14  # The benefit of running the crashlog command inside lldb in the  
 15  # embedded python interpreter is when the command completes, there  
 16  # will be a target with all of the files loaded at the locations 
 17  # described in the crash log. Only the files that have stack frames 
 18  # in the backtrace will be loaded unless the "--load-all" option 
 19  # has been specified. This allows users to explore the program in the 
 20  # state it was in right at crash time.  
 21  # 
 22  # On MacOSX csh, tcsh: 
 23  #   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash ) 
 24  # 
 25  # On MacOSX sh, bash: 
 26  #   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash 
 27  #---------------------------------------------------------------------- 
 28   
 29  import lldb 
 30  import commands 
 31  import optparse 
 32  import os 
 33  import plistlib 
 34  import re 
 35  import shlex 
 36  import sys 
 37  import time 
 38  import uuid 
 39   
40 -class Address:
41 """Class that represents an address that will be symbolicated"""
42 - def __init__(self, target, load_addr):
43 self.target = target 44 self.load_addr = load_addr # The load address that this object represents 45 self.so_addr = None # the resolved lldb.SBAddress (if any), named so_addr for section/offset address 46 self.sym_ctx = None # The cached symbol context for this address 47 self.description = None # Any original textual description of this address to be used as a backup in case symbolication fails 48 self.symbolication = None # The cached symbolicated string that describes this address 49 self.inlined = False
50 - def __str__(self):
51 s = "%#16.16x" % (self.load_addr) 52 if self.symbolication: 53 s += " %s" % (self.symbolication) 54 elif self.description: 55 s += " %s" % (self.description) 56 elif self.so_addr: 57 s += " %s" % (self.so_addr) 58 return s
59
60 - def resolve_addr(self):
61 if self.so_addr == None: 62 self.so_addr = self.target.ResolveLoadAddress (self.load_addr) 63 return self.so_addr
64
65 - def is_inlined(self):
66 return self.inlined
67
68 - def get_symbol_context(self):
69 if self.sym_ctx == None: 70 sb_addr = self.resolve_addr() 71 if sb_addr: 72 self.sym_ctx = self.target.ResolveSymbolContextForAddress (sb_addr, lldb.eSymbolContextEverything) 73 else: 74 self.sym_ctx = lldb.SBSymbolContext() 75 return self.sym_ctx
76
77 - def get_instructions(self):
78 sym_ctx = self.get_symbol_context() 79 if sym_ctx: 80 function = sym_ctx.GetFunction() 81 if function: 82 return function.GetInstructions(self.target) 83 return sym_ctx.GetSymbol().GetInstructions(self.target) 84 return None
85
86 - def symbolicate(self, verbose = False):
87 if self.symbolication == None: 88 self.symbolication = '' 89 self.inlined = False 90 sym_ctx = self.get_symbol_context() 91 if sym_ctx: 92 module = sym_ctx.GetModule() 93 if module: 94 # Print full source file path in verbose mode 95 if verbose: 96 self.symbolication += str(module.GetFileSpec()) + '`' 97 else: 98 self.symbolication += module.GetFileSpec().GetFilename() + '`' 99 function_start_load_addr = -1 100 function = sym_ctx.GetFunction() 101 block = sym_ctx.GetBlock() 102 line_entry = sym_ctx.GetLineEntry() 103 symbol = sym_ctx.GetSymbol() 104 inlined_block = block.GetContainingInlinedBlock(); 105 if function: 106 self.symbolication += function.GetName() 107 108 if inlined_block: 109 self.inlined = True 110 self.symbolication += ' [inlined] ' + inlined_block.GetInlinedName(); 111 block_range_idx = inlined_block.GetRangeIndexForBlockAddress (self.so_addr) 112 if block_range_idx < lldb.UINT32_MAX: 113 block_range_start_addr = inlined_block.GetRangeStartAddress (block_range_idx) 114 function_start_load_addr = block_range_start_addr.GetLoadAddress (self.target) 115 if function_start_load_addr == -1: 116 function_start_load_addr = function.GetStartAddress().GetLoadAddress (self.target) 117 elif symbol: 118 self.symbolication += symbol.GetName() 119 function_start_load_addr = symbol.GetStartAddress().GetLoadAddress (self.target) 120 else: 121 self.symbolication = '' 122 return False 123 124 # Dump the offset from the current function or symbol if it is non zero 125 function_offset = self.load_addr - function_start_load_addr 126 if function_offset > 0: 127 self.symbolication += " + %u" % (function_offset) 128 elif function_offset < 0: 129 self.symbolication += " %i (invalid negative offset, file a bug) " % function_offset 130 131 # Print out any line information if any is available 132 if line_entry.GetFileSpec(): 133 # Print full source file path in verbose mode 134 if verbose: 135 self.symbolication += ' at %s' % line_entry.GetFileSpec() 136 else: 137 self.symbolication += ' at %s' % line_entry.GetFileSpec().GetFilename() 138 self.symbolication += ':%u' % line_entry.GetLine () 139 column = line_entry.GetColumn() 140 if column > 0: 141 self.symbolication += ':%u' % column 142 return True 143 return False
144
145 -class Section:
146 """Class that represents an load address range""" 147 sect_info_regex = re.compile('(?P<name>[^=]+)=(?P<range>.*)') 148 addr_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$') 149 range_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$') 150
151 - def __init__(self, start_addr = None, end_addr = None, name = None):
152 self.start_addr = start_addr 153 self.end_addr = end_addr 154 self.name = name
155
156 - def contains(self, addr):
157 return self.start_addr <= addr and addr < self.end_addr;
158
159 - def set_from_string(self, s):
160 match = self.sect_info_regex.match (s) 161 if match: 162 self.name = match.group('name') 163 range_str = match.group('range') 164 addr_match = self.addr_regex.match(range_str) 165 if addr_match: 166 self.start_addr = int(addr_match.group('start'), 16) 167 self.end_addr = None 168 return True 169 170 range_match = self.range_regex.match(range_str) 171 if range_match: 172 self.start_addr = int(range_match.group('start'), 16) 173 self.end_addr = int(range_match.group('end'), 16) 174 op = range_match.group('op') 175 if op == '+': 176 self.end_addr += self.start_addr 177 return True 178 print 'error: invalid section info string "%s"' % s 179 print 'Valid section info formats are:' 180 print 'Format Example Description' 181 print '--------------------- -----------------------------------------------' 182 print '<name>=<base> __TEXT=0x123000 Section from base address only' 183 print '<name>=<base>-<end> __TEXT=0x123000-0x124000 Section from base address and end address' 184 print '<name>=<base>+<size> __TEXT=0x123000+0x1000 Section from base address and size' 185 return False
186
187 - def __str__(self):
188 if self.name: 189 if self.end_addr != None: 190 if self.start_addr != None: 191 return "%s=[0x%16.16x - 0x%16.16x)" % (self.name, self.start_addr, self.end_addr) 192 else: 193 if self.start_addr != None: 194 return "%s=0x%16.16x" % (self.name, self.start_addr) 195 return self.name 196 return "<invalid>"
197
198 -class Image:
199 """A class that represents an executable image and any associated data""" 200
201 - def __init__(self, path, uuid = None):
202 self.path = path 203 self.resolved_path = None 204 self.resolved = False 205 self.unavailable = False 206 self.uuid = uuid 207 self.section_infos = list() 208 self.identifier = None 209 self.version = None 210 self.arch = None 211 self.module = None 212 self.symfile = None 213 self.slide = None
214 215
216 - def dump(self, prefix):
217 print "%s%s" % (prefix, self)
218
219 - def debug_dump(self):
220 print 'path = "%s"' % (self.path) 221 print 'resolved_path = "%s"' % (self.resolved_path) 222 print 'resolved = %i' % (self.resolved) 223 print 'unavailable = %i' % (self.unavailable) 224 print 'uuid = %s' % (self.uuid) 225 print 'section_infos = %s' % (self.section_infos) 226 print 'identifier = "%s"' % (self.identifier) 227 print 'version = %s' % (self.version) 228 print 'arch = %s' % (self.arch) 229 print 'module = %s' % (self.module) 230 print 'symfile = "%s"' % (self.symfile) 231 print 'slide = %i (0x%x)' % (self.slide, self.slide)
232
233 - def __str__(self):
234 s = "%s %s %s" % (self.get_uuid(), self.version, self.get_resolved_path()) 235 for section_info in self.section_infos: 236 s += ", %s" % (section_info) 237 if self.slide != None: 238 s += ', slide = 0x%16.16x' % self.slide 239 return s
240
241 - def add_section(self, section):
242 #print "added '%s' to '%s'" % (section, self.path) 243 self.section_infos.append (section)
244
245 - def get_section_containing_load_addr (self, load_addr):
246 for section_info in self.section_infos: 247 if section_info.contains(load_addr): 248 return section_info 249 return None
250
251 - def get_resolved_path(self):
252 if self.resolved_path: 253 return self.resolved_path 254 elif self.path: 255 return self.path 256 return None
257
259 path = self.get_resolved_path() 260 if path: 261 return os.path.basename(path) 262 return None
263
264 - def symfile_basename(self):
265 if self.symfile: 266 return os.path.basename(self.symfile) 267 return None
268
269 - def has_section_load_info(self):
270 return self.section_infos or self.slide != None
271
272 - def load_module(self, target):
273 if self.unavailable: 274 return None # We already warned that we couldn't find this module, so don't return an error string 275 # Load this module into "target" using the section infos to 276 # set the section load addresses 277 if self.has_section_load_info(): 278 if target: 279 if self.module: 280 if self.section_infos: 281 num_sections_loaded = 0 282 for section_info in self.section_infos: 283 if section_info.name: 284 section = self.module.FindSection (section_info.name) 285 if section: 286 error = target.SetSectionLoadAddress (section, section_info.start_addr) 287 if error.Success(): 288 num_sections_loaded += 1 289 else: 290 return 'error: %s' % error.GetCString() 291 else: 292 return 'error: unable to find the section named "%s"' % section_info.name 293 else: 294 return 'error: unable to find "%s" section in "%s"' % (range.name, self.get_resolved_path()) 295 if num_sections_loaded == 0: 296 return 'error: no sections were successfully loaded' 297 else: 298 err = target.SetModuleLoadAddress(self.module, self.slide) 299 if err.Fail(): 300 return err.GetCString() 301 return None 302 else: 303 return 'error: invalid module' 304 else: 305 return 'error: invalid target' 306 else: 307 return 'error: no section infos'
308
309 - def add_module(self, target):
310 '''Add the Image described in this object to "target" and load the sections if "load" is True.''' 311 if target: 312 # Try and find using UUID only first so that paths need not match up 313 uuid_str = self.get_normalized_uuid_string() 314 if uuid_str: 315 self.module = target.AddModule (None, None, uuid_str) 316 if not self.module: 317 self.locate_module_and_debug_symbols () 318 if self.unavailable: 319 return None 320 resolved_path = self.get_resolved_path() 321 self.module = target.AddModule (resolved_path, self.arch, uuid_str, self.symfile) 322 if not self.module: 323 return 'error: unable to get module for (%s) "%s"' % (self.arch, self.get_resolved_path()) 324 if self.has_section_load_info(): 325 return self.load_module(target) 326 else: 327 return None # No sections, the module was added to the target, so success 328 else: 329 return 'error: invalid target'
330
332 # By default, just use the paths that were supplied in: 333 # self.path 334 # self.resolved_path 335 # self.module 336 # self.symfile 337 # Subclasses can inherit from this class and override this function 338 self.resolved = True 339 return True
340
341 - def get_uuid(self):
342 if not self.uuid and self.module: 343 self.uuid = uuid.UUID(self.module.GetUUIDString()) 344 return self.uuid
345
347 if self.uuid: 348 return str(self.uuid).upper() 349 return None
350
351 - def create_target(self):
352 '''Create a target using the information in this Image object.''' 353 if self.unavailable: 354 return None 355 356 if self.locate_module_and_debug_symbols (): 357 resolved_path = self.get_resolved_path(); 358 path_spec = lldb.SBFileSpec (resolved_path) 359 #result.PutCString ('plist[%s] = %s' % (uuid, self.plist)) 360 error = lldb.SBError() 361 target = lldb.debugger.CreateTarget (resolved_path, self.arch, None, False, error); 362 if target: 363 self.module = target.FindModule(path_spec) 364 if self.has_section_load_info(): 365 err = self.load_module(target) 366 if err: 367 print 'ERROR: ', err 368 return target 369 else: 370 print 'error: unable to create a valid target for (%s) "%s"' % (self.arch, self.path) 371 else: 372 print 'error: unable to locate main executable (%s) "%s"' % (self.arch, self.path) 373 return None
374
375 -class Symbolicator:
376
377 - def __init__(self):
378 """A class the represents the information needed to symbolicate addresses in a program""" 379 self.target = None 380 self.images = list() # a list of images to be used when symbolicating 381 self.addr_mask = 0xffffffffffffffff
382
383 - def __str__(self):
384 s = "Symbolicator:\n" 385 if self.target: 386 s += "Target = '%s'\n" % (self.target) 387 s += "Target modules:'\n" 388 for m in self.target.modules: 389 print m 390 s += "Images:\n" 391 for image in self.images: 392 s += ' %s\n' % (image) 393 return s
394
395 - def find_images_with_identifier(self, identifier):
396 images = list() 397 for image in self.images: 398 if image.identifier == identifier: 399 images.append(image) 400 return images
401
402 - def find_image_containing_load_addr(self, load_addr):
403 for image in self.images: 404 if image.get_section_containing_load_addr (load_addr): 405 return image 406 return None
407
408 - def create_target(self):
409 if self.target: 410 return self.target 411 412 if self.images: 413 for image in self.images: 414 self.target = image.create_target () 415 if self.target: 416 if self.target.GetAddressByteSize() == 4: 417 triple = self.target.triple 418 if triple: 419 arch = triple.split('-')[0] 420 if "arm" in arch: 421 self.addr_mask = 0xfffffffffffffffe 422 return self.target 423 return None
424
425 - def symbolicate(self, load_addr, verbose = False):
426 if not self.target: 427 self.create_target() 428 if self.target: 429 live_process = False 430 process = self.target.process 431 if process: 432 state = process.state 433 if state > lldb.eStateUnloaded and state < lldb.eStateDetached: 434 live_process = True 435 # If we don't have a live process, we can attempt to find the image 436 # that a load address belongs to and lazily load its module in the 437 # target, but we shouldn't do any of this if we have a live process 438 if not live_process: 439 image = self.find_image_containing_load_addr (load_addr) 440 if image: 441 image.add_module (self.target) 442 symbolicated_address = Address(self.target, load_addr) 443 if symbolicated_address.symbolicate (verbose): 444 if symbolicated_address.so_addr: 445 symbolicated_addresses = list() 446 symbolicated_addresses.append(symbolicated_address) 447 # See if we were able to reconstruct anything? 448 while 1: 449 inlined_parent_so_addr = lldb.SBAddress() 450 inlined_parent_sym_ctx = symbolicated_address.sym_ctx.GetParentOfInlinedScope (symbolicated_address.so_addr, inlined_parent_so_addr) 451 if not inlined_parent_sym_ctx: 452 break 453 if not inlined_parent_so_addr: 454 break 455 456 symbolicated_address = Address(self.target, inlined_parent_so_addr.GetLoadAddress(self.target)) 457 symbolicated_address.sym_ctx = inlined_parent_sym_ctx 458 symbolicated_address.so_addr = inlined_parent_so_addr 459 symbolicated_address.symbolicate (verbose) 460 461 # push the new frame onto the new frame stack 462 symbolicated_addresses.append (symbolicated_address) 463 464 if symbolicated_addresses: 465 return symbolicated_addresses 466 else: 467 print 'error: no target in Symbolicator' 468 return None
469 470
471 -def disassemble_instructions (target, instructions, pc, insts_before_pc, insts_after_pc, non_zeroeth_frame):
472 lines = list() 473 pc_index = -1 474 comment_column = 50 475 for inst_idx, inst in enumerate(instructions): 476 inst_pc = inst.GetAddress().GetLoadAddress(target); 477 if pc == inst_pc: 478 pc_index = inst_idx 479 mnemonic = inst.GetMnemonic (target) 480 operands = inst.GetOperands (target) 481 comment = inst.GetComment (target) 482 #data = inst.GetData (target) 483 lines.append ("%#16.16x: %8s %s" % (inst_pc, mnemonic, operands)) 484 if comment: 485 line_len = len(lines[-1]) 486 if line_len < comment_column: 487 lines[-1] += ' ' * (comment_column - line_len) 488 lines[-1] += "; %s" % comment 489 490 if pc_index >= 0: 491 # If we are disassembling the non-zeroeth frame, we need to backup the PC by 1 492 if non_zeroeth_frame and pc_index > 0: 493 pc_index = pc_index - 1 494 if insts_before_pc == -1: 495 start_idx = 0 496 else: 497 start_idx = pc_index - insts_before_pc 498 if start_idx < 0: 499 start_idx = 0 500 if insts_before_pc == -1: 501 end_idx = inst_idx 502 else: 503 end_idx = pc_index + insts_after_pc 504 if end_idx > inst_idx: 505 end_idx = inst_idx 506 for i in range(start_idx, end_idx+1): 507 if i == pc_index: 508 print ' -> ', lines[i] 509 else: 510 print ' ', lines[i]
511 519 526 530 534
535 -def Symbolicate(command_args):
536 537 usage = "usage: %prog [options] <addr1> [addr2 ...]" 538 description='''Symbolicate one or more addresses using LLDB's python scripting API..''' 539 parser = optparse.OptionParser(description=description, prog='crashlog.py',usage=usage) 540 parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False) 541 parser.add_option('-p', '--platform', type='string', metavar='platform', dest='platform', help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".') 542 parser.add_option('-f', '--file', type='string', metavar='file', dest='file', help='Specify a file to use when symbolicating') 543 parser.add_option('-a', '--arch', type='string', metavar='arch', dest='arch', help='Specify a architecture to use when symbolicating') 544 parser.add_option('-s', '--slide', type='int', metavar='slide', dest='slide', help='Specify the slide to use on the file specified with the --file option', default=None) 545 parser.add_option('--section', type='string', action='append', dest='section_strings', help='specify <sect-name>=<start-addr> or <sect-name>=<start-addr>-<end-addr>') 546 try: 547 (options, args) = parser.parse_args(command_args) 548 except: 549 return 550 symbolicator = Symbolicator() 551 images = list(); 552 if options.file: 553 image = Image(options.file); 554 image.arch = options.arch 555 # Add any sections that were specified with one or more --section options 556 if options.section_strings: 557 for section_str in options.section_strings: 558 section = Section() 559 if section.set_from_string (section_str): 560 image.add_section (section) 561 else: 562 sys.exit(1) 563 if options.slide != None: 564 image.slide = options.slide 565 symbolicator.images.append(image) 566 567 target = symbolicator.create_target() 568 if options.verbose: 569 print symbolicator 570 if target: 571 for addr_str in args: 572 addr = int(addr_str, 0) 573 symbolicated_addrs = symbolicator.symbolicate(addr, options.verbose) 574 for symbolicated_addr in symbolicated_addrs: 575 print symbolicated_addr 576 print 577 else: 578 print 'error: no target for %s' % (symbolicator)
579 580 if __name__ == '__main__': 581 # Create a new debugger instance 582 lldb.debugger = lldb.SBDebugger.Create() 583 Symbolicate (sys.argv[1:]) 584