1#===- object.py - Python Object Bindings --------------------*- python -*--===# 2# 3# The LLVM Compiler Infrastructure 4# 5# This file is distributed under the University of Illinois Open Source 6# License. See LICENSE.TXT for details. 7# 8#===------------------------------------------------------------------------===# 9 10r""" 11Object File Interface 12===================== 13 14This module provides an interface for reading information from object files 15(e.g. binary executables and libraries). 16 17Using this module, you can obtain information about an object file's sections, 18symbols, and relocations. These are represented by the classes ObjectFile, 19Section, Symbol, and Relocation, respectively. 20 21Usage 22----- 23 24The only way to use this module is to start by creating an ObjectFile. You can 25create an ObjectFile by loading a file (specified by its path) or by creating a 26llvm.core.MemoryBuffer and loading that. 27 28Once you have an object file, you can inspect its sections and symbols directly 29by calling get_sections() and get_symbols() respectively. To inspect 30relocations, call get_relocations() on a Section instance. 31 32Iterator Interface 33------------------ 34 35The LLVM bindings expose iteration over sections, symbols, and relocations in a 36way that only allows one instance to be operated on at a single time. This is 37slightly annoying from a Python perspective, as it isn't very Pythonic to have 38objects that "expire" but are still active from a dynamic language. 39 40To aid working around this limitation, each Section, Symbol, and Relocation 41instance caches its properties after first access. So, if the underlying 42iterator is advanced, the properties can still be obtained provided they have 43already been retrieved. 44 45In addition, we also provide a "cache" method on each class to cache all 46available data. You can call this on each obtained instance. Or, you can pass 47cache=True to the appropriate get_XXX() method to have this done for you. 48 49Here are some examples on how to perform iteration: 50 51 obj = ObjectFile(filename='/bin/ls') 52 53 # This is OK. Each Section is only accessed inside its own iteration slot. 54 section_names = [] 55 for section in obj.get_sections(): 56 section_names.append(section.name) 57 58 # This is NOT OK. You perform a lookup after the object has expired. 59 symbols = list(obj.get_symbols()) 60 for symbol in symbols: 61 print symbol.name # This raises because the object has expired. 62 63 # In this example, we mix a working and failing scenario. 64 symbols = [] 65 for symbol in obj.get_symbols(): 66 symbols.append(symbol) 67 print symbol.name 68 69 for symbol in symbols: 70 print symbol.name # OK 71 print symbol.address # NOT OK. We didn't look up this property before. 72 73 # Cache everything up front. 74 symbols = list(obj.get_symbols(cache=True)) 75 for symbol in symbols: 76 print symbol.name # OK 77 78""" 79 80from ctypes import c_char_p 81from ctypes import c_uint64 82 83from .common import CachedProperty 84from .common import LLVMObject 85from .common import c_object_p 86from .common import get_library 87from .core import MemoryBuffer 88 89__all__ = [ 90 "lib", 91 "ObjectFile", 92 "Relocation", 93 "Section", 94 "Symbol", 95] 96 97class ObjectFile(LLVMObject): 98 """Represents an object/binary file.""" 99 100 def __init__(self, filename=None, contents=None): 101 """Construct an instance from a filename or binary data. 102 103 filename must be a path to a file that can be opened with open(). 104 contents can be either a native Python buffer type (like str) or a 105 llvm.core.MemoryBuffer instance. 106 """ 107 if contents: 108 assert isinstance(contents, MemoryBuffer) 109 110 if filename is not None: 111 contents = MemoryBuffer(filename=filename) 112 113 if contents is None: 114 raise Exception('No input found.') 115 116 ptr = lib.LLVMCreateObjectFile(contents) 117 LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile) 118 self.take_ownership(contents) 119 120 def get_sections(self, cache=False): 121 """Obtain the sections in this object file. 122 123 This is a generator for llvm.object.Section instances. 124 125 Sections are exposed as limited-use objects. See the module's 126 documentation on iterators for more. 127 """ 128 sections = lib.LLVMGetSections(self) 129 last = None 130 while True: 131 if lib.LLVMIsSectionIteratorAtEnd(self, sections): 132 break 133 134 last = Section(sections) 135 if cache: 136 last.cache() 137 138 yield last 139 140 lib.LLVMMoveToNextSection(sections) 141 last.expire() 142 143 if last is not None: 144 last.expire() 145 146 lib.LLVMDisposeSectionIterator(sections) 147 148 def get_symbols(self, cache=False): 149 """Obtain the symbols in this object file. 150 151 This is a generator for llvm.object.Symbol instances. 152 153 Each Symbol instance is a limited-use object. See this module's 154 documentation on iterators for more. 155 """ 156 symbols = lib.LLVMGetSymbols(self) 157 last = None 158 while True: 159 if lib.LLVMIsSymbolIteratorAtEnd(self, symbols): 160 break 161 162 last = Symbol(symbols, self) 163 if cache: 164 last.cache() 165 166 yield last 167 168 lib.LLVMMoveToNextSymbol(symbols) 169 last.expire() 170 171 if last is not None: 172 last.expire() 173 174 lib.LLVMDisposeSymbolIterator(symbols) 175 176class Section(LLVMObject): 177 """Represents a section in an object file.""" 178 179 def __init__(self, ptr): 180 """Construct a new section instance. 181 182 Section instances can currently only be created from an ObjectFile 183 instance. Therefore, this constructor should not be used outside of 184 this module. 185 """ 186 LLVMObject.__init__(self, ptr) 187 188 self.expired = False 189 190 @CachedProperty 191 def name(self): 192 """Obtain the string name of the section. 193 194 This is typically something like '.dynsym' or '.rodata'. 195 """ 196 if self.expired: 197 raise Exception('Section instance has expired.') 198 199 return lib.LLVMGetSectionName(self) 200 201 @CachedProperty 202 def size(self): 203 """The size of the section, in long bytes.""" 204 if self.expired: 205 raise Exception('Section instance has expired.') 206 207 return lib.LLVMGetSectionSize(self) 208 209 @CachedProperty 210 def contents(self): 211 if self.expired: 212 raise Exception('Section instance has expired.') 213 214 return lib.LLVMGetSectionContents(self) 215 216 @CachedProperty 217 def address(self): 218 """The address of this section, in long bytes.""" 219 if self.expired: 220 raise Exception('Section instance has expired.') 221 222 return lib.LLVMGetSectionAddress(self) 223 224 def has_symbol(self, symbol): 225 """Returns whether a Symbol instance is present in this Section.""" 226 if self.expired: 227 raise Exception('Section instance has expired.') 228 229 assert isinstance(symbol, Symbol) 230 return lib.LLVMGetSectionContainsSymbol(self, symbol) 231 232 def get_relocations(self, cache=False): 233 """Obtain the relocations in this Section. 234 235 This is a generator for llvm.object.Relocation instances. 236 237 Each instance is a limited used object. See this module's documentation 238 on iterators for more. 239 """ 240 if self.expired: 241 raise Exception('Section instance has expired.') 242 243 relocations = lib.LLVMGetRelocations(self) 244 last = None 245 while True: 246 if lib.LLVMIsRelocationIteratorAtEnd(self, relocations): 247 break 248 249 last = Relocation(relocations) 250 if cache: 251 last.cache() 252 253 yield last 254 255 lib.LLVMMoveToNextRelocation(relocations) 256 last.expire() 257 258 if last is not None: 259 last.expire() 260 261 lib.LLVMDisposeRelocationIterator(relocations) 262 263 def cache(self): 264 """Cache properties of this Section. 265 266 This can be called as a workaround to the single active Section 267 limitation. When called, the properties of the Section are fetched so 268 they are still available after the Section has been marked inactive. 269 """ 270 getattr(self, 'name') 271 getattr(self, 'size') 272 getattr(self, 'contents') 273 getattr(self, 'address') 274 275 def expire(self): 276 """Expire the section. 277 278 This is called internally by the section iterator. 279 """ 280 self.expired = True 281 282class Symbol(LLVMObject): 283 """Represents a symbol in an object file.""" 284 def __init__(self, ptr, object_file): 285 assert isinstance(ptr, c_object_p) 286 assert isinstance(object_file, ObjectFile) 287 288 LLVMObject.__init__(self, ptr) 289 290 self.expired = False 291 self._object_file = object_file 292 293 @CachedProperty 294 def name(self): 295 """The str name of the symbol. 296 297 This is often a function or variable name. Keep in mind that name 298 mangling could be in effect. 299 """ 300 if self.expired: 301 raise Exception('Symbol instance has expired.') 302 303 return lib.LLVMGetSymbolName(self) 304 305 @CachedProperty 306 def address(self): 307 """The address of this symbol, in long bytes.""" 308 if self.expired: 309 raise Exception('Symbol instance has expired.') 310 311 return lib.LLVMGetSymbolAddress(self) 312 313 @CachedProperty 314 def file_offset(self): 315 """The offset of this symbol in the file, in long bytes.""" 316 if self.expired: 317 raise Exception('Symbol instance has expired.') 318 319 return lib.LLVMGetSymbolFileOffset(self) 320 321 @CachedProperty 322 def size(self): 323 """The size of the symbol, in long bytes.""" 324 if self.expired: 325 raise Exception('Symbol instance has expired.') 326 327 return lib.LLVMGetSymbolSize(self) 328 329 @CachedProperty 330 def section(self): 331 """The Section to which this Symbol belongs. 332 333 The returned Section instance does not expire, unlike Sections that are 334 commonly obtained through iteration. 335 336 Because this obtains a new section iterator each time it is accessed, 337 calling this on a number of Symbol instances could be expensive. 338 """ 339 sections = lib.LLVMGetSections(self._object_file) 340 lib.LLVMMoveToContainingSection(sections, self) 341 342 return Section(sections) 343 344 def cache(self): 345 """Cache all cacheable properties.""" 346 getattr(self, 'name') 347 getattr(self, 'address') 348 getattr(self, 'file_offset') 349 getattr(self, 'size') 350 351 def expire(self): 352 """Mark the object as expired to prevent future API accesses. 353 354 This is called internally by this module and it is unlikely that 355 external callers have a legitimate reason for using it. 356 """ 357 self.expired = True 358 359class Relocation(LLVMObject): 360 """Represents a relocation definition.""" 361 def __init__(self, ptr): 362 """Create a new relocation instance. 363 364 Relocations are created from objects derived from Section instances. 365 Therefore, this constructor should not be called outside of this 366 module. See Section.get_relocations() for the proper method to obtain 367 a Relocation instance. 368 """ 369 assert isinstance(ptr, c_object_p) 370 371 LLVMObject.__init__(self, ptr) 372 373 self.expired = False 374 375 @CachedProperty 376 def address(self): 377 """The address of this relocation, in long bytes.""" 378 if self.expired: 379 raise Exception('Relocation instance has expired.') 380 381 return lib.LLVMGetRelocationAddress(self) 382 383 @CachedProperty 384 def offset(self): 385 """The offset of this relocation, in long bytes.""" 386 if self.expired: 387 raise Exception('Relocation instance has expired.') 388 389 return lib.LLVMGetRelocationOffset(self) 390 391 @CachedProperty 392 def symbol(self): 393 """The Symbol corresponding to this Relocation.""" 394 if self.expired: 395 raise Exception('Relocation instance has expired.') 396 397 ptr = lib.LLVMGetRelocationSymbol(self) 398 return Symbol(ptr) 399 400 @CachedProperty 401 def type_number(self): 402 """The relocation type, as a long.""" 403 if self.expired: 404 raise Exception('Relocation instance has expired.') 405 406 return lib.LLVMGetRelocationType(self) 407 408 @CachedProperty 409 def type_name(self): 410 """The relocation type's name, as a str.""" 411 if self.expired: 412 raise Exception('Relocation instance has expired.') 413 414 return lib.LLVMGetRelocationTypeName(self) 415 416 @CachedProperty 417 def value_string(self): 418 if self.expired: 419 raise Exception('Relocation instance has expired.') 420 421 return lib.LLVMGetRelocationValueString(self) 422 423 def expire(self): 424 """Expire this instance, making future API accesses fail.""" 425 self.expired = True 426 427 def cache(self): 428 """Cache all cacheable properties on this instance.""" 429 getattr(self, 'address') 430 getattr(self, 'offset') 431 getattr(self, 'symbol') 432 getattr(self, 'type') 433 getattr(self, 'type_name') 434 getattr(self, 'value_string') 435 436def register_library(library): 437 """Register function prototypes with LLVM library instance.""" 438 439 # Object.h functions 440 library.LLVMCreateObjectFile.argtypes = [MemoryBuffer] 441 library.LLVMCreateObjectFile.restype = c_object_p 442 443 library.LLVMDisposeObjectFile.argtypes = [ObjectFile] 444 445 library.LLVMGetSections.argtypes = [ObjectFile] 446 library.LLVMGetSections.restype = c_object_p 447 448 library.LLVMDisposeSectionIterator.argtypes = [c_object_p] 449 450 library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p] 451 library.LLVMIsSectionIteratorAtEnd.restype = bool 452 453 library.LLVMMoveToNextSection.argtypes = [c_object_p] 454 455 library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p] 456 457 library.LLVMGetSymbols.argtypes = [ObjectFile] 458 library.LLVMGetSymbols.restype = c_object_p 459 460 library.LLVMDisposeSymbolIterator.argtypes = [c_object_p] 461 462 library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p] 463 library.LLVMIsSymbolIteratorAtEnd.restype = bool 464 465 library.LLVMMoveToNextSymbol.argtypes = [c_object_p] 466 467 library.LLVMGetSectionName.argtypes = [c_object_p] 468 library.LLVMGetSectionName.restype = c_char_p 469 470 library.LLVMGetSectionSize.argtypes = [c_object_p] 471 library.LLVMGetSectionSize.restype = c_uint64 472 473 library.LLVMGetSectionContents.argtypes = [c_object_p] 474 library.LLVMGetSectionContents.restype = c_char_p 475 476 library.LLVMGetSectionAddress.argtypes = [c_object_p] 477 library.LLVMGetSectionAddress.restype = c_uint64 478 479 library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p] 480 library.LLVMGetSectionContainsSymbol.restype = bool 481 482 library.LLVMGetRelocations.argtypes = [c_object_p] 483 library.LLVMGetRelocations.restype = c_object_p 484 485 library.LLVMDisposeRelocationIterator.argtypes = [c_object_p] 486 487 library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p] 488 library.LLVMIsRelocationIteratorAtEnd.restype = bool 489 490 library.LLVMMoveToNextRelocation.argtypes = [c_object_p] 491 492 library.LLVMGetSymbolName.argtypes = [Symbol] 493 library.LLVMGetSymbolName.restype = c_char_p 494 495 library.LLVMGetSymbolAddress.argtypes = [Symbol] 496 library.LLVMGetSymbolAddress.restype = c_uint64 497 498 library.LLVMGetSymbolFileOffset.argtypes = [Symbol] 499 library.LLVMGetSymbolFileOffset.restype = c_uint64 500 501 library.LLVMGetSymbolSize.argtypes = [Symbol] 502 library.LLVMGetSymbolSize.restype = c_uint64 503 504 library.LLVMGetRelocationAddress.argtypes = [c_object_p] 505 library.LLVMGetRelocationAddress.restype = c_uint64 506 507 library.LLVMGetRelocationOffset.argtypes = [c_object_p] 508 library.LLVMGetRelocationOffset.restype = c_uint64 509 510 library.LLVMGetRelocationSymbol.argtypes = [c_object_p] 511 library.LLVMGetRelocationSymbol.restype = c_object_p 512 513 library.LLVMGetRelocationType.argtypes = [c_object_p] 514 library.LLVMGetRelocationType.restype = c_uint64 515 516 library.LLVMGetRelocationTypeName.argtypes = [c_object_p] 517 library.LLVMGetRelocationTypeName.restype = c_char_p 518 519 library.LLVMGetRelocationValueString.argtypes = [c_object_p] 520 library.LLVMGetRelocationValueString.restype = c_char_p 521 522lib = get_library() 523register_library(lib) 524