1#===- object.py - Python Object Bindings --------------------*- python -*--===#
2#
3#                     The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9
10r"""
11Object File Interface
12=====================
13
14This module provides an interface for reading information from object files
15(e.g. binary executables and libraries).
16
17Using this module, you can obtain information about an object file's sections,
18symbols, and relocations. These are represented by the classes ObjectFile,
19Section, Symbol, and Relocation, respectively.
20
21Usage
22-----
23
24The only way to use this module is to start by creating an ObjectFile. You can
25create an ObjectFile by loading a file (specified by its path) or by creating a
26llvm.core.MemoryBuffer and loading that.
27
28Once you have an object file, you can inspect its sections and symbols directly
29by calling get_sections() and get_symbols() respectively. To inspect
30relocations, call get_relocations() on a Section instance.
31
32Iterator Interface
33------------------
34
35The LLVM bindings expose iteration over sections, symbols, and relocations in a
36way that only allows one instance to be operated on at a single time. This is
37slightly annoying from a Python perspective, as it isn't very Pythonic to have
38objects that "expire" but are still active from a dynamic language.
39
40To aid working around this limitation, each Section, Symbol, and Relocation
41instance caches its properties after first access. So, if the underlying
42iterator is advanced, the properties can still be obtained provided they have
43already been retrieved.
44
45In addition, we also provide a "cache" method on each class to cache all
46available data. You can call this on each obtained instance. Or, you can pass
47cache=True to the appropriate get_XXX() method to have this done for you.
48
49Here are some examples on how to perform iteration:
50
51    obj = ObjectFile(filename='/bin/ls')
52
53    # This is OK. Each Section is only accessed inside its own iteration slot.
54    section_names = []
55    for section in obj.get_sections():
56        section_names.append(section.name)
57
58    # This is NOT OK. You perform a lookup after the object has expired.
59    symbols = list(obj.get_symbols())
60    for symbol in symbols:
61        print symbol.name # This raises because the object has expired.
62
63    # In this example, we mix a working and failing scenario.
64    symbols = []
65    for symbol in obj.get_symbols():
66        symbols.append(symbol)
67        print symbol.name
68
69    for symbol in symbols:
70        print symbol.name # OK
71        print symbol.address # NOT OK. We didn't look up this property before.
72
73    # Cache everything up front.
74    symbols = list(obj.get_symbols(cache=True))
75    for symbol in symbols:
76        print symbol.name # OK
77
78"""
79
80from ctypes import c_char_p
81from ctypes import c_char
82from ctypes import POINTER
83from ctypes import c_uint64
84from ctypes import string_at
85
86from .common import CachedProperty
87from .common import LLVMObject
88from .common import c_object_p
89from .common import get_library
90from .core import MemoryBuffer
91
92__all__ = [
93    "lib",
94    "ObjectFile",
95    "Relocation",
96    "Section",
97    "Symbol",
98]
99
100class ObjectFile(LLVMObject):
101    """Represents an object/binary file."""
102
103    def __init__(self, filename=None, contents=None):
104        """Construct an instance from a filename or binary data.
105
106        filename must be a path to a file that can be opened with open().
107        contents can be either a native Python buffer type (like str) or a
108        llvm.core.MemoryBuffer instance.
109        """
110        if contents:
111            assert isinstance(contents, MemoryBuffer)
112
113        if filename is not None:
114            contents = MemoryBuffer(filename=filename)
115
116        if contents is None:
117            raise Exception('No input found.')
118
119        ptr = lib.LLVMCreateObjectFile(contents)
120        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile)
121        self.take_ownership(contents)
122
123    def get_sections(self, cache=False):
124        """Obtain the sections in this object file.
125
126        This is a generator for llvm.object.Section instances.
127
128        Sections are exposed as limited-use objects. See the module's
129        documentation on iterators for more.
130        """
131        sections = lib.LLVMGetSections(self)
132        last = None
133        while True:
134            if lib.LLVMIsSectionIteratorAtEnd(self, sections):
135                break
136
137            last = Section(sections)
138            if cache:
139                last.cache()
140
141            yield last
142
143            lib.LLVMMoveToNextSection(sections)
144            last.expire()
145
146        if last is not None:
147            last.expire()
148
149        lib.LLVMDisposeSectionIterator(sections)
150
151    def get_symbols(self, cache=False):
152        """Obtain the symbols in this object file.
153
154        This is a generator for llvm.object.Symbol instances.
155
156        Each Symbol instance is a limited-use object. See this module's
157        documentation on iterators for more.
158        """
159        symbols = lib.LLVMGetSymbols(self)
160        last = None
161        while True:
162            if lib.LLVMIsSymbolIteratorAtEnd(self, symbols):
163                break
164
165            last = Symbol(symbols, self)
166            if cache:
167                last.cache()
168
169            yield last
170
171            lib.LLVMMoveToNextSymbol(symbols)
172            last.expire()
173
174        if last is not None:
175            last.expire()
176
177        lib.LLVMDisposeSymbolIterator(symbols)
178
179class Section(LLVMObject):
180    """Represents a section in an object file."""
181
182    def __init__(self, ptr):
183        """Construct a new section instance.
184
185        Section instances can currently only be created from an ObjectFile
186        instance. Therefore, this constructor should not be used outside of
187        this module.
188        """
189        LLVMObject.__init__(self, ptr)
190
191        self.expired = False
192
193    @CachedProperty
194    def name(self):
195        """Obtain the string name of the section.
196
197        This is typically something like '.dynsym' or '.rodata'.
198        """
199        if self.expired:
200            raise Exception('Section instance has expired.')
201
202        return lib.LLVMGetSectionName(self)
203
204    @CachedProperty
205    def size(self):
206        """The size of the section, in long bytes."""
207        if self.expired:
208            raise Exception('Section instance has expired.')
209
210        return lib.LLVMGetSectionSize(self)
211
212    @CachedProperty
213    def contents(self):
214        if self.expired:
215            raise Exception('Section instance has expired.')
216
217        siz = self.size
218
219        r = lib.LLVMGetSectionContents(self)
220        if r:
221            return string_at(r, siz)
222        return None
223
224    @CachedProperty
225    def address(self):
226        """The address of this section, in long bytes."""
227        if self.expired:
228            raise Exception('Section instance has expired.')
229
230        return lib.LLVMGetSectionAddress(self)
231
232    def has_symbol(self, symbol):
233        """Returns whether a Symbol instance is present in this Section."""
234        if self.expired:
235            raise Exception('Section instance has expired.')
236
237        assert isinstance(symbol, Symbol)
238        return lib.LLVMGetSectionContainsSymbol(self, symbol)
239
240    def get_relocations(self, cache=False):
241        """Obtain the relocations in this Section.
242
243        This is a generator for llvm.object.Relocation instances.
244
245        Each instance is a limited used object. See this module's documentation
246        on iterators for more.
247        """
248        if self.expired:
249            raise Exception('Section instance has expired.')
250
251        relocations = lib.LLVMGetRelocations(self)
252        last = None
253        while True:
254            if lib.LLVMIsRelocationIteratorAtEnd(self, relocations):
255                break
256
257            last = Relocation(relocations)
258            if cache:
259                last.cache()
260
261            yield last
262
263            lib.LLVMMoveToNextRelocation(relocations)
264            last.expire()
265
266        if last is not None:
267            last.expire()
268
269        lib.LLVMDisposeRelocationIterator(relocations)
270
271    def cache(self):
272        """Cache properties of this Section.
273
274        This can be called as a workaround to the single active Section
275        limitation. When called, the properties of the Section are fetched so
276        they are still available after the Section has been marked inactive.
277        """
278        getattr(self, 'name')
279        getattr(self, 'size')
280        getattr(self, 'contents')
281        getattr(self, 'address')
282
283    def expire(self):
284        """Expire the section.
285
286        This is called internally by the section iterator.
287        """
288        self.expired = True
289
290class Symbol(LLVMObject):
291    """Represents a symbol in an object file."""
292    def __init__(self, ptr, object_file):
293        assert isinstance(ptr, c_object_p)
294        assert isinstance(object_file, ObjectFile)
295
296        LLVMObject.__init__(self, ptr)
297
298        self.expired = False
299        self._object_file = object_file
300
301    @CachedProperty
302    def name(self):
303        """The str name of the symbol.
304
305        This is often a function or variable name. Keep in mind that name
306        mangling could be in effect.
307        """
308        if self.expired:
309            raise Exception('Symbol instance has expired.')
310
311        return lib.LLVMGetSymbolName(self)
312
313    @CachedProperty
314    def address(self):
315        """The address of this symbol, in long bytes."""
316        if self.expired:
317            raise Exception('Symbol instance has expired.')
318
319        return lib.LLVMGetSymbolAddress(self)
320
321    @CachedProperty
322    def size(self):
323        """The size of the symbol, in long bytes."""
324        if self.expired:
325            raise Exception('Symbol instance has expired.')
326
327        return lib.LLVMGetSymbolSize(self)
328
329    @CachedProperty
330    def section(self):
331        """The Section to which this Symbol belongs.
332
333        The returned Section instance does not expire, unlike Sections that are
334        commonly obtained through iteration.
335
336        Because this obtains a new section iterator each time it is accessed,
337        calling this on a number of Symbol instances could be expensive.
338        """
339        sections = lib.LLVMGetSections(self._object_file)
340        lib.LLVMMoveToContainingSection(sections, self)
341
342        return Section(sections)
343
344    def cache(self):
345        """Cache all cacheable properties."""
346        getattr(self, 'name')
347        getattr(self, 'address')
348        getattr(self, 'size')
349
350    def expire(self):
351        """Mark the object as expired to prevent future API accesses.
352
353        This is called internally by this module and it is unlikely that
354        external callers have a legitimate reason for using it.
355        """
356        self.expired = True
357
358class Relocation(LLVMObject):
359    """Represents a relocation definition."""
360    def __init__(self, ptr):
361        """Create a new relocation instance.
362
363        Relocations are created from objects derived from Section instances.
364        Therefore, this constructor should not be called outside of this
365        module. See Section.get_relocations() for the proper method to obtain
366        a Relocation instance.
367        """
368        assert isinstance(ptr, c_object_p)
369
370        LLVMObject.__init__(self, ptr)
371
372        self.expired = False
373
374    @CachedProperty
375    def address(self):
376        """The address of this relocation, in long bytes."""
377        if self.expired:
378            raise Exception('Relocation instance has expired.')
379
380        return lib.LLVMGetRelocationAddress(self)
381
382    @CachedProperty
383    def offset(self):
384        """The offset of this relocation, in long bytes."""
385        if self.expired:
386            raise Exception('Relocation instance has expired.')
387
388        return lib.LLVMGetRelocationOffset(self)
389
390    @CachedProperty
391    def symbol(self):
392        """The Symbol corresponding to this Relocation."""
393        if self.expired:
394            raise Exception('Relocation instance has expired.')
395
396        ptr = lib.LLVMGetRelocationSymbol(self)
397        return Symbol(ptr)
398
399    @CachedProperty
400    def type_number(self):
401        """The relocation type, as a long."""
402        if self.expired:
403            raise Exception('Relocation instance has expired.')
404
405        return lib.LLVMGetRelocationType(self)
406
407    @CachedProperty
408    def type_name(self):
409        """The relocation type's name, as a str."""
410        if self.expired:
411            raise Exception('Relocation instance has expired.')
412
413        return lib.LLVMGetRelocationTypeName(self)
414
415    @CachedProperty
416    def value_string(self):
417        if self.expired:
418            raise Exception('Relocation instance has expired.')
419
420        return lib.LLVMGetRelocationValueString(self)
421
422    def expire(self):
423        """Expire this instance, making future API accesses fail."""
424        self.expired = True
425
426    def cache(self):
427        """Cache all cacheable properties on this instance."""
428        getattr(self, 'address')
429        getattr(self, 'offset')
430        getattr(self, 'symbol')
431        getattr(self, 'type')
432        getattr(self, 'type_name')
433        getattr(self, 'value_string')
434
435def register_library(library):
436    """Register function prototypes with LLVM library instance."""
437
438    # Object.h functions
439    library.LLVMCreateObjectFile.argtypes = [MemoryBuffer]
440    library.LLVMCreateObjectFile.restype = c_object_p
441
442    library.LLVMDisposeObjectFile.argtypes = [ObjectFile]
443
444    library.LLVMGetSections.argtypes = [ObjectFile]
445    library.LLVMGetSections.restype = c_object_p
446
447    library.LLVMDisposeSectionIterator.argtypes = [c_object_p]
448
449    library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
450    library.LLVMIsSectionIteratorAtEnd.restype = bool
451
452    library.LLVMMoveToNextSection.argtypes = [c_object_p]
453
454    library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p]
455
456    library.LLVMGetSymbols.argtypes = [ObjectFile]
457    library.LLVMGetSymbols.restype = c_object_p
458
459    library.LLVMDisposeSymbolIterator.argtypes = [c_object_p]
460
461    library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
462    library.LLVMIsSymbolIteratorAtEnd.restype = bool
463
464    library.LLVMMoveToNextSymbol.argtypes = [c_object_p]
465
466    library.LLVMGetSectionName.argtypes = [c_object_p]
467    library.LLVMGetSectionName.restype = c_char_p
468
469    library.LLVMGetSectionSize.argtypes = [c_object_p]
470    library.LLVMGetSectionSize.restype = c_uint64
471
472    library.LLVMGetSectionContents.argtypes = [c_object_p]
473    # Can't use c_char_p here as it isn't a NUL-terminated string.
474    library.LLVMGetSectionContents.restype = POINTER(c_char)
475
476    library.LLVMGetSectionAddress.argtypes = [c_object_p]
477    library.LLVMGetSectionAddress.restype = c_uint64
478
479    library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p]
480    library.LLVMGetSectionContainsSymbol.restype = bool
481
482    library.LLVMGetRelocations.argtypes = [c_object_p]
483    library.LLVMGetRelocations.restype = c_object_p
484
485    library.LLVMDisposeRelocationIterator.argtypes = [c_object_p]
486
487    library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p]
488    library.LLVMIsRelocationIteratorAtEnd.restype = bool
489
490    library.LLVMMoveToNextRelocation.argtypes = [c_object_p]
491
492    library.LLVMGetSymbolName.argtypes = [Symbol]
493    library.LLVMGetSymbolName.restype = c_char_p
494
495    library.LLVMGetSymbolAddress.argtypes = [Symbol]
496    library.LLVMGetSymbolAddress.restype = c_uint64
497
498    library.LLVMGetSymbolSize.argtypes = [Symbol]
499    library.LLVMGetSymbolSize.restype = c_uint64
500
501    library.LLVMGetRelocationAddress.argtypes = [c_object_p]
502    library.LLVMGetRelocationAddress.restype = c_uint64
503
504    library.LLVMGetRelocationOffset.argtypes = [c_object_p]
505    library.LLVMGetRelocationOffset.restype = c_uint64
506
507    library.LLVMGetRelocationSymbol.argtypes = [c_object_p]
508    library.LLVMGetRelocationSymbol.restype = c_object_p
509
510    library.LLVMGetRelocationType.argtypes = [c_object_p]
511    library.LLVMGetRelocationType.restype = c_uint64
512
513    library.LLVMGetRelocationTypeName.argtypes = [c_object_p]
514    library.LLVMGetRelocationTypeName.restype = c_char_p
515
516    library.LLVMGetRelocationValueString.argtypes = [c_object_p]
517    library.LLVMGetRelocationValueString.restype = c_char_p
518
519lib = get_library()
520register_library(lib)
521