disassembler.py revision fdddf771716a48857a1044abc7917886bf0bf719
1#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
2#
3#                     The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9
10from ctypes import CFUNCTYPE
11from ctypes import POINTER
12from ctypes import addressof
13from ctypes import byref
14from ctypes import c_byte
15from ctypes import c_char_p
16from ctypes import c_int
17from ctypes import c_size_t
18from ctypes import c_ubyte
19from ctypes import c_uint64
20from ctypes import c_void_p
21from ctypes import cast
22
23from .common import LLVMObject
24from .common import c_object_p
25from .common import get_library
26
27__all__ = [
28    'Disassembler',
29]
30
31lib = get_library()
32callbacks = {}
33
34# Constants for set_options
35Option_UseMarkup = 1
36
37class Disassembler(LLVMObject):
38    """Represents a disassembler instance.
39
40    Disassembler instances are tied to specific "triple," which must be defined
41    at creation time.
42
43    Disassembler instances can disassemble instructions from multiple sources.
44    """
45    def __init__(self, triple):
46        """Create a new disassembler instance.
47
48        The triple argument is the triple to create the disassembler for. This
49        is something like 'i386-apple-darwin9'.
50        """
51        ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
52                callbacks['op_info'](0), callbacks['symbol_lookup'](0))
53        if not ptr.contents:
54            raise Exception('Could not obtain disassembler for triple: %s' %
55                            triple)
56
57        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)
58
59    def get_instruction(self, source, pc=0):
60        """Obtain the next instruction from an input source.
61
62        The input source should be a str or bytearray or something that
63        represents a sequence of bytes.
64
65        This function will start reading bytes from the beginning of the
66        source.
67
68        The pc argument specifies the address that the first byte is at.
69
70        This returns a 2-tuple of:
71
72          long number of bytes read. 0 if no instruction was read.
73          str representation of instruction. This will be the assembly that
74            represents the instruction.
75        """
76        buf = cast(c_char_p(source), POINTER(c_ubyte))
77        out_str = cast((c_byte * 255)(), c_char_p)
78
79        result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
80                                           c_uint64(pc), out_str, 255)
81
82        return (result, out_str.value)
83
84    def get_instructions(self, source, pc=0):
85        """Obtain multiple instructions from an input source.
86
87        This is like get_instruction() except it is a generator for all
88        instructions within the source. It starts at the beginning of the
89        source and reads instructions until no more can be read.
90
91        This generator returns 3-tuple of:
92
93          long address of instruction.
94          long size of instruction, in bytes.
95          str representation of instruction.
96        """
97        source_bytes = c_char_p(source)
98        out_str = cast((c_byte * 255)(), c_char_p)
99
100        # This could probably be written cleaner. But, it does work.
101        buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
102        offset = 0
103        address = pc
104        end_address = pc + len(source)
105        while address < end_address:
106            b = cast(addressof(buf) + offset, POINTER(c_ubyte))
107            result = lib.LLVMDisasmInstruction(self, b,
108                    c_uint64(len(source) - offset), c_uint64(address),
109                    out_str, 255)
110
111            if result == 0:
112                break
113
114            yield (address, result, out_str.value)
115
116            address += result
117            offset += result
118
119    def set_options(self, options):
120        if not lib.LLVMSetDisasmOptions(self, options):
121            raise Exception('Unable to set all disassembler options in %i' % options)
122
123
124def register_library(library):
125    library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
126        callbacks['op_info'], callbacks['symbol_lookup']]
127    library.LLVMCreateDisasm.restype = c_object_p
128
129    library.LLVMDisasmDispose.argtypes = [Disassembler]
130
131    library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
132            c_uint64, c_uint64, c_char_p, c_size_t]
133    library.LLVMDisasmInstruction.restype = c_size_t
134
135    library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64]
136    library.LLVMSetDisasmOptions.restype = c_int
137
138
139callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
140                                 c_int, c_void_p)
141callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
142                                       POINTER(c_uint64), c_uint64,
143                                       POINTER(c_char_p))
144
145register_library(lib)
146