192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc#
392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc#                     The LLVM Compiler Infrastructure
492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc#
592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc# This file is distributed under the University of Illinois Open Source
692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc# License. See LICENSE.TXT for details.
792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc#
892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc#===------------------------------------------------------------------------===#
992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
1092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import CFUNCTYPE
1192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import POINTER
1292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import addressof
1392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_byte
1492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_char_p
1592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_int
1692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_size_t
1792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_ubyte
1892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_uint64
1992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_void_p
2092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import cast
2192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
2292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom .common import LLVMObject
2392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom .common import c_object_p
2492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom .common import get_library
2592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
2692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc__all__ = [
2792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    'Disassembler',
2892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc]
2992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
3092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorclib = get_library()
3192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorccallbacks = {}
3292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
33fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc# Constants for set_options
34fdddf771716a48857a1044abc7917886bf0bf719Gregory SzorcOption_UseMarkup = 1
35fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc
3660e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg
3760e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg
3860e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg_initialized = False
3960e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg_targets = ['AArch64', 'ARM', 'Hexagon', 'MSP430', 'Mips', 'NVPTX', 'PowerPC', 'R600', 'Sparc', 'SystemZ', 'X86', 'XCore']
4060e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborgdef _ensure_initialized():
4160e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg    global _initialized
4260e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg    if not _initialized:
4360e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg        # Here one would want to call the functions
4460e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg        # LLVMInitializeAll{TargetInfo,TargetMC,Disassembler}s, but
4560e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg        # unfortunately they are only defined as static inline
4660e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg        # functions in the header files of llvm-c, so they don't exist
4760e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg        # as symbols in the shared library.
4860e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg        # So until that is fixed use this hack to initialize them all
4960e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg        for tgt in _targets:
5060e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg            for initializer in ("TargetInfo", "TargetMC", "Disassembler"):
5160e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg                try:
5260e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg                    f = getattr(lib, "LLVMInitialize" + tgt + initializer)
5360e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg                except AttributeError:
5460e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg                    continue
5560e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg                f()
5660e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg        _initialized = True
5760e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg
5860e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg
5992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcclass Disassembler(LLVMObject):
6092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    """Represents a disassembler instance.
6192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
6292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    Disassembler instances are tied to specific "triple," which must be defined
6392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    at creation time.
6492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
6592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    Disassembler instances can disassemble instructions from multiple sources.
6692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    """
6792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    def __init__(self, triple):
6892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        """Create a new disassembler instance.
6992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
7092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        The triple argument is the triple to create the disassembler for. This
7192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        is something like 'i386-apple-darwin9'.
7292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        """
7360e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg
7460e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg        _ensure_initialized()
7560e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg
7692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
7792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc                callbacks['op_info'](0), callbacks['symbol_lookup'](0))
78e8a957d2a91d02af0b9c3808b31067d236b22196Anders Waldenborg        if not ptr:
7992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc            raise Exception('Could not obtain disassembler for triple: %s' %
8092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc                            triple)
8192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
8292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)
8392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
8492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    def get_instruction(self, source, pc=0):
8592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        """Obtain the next instruction from an input source.
8692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
8792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        The input source should be a str or bytearray or something that
8892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        represents a sequence of bytes.
8992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
9092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        This function will start reading bytes from the beginning of the
9192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        source.
9292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
9392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        The pc argument specifies the address that the first byte is at.
9492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
9592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        This returns a 2-tuple of:
9692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
9792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc          long number of bytes read. 0 if no instruction was read.
9892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc          str representation of instruction. This will be the assembly that
9992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc            represents the instruction.
10092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        """
10192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        buf = cast(c_char_p(source), POINTER(c_ubyte))
10292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        out_str = cast((c_byte * 255)(), c_char_p)
10392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
10492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
10592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc                                           c_uint64(pc), out_str, 255)
10692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
10792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        return (result, out_str.value)
10892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
10992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    def get_instructions(self, source, pc=0):
11092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        """Obtain multiple instructions from an input source.
11192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
11292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        This is like get_instruction() except it is a generator for all
11392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        instructions within the source. It starts at the beginning of the
11492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        source and reads instructions until no more can be read.
11592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
11692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        This generator returns 3-tuple of:
11792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
11892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc          long address of instruction.
11992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc          long size of instruction, in bytes.
12092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc          str representation of instruction.
12192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        """
12292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        source_bytes = c_char_p(source)
12392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        out_str = cast((c_byte * 255)(), c_char_p)
12492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
12592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        # This could probably be written cleaner. But, it does work.
12692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
12792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        offset = 0
12892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        address = pc
12992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        end_address = pc + len(source)
13092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        while address < end_address:
13192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc            b = cast(addressof(buf) + offset, POINTER(c_ubyte))
13292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc            result = lib.LLVMDisasmInstruction(self, b,
13392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc                    c_uint64(len(source) - offset), c_uint64(address),
13492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc                    out_str, 255)
13592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
13692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc            if result == 0:
13792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc                break
13892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
13992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc            yield (address, result, out_str.value)
14092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
14192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc            address += result
14292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc            offset += result
14392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
144fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc    def set_options(self, options):
145fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc        if not lib.LLVMSetDisasmOptions(self, options):
146fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc            raise Exception('Unable to set all disassembler options in %i' % options)
147fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc
14892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
14992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcdef register_library(library):
15092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
15192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc        callbacks['op_info'], callbacks['symbol_lookup']]
15292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    library.LLVMCreateDisasm.restype = c_object_p
15392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
15492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    library.LLVMDisasmDispose.argtypes = [Disassembler]
15592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
15692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
15792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc            c_uint64, c_uint64, c_char_p, c_size_t]
15892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc    library.LLVMDisasmInstruction.restype = c_size_t
15992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
160fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc    library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64]
161fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc    library.LLVMSetDisasmOptions.restype = c_int
162fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc
163fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc
16492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorccallbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
16592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc                                 c_int, c_void_p)
16692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorccallbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
16792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc                                       POINTER(c_uint64), c_uint64,
16892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc                                       POINTER(c_char_p))
16992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc
17092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcregister_library(lib)
171