192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===# 292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc# 392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc# The LLVM Compiler Infrastructure 492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc# 592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc# This file is distributed under the University of Illinois Open Source 692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc# License. See LICENSE.TXT for details. 792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc# 892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc#===------------------------------------------------------------------------===# 992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 1092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import CFUNCTYPE 1192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import POINTER 1292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import addressof 1392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_byte 1492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_char_p 1592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_int 1692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_size_t 1792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_ubyte 1892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_uint64 1992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import c_void_p 2092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom ctypes import cast 2192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 2292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom .common import LLVMObject 2392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom .common import c_object_p 2492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcfrom .common import get_library 2592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 2692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc__all__ = [ 2792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 'Disassembler', 2892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc] 2992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 3092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorclib = get_library() 3192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorccallbacks = {} 3292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 33fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc# Constants for set_options 34fdddf771716a48857a1044abc7917886bf0bf719Gregory SzorcOption_UseMarkup = 1 35fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc 3660e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg 3760e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg 3860e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg_initialized = False 3960e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg_targets = ['AArch64', 'ARM', 'Hexagon', 'MSP430', 'Mips', 'NVPTX', 'PowerPC', 'R600', 'Sparc', 'SystemZ', 'X86', 'XCore'] 4060e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborgdef _ensure_initialized(): 4160e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg global _initialized 4260e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg if not _initialized: 4360e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg # Here one would want to call the functions 4460e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg # LLVMInitializeAll{TargetInfo,TargetMC,Disassembler}s, but 4560e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg # unfortunately they are only defined as static inline 4660e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg # functions in the header files of llvm-c, so they don't exist 4760e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg # as symbols in the shared library. 4860e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg # So until that is fixed use this hack to initialize them all 4960e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg for tgt in _targets: 5060e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg for initializer in ("TargetInfo", "TargetMC", "Disassembler"): 5160e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg try: 5260e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg f = getattr(lib, "LLVMInitialize" + tgt + initializer) 5360e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg except AttributeError: 5460e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg continue 5560e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg f() 5660e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg _initialized = True 5760e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg 5860e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg 5992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcclass Disassembler(LLVMObject): 6092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc """Represents a disassembler instance. 6192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 6292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc Disassembler instances are tied to specific "triple," which must be defined 6392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc at creation time. 6492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 6592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc Disassembler instances can disassemble instructions from multiple sources. 6692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc """ 6792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc def __init__(self, triple): 6892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc """Create a new disassembler instance. 6992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 7092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc The triple argument is the triple to create the disassembler for. This 7192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc is something like 'i386-apple-darwin9'. 7292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc """ 7360e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg 7460e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg _ensure_initialized() 7560e4d7f618ffc8b91c6fa2638e3c25a085c2fb1eAnders Waldenborg 7692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0), 7792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc callbacks['op_info'](0), callbacks['symbol_lookup'](0)) 78e8a957d2a91d02af0b9c3808b31067d236b22196Anders Waldenborg if not ptr: 7992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc raise Exception('Could not obtain disassembler for triple: %s' % 8092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc triple) 8192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 8292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose) 8392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 8492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc def get_instruction(self, source, pc=0): 8592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc """Obtain the next instruction from an input source. 8692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 8792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc The input source should be a str or bytearray or something that 8892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc represents a sequence of bytes. 8992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 9092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc This function will start reading bytes from the beginning of the 9192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc source. 9292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 9392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc The pc argument specifies the address that the first byte is at. 9492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 9592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc This returns a 2-tuple of: 9692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 9792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc long number of bytes read. 0 if no instruction was read. 9892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc str representation of instruction. This will be the assembly that 9992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc represents the instruction. 10092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc """ 10192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc buf = cast(c_char_p(source), POINTER(c_ubyte)) 10292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc out_str = cast((c_byte * 255)(), c_char_p) 10392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 10492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)), 10592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc c_uint64(pc), out_str, 255) 10692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 10792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc return (result, out_str.value) 10892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 10992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc def get_instructions(self, source, pc=0): 11092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc """Obtain multiple instructions from an input source. 11192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 11292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc This is like get_instruction() except it is a generator for all 11392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc instructions within the source. It starts at the beginning of the 11492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc source and reads instructions until no more can be read. 11592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 11692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc This generator returns 3-tuple of: 11792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 11892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc long address of instruction. 11992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc long size of instruction, in bytes. 12092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc str representation of instruction. 12192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc """ 12292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc source_bytes = c_char_p(source) 12392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc out_str = cast((c_byte * 255)(), c_char_p) 12492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 12592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc # This could probably be written cleaner. But, it does work. 12692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents 12792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc offset = 0 12892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc address = pc 12992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc end_address = pc + len(source) 13092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc while address < end_address: 13192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc b = cast(addressof(buf) + offset, POINTER(c_ubyte)) 13292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc result = lib.LLVMDisasmInstruction(self, b, 13392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc c_uint64(len(source) - offset), c_uint64(address), 13492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc out_str, 255) 13592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 13692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc if result == 0: 13792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc break 13892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 13992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc yield (address, result, out_str.value) 14092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 14192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc address += result 14292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc offset += result 14392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 144fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc def set_options(self, options): 145fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc if not lib.LLVMSetDisasmOptions(self, options): 146fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc raise Exception('Unable to set all disassembler options in %i' % options) 147fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc 14892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 14992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcdef register_library(library): 15092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int, 15192a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc callbacks['op_info'], callbacks['symbol_lookup']] 15292a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc library.LLVMCreateDisasm.restype = c_object_p 15392a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 15492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc library.LLVMDisasmDispose.argtypes = [Disassembler] 15592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 15692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte), 15792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc c_uint64, c_uint64, c_char_p, c_size_t] 15892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc library.LLVMDisasmInstruction.restype = c_size_t 15992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 160fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64] 161fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc library.LLVMSetDisasmOptions.restype = c_int 162fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc 163fdddf771716a48857a1044abc7917886bf0bf719Gregory Szorc 16492a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorccallbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64, 16592a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc c_int, c_void_p) 16692a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorccallbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64, 16792a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc POINTER(c_uint64), c_uint64, 16892a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc POINTER(c_char_p)) 16992a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorc 17092a3e9d63a4fd408fce76c1b2ba71b5a7fb04b8aGregory Szorcregister_library(lib) 171