1ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===// 2ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// 3ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// The LLVM Compiler Infrastructure 4ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// 5ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// This file is distributed under the University of Illinois Open Source 6ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// License. See LICENSE.TXT for details. 7ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// 8ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//===----------------------------------------------------------------------===// 9ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// 10ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// This file contains the declaration of the MCObjectDisassembler class, which 11ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// can be used to construct an MCModule and an MC CFG from an ObjectFile. 12ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// 13ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//===----------------------------------------------------------------------===// 14ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 15ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H 16ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#define LLVM_MC_MCOBJECTDISASSEMBLER_H 17ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 180a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha#include "llvm/ADT/ArrayRef.h" 190a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha#include "llvm/ADT/StringRef.h" 200a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha#include "llvm/Support/DataTypes.h" 21f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha#include "llvm/Support/MemoryObject.h" 22f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha#include <vector> 230a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 24ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachanamespace llvm { 25ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 26ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachanamespace object { 27ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha class ObjectFile; 280e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha class MachOObjectFile; 29ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha} 30ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 31ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCBasicBlock; 32ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCDisassembler; 33ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCFunction; 34ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCInstrAnalysis; 35ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCModule; 360f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougachaclass MCObjectSymbolizer; 37ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 38ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions. 39ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha/// This class builds on MCDisassembler to disassemble whole sections, creating 40ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data). 41ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha/// It can also be used to create a control flow graph consisting of MCFunctions 42ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha/// and MCBasicBlocks. 43ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCObjectDisassembler { 44ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachapublic: 45ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCObjectDisassembler(const object::ObjectFile &Obj, 46ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha const MCDisassembler &Dis, 47ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha const MCInstrAnalysis &MIA); 480a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha virtual ~MCObjectDisassembler() {} 49ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 50ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// \brief Build an MCModule, creating atoms and optionally functions. 51ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// \param withCFG Also build a CFG by adding MCFunctions to the Module. 52ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// If withCFG is false, the MCModule built only contains atoms, representing 53ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// what was found in the object file. If withCFG is true, MCFunctions are 54ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// created, containing MCBasicBlocks. All text atoms are split to form basic 55ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// block atoms, which then each back an MCBasicBlock. 56ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCModule *buildModule(bool withCFG = false); 57ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 580a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha MCModule *buildEmptyModule(); 590a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 60f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha typedef std::vector<uint64_t> AddressSetTy; 61f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha /// \name Create a new MCFunction. 62f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MCFunction *createFunction(MCModule *Module, uint64_t BeginAddr, 63f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha AddressSetTy &CallTargets, 64f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha AddressSetTy &TailCallTargets); 65f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 66f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha /// \brief Set the region on which to fallback if disassembly was requested 67f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha /// somewhere not accessible in the object file. 68f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha /// This is used for dynamic disassembly (see RawMemoryObject). 6936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void setFallbackRegion(std::unique_ptr<MemoryObject> &Region) { 7036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines FallbackRegion.reset(Region.release()); 71f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 72f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 730f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha /// \brief Set the symbolizer to use to get information on external functions. 740f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha /// Note that this isn't used to do instruction-level symbolization (that is, 750f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha /// plugged into MCDisassembler), but to symbolize function call targets. 760f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha void setSymbolizer(MCObjectSymbolizer *ObjectSymbolizer) { 770f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha MOS = ObjectSymbolizer; 780f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha } 790f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha 800a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha /// \brief Get the effective address of the entrypoint, or 0 if there is none. 810a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha virtual uint64_t getEntrypoint(); 820a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 830a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha /// \name Get the addresses of static constructors/destructors in the object. 840a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha /// The caller is expected to know how to interpret the addresses; 850a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha /// for example, Mach-O init functions expect 5 arguments, not for ELF. 860a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha /// The addresses are original object file load addresses, not effective. 870a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha /// @{ 880a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha virtual ArrayRef<uint64_t> getStaticInitFunctions(); 890a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha virtual ArrayRef<uint64_t> getStaticExitFunctions(); 900a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha /// @} 910a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 92484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// \name Translation between effective and objectfile load address. 93484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// @{ 94484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// \brief Compute the effective load address, from an objectfile virtual 95484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// address. This is implemented in a format-specific way, to take into 96484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// account things like PIE/ASLR when doing dynamic disassembly. 97484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// For example, on Mach-O this would be done by adding the VM addr slide, 98484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// on glibc ELF by keeping a map between segment load addresses, filled 99484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// using dl_iterate_phdr, etc.. 100484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// In most static situations and in the default impl., this returns \p Addr. 101484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha virtual uint64_t getEffectiveLoadAddr(uint64_t Addr); 102484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha 103484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// \brief Compute the original load address, as specified in the objectfile. 104484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// This is the inverse of getEffectiveLoadAddr. 105484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr); 106484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha /// @} 107484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha 1080a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougachaprotected: 1090a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha const object::ObjectFile &Obj; 1100a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha const MCDisassembler &Dis; 1110a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha const MCInstrAnalysis &MIA; 1120f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha MCObjectSymbolizer *MOS; 1130a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 114f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha /// \brief The fallback memory region, outside the object file. 11536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::unique_ptr<MemoryObject> FallbackRegion; 116f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 117f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha /// \brief Return a memory region suitable for reading starting at \p Addr. 118f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha /// In most cases, this returns a StringRefMemoryObject backed by the 119f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha /// containing section. When no section was found, this returns the 120f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha /// FallbackRegion, if it is suitable. 121f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha /// If it is not, or if there is no fallback region, this returns 0. 122f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MemoryObject *getRegionFor(uint64_t Addr); 123f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 124ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaprivate: 125ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// \brief Fill \p Module by creating an atom for each section. 126ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// This could be made much smarter, using information like symbols, but also 127ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// format-specific features, like mach-o function_start or data_in_code LCs. 128ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha void buildSectionAtoms(MCModule *Module); 129ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 130ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// \brief Enrich \p Module with a CFG consisting of MCFunctions. 131ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// \param Module An MCModule returned by buildModule, with no CFG. 132ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom. 133ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// When the CFG is built, contiguous instructions that were previously in a 134ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha /// single MCTextAtom will be split in multiple basic block atoms. 135ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha void buildCFG(MCModule *Module); 136f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 137f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MCBasicBlock *getBBAt(MCModule *Module, MCFunction *MCFN, uint64_t BeginAddr, 138f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha AddressSetTy &CallTargets, 139f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha AddressSetTy &TailCallTargets); 140ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha}; 141ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 1420e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougachaclass MCMachOObjectDisassembler : public MCObjectDisassembler { 1430e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha const object::MachOObjectFile &MOOF; 1440e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 1450e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha uint64_t VMAddrSlide; 1460e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha uint64_t HeaderLoadAddress; 1470e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 1480e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha // __DATA;__mod_init_func support. 1490e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha llvm::StringRef ModInitContents; 1500e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha // __DATA;__mod_exit_func support. 1510e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha llvm::StringRef ModExitContents; 1520e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 1530e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougachapublic: 1540e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha /// \brief Construct a Mach-O specific object disassembler. 1550e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha /// \param VMAddrSlide The virtual address slide applied by dyld. 1560e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha /// \param HeaderLoadAddress The load address of the mach_header for this 1570e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha /// object. 1580e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha MCMachOObjectDisassembler(const object::MachOObjectFile &MOOF, 1590e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha const MCDisassembler &Dis, 1600e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha const MCInstrAnalysis &MIA, uint64_t VMAddrSlide, 1610e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha uint64_t HeaderLoadAddress); 1620e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 1630e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougachaprotected: 16436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t getEffectiveLoadAddr(uint64_t Addr) override; 16536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr) override; 16636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t getEntrypoint() override; 1670e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 16836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ArrayRef<uint64_t> getStaticInitFunctions() override; 16936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ArrayRef<uint64_t> getStaticExitFunctions() override; 1700e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha}; 1710e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 172ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha} 173ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 174ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#endif 175