1ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===//
2ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//
3ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//                     The LLVM Compiler Infrastructure
4ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//
5ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// This file is distributed under the University of Illinois Open Source
6ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// License. See LICENSE.TXT for details.
7ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//
8ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//===----------------------------------------------------------------------===//
9ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//
10ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// This file contains the declaration of the MCObjectDisassembler class, which
11ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// can be used to construct an MCModule and an MC CFG from an ObjectFile.
12ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//
13ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//===----------------------------------------------------------------------===//
14ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha
15ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H
16ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#define LLVM_MC_MCOBJECTDISASSEMBLER_H
17ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha
180a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha#include "llvm/ADT/ArrayRef.h"
190a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha#include "llvm/ADT/StringRef.h"
200a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha#include "llvm/Support/DataTypes.h"
21f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha#include "llvm/Support/MemoryObject.h"
22f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha#include <vector>
230a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha
24ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachanamespace llvm {
25ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha
26ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachanamespace object {
27ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  class ObjectFile;
280e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  class MachOObjectFile;
29ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha}
30ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha
31ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCBasicBlock;
32ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCDisassembler;
33ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCFunction;
34ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCInstrAnalysis;
35ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCModule;
360f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougachaclass MCObjectSymbolizer;
37ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha
38ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions.
39ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha/// This class builds on MCDisassembler to disassemble whole sections, creating
40ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data).
41ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha/// It can also be used to create a control flow graph consisting of MCFunctions
42ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha/// and MCBasicBlocks.
43ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaclass MCObjectDisassembler {
44ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachapublic:
45ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  MCObjectDisassembler(const object::ObjectFile &Obj,
46ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha                       const MCDisassembler &Dis,
47ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha                       const MCInstrAnalysis &MIA);
480a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  virtual ~MCObjectDisassembler() {}
49ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha
50ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// \brief Build an MCModule, creating atoms and optionally functions.
51ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// \param withCFG Also build a CFG by adding MCFunctions to the Module.
52ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// If withCFG is false, the MCModule built only contains atoms, representing
53ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// what was found in the object file. If withCFG is true, MCFunctions are
54ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// created, containing MCBasicBlocks. All text atoms are split to form basic
55ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// block atoms, which then each back an MCBasicBlock.
56ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  MCModule *buildModule(bool withCFG = false);
57ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha
580a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  MCModule *buildEmptyModule();
590a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha
60f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  typedef std::vector<uint64_t> AddressSetTy;
61f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  /// \name Create a new MCFunction.
62f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  MCFunction *createFunction(MCModule *Module, uint64_t BeginAddr,
63f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha                             AddressSetTy &CallTargets,
64f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha                             AddressSetTy &TailCallTargets);
65f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha
66f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  /// \brief Set the region on which to fallback if disassembly was requested
67f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  /// somewhere not accessible in the object file.
68f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  /// This is used for dynamic disassembly (see RawMemoryObject).
6936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  void setFallbackRegion(std::unique_ptr<MemoryObject> &Region) {
7036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    FallbackRegion.reset(Region.release());
71f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  }
72f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha
730f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha  /// \brief Set the symbolizer to use to get information on external functions.
740f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha  /// Note that this isn't used to do instruction-level symbolization (that is,
750f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha  /// plugged into MCDisassembler), but to symbolize function call targets.
760f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha  void setSymbolizer(MCObjectSymbolizer *ObjectSymbolizer) {
770f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha    MOS = ObjectSymbolizer;
780f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha  }
790f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha
800a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  /// \brief Get the effective address of the entrypoint, or 0 if there is none.
810a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  virtual uint64_t getEntrypoint();
820a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha
830a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  /// \name Get the addresses of static constructors/destructors in the object.
840a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  /// The caller is expected to know how to interpret the addresses;
850a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  /// for example, Mach-O init functions expect 5 arguments, not for ELF.
860a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  /// The addresses are original object file load addresses, not effective.
870a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  /// @{
880a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  virtual ArrayRef<uint64_t> getStaticInitFunctions();
890a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  virtual ArrayRef<uint64_t> getStaticExitFunctions();
900a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  /// @}
910a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha
92484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// \name Translation between effective and objectfile load address.
93484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// @{
94484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// \brief Compute the effective load address, from an objectfile virtual
95484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// address. This is implemented in a format-specific way, to take into
96484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// account things like PIE/ASLR when doing dynamic disassembly.
97484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// For example, on Mach-O this would be done by adding the VM addr slide,
98484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// on glibc ELF by keeping a map between segment load addresses, filled
99484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// using dl_iterate_phdr, etc..
100484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// In most static situations and in the default impl., this returns \p Addr.
101484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  virtual uint64_t getEffectiveLoadAddr(uint64_t Addr);
102484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha
103484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// \brief Compute the original load address, as specified in the objectfile.
104484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// This is the inverse of getEffectiveLoadAddr.
105484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr);
106484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha  /// @}
107484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha
1080a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougachaprotected:
1090a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  const object::ObjectFile &Obj;
1100a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  const MCDisassembler &Dis;
1110a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha  const MCInstrAnalysis &MIA;
1120f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha  MCObjectSymbolizer *MOS;
1130a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha
114f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  /// \brief The fallback memory region, outside the object file.
11536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  std::unique_ptr<MemoryObject> FallbackRegion;
116f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha
117f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  /// \brief Return a memory region suitable for reading starting at \p Addr.
118f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  /// In most cases, this returns a StringRefMemoryObject backed by the
119f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  /// containing section. When no section was found, this returns the
120f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  /// FallbackRegion, if it is suitable.
121f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  /// If it is not, or if there is no fallback region, this returns 0.
122f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  MemoryObject *getRegionFor(uint64_t Addr);
123f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha
124ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachaprivate:
125ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// \brief Fill \p Module by creating an atom for each section.
126ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// This could be made much smarter, using information like symbols, but also
127ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// format-specific features, like mach-o function_start or data_in_code LCs.
128ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  void buildSectionAtoms(MCModule *Module);
129ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha
130ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// \brief Enrich \p Module with a CFG consisting of MCFunctions.
131ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// \param Module An MCModule returned by buildModule, with no CFG.
132ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom.
133ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// When the CFG is built, contiguous instructions that were previously in a
134ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  /// single MCTextAtom will be split in multiple basic block atoms.
135ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha  void buildCFG(MCModule *Module);
136f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha
137f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha  MCBasicBlock *getBBAt(MCModule *Module, MCFunction *MCFN, uint64_t BeginAddr,
138f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha                        AddressSetTy &CallTargets,
139f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha                        AddressSetTy &TailCallTargets);
140ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha};
141ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha
1420e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougachaclass MCMachOObjectDisassembler : public MCObjectDisassembler {
1430e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  const object::MachOObjectFile &MOOF;
1440e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha
1450e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  uint64_t VMAddrSlide;
1460e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  uint64_t HeaderLoadAddress;
1470e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha
1480e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  // __DATA;__mod_init_func support.
1490e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  llvm::StringRef ModInitContents;
1500e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  // __DATA;__mod_exit_func support.
1510e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  llvm::StringRef ModExitContents;
1520e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha
1530e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougachapublic:
1540e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  /// \brief Construct a Mach-O specific object disassembler.
1550e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  /// \param VMAddrSlide The virtual address slide applied by dyld.
1560e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  /// \param HeaderLoadAddress The load address of the mach_header for this
1570e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  /// object.
1580e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha  MCMachOObjectDisassembler(const object::MachOObjectFile &MOOF,
1590e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha                            const MCDisassembler &Dis,
1600e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha                            const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
1610e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha                            uint64_t HeaderLoadAddress);
1620e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha
1630e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougachaprotected:
16436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  uint64_t getEffectiveLoadAddr(uint64_t Addr) override;
16536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr) override;
16636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  uint64_t getEntrypoint() override;
1670e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha
16836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  ArrayRef<uint64_t> getStaticInitFunctions() override;
16936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  ArrayRef<uint64_t> getStaticExitFunctions() override;
1700e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha};
1710e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha
172ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha}
173ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha
174ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#endif
175