1//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the declaration of the MCObjectDisassembler class, which
11// can be used to construct an MCModule and an MC CFG from an ObjectFile.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H
16#define LLVM_MC_MCOBJECTDISASSEMBLER_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/DataTypes.h"
21#include "llvm/Support/MemoryObject.h"
22#include <vector>
23
24namespace llvm {
25
26namespace object {
27  class ObjectFile;
28  class MachOObjectFile;
29}
30
31class MCBasicBlock;
32class MCDisassembler;
33class MCFunction;
34class MCInstrAnalysis;
35class MCModule;
36class MCObjectSymbolizer;
37
38/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions.
39/// This class builds on MCDisassembler to disassemble whole sections, creating
40/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data).
41/// It can also be used to create a control flow graph consisting of MCFunctions
42/// and MCBasicBlocks.
43class MCObjectDisassembler {
44public:
45  MCObjectDisassembler(const object::ObjectFile &Obj,
46                       const MCDisassembler &Dis,
47                       const MCInstrAnalysis &MIA);
48  virtual ~MCObjectDisassembler() {}
49
50  /// \brief Build an MCModule, creating atoms and optionally functions.
51  /// \param withCFG Also build a CFG by adding MCFunctions to the Module.
52  /// If withCFG is false, the MCModule built only contains atoms, representing
53  /// what was found in the object file. If withCFG is true, MCFunctions are
54  /// created, containing MCBasicBlocks. All text atoms are split to form basic
55  /// block atoms, which then each back an MCBasicBlock.
56  MCModule *buildModule(bool withCFG = false);
57
58  MCModule *buildEmptyModule();
59
60  typedef std::vector<uint64_t> AddressSetTy;
61  /// \name Create a new MCFunction.
62  MCFunction *createFunction(MCModule *Module, uint64_t BeginAddr,
63                             AddressSetTy &CallTargets,
64                             AddressSetTy &TailCallTargets);
65
66  /// \brief Set the region on which to fallback if disassembly was requested
67  /// somewhere not accessible in the object file.
68  /// This is used for dynamic disassembly (see RawMemoryObject).
69  void setFallbackRegion(std::unique_ptr<MemoryObject> &Region) {
70    FallbackRegion.reset(Region.release());
71  }
72
73  /// \brief Set the symbolizer to use to get information on external functions.
74  /// Note that this isn't used to do instruction-level symbolization (that is,
75  /// plugged into MCDisassembler), but to symbolize function call targets.
76  void setSymbolizer(MCObjectSymbolizer *ObjectSymbolizer) {
77    MOS = ObjectSymbolizer;
78  }
79
80  /// \brief Get the effective address of the entrypoint, or 0 if there is none.
81  virtual uint64_t getEntrypoint();
82
83  /// \name Get the addresses of static constructors/destructors in the object.
84  /// The caller is expected to know how to interpret the addresses;
85  /// for example, Mach-O init functions expect 5 arguments, not for ELF.
86  /// The addresses are original object file load addresses, not effective.
87  /// @{
88  virtual ArrayRef<uint64_t> getStaticInitFunctions();
89  virtual ArrayRef<uint64_t> getStaticExitFunctions();
90  /// @}
91
92  /// \name Translation between effective and objectfile load address.
93  /// @{
94  /// \brief Compute the effective load address, from an objectfile virtual
95  /// address. This is implemented in a format-specific way, to take into
96  /// account things like PIE/ASLR when doing dynamic disassembly.
97  /// For example, on Mach-O this would be done by adding the VM addr slide,
98  /// on glibc ELF by keeping a map between segment load addresses, filled
99  /// using dl_iterate_phdr, etc..
100  /// In most static situations and in the default impl., this returns \p Addr.
101  virtual uint64_t getEffectiveLoadAddr(uint64_t Addr);
102
103  /// \brief Compute the original load address, as specified in the objectfile.
104  /// This is the inverse of getEffectiveLoadAddr.
105  virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr);
106  /// @}
107
108protected:
109  const object::ObjectFile &Obj;
110  const MCDisassembler &Dis;
111  const MCInstrAnalysis &MIA;
112  MCObjectSymbolizer *MOS;
113
114  /// \brief The fallback memory region, outside the object file.
115  std::unique_ptr<MemoryObject> FallbackRegion;
116
117  /// \brief Return a memory region suitable for reading starting at \p Addr.
118  /// In most cases, this returns a StringRefMemoryObject backed by the
119  /// containing section. When no section was found, this returns the
120  /// FallbackRegion, if it is suitable.
121  /// If it is not, or if there is no fallback region, this returns 0.
122  MemoryObject *getRegionFor(uint64_t Addr);
123
124private:
125  /// \brief Fill \p Module by creating an atom for each section.
126  /// This could be made much smarter, using information like symbols, but also
127  /// format-specific features, like mach-o function_start or data_in_code LCs.
128  void buildSectionAtoms(MCModule *Module);
129
130  /// \brief Enrich \p Module with a CFG consisting of MCFunctions.
131  /// \param Module An MCModule returned by buildModule, with no CFG.
132  /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom.
133  /// When the CFG is built, contiguous instructions that were previously in a
134  /// single MCTextAtom will be split in multiple basic block atoms.
135  void buildCFG(MCModule *Module);
136
137  MCBasicBlock *getBBAt(MCModule *Module, MCFunction *MCFN, uint64_t BeginAddr,
138                        AddressSetTy &CallTargets,
139                        AddressSetTy &TailCallTargets);
140};
141
142class MCMachOObjectDisassembler : public MCObjectDisassembler {
143  const object::MachOObjectFile &MOOF;
144
145  uint64_t VMAddrSlide;
146  uint64_t HeaderLoadAddress;
147
148  // __DATA;__mod_init_func support.
149  llvm::StringRef ModInitContents;
150  // __DATA;__mod_exit_func support.
151  llvm::StringRef ModExitContents;
152
153public:
154  /// \brief Construct a Mach-O specific object disassembler.
155  /// \param VMAddrSlide The virtual address slide applied by dyld.
156  /// \param HeaderLoadAddress The load address of the mach_header for this
157  /// object.
158  MCMachOObjectDisassembler(const object::MachOObjectFile &MOOF,
159                            const MCDisassembler &Dis,
160                            const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
161                            uint64_t HeaderLoadAddress);
162
163protected:
164  uint64_t getEffectiveLoadAddr(uint64_t Addr) override;
165  uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr) override;
166  uint64_t getEntrypoint() override;
167
168  ArrayRef<uint64_t> getStaticInitFunctions() override;
169  ArrayRef<uint64_t> getStaticExitFunctions() override;
170};
171
172}
173
174#endif
175