1//===- MachO.h - MachO object file implementation ---------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file declares the MachOObjectFile class, which implement the ObjectFile
11// interface for MachO files.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_OBJECT_MACHO_H
16#define LLVM_OBJECT_MACHO_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/Triple.h"
21#include "llvm/Object/ObjectFile.h"
22#include "llvm/Support/MachO.h"
23
24namespace llvm {
25namespace object {
26
27/// DiceRef - This is a value type class that represents a single
28/// data in code entry in the table in a Mach-O object file.
29class DiceRef {
30  DataRefImpl DicePimpl;
31  const ObjectFile *OwningObject;
32
33public:
34  DiceRef() : OwningObject(nullptr) { }
35
36  DiceRef(DataRefImpl DiceP, const ObjectFile *Owner);
37
38  bool operator==(const DiceRef &Other) const;
39  bool operator<(const DiceRef &Other) const;
40
41  void moveNext();
42
43  std::error_code getOffset(uint32_t &Result) const;
44  std::error_code getLength(uint16_t &Result) const;
45  std::error_code getKind(uint16_t &Result) const;
46
47  DataRefImpl getRawDataRefImpl() const;
48  const ObjectFile *getObjectFile() const;
49};
50typedef content_iterator<DiceRef> dice_iterator;
51
52/// ExportEntry encapsulates the current-state-of-the-walk used when doing a
53/// non-recursive walk of the trie data structure.  This allows you to iterate
54/// across all exported symbols using:
55///      for (const llvm::object::ExportEntry &AnExport : Obj->exports()) {
56///      }
57class ExportEntry {
58public:
59  ExportEntry(ArrayRef<uint8_t> Trie);
60
61  StringRef name() const;
62  uint64_t flags() const;
63  uint64_t address() const;
64  uint64_t other() const;
65  StringRef otherName() const;
66  uint32_t nodeOffset() const;
67
68  bool operator==(const ExportEntry &) const;
69
70  void moveNext();
71
72private:
73  friend class MachOObjectFile;
74  void moveToFirst();
75  void moveToEnd();
76  uint64_t readULEB128(const uint8_t *&p);
77  void pushDownUntilBottom();
78  void pushNode(uint64_t Offset);
79
80  // Represents a node in the mach-o exports trie.
81  struct NodeState {
82    NodeState(const uint8_t *Ptr);
83    const uint8_t *Start;
84    const uint8_t *Current;
85    uint64_t Flags;
86    uint64_t Address;
87    uint64_t Other;
88    const char *ImportName;
89    unsigned ChildCount;
90    unsigned NextChildIndex;
91    unsigned ParentStringLength;
92    bool IsExportNode;
93  };
94
95  ArrayRef<uint8_t> Trie;
96  SmallString<256> CumulativeString;
97  SmallVector<NodeState, 16> Stack;
98  bool Malformed;
99  bool Done;
100};
101typedef content_iterator<ExportEntry> export_iterator;
102
103/// MachORebaseEntry encapsulates the current state in the decompression of
104/// rebasing opcodes. This allows you to iterate through the compressed table of
105/// rebasing using:
106///    for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable()) {
107///    }
108class MachORebaseEntry {
109public:
110  MachORebaseEntry(ArrayRef<uint8_t> opcodes, bool is64Bit);
111
112  uint32_t segmentIndex() const;
113  uint64_t segmentOffset() const;
114  StringRef typeName() const;
115
116  bool operator==(const MachORebaseEntry &) const;
117
118  void moveNext();
119
120private:
121  friend class MachOObjectFile;
122  void moveToFirst();
123  void moveToEnd();
124  uint64_t readULEB128();
125
126  ArrayRef<uint8_t> Opcodes;
127  const uint8_t *Ptr;
128  uint64_t SegmentOffset;
129  uint32_t SegmentIndex;
130  uint64_t RemainingLoopCount;
131  uint64_t AdvanceAmount;
132  uint8_t  RebaseType;
133  uint8_t  PointerSize;
134  bool     Malformed;
135  bool     Done;
136};
137typedef content_iterator<MachORebaseEntry> rebase_iterator;
138
139/// MachOBindEntry encapsulates the current state in the decompression of
140/// binding opcodes. This allows you to iterate through the compressed table of
141/// bindings using:
142///    for (const llvm::object::MachOBindEntry &Entry : Obj->bindTable()) {
143///    }
144class MachOBindEntry {
145public:
146  enum class Kind { Regular, Lazy, Weak };
147
148  MachOBindEntry(ArrayRef<uint8_t> Opcodes, bool is64Bit, MachOBindEntry::Kind);
149
150  uint32_t segmentIndex() const;
151  uint64_t segmentOffset() const;
152  StringRef typeName() const;
153  StringRef symbolName() const;
154  uint32_t flags() const;
155  int64_t addend() const;
156  int ordinal() const;
157
158  bool operator==(const MachOBindEntry &) const;
159
160  void moveNext();
161
162private:
163  friend class MachOObjectFile;
164  void moveToFirst();
165  void moveToEnd();
166  uint64_t readULEB128();
167  int64_t readSLEB128();
168
169  ArrayRef<uint8_t> Opcodes;
170  const uint8_t *Ptr;
171  uint64_t SegmentOffset;
172  uint32_t SegmentIndex;
173  StringRef SymbolName;
174  int      Ordinal;
175  uint32_t Flags;
176  int64_t  Addend;
177  uint64_t RemainingLoopCount;
178  uint64_t AdvanceAmount;
179  uint8_t  BindType;
180  uint8_t  PointerSize;
181  Kind     TableKind;
182  bool     Malformed;
183  bool     Done;
184};
185typedef content_iterator<MachOBindEntry> bind_iterator;
186
187class MachOObjectFile : public ObjectFile {
188public:
189  struct LoadCommandInfo {
190    const char *Ptr;      // Where in memory the load command is.
191    MachO::load_command C; // The command itself.
192  };
193  typedef SmallVector<LoadCommandInfo, 4> LoadCommandList;
194  typedef LoadCommandList::const_iterator load_command_iterator;
195
196  MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
197                  std::error_code &EC);
198
199  void moveSymbolNext(DataRefImpl &Symb) const override;
200
201  uint64_t getNValue(DataRefImpl Sym) const;
202  ErrorOr<StringRef> getSymbolName(DataRefImpl Symb) const override;
203
204  // MachO specific.
205  std::error_code getIndirectName(DataRefImpl Symb, StringRef &Res) const;
206  unsigned getSectionType(SectionRef Sec) const;
207
208  ErrorOr<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
209  uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
210  uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
211  SymbolRef::Type getSymbolType(DataRefImpl Symb) const override;
212  uint32_t getSymbolFlags(DataRefImpl Symb) const override;
213  ErrorOr<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
214  unsigned getSymbolSectionID(SymbolRef Symb) const;
215  unsigned getSectionID(SectionRef Sec) const;
216
217  void moveSectionNext(DataRefImpl &Sec) const override;
218  std::error_code getSectionName(DataRefImpl Sec,
219                                 StringRef &Res) const override;
220  uint64_t getSectionAddress(DataRefImpl Sec) const override;
221  uint64_t getSectionSize(DataRefImpl Sec) const override;
222  std::error_code getSectionContents(DataRefImpl Sec,
223                                     StringRef &Res) const override;
224  uint64_t getSectionAlignment(DataRefImpl Sec) const override;
225  bool isSectionText(DataRefImpl Sec) const override;
226  bool isSectionData(DataRefImpl Sec) const override;
227  bool isSectionBSS(DataRefImpl Sec) const override;
228  bool isSectionVirtual(DataRefImpl Sec) const override;
229  relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
230  relocation_iterator section_rel_end(DataRefImpl Sec) const override;
231
232  void moveRelocationNext(DataRefImpl &Rel) const override;
233  uint64_t getRelocationOffset(DataRefImpl Rel) const override;
234  symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
235  section_iterator getRelocationSection(DataRefImpl Rel) const;
236  uint64_t getRelocationType(DataRefImpl Rel) const override;
237  void getRelocationTypeName(DataRefImpl Rel,
238                             SmallVectorImpl<char> &Result) const override;
239  uint8_t getRelocationLength(DataRefImpl Rel) const;
240
241  // MachO specific.
242  std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &) const;
243
244  section_iterator getRelocationRelocatedSection(relocation_iterator Rel) const;
245
246  // TODO: Would be useful to have an iterator based version
247  // of the load command interface too.
248
249  basic_symbol_iterator symbol_begin_impl() const override;
250  basic_symbol_iterator symbol_end_impl() const override;
251
252  // MachO specific.
253  basic_symbol_iterator getSymbolByIndex(unsigned Index) const;
254
255  section_iterator section_begin() const override;
256  section_iterator section_end() const override;
257
258  uint8_t getBytesInAddress() const override;
259
260  StringRef getFileFormatName() const override;
261  unsigned getArch() const override;
262  Triple getArch(const char **McpuDefault, Triple *ThumbTriple) const;
263
264  relocation_iterator section_rel_begin(unsigned Index) const;
265  relocation_iterator section_rel_end(unsigned Index) const;
266
267  dice_iterator begin_dices() const;
268  dice_iterator end_dices() const;
269
270  load_command_iterator begin_load_commands() const;
271  load_command_iterator end_load_commands() const;
272  iterator_range<load_command_iterator> load_commands() const;
273
274  /// For use iterating over all exported symbols.
275  iterator_range<export_iterator> exports() const;
276
277  /// For use examining a trie not in a MachOObjectFile.
278  static iterator_range<export_iterator> exports(ArrayRef<uint8_t> Trie);
279
280  /// For use iterating over all rebase table entries.
281  iterator_range<rebase_iterator> rebaseTable() const;
282
283  /// For use examining rebase opcodes not in a MachOObjectFile.
284  static iterator_range<rebase_iterator> rebaseTable(ArrayRef<uint8_t> Opcodes,
285                                                     bool is64);
286
287  /// For use iterating over all bind table entries.
288  iterator_range<bind_iterator> bindTable() const;
289
290  /// For use iterating over all lazy bind table entries.
291  iterator_range<bind_iterator> lazyBindTable() const;
292
293  /// For use iterating over all lazy bind table entries.
294  iterator_range<bind_iterator> weakBindTable() const;
295
296  /// For use examining bind opcodes not in a MachOObjectFile.
297  static iterator_range<bind_iterator> bindTable(ArrayRef<uint8_t> Opcodes,
298                                                 bool is64,
299                                                 MachOBindEntry::Kind);
300
301
302  // In a MachO file, sections have a segment name. This is used in the .o
303  // files. They have a single segment, but this field specifies which segment
304  // a section should be put in in the final object.
305  StringRef getSectionFinalSegmentName(DataRefImpl Sec) const;
306
307  // Names are stored as 16 bytes. These returns the raw 16 bytes without
308  // interpreting them as a C string.
309  ArrayRef<char> getSectionRawName(DataRefImpl Sec) const;
310  ArrayRef<char> getSectionRawFinalSegmentName(DataRefImpl Sec) const;
311
312  // MachO specific Info about relocations.
313  bool isRelocationScattered(const MachO::any_relocation_info &RE) const;
314  unsigned getPlainRelocationSymbolNum(
315                                    const MachO::any_relocation_info &RE) const;
316  bool getPlainRelocationExternal(const MachO::any_relocation_info &RE) const;
317  bool getScatteredRelocationScattered(
318                                    const MachO::any_relocation_info &RE) const;
319  uint32_t getScatteredRelocationValue(
320                                    const MachO::any_relocation_info &RE) const;
321  uint32_t getScatteredRelocationType(
322                                    const MachO::any_relocation_info &RE) const;
323  unsigned getAnyRelocationAddress(const MachO::any_relocation_info &RE) const;
324  unsigned getAnyRelocationPCRel(const MachO::any_relocation_info &RE) const;
325  unsigned getAnyRelocationLength(const MachO::any_relocation_info &RE) const;
326  unsigned getAnyRelocationType(const MachO::any_relocation_info &RE) const;
327  SectionRef getAnyRelocationSection(const MachO::any_relocation_info &RE) const;
328
329  // MachO specific structures.
330  MachO::section getSection(DataRefImpl DRI) const;
331  MachO::section_64 getSection64(DataRefImpl DRI) const;
332  MachO::section getSection(const LoadCommandInfo &L, unsigned Index) const;
333  MachO::section_64 getSection64(const LoadCommandInfo &L,unsigned Index) const;
334  MachO::nlist getSymbolTableEntry(DataRefImpl DRI) const;
335  MachO::nlist_64 getSymbol64TableEntry(DataRefImpl DRI) const;
336
337  MachO::linkedit_data_command
338  getLinkeditDataLoadCommand(const LoadCommandInfo &L) const;
339  MachO::segment_command
340  getSegmentLoadCommand(const LoadCommandInfo &L) const;
341  MachO::segment_command_64
342  getSegment64LoadCommand(const LoadCommandInfo &L) const;
343  MachO::linker_option_command
344  getLinkerOptionLoadCommand(const LoadCommandInfo &L) const;
345  MachO::version_min_command
346  getVersionMinLoadCommand(const LoadCommandInfo &L) const;
347  MachO::dylib_command
348  getDylibIDLoadCommand(const LoadCommandInfo &L) const;
349  MachO::dyld_info_command
350  getDyldInfoLoadCommand(const LoadCommandInfo &L) const;
351  MachO::dylinker_command
352  getDylinkerCommand(const LoadCommandInfo &L) const;
353  MachO::uuid_command
354  getUuidCommand(const LoadCommandInfo &L) const;
355  MachO::rpath_command
356  getRpathCommand(const LoadCommandInfo &L) const;
357  MachO::source_version_command
358  getSourceVersionCommand(const LoadCommandInfo &L) const;
359  MachO::entry_point_command
360  getEntryPointCommand(const LoadCommandInfo &L) const;
361  MachO::encryption_info_command
362  getEncryptionInfoCommand(const LoadCommandInfo &L) const;
363  MachO::encryption_info_command_64
364  getEncryptionInfoCommand64(const LoadCommandInfo &L) const;
365  MachO::sub_framework_command
366  getSubFrameworkCommand(const LoadCommandInfo &L) const;
367  MachO::sub_umbrella_command
368  getSubUmbrellaCommand(const LoadCommandInfo &L) const;
369  MachO::sub_library_command
370  getSubLibraryCommand(const LoadCommandInfo &L) const;
371  MachO::sub_client_command
372  getSubClientCommand(const LoadCommandInfo &L) const;
373  MachO::routines_command
374  getRoutinesCommand(const LoadCommandInfo &L) const;
375  MachO::routines_command_64
376  getRoutinesCommand64(const LoadCommandInfo &L) const;
377  MachO::thread_command
378  getThreadCommand(const LoadCommandInfo &L) const;
379
380  MachO::any_relocation_info getRelocation(DataRefImpl Rel) const;
381  MachO::data_in_code_entry getDice(DataRefImpl Rel) const;
382  const MachO::mach_header &getHeader() const;
383  const MachO::mach_header_64 &getHeader64() const;
384  uint32_t
385  getIndirectSymbolTableEntry(const MachO::dysymtab_command &DLC,
386                              unsigned Index) const;
387  MachO::data_in_code_entry getDataInCodeTableEntry(uint32_t DataOffset,
388                                                    unsigned Index) const;
389  MachO::symtab_command getSymtabLoadCommand() const;
390  MachO::dysymtab_command getDysymtabLoadCommand() const;
391  MachO::linkedit_data_command getDataInCodeLoadCommand() const;
392  MachO::linkedit_data_command getLinkOptHintsLoadCommand() const;
393  ArrayRef<uint8_t> getDyldInfoRebaseOpcodes() const;
394  ArrayRef<uint8_t> getDyldInfoBindOpcodes() const;
395  ArrayRef<uint8_t> getDyldInfoWeakBindOpcodes() const;
396  ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const;
397  ArrayRef<uint8_t> getDyldInfoExportsTrie() const;
398  ArrayRef<uint8_t> getUuid() const;
399
400  StringRef getStringTableData() const;
401  bool is64Bit() const;
402  void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const;
403
404  static StringRef guessLibraryShortName(StringRef Name, bool &isFramework,
405                                         StringRef &Suffix);
406
407  static Triple::ArchType getArch(uint32_t CPUType);
408  static Triple getArch(uint32_t CPUType, uint32_t CPUSubType,
409                        const char **McpuDefault = nullptr);
410  static Triple getThumbArch(uint32_t CPUType, uint32_t CPUSubType,
411                             const char **McpuDefault = nullptr);
412  static Triple getArch(uint32_t CPUType, uint32_t CPUSubType,
413                        const char **McpuDefault, Triple *ThumbTriple);
414  static bool isValidArch(StringRef ArchFlag);
415  static Triple getHostArch();
416
417  bool isRelocatableObject() const override;
418
419  bool hasPageZeroSegment() const { return HasPageZeroSegment; }
420
421  static bool classof(const Binary *v) {
422    return v->isMachO();
423  }
424
425  static uint32_t
426  getVersionMinMajor(MachO::version_min_command &C, bool SDK) {
427    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
428    return (VersionOrSDK >> 16) & 0xffff;
429  }
430
431  static uint32_t
432  getVersionMinMinor(MachO::version_min_command &C, bool SDK) {
433    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
434    return (VersionOrSDK >> 8) & 0xff;
435  }
436
437  static uint32_t
438  getVersionMinUpdate(MachO::version_min_command &C, bool SDK) {
439    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
440    return VersionOrSDK & 0xff;
441  }
442
443private:
444  uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
445
446  union {
447    MachO::mach_header_64 Header64;
448    MachO::mach_header Header;
449  };
450  typedef SmallVector<const char*, 1> SectionList;
451  SectionList Sections;
452  typedef SmallVector<const char*, 1> LibraryList;
453  LibraryList Libraries;
454  LoadCommandList LoadCommands;
455  typedef SmallVector<StringRef, 1> LibraryShortName;
456  mutable LibraryShortName LibrariesShortNames;
457  const char *SymtabLoadCmd;
458  const char *DysymtabLoadCmd;
459  const char *DataInCodeLoadCmd;
460  const char *LinkOptHintsLoadCmd;
461  const char *DyldInfoLoadCmd;
462  const char *UuidLoadCmd;
463  bool HasPageZeroSegment;
464};
465
466/// DiceRef
467inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner)
468  : DicePimpl(DiceP) , OwningObject(Owner) {}
469
470inline bool DiceRef::operator==(const DiceRef &Other) const {
471  return DicePimpl == Other.DicePimpl;
472}
473
474inline bool DiceRef::operator<(const DiceRef &Other) const {
475  return DicePimpl < Other.DicePimpl;
476}
477
478inline void DiceRef::moveNext() {
479  const MachO::data_in_code_entry *P =
480    reinterpret_cast<const MachO::data_in_code_entry *>(DicePimpl.p);
481  DicePimpl.p = reinterpret_cast<uintptr_t>(P + 1);
482}
483
484// Since a Mach-O data in code reference, a DiceRef, can only be created when
485// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for
486// the methods that get the values of the fields of the reference.
487
488inline std::error_code DiceRef::getOffset(uint32_t &Result) const {
489  const MachOObjectFile *MachOOF =
490    static_cast<const MachOObjectFile *>(OwningObject);
491  MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
492  Result = Dice.offset;
493  return std::error_code();
494}
495
496inline std::error_code DiceRef::getLength(uint16_t &Result) const {
497  const MachOObjectFile *MachOOF =
498    static_cast<const MachOObjectFile *>(OwningObject);
499  MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
500  Result = Dice.length;
501  return std::error_code();
502}
503
504inline std::error_code DiceRef::getKind(uint16_t &Result) const {
505  const MachOObjectFile *MachOOF =
506    static_cast<const MachOObjectFile *>(OwningObject);
507  MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
508  Result = Dice.kind;
509  return std::error_code();
510}
511
512inline DataRefImpl DiceRef::getRawDataRefImpl() const {
513  return DicePimpl;
514}
515
516inline const ObjectFile *DiceRef::getObjectFile() const {
517  return OwningObject;
518}
519
520}
521}
522
523#endif
524