1//===-- MachOUtils.h - Mach-o specific helpers for dsymutil  --------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "MachOUtils.h"
11#include "BinaryHolder.h"
12#include "DebugMap.h"
13#include "dsymutil.h"
14#include "NonRelocatableStringpool.h"
15#include "llvm/MC/MCSectionMachO.h"
16#include "llvm/MC/MCAsmLayout.h"
17#include "llvm/MC/MCSectionMachO.h"
18#include "llvm/MC/MCObjectStreamer.h"
19#include "llvm/MC/MCStreamer.h"
20#include "llvm/Object/MachO.h"
21#include "llvm/Support/FileUtilities.h"
22#include "llvm/Support/Program.h"
23#include "llvm/Support/raw_ostream.h"
24
25namespace llvm {
26namespace dsymutil {
27namespace MachOUtils {
28
29std::string getArchName(StringRef Arch) {
30  if (Arch.startswith("thumb"))
31    return (llvm::Twine("arm") + Arch.drop_front(5)).str();
32  return Arch;
33}
34
35static bool runLipo(StringRef SDKPath, SmallVectorImpl<const char *> &Args) {
36  auto Path = sys::findProgramByName("lipo", makeArrayRef(SDKPath));
37  if (!Path)
38    Path = sys::findProgramByName("lipo");
39
40  if (!Path) {
41    errs() << "error: lipo: " << Path.getError().message() << "\n";
42    return false;
43  }
44
45  std::string ErrMsg;
46  int result =
47      sys::ExecuteAndWait(*Path, Args.data(), nullptr, nullptr, 0, 0, &ErrMsg);
48  if (result) {
49    errs() << "error: lipo: " << ErrMsg << "\n";
50    return false;
51  }
52
53  return true;
54}
55
56bool generateUniversalBinary(SmallVectorImpl<ArchAndFilename> &ArchFiles,
57                             StringRef OutputFileName,
58                             const LinkOptions &Options, StringRef SDKPath) {
59  // No need to merge one file into a universal fat binary. First, try
60  // to move it (rename) to the final location. If that fails because
61  // of cross-device link issues then copy and delete.
62  if (ArchFiles.size() == 1) {
63    StringRef From(ArchFiles.front().Path);
64    if (sys::fs::rename(From, OutputFileName)) {
65      if (std::error_code EC = sys::fs::copy_file(From, OutputFileName)) {
66        errs() << "error: while copying " << From << " to " << OutputFileName
67               << ": " << EC.message() << "\n";
68        return false;
69      }
70      sys::fs::remove(From);
71    }
72    return true;
73  }
74
75  SmallVector<const char *, 8> Args;
76  Args.push_back("lipo");
77  Args.push_back("-create");
78
79  for (auto &Thin : ArchFiles)
80    Args.push_back(Thin.Path.c_str());
81
82  // Align segments to match dsymutil-classic alignment
83  for (auto &Thin : ArchFiles) {
84    Thin.Arch = getArchName(Thin.Arch);
85    Args.push_back("-segalign");
86    Args.push_back(Thin.Arch.c_str());
87    Args.push_back("20");
88  }
89
90  Args.push_back("-output");
91  Args.push_back(OutputFileName.data());
92  Args.push_back(nullptr);
93
94  if (Options.Verbose) {
95    outs() << "Running lipo\n";
96    for (auto Arg : Args)
97      outs() << ' ' << ((Arg == nullptr) ? "\n" : Arg);
98  }
99
100  return Options.NoOutput ? true : runLipo(SDKPath, Args);
101}
102
103// Return a MachO::segment_command_64 that holds the same values as
104// the passed MachO::segment_command. We do that to avoid having to
105// duplicat the logic for 32bits and 64bits segments.
106struct MachO::segment_command_64 adaptFrom32bits(MachO::segment_command Seg) {
107  MachO::segment_command_64 Seg64;
108  Seg64.cmd = Seg.cmd;
109  Seg64.cmdsize = Seg.cmdsize;
110  memcpy(Seg64.segname, Seg.segname, sizeof(Seg.segname));
111  Seg64.vmaddr = Seg.vmaddr;
112  Seg64.vmsize = Seg.vmsize;
113  Seg64.fileoff = Seg.fileoff;
114  Seg64.filesize = Seg.filesize;
115  Seg64.maxprot = Seg.maxprot;
116  Seg64.initprot = Seg.initprot;
117  Seg64.nsects = Seg.nsects;
118  Seg64.flags = Seg.flags;
119  return Seg64;
120}
121
122// Iterate on all \a Obj segments, and apply \a Handler to them.
123template <typename FunctionTy>
124static void iterateOnSegments(const object::MachOObjectFile &Obj,
125                              FunctionTy Handler) {
126  for (const auto &LCI : Obj.load_commands()) {
127    MachO::segment_command_64 Segment;
128    if (LCI.C.cmd == MachO::LC_SEGMENT)
129      Segment = adaptFrom32bits(Obj.getSegmentLoadCommand(LCI));
130    else if (LCI.C.cmd == MachO::LC_SEGMENT_64)
131      Segment = Obj.getSegment64LoadCommand(LCI);
132    else
133      continue;
134
135    Handler(Segment);
136  }
137}
138
139// Transfer the symbols described by \a NList to \a NewSymtab which is
140// just the raw contents of the symbol table for the dSYM companion file.
141// \returns whether the symbol was tranfered or not.
142template <typename NListTy>
143static bool transferSymbol(NListTy NList, bool IsLittleEndian,
144                           StringRef Strings, SmallVectorImpl<char> &NewSymtab,
145                           NonRelocatableStringpool &NewStrings,
146                           bool &InDebugNote) {
147  // Do not transfer undefined symbols, we want real addresses.
148  if ((NList.n_type & MachO::N_TYPE) == MachO::N_UNDF)
149    return false;
150
151  StringRef Name = StringRef(Strings.begin() + NList.n_strx);
152  if (InDebugNote) {
153    InDebugNote =
154        (NList.n_type != MachO::N_SO) || (!Name.empty() && Name[0] != '\0');
155    return false;
156  } else if (NList.n_type == MachO::N_SO) {
157    InDebugNote = true;
158    return false;
159  }
160
161  // FIXME: The + 1 is here to mimic dsymutil-classic that has 2 empty
162  // strings at the start of the generated string table (There is
163  // corresponding code in the string table emission).
164  NList.n_strx = NewStrings.getStringOffset(Name) + 1;
165  if (IsLittleEndian != sys::IsLittleEndianHost)
166    MachO::swapStruct(NList);
167
168  NewSymtab.append(reinterpret_cast<char *>(&NList),
169                   reinterpret_cast<char *>(&NList + 1));
170  return true;
171}
172
173// Wrapper around transferSymbol to transfer all of \a Obj symbols
174// to \a NewSymtab. This function does not write in the output file.
175// \returns the number of symbols in \a NewSymtab.
176static unsigned transferSymbols(const object::MachOObjectFile &Obj,
177                                SmallVectorImpl<char> &NewSymtab,
178                                NonRelocatableStringpool &NewStrings) {
179  unsigned Syms = 0;
180  StringRef Strings = Obj.getStringTableData();
181  bool IsLittleEndian = Obj.isLittleEndian();
182  bool InDebugNote = false;
183
184  if (Obj.is64Bit()) {
185    for (const object::SymbolRef &Symbol : Obj.symbols()) {
186      object::DataRefImpl DRI = Symbol.getRawDataRefImpl();
187      if (transferSymbol(Obj.getSymbol64TableEntry(DRI), IsLittleEndian,
188                         Strings, NewSymtab, NewStrings, InDebugNote))
189        ++Syms;
190    }
191  } else {
192    for (const object::SymbolRef &Symbol : Obj.symbols()) {
193      object::DataRefImpl DRI = Symbol.getRawDataRefImpl();
194      if (transferSymbol(Obj.getSymbolTableEntry(DRI), IsLittleEndian, Strings,
195                         NewSymtab, NewStrings, InDebugNote))
196        ++Syms;
197    }
198  }
199  return Syms;
200}
201
202static MachO::section
203getSection(const object::MachOObjectFile &Obj,
204           const MachO::segment_command &Seg,
205           const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) {
206  return Obj.getSection(LCI, Idx);
207}
208
209static MachO::section_64
210getSection(const object::MachOObjectFile &Obj,
211           const MachO::segment_command_64 &Seg,
212           const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) {
213  return Obj.getSection64(LCI, Idx);
214}
215
216// Transfer \a Segment from \a Obj to the output file. This calls into \a Writer
217// to write these load commands directly in the output file at the current
218// position.
219// The function also tries to find a hole in the address map to fit the __DWARF
220// segment of \a DwarfSegmentSize size. \a EndAddress is updated to point at the
221// highest segment address.
222// When the __LINKEDIT segment is transfered, its offset and size are set resp.
223// to \a LinkeditOffset and \a LinkeditSize.
224template <typename SegmentTy>
225static void transferSegmentAndSections(
226    const object::MachOObjectFile::LoadCommandInfo &LCI, SegmentTy Segment,
227    const object::MachOObjectFile &Obj, MCObjectWriter &Writer,
228    uint64_t LinkeditOffset, uint64_t LinkeditSize, uint64_t DwarfSegmentSize,
229    uint64_t &GapForDwarf, uint64_t &EndAddress) {
230  if (StringRef("__DWARF") == Segment.segname)
231    return;
232
233  Segment.fileoff = Segment.filesize = 0;
234
235  if (StringRef("__LINKEDIT") == Segment.segname) {
236    Segment.fileoff = LinkeditOffset;
237    Segment.filesize = LinkeditSize;
238  }
239
240  // Check if the end address of the last segment and our current
241  // start address leave a sufficient gap to store the __DWARF
242  // segment.
243  uint64_t PrevEndAddress = EndAddress;
244  EndAddress = RoundUpToAlignment(EndAddress, 0x1000);
245  if (GapForDwarf == UINT64_MAX && Segment.vmaddr > EndAddress &&
246      Segment.vmaddr - EndAddress >= DwarfSegmentSize)
247    GapForDwarf = EndAddress;
248
249  // The segments are not necessarily sorted by their vmaddr.
250  EndAddress =
251      std::max<uint64_t>(PrevEndAddress, Segment.vmaddr + Segment.vmsize);
252  unsigned nsects = Segment.nsects;
253  if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
254    MachO::swapStruct(Segment);
255  Writer.writeBytes(
256      StringRef(reinterpret_cast<char *>(&Segment), sizeof(Segment)));
257  for (unsigned i = 0; i < nsects; ++i) {
258    auto Sect = getSection(Obj, Segment, LCI, i);
259    Sect.offset = Sect.reloff = Sect.nreloc = 0;
260    if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
261      MachO::swapStruct(Sect);
262    Writer.writeBytes(StringRef(reinterpret_cast<char *>(&Sect), sizeof(Sect)));
263  }
264}
265
266// Write the __DWARF segment load command to the output file.
267static void createDwarfSegment(uint64_t VMAddr, uint64_t FileOffset,
268                               uint64_t FileSize, unsigned NumSections,
269                               MCAsmLayout &Layout, MachObjectWriter &Writer) {
270  Writer.writeSegmentLoadCommand("__DWARF", NumSections, VMAddr,
271                                 RoundUpToAlignment(FileSize, 0x1000),
272                                 FileOffset, FileSize, /* MaxProt */ 7,
273                                 /* InitProt =*/3);
274
275  for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
276    MCSection *Sec = Layout.getSectionOrder()[i];
277    if (Sec->begin() == Sec->end() || !Layout.getSectionFileSize(Sec))
278      continue;
279
280    unsigned Align = Sec->getAlignment();
281    if (Align > 1) {
282      VMAddr = RoundUpToAlignment(VMAddr, Align);
283      FileOffset = RoundUpToAlignment(FileOffset, Align);
284    }
285    Writer.writeSection(Layout, *Sec, VMAddr, FileOffset, 0, 0, 0);
286
287    FileOffset += Layout.getSectionAddressSize(Sec);
288    VMAddr += Layout.getSectionAddressSize(Sec);
289  }
290}
291
292static bool isExecutable(const object::MachOObjectFile &Obj) {
293  if (Obj.is64Bit())
294    return Obj.getHeader64().filetype != MachO::MH_OBJECT;
295  else
296    return Obj.getHeader().filetype != MachO::MH_OBJECT;
297}
298
299static bool hasLinkEditSegment(const object::MachOObjectFile &Obj) {
300  bool HasLinkEditSegment = false;
301  iterateOnSegments(Obj, [&](const MachO::segment_command_64 &Segment) {
302    if (StringRef("__LINKEDIT") == Segment.segname)
303      HasLinkEditSegment = true;
304  });
305  return HasLinkEditSegment;
306}
307
308static unsigned segmentLoadCommandSize(bool Is64Bit, unsigned NumSections) {
309  if (Is64Bit)
310    return sizeof(MachO::segment_command_64) +
311           NumSections * sizeof(MachO::section_64);
312
313  return sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
314}
315
316// Stream a dSYM companion binary file corresponding to the binary referenced
317// by \a DM to \a OutFile. The passed \a MS MCStreamer is setup to write to
318// \a OutFile and it must be using a MachObjectWriter object to do so.
319bool generateDsymCompanion(const DebugMap &DM, MCStreamer &MS,
320                           raw_fd_ostream &OutFile) {
321  auto &ObjectStreamer = static_cast<MCObjectStreamer &>(MS);
322  MCAssembler &MCAsm = ObjectStreamer.getAssembler();
323  auto &Writer = static_cast<MachObjectWriter &>(MCAsm.getWriter());
324  MCAsmLayout Layout(MCAsm);
325
326  MCAsm.layout(Layout);
327
328  BinaryHolder InputBinaryHolder(false);
329  auto ErrOrObjs = InputBinaryHolder.GetObjectFiles(DM.getBinaryPath());
330  if (auto Error = ErrOrObjs.getError())
331    return error(Twine("opening ") + DM.getBinaryPath() + ": " +
332                     Error.message(),
333                 "output file streaming");
334
335  auto ErrOrInputBinary =
336      InputBinaryHolder.GetAs<object::MachOObjectFile>(DM.getTriple());
337  if (auto Error = ErrOrInputBinary.getError())
338    return error(Twine("opening ") + DM.getBinaryPath() + ": " +
339                     Error.message(),
340                 "output file streaming");
341  auto &InputBinary = *ErrOrInputBinary;
342
343  bool Is64Bit = Writer.is64Bit();
344  MachO::symtab_command SymtabCmd = InputBinary.getSymtabLoadCommand();
345
346  // Get UUID.
347  MachO::uuid_command UUIDCmd;
348  memset(&UUIDCmd, 0, sizeof(UUIDCmd));
349  UUIDCmd.cmd = MachO::LC_UUID;
350  UUIDCmd.cmdsize = sizeof(MachO::uuid_command);
351  for (auto &LCI : InputBinary.load_commands()) {
352    if (LCI.C.cmd == MachO::LC_UUID) {
353      UUIDCmd = InputBinary.getUuidCommand(LCI);
354      break;
355    }
356  }
357
358  // Compute the number of load commands we will need.
359  unsigned LoadCommandSize = 0;
360  unsigned NumLoadCommands = 0;
361  // We will copy the UUID if there is one.
362  if (UUIDCmd.cmd != 0) {
363    ++NumLoadCommands;
364    LoadCommandSize += sizeof(MachO::uuid_command);
365  }
366
367  // If we have a valid symtab to copy, do it.
368  bool ShouldEmitSymtab =
369      isExecutable(InputBinary) && hasLinkEditSegment(InputBinary);
370  if (ShouldEmitSymtab) {
371    LoadCommandSize += sizeof(MachO::symtab_command);
372    ++NumLoadCommands;
373  }
374
375  unsigned HeaderSize =
376      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
377  // We will copy every segment that isn't __DWARF.
378  iterateOnSegments(InputBinary, [&](const MachO::segment_command_64 &Segment) {
379    if (StringRef("__DWARF") == Segment.segname)
380      return;
381
382    ++NumLoadCommands;
383    LoadCommandSize += segmentLoadCommandSize(Is64Bit, Segment.nsects);
384  });
385
386  // We will add our own brand new __DWARF segment if we have debug
387  // info.
388  unsigned NumDwarfSections = 0;
389  uint64_t DwarfSegmentSize = 0;
390
391  for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
392    MCSection *Sec = Layout.getSectionOrder()[i];
393    if (Sec->begin() == Sec->end())
394      continue;
395
396    if (uint64_t Size = Layout.getSectionFileSize(Sec)) {
397      DwarfSegmentSize =
398          RoundUpToAlignment(DwarfSegmentSize, Sec->getAlignment());
399      DwarfSegmentSize += Size;
400      ++NumDwarfSections;
401    }
402  }
403
404  if (NumDwarfSections) {
405    ++NumLoadCommands;
406    LoadCommandSize += segmentLoadCommandSize(Is64Bit, NumDwarfSections);
407  }
408
409  SmallString<0> NewSymtab;
410  NonRelocatableStringpool NewStrings;
411  unsigned NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
412  unsigned NumSyms = 0;
413  uint64_t NewStringsSize = 0;
414  if (ShouldEmitSymtab) {
415    NewSymtab.reserve(SymtabCmd.nsyms * NListSize / 2);
416    NumSyms = transferSymbols(InputBinary, NewSymtab, NewStrings);
417    NewStringsSize = NewStrings.getSize() + 1;
418  }
419
420  uint64_t SymtabStart = LoadCommandSize;
421  SymtabStart += HeaderSize;
422  SymtabStart = RoundUpToAlignment(SymtabStart, 0x1000);
423
424  // We gathered all the information we need, start emitting the output file.
425  Writer.writeHeader(MachO::MH_DSYM, NumLoadCommands, LoadCommandSize, false);
426
427  // Write the load commands.
428  assert(OutFile.tell() == HeaderSize);
429  if (UUIDCmd.cmd != 0) {
430    Writer.write32(UUIDCmd.cmd);
431    Writer.write32(UUIDCmd.cmdsize);
432    Writer.writeBytes(
433        StringRef(reinterpret_cast<const char *>(UUIDCmd.uuid), 16));
434    assert(OutFile.tell() == HeaderSize + sizeof(UUIDCmd));
435  }
436
437  assert(SymtabCmd.cmd && "No symbol table.");
438  uint64_t StringStart = SymtabStart + NumSyms * NListSize;
439  if (ShouldEmitSymtab)
440    Writer.writeSymtabLoadCommand(SymtabStart, NumSyms, StringStart,
441                                  NewStringsSize);
442
443  uint64_t DwarfSegmentStart = StringStart + NewStringsSize;
444  DwarfSegmentStart = RoundUpToAlignment(DwarfSegmentStart, 0x1000);
445
446  // Write the load commands for the segments and sections we 'import' from
447  // the original binary.
448  uint64_t EndAddress = 0;
449  uint64_t GapForDwarf = UINT64_MAX;
450  for (auto &LCI : InputBinary.load_commands()) {
451    if (LCI.C.cmd == MachO::LC_SEGMENT)
452      transferSegmentAndSections(LCI, InputBinary.getSegmentLoadCommand(LCI),
453                                 InputBinary, Writer, SymtabStart,
454                                 StringStart + NewStringsSize - SymtabStart,
455                                 DwarfSegmentSize, GapForDwarf, EndAddress);
456    else if (LCI.C.cmd == MachO::LC_SEGMENT_64)
457      transferSegmentAndSections(LCI, InputBinary.getSegment64LoadCommand(LCI),
458                                 InputBinary, Writer, SymtabStart,
459                                 StringStart + NewStringsSize - SymtabStart,
460                                 DwarfSegmentSize, GapForDwarf, EndAddress);
461  }
462
463  uint64_t DwarfVMAddr = RoundUpToAlignment(EndAddress, 0x1000);
464  uint64_t DwarfVMMax = Is64Bit ? UINT64_MAX : UINT32_MAX;
465  if (DwarfVMAddr + DwarfSegmentSize > DwarfVMMax ||
466      DwarfVMAddr + DwarfSegmentSize < DwarfVMAddr /* Overflow */) {
467    // There is no room for the __DWARF segment at the end of the
468    // address space. Look trhough segments to find a gap.
469    DwarfVMAddr = GapForDwarf;
470    if (DwarfVMAddr == UINT64_MAX)
471      warn("not enough VM space for the __DWARF segment.",
472           "output file streaming");
473  }
474
475  // Write the load command for the __DWARF segment.
476  createDwarfSegment(DwarfVMAddr, DwarfSegmentStart, DwarfSegmentSize,
477                     NumDwarfSections, Layout, Writer);
478
479  assert(OutFile.tell() == LoadCommandSize + HeaderSize);
480  Writer.WriteZeros(SymtabStart - (LoadCommandSize + HeaderSize));
481  assert(OutFile.tell() == SymtabStart);
482
483  // Transfer symbols.
484  if (ShouldEmitSymtab) {
485    Writer.writeBytes(NewSymtab.str());
486    assert(OutFile.tell() == StringStart);
487
488    // Transfer string table.
489    // FIXME: The NonRelocatableStringpool starts with an empty string, but
490    // dsymutil-classic starts the reconstructed string table with 2 of these.
491    // Reproduce that behavior for now (there is corresponding code in
492    // transferSymbol).
493    Writer.WriteZeros(1);
494    typedef NonRelocatableStringpool::MapTy MapTy;
495    for (auto *Entry = NewStrings.getFirstEntry(); Entry;
496         Entry = static_cast<MapTy::MapEntryTy *>(Entry->getValue().second))
497      Writer.writeBytes(
498          StringRef(Entry->getKey().data(), Entry->getKey().size() + 1));
499  }
500
501  assert(OutFile.tell() == StringStart + NewStringsSize);
502
503  // Pad till the Dwarf segment start.
504  Writer.WriteZeros(DwarfSegmentStart - (StringStart + NewStringsSize));
505  assert(OutFile.tell() == DwarfSegmentStart);
506
507  // Emit the Dwarf sections contents.
508  for (const MCSection &Sec : MCAsm) {
509    if (Sec.begin() == Sec.end())
510      continue;
511
512    uint64_t Pos = OutFile.tell();
513    Writer.WriteZeros(RoundUpToAlignment(Pos, Sec.getAlignment()) - Pos);
514    MCAsm.writeSectionData(&Sec, Layout);
515  }
516
517  return true;
518}
519}
520}
521}
522