MachODump.cpp revision a894c8e34453493a9d3fb2ffbbc21151c3965b63
1//===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the MachO-specific dumper for llvm-objdump.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm-objdump.h"
15#include "MCFunction.h"
16#include "llvm/Support/MachO.h"
17#include "llvm/Object/MachOObject.h"
18#include "llvm/ADT/OwningPtr.h"
19#include "llvm/ADT/Triple.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/MC/MCAsmInfo.h"
22#include "llvm/MC/MCDisassembler.h"
23#include "llvm/MC/MCInst.h"
24#include "llvm/MC/MCInstPrinter.h"
25#include "llvm/MC/MCInstrAnalysis.h"
26#include "llvm/MC/MCInstrDesc.h"
27#include "llvm/MC/MCInstrInfo.h"
28#include "llvm/MC/MCSubtargetInfo.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/Format.h"
32#include "llvm/Support/GraphWriter.h"
33#include "llvm/Support/MemoryBuffer.h"
34#include "llvm/Support/TargetRegistry.h"
35#include "llvm/Support/TargetSelect.h"
36#include "llvm/Support/raw_ostream.h"
37#include "llvm/Support/system_error.h"
38#include <algorithm>
39#include <cstring>
40using namespace llvm;
41using namespace object;
42
43static cl::opt<bool>
44  CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and"
45                      "write it to a graphviz file (MachO-only)"));
46
47static const Target *GetTarget(const MachOObject *MachOObj) {
48  // Figure out the target triple.
49  llvm::Triple TT("unknown-unknown-unknown");
50  switch (MachOObj->getHeader().CPUType) {
51  case llvm::MachO::CPUTypeI386:
52    TT.setArch(Triple::ArchType(Triple::x86));
53    break;
54  case llvm::MachO::CPUTypeX86_64:
55    TT.setArch(Triple::ArchType(Triple::x86_64));
56    break;
57  case llvm::MachO::CPUTypeARM:
58    TT.setArch(Triple::ArchType(Triple::arm));
59    break;
60  case llvm::MachO::CPUTypePowerPC:
61    TT.setArch(Triple::ArchType(Triple::ppc));
62    break;
63  case llvm::MachO::CPUTypePowerPC64:
64    TT.setArch(Triple::ArchType(Triple::ppc64));
65    break;
66  }
67
68  TripleName = TT.str();
69
70  // Get the target specific parser.
71  std::string Error;
72  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
73  if (TheTarget)
74    return TheTarget;
75
76  errs() << "llvm-objdump: error: unable to get target for '" << TripleName
77         << "', see --version and --triple.\n";
78  return 0;
79}
80
81struct Section {
82  char Name[16];
83  uint64_t Address;
84  uint64_t Size;
85  uint32_t Offset;
86  uint32_t NumRelocs;
87  uint64_t RelocTableOffset;
88};
89
90struct Symbol {
91  uint64_t Value;
92  uint32_t StringIndex;
93  uint8_t SectionIndex;
94  bool operator<(const Symbol &RHS) const { return Value < RHS.Value; }
95};
96
97
98template <typename T>
99static Section copySection(const T &Sect) {
100  Section S;
101  memcpy(S.Name, Sect->Name, 16);
102  S.Address = Sect->Address;
103  S.Size = Sect->Size;
104  S.Offset = Sect->Offset;
105  S.NumRelocs = Sect->NumRelocationTableEntries;
106  S.RelocTableOffset = Sect->RelocationTableOffset;
107  return S;
108}
109
110template <typename T>
111static Symbol copySymbol(const T &STE) {
112  Symbol S;
113  S.StringIndex = STE->StringIndex;
114  S.SectionIndex = STE->SectionIndex;
115  S.Value = STE->Value;
116  return S;
117}
118
119// Print addtitional information about an address, if available.
120static void DumpAddress(uint64_t Address, ArrayRef<Section> Sections,
121                        MachOObject *MachOObj, raw_ostream &OS) {
122  for (unsigned i = 0; i != Sections.size(); ++i) {
123    uint64_t addr = Address-Sections[i].Address;
124    if (Sections[i].Address <= Address &&
125        Sections[i].Address + Sections[i].Size > Address) {
126      StringRef bytes = MachOObj->getData(Sections[i].Offset,
127                                          Sections[i].Size);
128      // Print constant strings.
129      if (!strcmp(Sections[i].Name, "__cstring"))
130        OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"';
131      // Print constant CFStrings.
132      if (!strcmp(Sections[i].Name, "__cfstring"))
133        OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"';
134    }
135  }
136}
137
138typedef std::map<uint64_t, MCFunction*> FunctionMapTy;
139typedef SmallVector<MCFunction, 16> FunctionListTy;
140static void createMCFunctionAndSaveCalls(StringRef Name,
141                                         const MCDisassembler *DisAsm,
142                                         MemoryObject &Object, uint64_t Start,
143                                         uint64_t End,
144                                         MCInstrAnalysis *InstrAnalysis,
145                                         uint64_t Address,
146                                         raw_ostream &DebugOut,
147                                         FunctionMapTy &FunctionMap,
148                                         FunctionListTy &Functions) {
149  SmallVector<uint64_t, 16> Calls;
150  MCFunction f =
151    MCFunction::createFunctionFromMC(Name, DisAsm, Object, Start, End,
152                                     InstrAnalysis, DebugOut, Calls);
153  Functions.push_back(f);
154  FunctionMap[Address] = &Functions.back();
155
156  // Add the gathered callees to the map.
157  for (unsigned i = 0, e = Calls.size(); i != e; ++i)
158    FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0));
159}
160
161// Write a graphviz file for the CFG inside an MCFunction.
162static void emitDOTFile(const char *FileName, const MCFunction &f,
163                        MCInstPrinter *IP) {
164  // Start a new dot file.
165  std::string Error;
166  raw_fd_ostream Out(FileName, Error);
167  if (!Error.empty()) {
168    errs() << "llvm-objdump: warning: " << Error << '\n';
169    return;
170  }
171
172  Out << "digraph " << f.getName() << " {\n";
173  Out << "graph [ rankdir = \"LR\" ];\n";
174  for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
175    bool hasPreds = false;
176    // Only print blocks that have predecessors.
177    // FIXME: Slow.
178    for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
179        ++pi)
180      if (pi->second.contains(i->first)) {
181        hasPreds = true;
182        break;
183      }
184
185    if (!hasPreds && i != f.begin())
186      continue;
187
188    Out << '"' << i->first << "\" [ label=\"<a>";
189    // Print instructions.
190    for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
191        ++ii) {
192      // Escape special chars and print the instruction in mnemonic form.
193      std::string Str;
194      raw_string_ostream OS(Str);
195      IP->printInst(&i->second.getInsts()[ii].Inst, OS, "");
196      Out << DOT::EscapeString(OS.str()) << '|';
197    }
198    Out << "<o>\" shape=\"record\" ];\n";
199
200    // Add edges.
201    for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
202        se = i->second.succ_end(); si != se; ++si)
203      Out << i->first << ":o -> " << *si <<":a\n";
204  }
205  Out << "}\n";
206}
207
208void llvm::DisassembleInputMachO(StringRef Filename) {
209  OwningPtr<MemoryBuffer> Buff;
210
211  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
212    errs() << "llvm-objdump: " << Filename << ": " << ec.message() << "\n";
213    return;
214  }
215
216  OwningPtr<MachOObject> MachOObj(MachOObject::LoadFromBuffer(Buff.take()));
217
218  const Target *TheTarget = GetTarget(MachOObj.get());
219  if (!TheTarget) {
220    // GetTarget prints out stuff.
221    return;
222  }
223  const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo();
224  OwningPtr<MCInstrAnalysis>
225    InstrAnalysis(TheTarget->createMCInstrAnalysis(InstrInfo));
226
227  // Set up disassembler.
228  OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createMCAsmInfo(TripleName));
229  OwningPtr<const MCSubtargetInfo>
230    STI(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
231  OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI));
232  int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
233  OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
234                              AsmPrinterVariant, *AsmInfo, *STI));
235
236  if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) {
237    errs() << "error: couldn't initialize disassmbler for target "
238           << TripleName << '\n';
239    return;
240  }
241
242  outs() << '\n' << Filename << ":\n\n";
243
244  const macho::Header &Header = MachOObj->getHeader();
245
246  const MachOObject::LoadCommandInfo *SymtabLCI = 0;
247  // First, find the symbol table segment.
248  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
249    const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
250    if (LCI.Command.Type == macho::LCT_Symtab) {
251      SymtabLCI = &LCI;
252      break;
253    }
254  }
255
256  // Read and register the symbol table data.
257  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
258  MachOObj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
259  MachOObj->RegisterStringTable(*SymtabLC);
260
261  std::vector<Section> Sections;
262  std::vector<Symbol> Symbols;
263  std::vector<Symbol> UnsortedSymbols; // FIXME: duplication
264  SmallVector<uint64_t, 8> FoundFns;
265
266  // Make a list of all symbols in the object file.
267  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
268    const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
269    if (LCI.Command.Type == macho::LCT_Segment) {
270      InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
271      MachOObj->ReadSegmentLoadCommand(LCI, SegmentLC);
272
273      // Store the sections in this segment.
274      for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
275        InMemoryStruct<macho::Section> Sect;
276        MachOObj->ReadSection(LCI, SectNum, Sect);
277        Sections.push_back(copySection(Sect));
278
279        // Store the symbols in this section.
280        for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
281          InMemoryStruct<macho::SymbolTableEntry> STE;
282          MachOObj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
283          Symbols.push_back(copySymbol(STE));
284          UnsortedSymbols.push_back(Symbols.back());
285        }
286      }
287    } else if (LCI.Command.Type == macho::LCT_Segment64) {
288      InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
289      MachOObj->ReadSegment64LoadCommand(LCI, Segment64LC);
290
291      // Store the sections in this segment.
292      for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections;
293           ++SectNum) {
294        InMemoryStruct<macho::Section64> Sect64;
295        MachOObj->ReadSection64(LCI, SectNum, Sect64);
296        Sections.push_back(copySection(Sect64));
297
298        // Store the symbols in this section.
299        for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
300          InMemoryStruct<macho::Symbol64TableEntry> STE;
301          MachOObj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
302          Symbols.push_back(copySymbol(STE));
303          UnsortedSymbols.push_back(Symbols.back());
304        }
305      }
306    } else if (LCI.Command.Type == macho::LCT_FunctionStarts) {
307      // We found a function starts segment, parse the addresses for later
308      // consumption.
309      InMemoryStruct<macho::LinkeditDataLoadCommand> LLC;
310      MachOObj->ReadLinkeditDataLoadCommand(LCI, LLC);
311
312      MachOObj->ReadULEB128s(LLC->DataOffset, FoundFns);
313    }
314  }
315
316
317  // Sort the symbols by address, just in case they didn't come in that way.
318  array_pod_sort(Symbols.begin(), Symbols.end());
319
320#ifndef NDEBUG
321  raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
322#else
323  raw_ostream &DebugOut = nulls();
324#endif
325
326  FunctionMapTy FunctionMap;
327  FunctionListTy Functions;
328
329  for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
330    if (strcmp(Sections[SectIdx].Name, "__text"))
331      continue; // Skip non-text sections
332
333    // Insert the functions from the function starts segment into our map.
334    uint64_t VMAddr = Sections[SectIdx].Address - Sections[SectIdx].Offset;
335    for (unsigned i = 0, e = FoundFns.size(); i != e; ++i)
336      FunctionMap.insert(std::make_pair(FoundFns[i]+VMAddr, (MCFunction*)0));
337
338    StringRef Bytes = MachOObj->getData(Sections[SectIdx].Offset,
339                                        Sections[SectIdx].Size);
340    StringRefMemoryObject memoryObject(Bytes);
341    bool symbolTableWorked = false;
342
343    // Parse relocations.
344    std::vector<std::pair<uint64_t, uint32_t> > Relocs;
345    for (unsigned j = 0; j != Sections[SectIdx].NumRelocs; ++j) {
346      InMemoryStruct<macho::RelocationEntry> RE;
347      MachOObj->ReadRelocationEntry(Sections[SectIdx].RelocTableOffset, j, RE);
348      Relocs.push_back(std::make_pair(RE->Word0, RE->Word1 & 0xffffff));
349    }
350    array_pod_sort(Relocs.begin(), Relocs.end());
351
352    // Disassemble symbol by symbol.
353    for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
354      // Make sure the symbol is defined in this section.
355      if ((unsigned)Symbols[SymIdx].SectionIndex - 1 != SectIdx)
356        continue;
357
358      // Start at the address of the symbol relative to the section's address.
359      uint64_t Start = Symbols[SymIdx].Value - Sections[SectIdx].Address;
360      // Stop disassembling either at the beginning of the next symbol or at
361      // the end of the section.
362      uint64_t End = (SymIdx+1 == Symbols.size() ||
363          Symbols[SymIdx].SectionIndex != Symbols[SymIdx+1].SectionIndex) ?
364          Sections[SectIdx].Size :
365          Symbols[SymIdx+1].Value - Sections[SectIdx].Address;
366      uint64_t Size;
367
368      if (Start >= End)
369        continue;
370
371      symbolTableWorked = true;
372
373      if (!CFG) {
374        // Normal disassembly, print addresses, bytes and mnemonic form.
375        outs() << MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex)
376          << ":\n";
377        for (uint64_t Index = Start; Index < End; Index += Size) {
378          MCInst Inst;
379
380          if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
381                                     DebugOut, nulls())) {
382            outs() << format("%8llx:\t", Sections[SectIdx].Address + Index);
383            DumpBytes(StringRef(Bytes.data() + Index, Size));
384            IP->printInst(&Inst, outs(), "");
385            outs() << "\n";
386          } else {
387            errs() << "llvm-objdump: warning: invalid instruction encoding\n";
388            if (Size == 0)
389              Size = 1; // skip illegible bytes
390          }
391        }
392      } else {
393        // Create CFG and use it for disassembly.
394        createMCFunctionAndSaveCalls(
395            MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex),
396            DisAsm.get(), memoryObject, Start, End, InstrAnalysis.get(),
397            Start, DebugOut, FunctionMap, Functions);
398      }
399    }
400
401    if (CFG) {
402      if (!symbolTableWorked) {
403        // Reading the symbol table didn't work, create a big __TEXT symbol.
404        createMCFunctionAndSaveCalls("__TEXT", DisAsm.get(), memoryObject,
405                                     0, Sections[SectIdx].Size,
406                                     InstrAnalysis.get(),
407                                     Sections[SectIdx].Offset, DebugOut,
408                                     FunctionMap, Functions);
409      }
410      for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(),
411           me = FunctionMap.end(); mi != me; ++mi)
412        if (mi->second == 0) {
413          // Create functions for the remaining callees we have gathered,
414          // but we didn't find a name for them.
415          SmallVector<uint64_t, 16> Calls;
416          MCFunction f =
417            MCFunction::createFunctionFromMC("unknown", DisAsm.get(),
418                                             memoryObject, mi->first,
419                                             Sections[SectIdx].Size,
420                                             InstrAnalysis.get(), DebugOut,
421                                             Calls);
422          Functions.push_back(f);
423          mi->second = &Functions.back();
424          for (unsigned i = 0, e = Calls.size(); i != e; ++i) {
425            std::pair<uint64_t, MCFunction*> p(Calls[i], (MCFunction*)0);
426            if (FunctionMap.insert(p).second)
427              mi = FunctionMap.begin();
428          }
429        }
430
431      DenseSet<uint64_t> PrintedBlocks;
432      for (unsigned ffi = 0, ffe = Functions.size(); ffi != ffe; ++ffi) {
433        MCFunction &f = Functions[ffi];
434        for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){
435          if (!PrintedBlocks.insert(fi->first).second)
436            continue; // We already printed this block.
437
438          // We assume a block has predecessors when it's the first block after
439          // a symbol.
440          bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end();
441
442          // See if this block has predecessors.
443          // FIXME: Slow.
444          for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
445              ++pi)
446            if (pi->second.contains(fi->first)) {
447              hasPreds = true;
448              break;
449            }
450
451          // No predecessors, this is a data block. Print as .byte directives.
452          if (!hasPreds) {
453            uint64_t End = llvm::next(fi) == fe ? Sections[SectIdx].Size :
454                                                  llvm::next(fi)->first;
455            outs() << "# " << End-fi->first << " bytes of data:\n";
456            for (unsigned pos = fi->first; pos != End; ++pos) {
457              outs() << format("%8x:\t", Sections[SectIdx].Address + pos);
458              DumpBytes(StringRef(Bytes.data() + pos, 1));
459              outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]);
460            }
461            continue;
462          }
463
464          if (fi->second.contains(fi->first)) // Print a header for simple loops
465            outs() << "# Loop begin:\n";
466
467          // Walk over the instructions and print them.
468          for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie;
469               ++ii) {
470            const MCDecodedInst &Inst = fi->second.getInsts()[ii];
471
472            // If there's a symbol at this address, print its name.
473            if (FunctionMap.find(Sections[SectIdx].Address + Inst.Address) !=
474                FunctionMap.end())
475              outs() << FunctionMap[Sections[SectIdx].Address + Inst.Address]->
476                                                             getName() << ":\n";
477
478            outs() << format("%8llx:\t", Sections[SectIdx].Address +
479                                         Inst.Address);
480            DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size));
481
482            if (fi->second.contains(fi->first)) // Indent simple loops.
483              outs() << '\t';
484
485            IP->printInst(&Inst.Inst, outs(), "");
486
487            // Look for relocations inside this instructions, if there is one
488            // print its target and additional information if availbable.
489            for (unsigned j = 0; j != Relocs.size(); ++j)
490              if (Relocs[j].first >= Sections[SectIdx].Address + Inst.Address &&
491                  Relocs[j].first < Sections[SectIdx].Address + Inst.Address +
492                                    Inst.Size) {
493                outs() << "\t# "
494                   << MachOObj->getStringAtIndex(
495                                  UnsortedSymbols[Relocs[j].second].StringIndex)
496                   << ' ';
497                DumpAddress(UnsortedSymbols[Relocs[j].second].Value, Sections,
498                            MachOObj.get(), outs());
499              }
500
501            // If this instructions contains an address, see if we can evaluate
502            // it and print additional information.
503            uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst,
504                                                          Inst.Address,
505                                                          Inst.Size);
506            if (targ != -1ULL)
507              DumpAddress(targ, Sections, MachOObj.get(), outs());
508
509            outs() << '\n';
510          }
511        }
512
513        emitDOTFile((f.getName().str() + ".dot").c_str(), f, IP.get());
514      }
515    }
516  }
517}
518