llvm-objdump.cpp revision 853b0fd623491ef7dafeed20ee15897e3b95d82c
1//===-- llvm-objdump.cpp - Object file dumping utility for llvm -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This program is a utility that works like binutils "objdump", that is, it
11// dumps out a plethora of information about an object file depending on the
12// flags.
13//
14//===----------------------------------------------------------------------===//
15
16#include "MCFunction.h"
17#include "llvm/Object/ObjectFile.h"
18#include "llvm/ADT/OwningPtr.h"
19#include "llvm/ADT/Triple.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/MC/MCAsmInfo.h"
22#include "llvm/MC/MCDisassembler.h"
23#include "llvm/MC/MCInst.h"
24#include "llvm/MC/MCInstPrinter.h"
25#include "llvm/MC/MCInstrDesc.h"
26#include "llvm/MC/MCInstrInfo.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/Format.h"
30#include "llvm/Support/GraphWriter.h"
31#include "llvm/Support/Host.h"
32#include "llvm/Support/ManagedStatic.h"
33#include "llvm/Support/MemoryBuffer.h"
34#include "llvm/Support/MemoryObject.h"
35#include "llvm/Support/PrettyStackTrace.h"
36#include "llvm/Support/Signals.h"
37#include "llvm/Support/SourceMgr.h"
38#include "llvm/Support/raw_ostream.h"
39#include "llvm/Support/system_error.h"
40#include "llvm/Target/TargetRegistry.h"
41#include "llvm/Target/TargetSelect.h"
42#include <algorithm>
43#include <cstring>
44using namespace llvm;
45using namespace object;
46
47namespace {
48  cl::list<std::string>
49  InputFilenames(cl::Positional, cl::desc("<input object files>"),
50                 cl::ZeroOrMore);
51
52  cl::opt<bool>
53  Disassemble("disassemble",
54    cl::desc("Display assembler mnemonics for the machine instructions"));
55  cl::alias
56  Disassembled("d", cl::desc("Alias for --disassemble"),
57               cl::aliasopt(Disassemble));
58
59  cl::opt<bool>
60  CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and"
61                      "write it to a graphviz file"));
62
63  cl::opt<std::string>
64  TripleName("triple", cl::desc("Target triple to disassemble for, "
65                                "see -version for available targets"));
66
67  cl::opt<std::string>
68  ArchName("arch", cl::desc("Target arch to disassemble for, "
69                            "see -version for available targets"));
70
71  StringRef ToolName;
72
73  bool error(error_code ec) {
74    if (!ec) return false;
75
76    outs() << ToolName << ": error reading file: " << ec.message() << ".\n";
77    outs().flush();
78    return true;
79  }
80}
81
82static const Target *GetTarget(const ObjectFile *Obj = NULL) {
83  // Figure out the target triple.
84  llvm::Triple TT("unknown-unknown-unknown");
85  if (TripleName.empty()) {
86    if (Obj)
87      TT.setArch(Triple::ArchType(Obj->getArch()));
88  } else
89    TT.setTriple(Triple::normalize(TripleName));
90
91  if (!ArchName.empty())
92    TT.setArchName(ArchName);
93
94  TripleName = TT.str();
95
96  // Get the target specific parser.
97  std::string Error;
98  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
99  if (TheTarget)
100    return TheTarget;
101
102  errs() << ToolName << ": error: unable to get target for '" << TripleName
103         << "', see --version and --triple.\n";
104  return 0;
105}
106
107namespace {
108class StringRefMemoryObject : public MemoryObject {
109private:
110  StringRef Bytes;
111public:
112  StringRefMemoryObject(StringRef bytes) : Bytes(bytes) {}
113
114  uint64_t getBase() const { return 0; }
115  uint64_t getExtent() const { return Bytes.size(); }
116
117  int readByte(uint64_t Addr, uint8_t *Byte) const {
118    if (Addr >= getExtent())
119      return -1;
120    *Byte = Bytes[Addr];
121    return 0;
122  }
123};
124}
125
126static void DumpBytes(StringRef bytes) {
127  static char hex_rep[] = "0123456789abcdef";
128  // FIXME: The real way to do this is to figure out the longest instruction
129  //        and align to that size before printing. I'll fix this when I get
130  //        around to outputting relocations.
131  // 15 is the longest x86 instruction
132  // 3 is for the hex rep of a byte + a space.
133  // 1 is for the null terminator.
134  enum { OutputSize = (15 * 3) + 1 };
135  char output[OutputSize];
136
137  assert(bytes.size() <= 15
138    && "DumpBytes only supports instructions of up to 15 bytes");
139  memset(output, ' ', sizeof(output));
140  unsigned index = 0;
141  for (StringRef::iterator i = bytes.begin(),
142                           e = bytes.end(); i != e; ++i) {
143    output[index] = hex_rep[(*i & 0xF0) >> 4];
144    output[index + 1] = hex_rep[*i & 0xF];
145    index += 3;
146  }
147
148  output[sizeof(output) - 1] = 0;
149  outs() << output;
150}
151
152static void DisassembleInput(const StringRef &Filename) {
153  OwningPtr<MemoryBuffer> Buff;
154
155  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
156    errs() << ToolName << ": " << Filename << ": " << ec.message() << "\n";
157    return;
158  }
159
160  OwningPtr<ObjectFile> Obj(ObjectFile::createObjectFile(Buff.take()));
161
162  const Target *TheTarget = GetTarget(Obj.get());
163  if (!TheTarget) {
164    // GetTarget prints out stuff.
165    return;
166  }
167  const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo();
168
169  outs() << '\n';
170  outs() << Filename
171         << ":\tfile format " << Obj->getFileFormatName() << "\n\n";
172
173  error_code ec;
174  for (ObjectFile::section_iterator i = Obj->begin_sections(),
175                                    e = Obj->end_sections();
176                                    i != e; i.increment(ec)) {
177    if (error(ec)) break;
178    bool text;
179    if (error(i->isText(text))) break;
180    if (!text) continue;
181
182    // Make a list of all the symbols in this section.
183    std::vector<std::pair<uint64_t, StringRef> > Symbols;
184    for (ObjectFile::symbol_iterator si = Obj->begin_symbols(),
185                                     se = Obj->end_symbols();
186                                     si != se; si.increment(ec)) {
187      bool contains;
188      if (!error(i->containsSymbol(*si, contains)) && contains) {
189        uint64_t Address;
190        if (error(si->getAddress(Address))) break;
191        StringRef Name;
192        if (error(si->getName(Name))) break;
193        Symbols.push_back(std::make_pair(Address, Name));
194      }
195    }
196
197    // Sort the symbols by address, just in case they didn't come in that way.
198    array_pod_sort(Symbols.begin(), Symbols.end());
199
200    StringRef name;
201    if (error(i->getName(name))) break;
202    outs() << "Disassembly of section " << name << ':';
203
204    // If the section has no symbols just insert a dummy one and disassemble
205    // the whole section.
206    if (Symbols.empty())
207      Symbols.push_back(std::make_pair(0, name));
208
209    // Set up disassembler.
210    OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createMCAsmInfo(TripleName));
211
212    if (!AsmInfo) {
213      errs() << "error: no assembly info for target " << TripleName << "\n";
214      return;
215    }
216
217    OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler());
218    if (!DisAsm) {
219      errs() << "error: no disassembler for target " << TripleName << "\n";
220      return;
221    }
222
223    int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
224    OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
225                                  AsmPrinterVariant, *AsmInfo));
226    if (!IP) {
227      errs() << "error: no instruction printer for target " << TripleName << '\n';
228      return;
229    }
230
231    StringRef Bytes;
232    if (error(i->getContents(Bytes))) break;
233    StringRefMemoryObject memoryObject(Bytes);
234    uint64_t Size;
235    uint64_t Index;
236    uint64_t SectSize;
237    if (error(i->getSize(SectSize))) break;
238
239    // Disassemble symbol by symbol.
240    for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
241      uint64_t Start = Symbols[si].first;
242      uint64_t End = si == se-1 ? SectSize : Symbols[si + 1].first - 1;
243      outs() << '\n' << Symbols[si].second << ":\n";
244
245#ifndef NDEBUG
246        raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
247#else
248        raw_ostream &DebugOut = nulls();
249#endif
250
251      for (Index = Start; Index < End; Index += Size) {
252        MCInst Inst;
253        if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut)) {
254          uint64_t addr;
255          if (error(i->getAddress(addr))) break;
256          outs() << format("%8x:\t", addr + Index);
257          DumpBytes(StringRef(Bytes.data() + Index, Size));
258          IP->printInst(&Inst, outs());
259          outs() << "\n";
260        } else {
261          errs() << ToolName << ": warning: invalid instruction encoding\n";
262          if (Size == 0)
263            Size = 1; // skip illegible bytes
264        }
265      }
266
267      if (CFG) {
268        MCFunction f =
269          MCFunction::createFunctionFromMC(Symbols[si].second, DisAsm.get(),
270                                           memoryObject, Start, End, InstrInfo,
271                                           DebugOut);
272
273        // Start a new dot file.
274        std::string Error;
275        raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error);
276        if (!Error.empty()) {
277          errs() << ToolName << ": warning: " << Error << '\n';
278          continue;
279        }
280
281        Out << "digraph " << f.getName() << " {\n";
282        Out << "graph [ rankdir = \"LR\" ];\n";
283        for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
284          bool hasPreds = false;
285          // Only print blocks that have predecessors.
286          // FIXME: Slow.
287          for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
288               ++pi)
289            for (pi->second->contains(&i->second)) {
290              hasPreds = true;
291              break;
292            }
293
294          if (!hasPreds && i != f.begin())
295            continue;
296
297          Out << '"' << (uintptr_t)&i->second << "\" [ label=\"<a>";
298          // Print instructions.
299          for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
300               ++ii) {
301            // Escape special chars and print the instruction in mnemonic form.
302            std::string Str;
303            raw_string_ostream OS(Str);
304            IP->printInst(&i->second.getInsts()[ii].Inst, OS);
305            Out << DOT::EscapeString(OS.str()) << '|';
306          }
307          Out << "<o>\" shape=\"record\" ];\n";
308
309          // Add edges.
310          for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
311              se = i->second.succ_end(); si != se; ++si)
312            Out << (uintptr_t)&i->second << ":o -> " << (uintptr_t)*si <<":a\n";
313        }
314        Out << "}\n";
315      }
316    }
317  }
318}
319
320int main(int argc, char **argv) {
321  // Print a stack trace if we signal out.
322  sys::PrintStackTraceOnErrorSignal();
323  PrettyStackTraceProgram X(argc, argv);
324  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
325
326  // Initialize targets and assembly printers/parsers.
327  llvm::InitializeAllTargetInfos();
328  llvm::InitializeAllTargetMCs();
329  llvm::InitializeAllAsmParsers();
330  llvm::InitializeAllDisassemblers();
331
332  cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n");
333  TripleName = Triple::normalize(TripleName);
334
335  ToolName = argv[0];
336
337  // Defaults to a.out if no filenames specified.
338  if (InputFilenames.size() == 0)
339    InputFilenames.push_back("a.out");
340
341  // -d is the only flag that is currently implemented, so just print help if
342  // it is not set.
343  if (!Disassemble) {
344    cl::PrintHelpMessage();
345    return 2;
346  }
347
348  std::for_each(InputFilenames.begin(), InputFilenames.end(),
349                DisassembleInput);
350
351  return 0;
352}
353