MachODump.cpp revision a894c8e34453493a9d3fb2ffbbc21151c3965b63
1//===-- MachODump.cpp - Object file dumping utility for llvm --------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the MachO-specific dumper for llvm-objdump. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm-objdump.h" 15#include "MCFunction.h" 16#include "llvm/Support/MachO.h" 17#include "llvm/Object/MachOObject.h" 18#include "llvm/ADT/OwningPtr.h" 19#include "llvm/ADT/Triple.h" 20#include "llvm/ADT/STLExtras.h" 21#include "llvm/MC/MCAsmInfo.h" 22#include "llvm/MC/MCDisassembler.h" 23#include "llvm/MC/MCInst.h" 24#include "llvm/MC/MCInstPrinter.h" 25#include "llvm/MC/MCInstrAnalysis.h" 26#include "llvm/MC/MCInstrDesc.h" 27#include "llvm/MC/MCInstrInfo.h" 28#include "llvm/MC/MCSubtargetInfo.h" 29#include "llvm/Support/CommandLine.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/Format.h" 32#include "llvm/Support/GraphWriter.h" 33#include "llvm/Support/MemoryBuffer.h" 34#include "llvm/Support/TargetRegistry.h" 35#include "llvm/Support/TargetSelect.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/Support/system_error.h" 38#include <algorithm> 39#include <cstring> 40using namespace llvm; 41using namespace object; 42 43static cl::opt<bool> 44 CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and" 45 "write it to a graphviz file (MachO-only)")); 46 47static const Target *GetTarget(const MachOObject *MachOObj) { 48 // Figure out the target triple. 49 llvm::Triple TT("unknown-unknown-unknown"); 50 switch (MachOObj->getHeader().CPUType) { 51 case llvm::MachO::CPUTypeI386: 52 TT.setArch(Triple::ArchType(Triple::x86)); 53 break; 54 case llvm::MachO::CPUTypeX86_64: 55 TT.setArch(Triple::ArchType(Triple::x86_64)); 56 break; 57 case llvm::MachO::CPUTypeARM: 58 TT.setArch(Triple::ArchType(Triple::arm)); 59 break; 60 case llvm::MachO::CPUTypePowerPC: 61 TT.setArch(Triple::ArchType(Triple::ppc)); 62 break; 63 case llvm::MachO::CPUTypePowerPC64: 64 TT.setArch(Triple::ArchType(Triple::ppc64)); 65 break; 66 } 67 68 TripleName = TT.str(); 69 70 // Get the target specific parser. 71 std::string Error; 72 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); 73 if (TheTarget) 74 return TheTarget; 75 76 errs() << "llvm-objdump: error: unable to get target for '" << TripleName 77 << "', see --version and --triple.\n"; 78 return 0; 79} 80 81struct Section { 82 char Name[16]; 83 uint64_t Address; 84 uint64_t Size; 85 uint32_t Offset; 86 uint32_t NumRelocs; 87 uint64_t RelocTableOffset; 88}; 89 90struct Symbol { 91 uint64_t Value; 92 uint32_t StringIndex; 93 uint8_t SectionIndex; 94 bool operator<(const Symbol &RHS) const { return Value < RHS.Value; } 95}; 96 97 98template <typename T> 99static Section copySection(const T &Sect) { 100 Section S; 101 memcpy(S.Name, Sect->Name, 16); 102 S.Address = Sect->Address; 103 S.Size = Sect->Size; 104 S.Offset = Sect->Offset; 105 S.NumRelocs = Sect->NumRelocationTableEntries; 106 S.RelocTableOffset = Sect->RelocationTableOffset; 107 return S; 108} 109 110template <typename T> 111static Symbol copySymbol(const T &STE) { 112 Symbol S; 113 S.StringIndex = STE->StringIndex; 114 S.SectionIndex = STE->SectionIndex; 115 S.Value = STE->Value; 116 return S; 117} 118 119// Print addtitional information about an address, if available. 120static void DumpAddress(uint64_t Address, ArrayRef<Section> Sections, 121 MachOObject *MachOObj, raw_ostream &OS) { 122 for (unsigned i = 0; i != Sections.size(); ++i) { 123 uint64_t addr = Address-Sections[i].Address; 124 if (Sections[i].Address <= Address && 125 Sections[i].Address + Sections[i].Size > Address) { 126 StringRef bytes = MachOObj->getData(Sections[i].Offset, 127 Sections[i].Size); 128 // Print constant strings. 129 if (!strcmp(Sections[i].Name, "__cstring")) 130 OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"'; 131 // Print constant CFStrings. 132 if (!strcmp(Sections[i].Name, "__cfstring")) 133 OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"'; 134 } 135 } 136} 137 138typedef std::map<uint64_t, MCFunction*> FunctionMapTy; 139typedef SmallVector<MCFunction, 16> FunctionListTy; 140static void createMCFunctionAndSaveCalls(StringRef Name, 141 const MCDisassembler *DisAsm, 142 MemoryObject &Object, uint64_t Start, 143 uint64_t End, 144 MCInstrAnalysis *InstrAnalysis, 145 uint64_t Address, 146 raw_ostream &DebugOut, 147 FunctionMapTy &FunctionMap, 148 FunctionListTy &Functions) { 149 SmallVector<uint64_t, 16> Calls; 150 MCFunction f = 151 MCFunction::createFunctionFromMC(Name, DisAsm, Object, Start, End, 152 InstrAnalysis, DebugOut, Calls); 153 Functions.push_back(f); 154 FunctionMap[Address] = &Functions.back(); 155 156 // Add the gathered callees to the map. 157 for (unsigned i = 0, e = Calls.size(); i != e; ++i) 158 FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0)); 159} 160 161// Write a graphviz file for the CFG inside an MCFunction. 162static void emitDOTFile(const char *FileName, const MCFunction &f, 163 MCInstPrinter *IP) { 164 // Start a new dot file. 165 std::string Error; 166 raw_fd_ostream Out(FileName, Error); 167 if (!Error.empty()) { 168 errs() << "llvm-objdump: warning: " << Error << '\n'; 169 return; 170 } 171 172 Out << "digraph " << f.getName() << " {\n"; 173 Out << "graph [ rankdir = \"LR\" ];\n"; 174 for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { 175 bool hasPreds = false; 176 // Only print blocks that have predecessors. 177 // FIXME: Slow. 178 for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; 179 ++pi) 180 if (pi->second.contains(i->first)) { 181 hasPreds = true; 182 break; 183 } 184 185 if (!hasPreds && i != f.begin()) 186 continue; 187 188 Out << '"' << i->first << "\" [ label=\"<a>"; 189 // Print instructions. 190 for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie; 191 ++ii) { 192 // Escape special chars and print the instruction in mnemonic form. 193 std::string Str; 194 raw_string_ostream OS(Str); 195 IP->printInst(&i->second.getInsts()[ii].Inst, OS, ""); 196 Out << DOT::EscapeString(OS.str()) << '|'; 197 } 198 Out << "<o>\" shape=\"record\" ];\n"; 199 200 // Add edges. 201 for (MCBasicBlock::succ_iterator si = i->second.succ_begin(), 202 se = i->second.succ_end(); si != se; ++si) 203 Out << i->first << ":o -> " << *si <<":a\n"; 204 } 205 Out << "}\n"; 206} 207 208void llvm::DisassembleInputMachO(StringRef Filename) { 209 OwningPtr<MemoryBuffer> Buff; 210 211 if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { 212 errs() << "llvm-objdump: " << Filename << ": " << ec.message() << "\n"; 213 return; 214 } 215 216 OwningPtr<MachOObject> MachOObj(MachOObject::LoadFromBuffer(Buff.take())); 217 218 const Target *TheTarget = GetTarget(MachOObj.get()); 219 if (!TheTarget) { 220 // GetTarget prints out stuff. 221 return; 222 } 223 const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo(); 224 OwningPtr<MCInstrAnalysis> 225 InstrAnalysis(TheTarget->createMCInstrAnalysis(InstrInfo)); 226 227 // Set up disassembler. 228 OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createMCAsmInfo(TripleName)); 229 OwningPtr<const MCSubtargetInfo> 230 STI(TheTarget->createMCSubtargetInfo(TripleName, "", "")); 231 OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI)); 232 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 233 OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter( 234 AsmPrinterVariant, *AsmInfo, *STI)); 235 236 if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) { 237 errs() << "error: couldn't initialize disassmbler for target " 238 << TripleName << '\n'; 239 return; 240 } 241 242 outs() << '\n' << Filename << ":\n\n"; 243 244 const macho::Header &Header = MachOObj->getHeader(); 245 246 const MachOObject::LoadCommandInfo *SymtabLCI = 0; 247 // First, find the symbol table segment. 248 for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { 249 const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i); 250 if (LCI.Command.Type == macho::LCT_Symtab) { 251 SymtabLCI = &LCI; 252 break; 253 } 254 } 255 256 // Read and register the symbol table data. 257 InMemoryStruct<macho::SymtabLoadCommand> SymtabLC; 258 MachOObj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC); 259 MachOObj->RegisterStringTable(*SymtabLC); 260 261 std::vector<Section> Sections; 262 std::vector<Symbol> Symbols; 263 std::vector<Symbol> UnsortedSymbols; // FIXME: duplication 264 SmallVector<uint64_t, 8> FoundFns; 265 266 // Make a list of all symbols in the object file. 267 for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { 268 const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i); 269 if (LCI.Command.Type == macho::LCT_Segment) { 270 InMemoryStruct<macho::SegmentLoadCommand> SegmentLC; 271 MachOObj->ReadSegmentLoadCommand(LCI, SegmentLC); 272 273 // Store the sections in this segment. 274 for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { 275 InMemoryStruct<macho::Section> Sect; 276 MachOObj->ReadSection(LCI, SectNum, Sect); 277 Sections.push_back(copySection(Sect)); 278 279 // Store the symbols in this section. 280 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { 281 InMemoryStruct<macho::SymbolTableEntry> STE; 282 MachOObj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE); 283 Symbols.push_back(copySymbol(STE)); 284 UnsortedSymbols.push_back(Symbols.back()); 285 } 286 } 287 } else if (LCI.Command.Type == macho::LCT_Segment64) { 288 InMemoryStruct<macho::Segment64LoadCommand> Segment64LC; 289 MachOObj->ReadSegment64LoadCommand(LCI, Segment64LC); 290 291 // Store the sections in this segment. 292 for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; 293 ++SectNum) { 294 InMemoryStruct<macho::Section64> Sect64; 295 MachOObj->ReadSection64(LCI, SectNum, Sect64); 296 Sections.push_back(copySection(Sect64)); 297 298 // Store the symbols in this section. 299 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { 300 InMemoryStruct<macho::Symbol64TableEntry> STE; 301 MachOObj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE); 302 Symbols.push_back(copySymbol(STE)); 303 UnsortedSymbols.push_back(Symbols.back()); 304 } 305 } 306 } else if (LCI.Command.Type == macho::LCT_FunctionStarts) { 307 // We found a function starts segment, parse the addresses for later 308 // consumption. 309 InMemoryStruct<macho::LinkeditDataLoadCommand> LLC; 310 MachOObj->ReadLinkeditDataLoadCommand(LCI, LLC); 311 312 MachOObj->ReadULEB128s(LLC->DataOffset, FoundFns); 313 } 314 } 315 316 317 // Sort the symbols by address, just in case they didn't come in that way. 318 array_pod_sort(Symbols.begin(), Symbols.end()); 319 320#ifndef NDEBUG 321 raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); 322#else 323 raw_ostream &DebugOut = nulls(); 324#endif 325 326 FunctionMapTy FunctionMap; 327 FunctionListTy Functions; 328 329 for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { 330 if (strcmp(Sections[SectIdx].Name, "__text")) 331 continue; // Skip non-text sections 332 333 // Insert the functions from the function starts segment into our map. 334 uint64_t VMAddr = Sections[SectIdx].Address - Sections[SectIdx].Offset; 335 for (unsigned i = 0, e = FoundFns.size(); i != e; ++i) 336 FunctionMap.insert(std::make_pair(FoundFns[i]+VMAddr, (MCFunction*)0)); 337 338 StringRef Bytes = MachOObj->getData(Sections[SectIdx].Offset, 339 Sections[SectIdx].Size); 340 StringRefMemoryObject memoryObject(Bytes); 341 bool symbolTableWorked = false; 342 343 // Parse relocations. 344 std::vector<std::pair<uint64_t, uint32_t> > Relocs; 345 for (unsigned j = 0; j != Sections[SectIdx].NumRelocs; ++j) { 346 InMemoryStruct<macho::RelocationEntry> RE; 347 MachOObj->ReadRelocationEntry(Sections[SectIdx].RelocTableOffset, j, RE); 348 Relocs.push_back(std::make_pair(RE->Word0, RE->Word1 & 0xffffff)); 349 } 350 array_pod_sort(Relocs.begin(), Relocs.end()); 351 352 // Disassemble symbol by symbol. 353 for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) { 354 // Make sure the symbol is defined in this section. 355 if ((unsigned)Symbols[SymIdx].SectionIndex - 1 != SectIdx) 356 continue; 357 358 // Start at the address of the symbol relative to the section's address. 359 uint64_t Start = Symbols[SymIdx].Value - Sections[SectIdx].Address; 360 // Stop disassembling either at the beginning of the next symbol or at 361 // the end of the section. 362 uint64_t End = (SymIdx+1 == Symbols.size() || 363 Symbols[SymIdx].SectionIndex != Symbols[SymIdx+1].SectionIndex) ? 364 Sections[SectIdx].Size : 365 Symbols[SymIdx+1].Value - Sections[SectIdx].Address; 366 uint64_t Size; 367 368 if (Start >= End) 369 continue; 370 371 symbolTableWorked = true; 372 373 if (!CFG) { 374 // Normal disassembly, print addresses, bytes and mnemonic form. 375 outs() << MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex) 376 << ":\n"; 377 for (uint64_t Index = Start; Index < End; Index += Size) { 378 MCInst Inst; 379 380 if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, 381 DebugOut, nulls())) { 382 outs() << format("%8llx:\t", Sections[SectIdx].Address + Index); 383 DumpBytes(StringRef(Bytes.data() + Index, Size)); 384 IP->printInst(&Inst, outs(), ""); 385 outs() << "\n"; 386 } else { 387 errs() << "llvm-objdump: warning: invalid instruction encoding\n"; 388 if (Size == 0) 389 Size = 1; // skip illegible bytes 390 } 391 } 392 } else { 393 // Create CFG and use it for disassembly. 394 createMCFunctionAndSaveCalls( 395 MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex), 396 DisAsm.get(), memoryObject, Start, End, InstrAnalysis.get(), 397 Start, DebugOut, FunctionMap, Functions); 398 } 399 } 400 401 if (CFG) { 402 if (!symbolTableWorked) { 403 // Reading the symbol table didn't work, create a big __TEXT symbol. 404 createMCFunctionAndSaveCalls("__TEXT", DisAsm.get(), memoryObject, 405 0, Sections[SectIdx].Size, 406 InstrAnalysis.get(), 407 Sections[SectIdx].Offset, DebugOut, 408 FunctionMap, Functions); 409 } 410 for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(), 411 me = FunctionMap.end(); mi != me; ++mi) 412 if (mi->second == 0) { 413 // Create functions for the remaining callees we have gathered, 414 // but we didn't find a name for them. 415 SmallVector<uint64_t, 16> Calls; 416 MCFunction f = 417 MCFunction::createFunctionFromMC("unknown", DisAsm.get(), 418 memoryObject, mi->first, 419 Sections[SectIdx].Size, 420 InstrAnalysis.get(), DebugOut, 421 Calls); 422 Functions.push_back(f); 423 mi->second = &Functions.back(); 424 for (unsigned i = 0, e = Calls.size(); i != e; ++i) { 425 std::pair<uint64_t, MCFunction*> p(Calls[i], (MCFunction*)0); 426 if (FunctionMap.insert(p).second) 427 mi = FunctionMap.begin(); 428 } 429 } 430 431 DenseSet<uint64_t> PrintedBlocks; 432 for (unsigned ffi = 0, ffe = Functions.size(); ffi != ffe; ++ffi) { 433 MCFunction &f = Functions[ffi]; 434 for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){ 435 if (!PrintedBlocks.insert(fi->first).second) 436 continue; // We already printed this block. 437 438 // We assume a block has predecessors when it's the first block after 439 // a symbol. 440 bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end(); 441 442 // See if this block has predecessors. 443 // FIXME: Slow. 444 for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; 445 ++pi) 446 if (pi->second.contains(fi->first)) { 447 hasPreds = true; 448 break; 449 } 450 451 // No predecessors, this is a data block. Print as .byte directives. 452 if (!hasPreds) { 453 uint64_t End = llvm::next(fi) == fe ? Sections[SectIdx].Size : 454 llvm::next(fi)->first; 455 outs() << "# " << End-fi->first << " bytes of data:\n"; 456 for (unsigned pos = fi->first; pos != End; ++pos) { 457 outs() << format("%8x:\t", Sections[SectIdx].Address + pos); 458 DumpBytes(StringRef(Bytes.data() + pos, 1)); 459 outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]); 460 } 461 continue; 462 } 463 464 if (fi->second.contains(fi->first)) // Print a header for simple loops 465 outs() << "# Loop begin:\n"; 466 467 // Walk over the instructions and print them. 468 for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie; 469 ++ii) { 470 const MCDecodedInst &Inst = fi->second.getInsts()[ii]; 471 472 // If there's a symbol at this address, print its name. 473 if (FunctionMap.find(Sections[SectIdx].Address + Inst.Address) != 474 FunctionMap.end()) 475 outs() << FunctionMap[Sections[SectIdx].Address + Inst.Address]-> 476 getName() << ":\n"; 477 478 outs() << format("%8llx:\t", Sections[SectIdx].Address + 479 Inst.Address); 480 DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size)); 481 482 if (fi->second.contains(fi->first)) // Indent simple loops. 483 outs() << '\t'; 484 485 IP->printInst(&Inst.Inst, outs(), ""); 486 487 // Look for relocations inside this instructions, if there is one 488 // print its target and additional information if availbable. 489 for (unsigned j = 0; j != Relocs.size(); ++j) 490 if (Relocs[j].first >= Sections[SectIdx].Address + Inst.Address && 491 Relocs[j].first < Sections[SectIdx].Address + Inst.Address + 492 Inst.Size) { 493 outs() << "\t# " 494 << MachOObj->getStringAtIndex( 495 UnsortedSymbols[Relocs[j].second].StringIndex) 496 << ' '; 497 DumpAddress(UnsortedSymbols[Relocs[j].second].Value, Sections, 498 MachOObj.get(), outs()); 499 } 500 501 // If this instructions contains an address, see if we can evaluate 502 // it and print additional information. 503 uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst, 504 Inst.Address, 505 Inst.Size); 506 if (targ != -1ULL) 507 DumpAddress(targ, Sections, MachOObj.get(), outs()); 508 509 outs() << '\n'; 510 } 511 } 512 513 emitDOTFile((f.getName().str() + ".dot").c_str(), f, IP.get()); 514 } 515 } 516 } 517} 518