MachODump.cpp revision bcc1a737f5e7ff896e79c9a4a6177cc243618eff
1//===-- MachODump.cpp - Object file dumping utility for llvm --------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the MachO-specific dumper for llvm-objdump. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm-objdump.h" 15#include "MCFunction.h" 16#include "llvm/Support/MachO.h" 17#include "llvm/Object/MachOObject.h" 18#include "llvm/ADT/OwningPtr.h" 19#include "llvm/ADT/Triple.h" 20#include "llvm/ADT/STLExtras.h" 21#include "llvm/MC/MCAsmInfo.h" 22#include "llvm/MC/MCDisassembler.h" 23#include "llvm/MC/MCInst.h" 24#include "llvm/MC/MCInstPrinter.h" 25#include "llvm/MC/MCInstrAnalysis.h" 26#include "llvm/MC/MCInstrDesc.h" 27#include "llvm/MC/MCInstrInfo.h" 28#include "llvm/MC/MCSubtargetInfo.h" 29#include "llvm/Support/CommandLine.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/Format.h" 32#include "llvm/Support/GraphWriter.h" 33#include "llvm/Support/MemoryBuffer.h" 34#include "llvm/Support/TargetRegistry.h" 35#include "llvm/Support/TargetSelect.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/Support/system_error.h" 38#include <algorithm> 39#include <cstring> 40using namespace llvm; 41using namespace object; 42 43static cl::opt<bool> 44 CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and" 45 "write it to a graphviz file (MachO-only)")); 46 47static const Target *GetTarget(const MachOObject *MachOObj) { 48 // Figure out the target triple. 49 llvm::Triple TT("unknown-unknown-unknown"); 50 switch (MachOObj->getHeader().CPUType) { 51 case llvm::MachO::CPUTypeI386: 52 TT.setArch(Triple::ArchType(Triple::x86)); 53 break; 54 case llvm::MachO::CPUTypeX86_64: 55 TT.setArch(Triple::ArchType(Triple::x86_64)); 56 break; 57 case llvm::MachO::CPUTypeARM: 58 TT.setArch(Triple::ArchType(Triple::arm)); 59 break; 60 case llvm::MachO::CPUTypePowerPC: 61 TT.setArch(Triple::ArchType(Triple::ppc)); 62 break; 63 case llvm::MachO::CPUTypePowerPC64: 64 TT.setArch(Triple::ArchType(Triple::ppc64)); 65 break; 66 } 67 68 TripleName = TT.str(); 69 70 // Get the target specific parser. 71 std::string Error; 72 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); 73 if (TheTarget) 74 return TheTarget; 75 76 errs() << "llvm-objdump: error: unable to get target for '" << TripleName 77 << "', see --version and --triple.\n"; 78 return 0; 79} 80 81struct Section { 82 char Name[16]; 83 uint64_t Address; 84 uint64_t Size; 85 uint32_t Offset; 86 uint32_t NumRelocs; 87 uint64_t RelocTableOffset; 88}; 89 90struct Symbol { 91 uint64_t Value; 92 uint32_t StringIndex; 93 uint8_t SectionIndex; 94 bool operator<(const Symbol &RHS) const { return Value < RHS.Value; } 95}; 96 97static void DumpAddress(uint64_t Address, ArrayRef<Section> Sections, 98 MachOObject *MachOObj, raw_ostream &OS) { 99 for (unsigned i = 0; i != Sections.size(); ++i) { 100 uint64_t addr = Address-Sections[i].Address; 101 if (Sections[i].Address <= Address && 102 Sections[i].Address + Sections[i].Size > Address) { 103 StringRef bytes = MachOObj->getData(Sections[i].Offset, 104 Sections[i].Size); 105 if (!strcmp(Sections[i].Name, "__cstring")) 106 OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"'; 107 if (!strcmp(Sections[i].Name, "__cfstring")) 108 OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"'; 109 } 110 } 111} 112 113void llvm::DisassembleInputMachO(StringRef Filename) { 114 OwningPtr<MemoryBuffer> Buff; 115 116 if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { 117 errs() << "llvm-objdump: " << Filename << ": " << ec.message() << "\n"; 118 return; 119 } 120 121 OwningPtr<MachOObject> MachOObj(MachOObject::LoadFromBuffer(Buff.take())); 122 123 const Target *TheTarget = GetTarget(MachOObj.get()); 124 if (!TheTarget) { 125 // GetTarget prints out stuff. 126 return; 127 } 128 const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo(); 129 OwningPtr<MCInstrAnalysis> 130 InstrAnalysis(TheTarget->createMCInstrAnalysis(InstrInfo)); 131 132 // Set up disassembler. 133 OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createMCAsmInfo(TripleName)); 134 135 if (!AsmInfo) { 136 errs() << "error: no assembly info for target " << TripleName << "\n"; 137 return; 138 } 139 140 OwningPtr<const MCSubtargetInfo> 141 STI(TheTarget->createMCSubtargetInfo(TripleName, "", "")); 142 143 if (!STI) { 144 errs() << "error: no subtarget info for target " << TripleName << "\n"; 145 return; 146 } 147 148 OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI)); 149 if (!DisAsm) { 150 errs() << "error: no disassembler for target " << TripleName << "\n"; 151 return; 152 } 153 154 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 155 OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter( 156 AsmPrinterVariant, *AsmInfo, *STI)); 157 if (!IP) { 158 errs() << "error: no instruction printer for target " << TripleName << '\n'; 159 return; 160 } 161 162 outs() << '\n'; 163 outs() << Filename << ":\n\n"; 164 165 const macho::Header &Header = MachOObj->getHeader(); 166 167 const MachOObject::LoadCommandInfo *SymtabLCI = 0; 168 for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { 169 const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i); 170 switch (LCI.Command.Type) { 171 case macho::LCT_Symtab: 172 SymtabLCI = &LCI; 173 break; 174 } 175 } 176 177 // Read and register the symbol table data. 178 InMemoryStruct<macho::SymtabLoadCommand> SymtabLC; 179 MachOObj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC); 180 MachOObj->RegisterStringTable(*SymtabLC); 181 182 std::vector<Section> Sections; 183 std::vector<Symbol> Symbols; 184 std::vector<Symbol> UnsortedSymbols; // FIXME: duplication 185 SmallVector<uint64_t, 8> FoundFns; 186 187 for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { 188 const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i); 189 if (LCI.Command.Type == macho::LCT_Segment) { 190 InMemoryStruct<macho::SegmentLoadCommand> SegmentLC; 191 MachOObj->ReadSegmentLoadCommand(LCI, SegmentLC); 192 193 for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { 194 InMemoryStruct<macho::Section> Sect; 195 MachOObj->ReadSection(LCI, SectNum, Sect); 196 197 Section S; 198 memcpy(S.Name, Sect->Name, 16); 199 S.Address = Sect->Address; 200 S.Size = Sect->Size; 201 S.Offset = Sect->Offset; 202 S.NumRelocs = Sect->NumRelocationTableEntries; 203 S.RelocTableOffset = Sect->RelocationTableOffset; 204 Sections.push_back(S); 205 206 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { 207 InMemoryStruct<macho::SymbolTableEntry> STE; 208 MachOObj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE); 209 210 Symbol S; 211 S.StringIndex = STE->StringIndex; 212 S.SectionIndex = STE->SectionIndex; 213 S.Value = STE->Value; 214 Symbols.push_back(S); 215 UnsortedSymbols.push_back(Symbols.back()); 216 } 217 } 218 } else if (LCI.Command.Type == macho::LCT_Segment64) { 219 InMemoryStruct<macho::Segment64LoadCommand> Segment64LC; 220 MachOObj->ReadSegment64LoadCommand(LCI, Segment64LC); 221 222 for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { 223 InMemoryStruct<macho::Section64> Sect64; 224 MachOObj->ReadSection64(LCI, SectNum, Sect64); 225 226 Section S; 227 memcpy(S.Name, Sect64->Name, 16); 228 S.Address = Sect64->Address; 229 S.Size = Sect64->Size; 230 S.Offset = Sect64->Offset; 231 S.NumRelocs = Sect64->NumRelocationTableEntries; 232 S.RelocTableOffset = Sect64->RelocationTableOffset; 233 Sections.push_back(S); 234 235 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { 236 InMemoryStruct<macho::Symbol64TableEntry> STE; 237 MachOObj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE); 238 239 Symbol S; 240 S.StringIndex = STE->StringIndex; 241 S.SectionIndex = STE->SectionIndex; 242 S.Value = STE->Value; 243 Symbols.push_back(S); 244 UnsortedSymbols.push_back(Symbols.back()); 245 } 246 } 247 } else if (LCI.Command.Type == macho::LCT_FunctionStarts) { 248 InMemoryStruct<macho::LinkeditDataLoadCommand> LLC; 249 MachOObj->ReadLinkeditDataLoadCommand(LCI, LLC); 250 251 MachOObj->ReadULEB128s(LLC->DataOffset, FoundFns); 252 } 253 } 254 255 std::map<uint64_t, MCFunction*> FunctionMap; 256 257 // Sort the symbols by address, just in case they didn't come in that way. 258 array_pod_sort(Symbols.begin(), Symbols.end()); 259 260#ifndef NDEBUG 261 raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); 262#else 263 raw_ostream &DebugOut = nulls(); 264#endif 265 266 SmallVector<MCFunction, 16> Functions; 267 268 for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { 269 if (strcmp(Sections[SectIdx].Name, "__text")) 270 continue; 271 272 uint64_t VMAddr = Sections[SectIdx].Address - Sections[SectIdx].Offset; 273 for (unsigned i = 0, e = FoundFns.size(); i != e; ++i) 274 FunctionMap.insert(std::make_pair(FoundFns[i]+VMAddr, (MCFunction*)0)); 275 276 StringRef Bytes = MachOObj->getData(Sections[SectIdx].Offset, 277 Sections[SectIdx].Size); 278 StringRefMemoryObject memoryObject(Bytes); 279 bool symbolTableWorked = false; 280 281 std::vector<std::pair<uint64_t, uint32_t> > Relocs; 282 for (unsigned j = 0; j != Sections[SectIdx].NumRelocs; ++j) { 283 InMemoryStruct<macho::RelocationEntry> RE; 284 MachOObj->ReadRelocationEntry(Sections[SectIdx].RelocTableOffset, j, RE); 285 Relocs.push_back(std::make_pair(RE->Word0, RE->Word1 & 0xffffff)); 286 } 287 array_pod_sort(Relocs.begin(), Relocs.end()); 288 289 for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) { 290 if ((unsigned)Symbols[SymIdx].SectionIndex - 1 != SectIdx) 291 continue; 292 293 uint64_t Start = Symbols[SymIdx].Value - Sections[SectIdx].Address; 294 uint64_t End = (SymIdx+1 == Symbols.size() || 295 Symbols[SymIdx].SectionIndex != Symbols[SymIdx+1].SectionIndex) ? 296 Sections[SectIdx].Size : 297 Symbols[SymIdx+1].Value - Sections[SectIdx].Address; 298 uint64_t Size; 299 300 if (Start >= End) 301 continue; 302 303 symbolTableWorked = true; 304 305 if (!CFG) { 306 outs() << MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex) 307 << ":\n"; 308 for (uint64_t Index = Start; Index < End; Index += Size) { 309 MCInst Inst; 310 311 if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, 312 DebugOut, nulls())) { 313 outs() << format("%8llx:\t", Sections[SectIdx].Address + Index); 314 DumpBytes(StringRef(Bytes.data() + Index, Size)); 315 IP->printInst(&Inst, outs(), ""); 316 outs() << "\n"; 317 } else { 318 errs() << "llvm-objdump: warning: invalid instruction encoding\n"; 319 if (Size == 0) 320 Size = 1; // skip illegible bytes 321 } 322 } 323 } else { 324 // Create CFG and use it for disassembly. 325 SmallVector<uint64_t, 16> Calls; 326 MCFunction f = 327 MCFunction::createFunctionFromMC( 328 MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex), 329 DisAsm.get(), 330 memoryObject, Start, End, 331 InstrAnalysis.get(), DebugOut, 332 Calls); 333 334 Functions.push_back(f); 335 FunctionMap[Start] = &Functions.back(); 336 337 for (unsigned i = 0, e = Calls.size(); i != e; ++i) 338 FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0)); 339 } 340 } 341 342 if (CFG) { 343 if (!symbolTableWorked) { 344 // Create CFG and use it for disassembly. 345 SmallVector<uint64_t, 16> Calls; 346 MCFunction f = 347 MCFunction::createFunctionFromMC("__TEXT", DisAsm.get(), 348 memoryObject, 0, Sections[SectIdx].Size, 349 InstrAnalysis.get(), DebugOut, 350 Calls); 351 352 Functions.push_back(f); 353 FunctionMap[Sections[SectIdx].Offset] = &Functions.back(); 354 355 for (unsigned i = 0, e = Calls.size(); i != e; ++i) 356 FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0)); 357 } 358 for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(), 359 me = FunctionMap.end(); mi != me; ++mi) 360 if (mi->second == 0) { 361 SmallVector<uint64_t, 16> Calls; 362 MCFunction f = 363 MCFunction::createFunctionFromMC("unknown", DisAsm.get(), 364 memoryObject, mi->first, 365 Sections[SectIdx].Size, 366 InstrAnalysis.get(), DebugOut, 367 Calls); 368 Functions.push_back(f); 369 mi->second = &Functions.back(); 370 for (unsigned i = 0, e = Calls.size(); i != e; ++i) 371 if (FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0)) 372 .second) 373 mi = FunctionMap.begin(); 374 } 375 376 DenseSet<uint64_t> PrintedBlocks; 377 for (unsigned ffi = 0, ffe = Functions.size(); ffi != ffe; ++ffi) { 378 MCFunction &f = Functions[ffi]; 379 for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){ 380 if (!PrintedBlocks.insert(fi->first).second) 381 continue; 382 bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end(); 383 384 // Only print blocks that have predecessors. 385 // FIXME: Slow. 386 for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; 387 ++pi) 388 if (pi->second.contains(fi->first)) { 389 hasPreds = true; 390 break; 391 } 392 393 // Data block. 394 if (!hasPreds && fi != f.begin()) { 395 uint64_t End = llvm::next(fi) == fe ? Sections[SectIdx].Size : 396 llvm::next(fi)->first; 397 outs() << "# " << End-fi->first << " bytes of data:\n"; 398 for (unsigned pos = fi->first; pos != End; ++pos) { 399 outs() << format("%8x:\t", Sections[SectIdx].Address + pos); 400 DumpBytes(StringRef(Bytes.data() + pos, 1)); 401 outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]); 402 } 403 continue; 404 } 405 406 if (fi->second.contains(fi->first)) 407 outs() << "# Loop begin:\n"; 408 409 for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie; 410 ++ii) { 411 const MCDecodedInst &Inst = fi->second.getInsts()[ii]; 412 if (FunctionMap.find(Sections[SectIdx].Address + Inst.Address) != 413 FunctionMap.end()) 414 outs() << FunctionMap[Sections[SectIdx].Address + Inst.Address]-> 415 getName() << ":\n"; 416 outs() << format("%8llx:\t", Sections[SectIdx].Address + 417 Inst.Address); 418 DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size)); 419 // Simple loops. 420 if (fi->second.contains(fi->first)) 421 outs() << '\t'; 422 IP->printInst(&Inst.Inst, outs(), ""); 423 for (unsigned j = 0; j != Relocs.size(); ++j) 424 if (Relocs[j].first >= Sections[SectIdx].Address + Inst.Address && 425 Relocs[j].first < Sections[SectIdx].Address + Inst.Address + 426 Inst.Size) { 427 outs() << "\t# " 428 << MachOObj->getStringAtIndex( 429 UnsortedSymbols[Relocs[j].second].StringIndex) 430 << ' '; 431 DumpAddress(UnsortedSymbols[Relocs[j].second].Value, Sections, 432 MachOObj.get(), outs()); 433 } 434 uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst, 435 Inst.Address, 436 Inst.Size); 437 if (targ != -1ULL) 438 DumpAddress(targ, Sections, MachOObj.get(), outs()); 439 440 outs() << '\n'; 441 } 442 } 443 444 // Start a new dot file. 445 std::string Error; 446 raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error); 447 if (!Error.empty()) { 448 errs() << "llvm-objdump: warning: " << Error << '\n'; 449 continue; 450 } 451 452 Out << "digraph " << f.getName() << " {\n"; 453 Out << "graph [ rankdir = \"LR\" ];\n"; 454 for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { 455 bool hasPreds = false; 456 // Only print blocks that have predecessors. 457 // FIXME: Slow. 458 for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; 459 ++pi) 460 if (pi->second.contains(i->first)) { 461 hasPreds = true; 462 break; 463 } 464 465 if (!hasPreds && i != f.begin()) 466 continue; 467 468 Out << '"' << i->first << "\" [ label=\"<a>"; 469 // Print instructions. 470 for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie; 471 ++ii) { 472 // Escape special chars and print the instruction in mnemonic form. 473 std::string Str; 474 raw_string_ostream OS(Str); 475 IP->printInst(&i->second.getInsts()[ii].Inst, OS, ""); 476 Out << DOT::EscapeString(OS.str()) << '|'; 477 } 478 Out << "<o>\" shape=\"record\" ];\n"; 479 480 // Add edges. 481 for (MCBasicBlock::succ_iterator si = i->second.succ_begin(), 482 se = i->second.succ_end(); si != se; ++si) 483 Out << i->first << ":o -> " << *si <<":a\n"; 484 } 485 Out << "}\n"; 486 } 487 } 488 } 489} 490