MachODump.cpp revision ef99356dfebb96f6f90efb912c2877214bad060e
1//===-- MachODump.cpp - Object file dumping utility for llvm --------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the MachO-specific dumper for llvm-objdump. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm-objdump.h" 15#include "llvm/ADT/OwningPtr.h" 16#include "llvm/ADT/STLExtras.h" 17#include "llvm/ADT/StringExtras.h" 18#include "llvm/ADT/Triple.h" 19#include "llvm/DebugInfo/DIContext.h" 20#include "llvm/MC/MCAsmInfo.h" 21#include "llvm/MC/MCDisassembler.h" 22#include "llvm/MC/MCInst.h" 23#include "llvm/MC/MCInstPrinter.h" 24#include "llvm/MC/MCInstrAnalysis.h" 25#include "llvm/MC/MCInstrDesc.h" 26#include "llvm/MC/MCInstrInfo.h" 27#include "llvm/MC/MCRegisterInfo.h" 28#include "llvm/MC/MCSubtargetInfo.h" 29#include "llvm/Object/MachO.h" 30#include "llvm/Support/Casting.h" 31#include "llvm/Support/CommandLine.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/Format.h" 34#include "llvm/Support/GraphWriter.h" 35#include "llvm/Support/MachO.h" 36#include "llvm/Support/MemoryBuffer.h" 37#include "llvm/Support/TargetRegistry.h" 38#include "llvm/Support/TargetSelect.h" 39#include "llvm/Support/raw_ostream.h" 40#include "llvm/Support/system_error.h" 41#include <algorithm> 42#include <cstring> 43using namespace llvm; 44using namespace object; 45 46static cl::opt<bool> 47 UseDbg("g", cl::desc("Print line information from debug info if available")); 48 49static cl::opt<std::string> 50 DSYMFile("dsym", cl::desc("Use .dSYM file for debug info")); 51 52static const Target *GetTarget(const MachOObjectFile *MachOObj) { 53 // Figure out the target triple. 54 if (TripleName.empty()) { 55 llvm::Triple TT("unknown-unknown-unknown"); 56 TT.setArch(Triple::ArchType(MachOObj->getArch())); 57 TripleName = TT.str(); 58 } 59 60 // Get the target specific parser. 61 std::string Error; 62 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); 63 if (TheTarget) 64 return TheTarget; 65 66 errs() << "llvm-objdump: error: unable to get target for '" << TripleName 67 << "', see --version and --triple.\n"; 68 return 0; 69} 70 71struct SymbolSorter { 72 bool operator()(const SymbolRef &A, const SymbolRef &B) { 73 SymbolRef::Type AType, BType; 74 A.getType(AType); 75 B.getType(BType); 76 77 uint64_t AAddr, BAddr; 78 if (AType != SymbolRef::ST_Function) 79 AAddr = 0; 80 else 81 A.getAddress(AAddr); 82 if (BType != SymbolRef::ST_Function) 83 BAddr = 0; 84 else 85 B.getAddress(BAddr); 86 return AAddr < BAddr; 87 } 88}; 89 90static void 91getSectionsAndSymbols(const macho::Header Header, 92 MachOObjectFile *MachOObj, 93 std::vector<SectionRef> &Sections, 94 std::vector<SymbolRef> &Symbols, 95 SmallVectorImpl<uint64_t> &FoundFns) { 96 error_code ec; 97 for (symbol_iterator SI = MachOObj->begin_symbols(), 98 SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec)) 99 Symbols.push_back(*SI); 100 101 for (section_iterator SI = MachOObj->begin_sections(), 102 SE = MachOObj->end_sections(); SI != SE; SI.increment(ec)) { 103 SectionRef SR = *SI; 104 StringRef SectName; 105 SR.getName(SectName); 106 Sections.push_back(*SI); 107 } 108 109 MachOObjectFile::LoadCommandInfo Command = 110 MachOObj->getFirstLoadCommandInfo(); 111 for (unsigned i = 0; ; ++i) { 112 if (Command.C.Type == macho::LCT_FunctionStarts) { 113 // We found a function starts segment, parse the addresses for later 114 // consumption. 115 macho::LinkeditDataLoadCommand LLC = 116 MachOObj->getLinkeditDataLoadCommand(Command); 117 118 MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns); 119 } 120 121 if (i == Header.NumLoadCommands - 1) 122 break; 123 else 124 Command = MachOObj->getNextLoadCommandInfo(Command); 125 } 126} 127 128static void DisassembleInputMachO2(StringRef Filename, 129 MachOObjectFile *MachOOF); 130 131void llvm::DisassembleInputMachO(StringRef Filename) { 132 OwningPtr<MemoryBuffer> Buff; 133 134 if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { 135 errs() << "llvm-objdump: " << Filename << ": " << ec.message() << "\n"; 136 return; 137 } 138 139 OwningPtr<MachOObjectFile> MachOOF(static_cast<MachOObjectFile*>( 140 ObjectFile::createMachOObjectFile(Buff.take()))); 141 142 DisassembleInputMachO2(Filename, MachOOF.get()); 143} 144 145static void DisassembleInputMachO2(StringRef Filename, 146 MachOObjectFile *MachOOF) { 147 const Target *TheTarget = GetTarget(MachOOF); 148 if (!TheTarget) { 149 // GetTarget prints out stuff. 150 return; 151 } 152 OwningPtr<const MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo()); 153 OwningPtr<MCInstrAnalysis> 154 InstrAnalysis(TheTarget->createMCInstrAnalysis(InstrInfo.get())); 155 156 // Set up disassembler. 157 OwningPtr<const MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName)); 158 OwningPtr<const MCAsmInfo> AsmInfo( 159 TheTarget->createMCAsmInfo(*MRI, TripleName)); 160 OwningPtr<const MCSubtargetInfo> 161 STI(TheTarget->createMCSubtargetInfo(TripleName, "", "")); 162 OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI)); 163 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 164 OwningPtr<MCInstPrinter> 165 IP(TheTarget->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *InstrInfo, 166 *MRI, *STI)); 167 168 if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) { 169 errs() << "error: couldn't initialize disassembler for target " 170 << TripleName << '\n'; 171 return; 172 } 173 174 outs() << '\n' << Filename << ":\n\n"; 175 176 macho::Header Header = MachOOF->getHeader(); 177 178 // FIXME: FoundFns isn't used anymore. Using symbols/LC_FUNCTION_STARTS to 179 // determine function locations will eventually go in MCObjectDisassembler. 180 // FIXME: Using the -cfg command line option, this code used to be able to 181 // annotate relocations with the referenced symbol's name, and if this was 182 // inside a __[cf]string section, the data it points to. This is now replaced 183 // by the upcoming MCSymbolizer, which needs the appropriate setup done above. 184 std::vector<SectionRef> Sections; 185 std::vector<SymbolRef> Symbols; 186 SmallVector<uint64_t, 8> FoundFns; 187 188 getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns); 189 190 // Make a copy of the unsorted symbol list. FIXME: duplication 191 std::vector<SymbolRef> UnsortedSymbols(Symbols); 192 // Sort the symbols by address, just in case they didn't come in that way. 193 std::sort(Symbols.begin(), Symbols.end(), SymbolSorter()); 194 195#ifndef NDEBUG 196 raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); 197#else 198 raw_ostream &DebugOut = nulls(); 199#endif 200 201 OwningPtr<DIContext> diContext; 202 ObjectFile *DbgObj = MachOOF; 203 // Try to find debug info and set up the DIContext for it. 204 if (UseDbg) { 205 // A separate DSym file path was specified, parse it as a macho file, 206 // get the sections and supply it to the section name parsing machinery. 207 if (!DSYMFile.empty()) { 208 OwningPtr<MemoryBuffer> Buf; 209 if (error_code ec = MemoryBuffer::getFileOrSTDIN(DSYMFile.c_str(), Buf)) { 210 errs() << "llvm-objdump: " << Filename << ": " << ec.message() << '\n'; 211 return; 212 } 213 DbgObj = ObjectFile::createMachOObjectFile(Buf.take()); 214 } 215 216 // Setup the DIContext 217 diContext.reset(DIContext::getDWARFContext(DbgObj)); 218 } 219 220 for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { 221 222 bool SectIsText = false; 223 Sections[SectIdx].isText(SectIsText); 224 if (SectIsText == false) 225 continue; 226 227 StringRef SectName; 228 if (Sections[SectIdx].getName(SectName) || 229 SectName != "__text") 230 continue; // Skip non-text sections 231 232 DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl(); 233 234 StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR); 235 if (SegmentName != "__TEXT") 236 continue; 237 238 StringRef Bytes; 239 Sections[SectIdx].getContents(Bytes); 240 StringRefMemoryObject memoryObject(Bytes); 241 bool symbolTableWorked = false; 242 243 // Parse relocations. 244 std::vector<std::pair<uint64_t, SymbolRef> > Relocs; 245 error_code ec; 246 for (relocation_iterator RI = Sections[SectIdx].begin_relocations(), 247 RE = Sections[SectIdx].end_relocations(); RI != RE; RI.increment(ec)) { 248 uint64_t RelocOffset, SectionAddress; 249 RI->getOffset(RelocOffset); 250 Sections[SectIdx].getAddress(SectionAddress); 251 RelocOffset -= SectionAddress; 252 253 SymbolRef RelocSym; 254 RI->getSymbol(RelocSym); 255 256 Relocs.push_back(std::make_pair(RelocOffset, RelocSym)); 257 } 258 array_pod_sort(Relocs.begin(), Relocs.end()); 259 260 // Disassemble symbol by symbol. 261 for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) { 262 StringRef SymName; 263 Symbols[SymIdx].getName(SymName); 264 265 SymbolRef::Type ST; 266 Symbols[SymIdx].getType(ST); 267 if (ST != SymbolRef::ST_Function) 268 continue; 269 270 // Make sure the symbol is defined in this section. 271 bool containsSym = false; 272 Sections[SectIdx].containsSymbol(Symbols[SymIdx], containsSym); 273 if (!containsSym) 274 continue; 275 276 // Start at the address of the symbol relative to the section's address. 277 uint64_t SectionAddress = 0; 278 uint64_t Start = 0; 279 Sections[SectIdx].getAddress(SectionAddress); 280 Symbols[SymIdx].getAddress(Start); 281 Start -= SectionAddress; 282 283 // Stop disassembling either at the beginning of the next symbol or at 284 // the end of the section. 285 bool containsNextSym = false; 286 uint64_t NextSym = 0; 287 uint64_t NextSymIdx = SymIdx+1; 288 while (Symbols.size() > NextSymIdx) { 289 SymbolRef::Type NextSymType; 290 Symbols[NextSymIdx].getType(NextSymType); 291 if (NextSymType == SymbolRef::ST_Function) { 292 Sections[SectIdx].containsSymbol(Symbols[NextSymIdx], 293 containsNextSym); 294 Symbols[NextSymIdx].getAddress(NextSym); 295 NextSym -= SectionAddress; 296 break; 297 } 298 ++NextSymIdx; 299 } 300 301 uint64_t SectSize; 302 Sections[SectIdx].getSize(SectSize); 303 uint64_t End = containsNextSym ? NextSym : SectSize; 304 uint64_t Size; 305 306 symbolTableWorked = true; 307 308 outs() << SymName << ":\n"; 309 DILineInfo lastLine; 310 for (uint64_t Index = Start; Index < End; Index += Size) { 311 MCInst Inst; 312 313 if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, 314 DebugOut, nulls())) { 315 uint64_t SectAddress = 0; 316 Sections[SectIdx].getAddress(SectAddress); 317 outs() << format("%8" PRIx64 ":\t", SectAddress + Index); 318 319 DumpBytes(StringRef(Bytes.data() + Index, Size)); 320 IP->printInst(&Inst, outs(), ""); 321 322 // Print debug info. 323 if (diContext) { 324 DILineInfo dli = 325 diContext->getLineInfoForAddress(SectAddress + Index); 326 // Print valid line info if it changed. 327 if (dli != lastLine && dli.getLine() != 0) 328 outs() << "\t## " << dli.getFileName() << ':' 329 << dli.getLine() << ':' << dli.getColumn(); 330 lastLine = dli; 331 } 332 outs() << "\n"; 333 } else { 334 errs() << "llvm-objdump: warning: invalid instruction encoding\n"; 335 if (Size == 0) 336 Size = 1; // skip illegible bytes 337 } 338 } 339 } 340 if (!symbolTableWorked) { 341 // Reading the symbol table didn't work, disassemble the whole section. 342 uint64_t SectAddress; 343 Sections[SectIdx].getAddress(SectAddress); 344 uint64_t SectSize; 345 Sections[SectIdx].getSize(SectSize); 346 uint64_t InstSize; 347 for (uint64_t Index = 0; Index < SectSize; Index += InstSize) { 348 MCInst Inst; 349 350 if (DisAsm->getInstruction(Inst, InstSize, memoryObject, Index, 351 DebugOut, nulls())) { 352 outs() << format("%8" PRIx64 ":\t", SectAddress + Index); 353 DumpBytes(StringRef(Bytes.data() + Index, InstSize)); 354 IP->printInst(&Inst, outs(), ""); 355 outs() << "\n"; 356 } else { 357 errs() << "llvm-objdump: warning: invalid instruction encoding\n"; 358 if (InstSize == 0) 359 InstSize = 1; // skip illegible bytes 360 } 361 } 362 } 363 } 364} 365