MachODump.cpp revision 6c1202c459ffa6d693ad92fa84e43902bc780bca
1//===-- MachODump.cpp - Object file dumping utility for llvm --------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the MachO-specific dumper for llvm-objdump. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm-objdump.h" 15#include "llvm/ADT/OwningPtr.h" 16#include "llvm/ADT/STLExtras.h" 17#include "llvm/ADT/StringExtras.h" 18#include "llvm/ADT/Triple.h" 19#include "llvm/DebugInfo/DIContext.h" 20#include "llvm/MC/MCAsmInfo.h" 21#include "llvm/MC/MCDisassembler.h" 22#include "llvm/MC/MCInst.h" 23#include "llvm/MC/MCInstPrinter.h" 24#include "llvm/MC/MCInstrAnalysis.h" 25#include "llvm/MC/MCInstrDesc.h" 26#include "llvm/MC/MCInstrInfo.h" 27#include "llvm/MC/MCRegisterInfo.h" 28#include "llvm/MC/MCSubtargetInfo.h" 29#include "llvm/Object/MachO.h" 30#include "llvm/Support/Casting.h" 31#include "llvm/Support/CommandLine.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/Format.h" 34#include "llvm/Support/GraphWriter.h" 35#include "llvm/Support/MachO.h" 36#include "llvm/Support/MemoryBuffer.h" 37#include "llvm/Support/TargetRegistry.h" 38#include "llvm/Support/TargetSelect.h" 39#include "llvm/Support/raw_ostream.h" 40#include "llvm/Support/system_error.h" 41#include <algorithm> 42#include <cstring> 43using namespace llvm; 44using namespace object; 45 46static cl::opt<bool> 47 UseDbg("g", cl::desc("Print line information from debug info if available")); 48 49static cl::opt<std::string> 50 DSYMFile("dsym", cl::desc("Use .dSYM file for debug info")); 51 52static const Target *GetTarget(const MachOObjectFile *MachOObj) { 53 // Figure out the target triple. 54 if (TripleName.empty()) { 55 llvm::Triple TT("unknown-unknown-unknown"); 56 TT.setArch(Triple::ArchType(MachOObj->getArch())); 57 TripleName = TT.str(); 58 } 59 60 // Get the target specific parser. 61 std::string Error; 62 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); 63 if (TheTarget) 64 return TheTarget; 65 66 errs() << "llvm-objdump: error: unable to get target for '" << TripleName 67 << "', see --version and --triple.\n"; 68 return 0; 69} 70 71struct SymbolSorter { 72 bool operator()(const SymbolRef &A, const SymbolRef &B) { 73 SymbolRef::Type AType, BType; 74 A.getType(AType); 75 B.getType(BType); 76 77 uint64_t AAddr, BAddr; 78 if (AType != SymbolRef::ST_Function) 79 AAddr = 0; 80 else 81 A.getAddress(AAddr); 82 if (BType != SymbolRef::ST_Function) 83 BAddr = 0; 84 else 85 B.getAddress(BAddr); 86 return AAddr < BAddr; 87 } 88}; 89 90static void 91getSectionsAndSymbols(const macho::Header Header, 92 MachOObjectFile *MachOObj, 93 std::vector<SectionRef> &Sections, 94 std::vector<SymbolRef> &Symbols, 95 SmallVectorImpl<uint64_t> &FoundFns) { 96 error_code ec; 97 for (symbol_iterator SI = MachOObj->begin_symbols(), 98 SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec)) 99 Symbols.push_back(*SI); 100 101 for (section_iterator SI = MachOObj->begin_sections(), 102 SE = MachOObj->end_sections(); SI != SE; SI.increment(ec)) { 103 SectionRef SR = *SI; 104 StringRef SectName; 105 SR.getName(SectName); 106 Sections.push_back(*SI); 107 } 108 109 MachOObjectFile::LoadCommandInfo Command = 110 MachOObj->getFirstLoadCommandInfo(); 111 for (unsigned i = 0; ; ++i) { 112 if (Command.C.Type == macho::LCT_FunctionStarts) { 113 // We found a function starts segment, parse the addresses for later 114 // consumption. 115 macho::LinkeditDataLoadCommand LLC = 116 MachOObj->getLinkeditDataLoadCommand(Command); 117 118 MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns); 119 } 120 121 if (i == Header.NumLoadCommands - 1) 122 break; 123 else 124 Command = MachOObj->getNextLoadCommandInfo(Command); 125 } 126} 127 128static void DisassembleInputMachO2(StringRef Filename, 129 MachOObjectFile *MachOOF); 130 131void llvm::DisassembleInputMachO(StringRef Filename) { 132 OwningPtr<MemoryBuffer> Buff; 133 134 if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { 135 errs() << "llvm-objdump: " << Filename << ": " << ec.message() << "\n"; 136 return; 137 } 138 139 OwningPtr<MachOObjectFile> MachOOF(static_cast<MachOObjectFile*>( 140 ObjectFile::createMachOObjectFile(Buff.take()))); 141 142 DisassembleInputMachO2(Filename, MachOOF.get()); 143} 144 145static void DisassembleInputMachO2(StringRef Filename, 146 MachOObjectFile *MachOOF) { 147 const Target *TheTarget = GetTarget(MachOOF); 148 if (!TheTarget) { 149 // GetTarget prints out stuff. 150 return; 151 } 152 OwningPtr<const MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo()); 153 OwningPtr<MCInstrAnalysis> 154 InstrAnalysis(TheTarget->createMCInstrAnalysis(InstrInfo.get())); 155 156 // Set up disassembler. 157 OwningPtr<const MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName)); 158 OwningPtr<const MCAsmInfo> AsmInfo( 159 TheTarget->createMCAsmInfo(*MRI, TripleName)); 160 OwningPtr<const MCSubtargetInfo> 161 STI(TheTarget->createMCSubtargetInfo(TripleName, "", "")); 162 OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI)); 163 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 164 OwningPtr<MCInstPrinter> 165 IP(TheTarget->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *InstrInfo, 166 *MRI, *STI)); 167 168 if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) { 169 errs() << "error: couldn't initialize disassembler for target " 170 << TripleName << '\n'; 171 return; 172 } 173 174 outs() << '\n' << Filename << ":\n\n"; 175 176 macho::Header Header = MachOOF->getHeader(); 177 178 // FIXME: FoundFns isn't used anymore. Using symbols/LC_FUNCTION_STARTS to 179 // determine function locations will eventually go in MCObjectDisassembler. 180 // FIXME: Using the -cfg command line option, this code used to be able to 181 // annotate relocations with the referenced symbol's name, and if this was 182 // inside a __[cf]string section, the data it points to. This is now replaced 183 // by the upcoming MCSymbolizer, which needs the appropriate setup done above. 184 std::vector<SectionRef> Sections; 185 std::vector<SymbolRef> Symbols; 186 SmallVector<uint64_t, 8> FoundFns; 187 188 getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns); 189 190 // Make a copy of the unsorted symbol list. FIXME: duplication 191 std::vector<SymbolRef> UnsortedSymbols(Symbols); 192 // Sort the symbols by address, just in case they didn't come in that way. 193 std::sort(Symbols.begin(), Symbols.end(), SymbolSorter()); 194 195#ifndef NDEBUG 196 raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); 197#else 198 raw_ostream &DebugOut = nulls(); 199#endif 200 201 OwningPtr<DIContext> diContext; 202 ObjectFile *DbgObj = MachOOF; 203 // Try to find debug info and set up the DIContext for it. 204 if (UseDbg) { 205 // A separate DSym file path was specified, parse it as a macho file, 206 // get the sections and supply it to the section name parsing machinery. 207 if (!DSYMFile.empty()) { 208 OwningPtr<MemoryBuffer> Buf; 209 if (error_code ec = MemoryBuffer::getFileOrSTDIN(DSYMFile.c_str(), Buf)) { 210 errs() << "llvm-objdump: " << Filename << ": " << ec.message() << '\n'; 211 return; 212 } 213 DbgObj = ObjectFile::createMachOObjectFile(Buf.take()); 214 } 215 216 // Setup the DIContext 217 diContext.reset(DIContext::getDWARFContext(DbgObj)); 218 } 219 220 for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { 221 222 bool SectIsText = false; 223 Sections[SectIdx].isText(SectIsText); 224 if (SectIsText == false) 225 continue; 226 227 StringRef SectName; 228 if (Sections[SectIdx].getName(SectName) || 229 SectName != "__text") 230 continue; // Skip non-text sections 231 232 DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl(); 233 234 StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR); 235 if (SegmentName != "__TEXT") 236 continue; 237 238 StringRef Bytes; 239 Sections[SectIdx].getContents(Bytes); 240 StringRefMemoryObject memoryObject(Bytes); 241 bool symbolTableWorked = false; 242 243 // Parse relocations. 244 std::vector<std::pair<uint64_t, SymbolRef> > Relocs; 245 error_code ec; 246 for (relocation_iterator RI = Sections[SectIdx].begin_relocations(), 247 RE = Sections[SectIdx].end_relocations(); RI != RE; RI.increment(ec)) { 248 uint64_t RelocOffset, SectionAddress; 249 RI->getOffset(RelocOffset); 250 Sections[SectIdx].getAddress(SectionAddress); 251 RelocOffset -= SectionAddress; 252 253 symbol_iterator RelocSym = RI->getSymbol(); 254 255 Relocs.push_back(std::make_pair(RelocOffset, *RelocSym)); 256 } 257 array_pod_sort(Relocs.begin(), Relocs.end()); 258 259 // Disassemble symbol by symbol. 260 for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) { 261 StringRef SymName; 262 Symbols[SymIdx].getName(SymName); 263 264 SymbolRef::Type ST; 265 Symbols[SymIdx].getType(ST); 266 if (ST != SymbolRef::ST_Function) 267 continue; 268 269 // Make sure the symbol is defined in this section. 270 bool containsSym = false; 271 Sections[SectIdx].containsSymbol(Symbols[SymIdx], containsSym); 272 if (!containsSym) 273 continue; 274 275 // Start at the address of the symbol relative to the section's address. 276 uint64_t SectionAddress = 0; 277 uint64_t Start = 0; 278 Sections[SectIdx].getAddress(SectionAddress); 279 Symbols[SymIdx].getAddress(Start); 280 Start -= SectionAddress; 281 282 // Stop disassembling either at the beginning of the next symbol or at 283 // the end of the section. 284 bool containsNextSym = false; 285 uint64_t NextSym = 0; 286 uint64_t NextSymIdx = SymIdx+1; 287 while (Symbols.size() > NextSymIdx) { 288 SymbolRef::Type NextSymType; 289 Symbols[NextSymIdx].getType(NextSymType); 290 if (NextSymType == SymbolRef::ST_Function) { 291 Sections[SectIdx].containsSymbol(Symbols[NextSymIdx], 292 containsNextSym); 293 Symbols[NextSymIdx].getAddress(NextSym); 294 NextSym -= SectionAddress; 295 break; 296 } 297 ++NextSymIdx; 298 } 299 300 uint64_t SectSize; 301 Sections[SectIdx].getSize(SectSize); 302 uint64_t End = containsNextSym ? NextSym : SectSize; 303 uint64_t Size; 304 305 symbolTableWorked = true; 306 307 outs() << SymName << ":\n"; 308 DILineInfo lastLine; 309 for (uint64_t Index = Start; Index < End; Index += Size) { 310 MCInst Inst; 311 312 if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, 313 DebugOut, nulls())) { 314 uint64_t SectAddress = 0; 315 Sections[SectIdx].getAddress(SectAddress); 316 outs() << format("%8" PRIx64 ":\t", SectAddress + Index); 317 318 DumpBytes(StringRef(Bytes.data() + Index, Size)); 319 IP->printInst(&Inst, outs(), ""); 320 321 // Print debug info. 322 if (diContext) { 323 DILineInfo dli = 324 diContext->getLineInfoForAddress(SectAddress + Index); 325 // Print valid line info if it changed. 326 if (dli != lastLine && dli.getLine() != 0) 327 outs() << "\t## " << dli.getFileName() << ':' 328 << dli.getLine() << ':' << dli.getColumn(); 329 lastLine = dli; 330 } 331 outs() << "\n"; 332 } else { 333 errs() << "llvm-objdump: warning: invalid instruction encoding\n"; 334 if (Size == 0) 335 Size = 1; // skip illegible bytes 336 } 337 } 338 } 339 if (!symbolTableWorked) { 340 // Reading the symbol table didn't work, disassemble the whole section. 341 uint64_t SectAddress; 342 Sections[SectIdx].getAddress(SectAddress); 343 uint64_t SectSize; 344 Sections[SectIdx].getSize(SectSize); 345 uint64_t InstSize; 346 for (uint64_t Index = 0; Index < SectSize; Index += InstSize) { 347 MCInst Inst; 348 349 if (DisAsm->getInstruction(Inst, InstSize, memoryObject, Index, 350 DebugOut, nulls())) { 351 outs() << format("%8" PRIx64 ":\t", SectAddress + Index); 352 DumpBytes(StringRef(Bytes.data() + Index, InstSize)); 353 IP->printInst(&Inst, outs(), ""); 354 outs() << "\n"; 355 } else { 356 errs() << "llvm-objdump: warning: invalid instruction encoding\n"; 357 if (InstSize == 0) 358 InstSize = 1; // skip illegible bytes 359 } 360 } 361 } 362 } 363} 364