1ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===// 2ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// 3ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// The LLVM Compiler Infrastructure 4ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// 5ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// This file is distributed under the University of Illinois Open Source 6ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// License. See LICENSE.TXT for details. 7ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha// 8ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha//===----------------------------------------------------------------------===// 9ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 10ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/MC/MCObjectDisassembler.h" 11ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/ADT/SetVector.h" 1205a81020d970dc0fe8ae29e484d06aae619a4ca4Ahmed Bougacha#include "llvm/ADT/SmallPtrSet.h" 13ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/ADT/StringExtras.h" 14ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/ADT/StringRef.h" 15ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/ADT/Twine.h" 16cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines#include "llvm/MC/MCAnalysis/MCAtom.h" 17cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines#include "llvm/MC/MCAnalysis/MCFunction.h" 18cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines#include "llvm/MC/MCAnalysis/MCModule.h" 19ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/MC/MCDisassembler.h" 20ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/MC/MCInstrAnalysis.h" 210f4a5ba24e680f5193792822c9dd066bfccdfc2dAhmed Bougacha#include "llvm/MC/MCObjectSymbolizer.h" 220e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha#include "llvm/Object/MachO.h" 23ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/Object/ObjectFile.h" 240e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha#include "llvm/Support/Debug.h" 250e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha#include "llvm/Support/MachO.h" 26ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/Support/MemoryObject.h" 27ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/Support/StringRefMemoryObject.h" 28ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include "llvm/Support/raw_ostream.h" 29ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha#include <map> 30ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 31ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachausing namespace llvm; 32ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachausing namespace object; 33ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 34dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "mc" 35dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 36ef99356dfebb96f6f90efb912c2877214bad060eAhmed BougachaMCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj, 37ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha const MCDisassembler &Dis, 38ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha const MCInstrAnalysis &MIA) 39dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines : Obj(Obj), Dis(Dis), MIA(MIA), MOS(nullptr) {} 40ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 410a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougachauint64_t MCObjectDisassembler::getEntrypoint() { 4236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (const SymbolRef &Symbol : Obj.symbols()) { 430a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha StringRef Name; 4436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Symbol.getName(Name); 450a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha if (Name == "main" || Name == "_main") { 460a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha uint64_t Entrypoint; 4736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Symbol.getAddress(Entrypoint); 48484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha return getEffectiveLoadAddr(Entrypoint); 490a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha } 500a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha } 510a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha return 0; 520a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha} 530a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 540a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed BougachaArrayRef<uint64_t> MCObjectDisassembler::getStaticInitFunctions() { 550a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha return ArrayRef<uint64_t>(); 560a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha} 570a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 580a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed BougachaArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() { 590a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha return ArrayRef<uint64_t>(); 600a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha} 610a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 62f176482752fbea3139394e280adfb10270dd3aacAhmed BougachaMemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) { 63f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // FIXME: Keep track of object sections. 64f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha return FallbackRegion.get(); 65f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha} 66f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 67484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougachauint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) { 68484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha return Addr; 69484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha} 70484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha 71484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougachauint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) { 72484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha return Addr; 73484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha} 74484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha 750a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed BougachaMCModule *MCObjectDisassembler::buildEmptyModule() { 76ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCModule *Module = new MCModule; 770a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha Module->Entrypoint = getEntrypoint(); 780a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha return Module; 790a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha} 800a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 810a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed BougachaMCModule *MCObjectDisassembler::buildModule(bool withCFG) { 820a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha MCModule *Module = buildEmptyModule(); 830a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 84ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha buildSectionAtoms(Module); 85ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (withCFG) 86ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha buildCFG(Module); 87ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha return Module; 88ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha} 89ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 90ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachavoid MCObjectDisassembler::buildSectionAtoms(MCModule *Module) { 9136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (const SectionRef &Section : Obj.sections()) { 9236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool isText; 9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Section.isText(isText); 9436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool isData; 9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Section.isData(isData); 96ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (!isData && !isText) 97ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha continue; 98ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t StartAddr; 10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Section.getAddress(StartAddr); 10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t SecSize; 10236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Section.getSize(SecSize); 103ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize) 104ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha continue; 105484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha StartAddr = getEffectiveLoadAddr(StartAddr); 106ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines StringRef Contents; 10836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Section.getContents(Contents); 1090a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha StringRefMemoryObject memoryObject(Contents, StartAddr); 110ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 111ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha // We don't care about things like non-file-backed sections yet. 112ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (Contents.size() != SecSize || !SecSize) 113ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha continue; 114ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha uint64_t EndAddr = StartAddr + SecSize - 1; 115ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 11636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines StringRef SecName; 11736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Section.getName(SecName); 118ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 119ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (isText) { 120dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MCTextAtom *Text = nullptr; 121dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MCDataAtom *InvalidData = nullptr; 12246937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha 123ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha uint64_t InstSize; 124ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (uint64_t Index = 0; Index < SecSize; Index += InstSize) { 12546937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha const uint64_t CurAddr = StartAddr + Index; 126ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCInst Inst; 12746937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(), 12846937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha nulls())) { 12946937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha if (!Text) { 13046937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha Text = Module->createTextAtom(CurAddr, CurAddr); 13146937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha Text->setName(SecName); 13246937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha } 133ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha Text->addInst(Inst, InstSize); 134dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines InvalidData = nullptr; 13546937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha } else { 13688e1e103de334d2aec9d261bc4264d3916ba410dRafael Espindola assert(InstSize && "getInstruction() consumed no bytes"); 13746937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha if (!InvalidData) { 138dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Text = nullptr; 13988e1e103de334d2aec9d261bc4264d3916ba410dRafael Espindola InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1); 14046937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha } 14188e1e103de334d2aec9d261bc4264d3916ba410dRafael Espindola for (uint64_t I = 0; I < InstSize; ++I) 14288e1e103de334d2aec9d261bc4264d3916ba410dRafael Espindola InvalidData->addData(Contents[Index+I]); 14346937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha } 144ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 145ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } else { 146ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr); 147ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha Data->setName(SecName); 148ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (uint64_t Index = 0; Index < SecSize; ++Index) 149ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha Data->addData(Contents[Index]); 150ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 151ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 152ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha} 153ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 154ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachanamespace { 155ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha struct BBInfo; 15605a81020d970dc0fe8ae29e484d06aae619a4ca4Ahmed Bougacha typedef SmallPtrSet<BBInfo*, 2> BBInfoSetTy; 157ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 158ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha struct BBInfo { 159ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCTextAtom *Atom; 160ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCBasicBlock *BB; 161ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha BBInfoSetTy Succs; 162ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha BBInfoSetTy Preds; 163f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MCObjectDisassembler::AddressSetTy SuccAddrs; 164ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 165dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BBInfo() : Atom(nullptr), BB(nullptr) {} 16646937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha 167ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha void addSucc(BBInfo &Succ) { 168ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha Succs.insert(&Succ); 169ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha Succ.Preds.insert(this); 170ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 171ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha }; 172ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha} 173ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 174f176482752fbea3139394e280adfb10270dd3aacAhmed Bougachastatic void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) { 175f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha std::sort(V.begin(), V.end()); 176f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha V.erase(std::unique(V.begin(), V.end()), V.end()); 177f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha} 178f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 179ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougachavoid MCObjectDisassembler::buildCFG(MCModule *Module) { 180ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy; 181ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha BBInfoByAddrTy BBInfos; 182ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha AddressSetTy Splits; 183ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha AddressSetTy Calls; 184ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 18536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (const SymbolRef &Symbol : Obj.symbols()) { 1860a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha SymbolRef::Type SymType; 18736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Symbol.getType(SymType); 1880a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha if (SymType == SymbolRef::ST_Function) { 1890a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha uint64_t SymAddr; 19036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Symbol.getAddress(SymAddr); 191484a6eb9cc22db9c78bb93969bc0341c19e7739eAhmed Bougacha SymAddr = getEffectiveLoadAddr(SymAddr); 19205a81020d970dc0fe8ae29e484d06aae619a4ca4Ahmed Bougacha Calls.push_back(SymAddr); 19305a81020d970dc0fe8ae29e484d06aae619a4ca4Ahmed Bougacha Splits.push_back(SymAddr); 1940a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha } 1950a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha } 1960a30cccd493e3bc82a5771ca15326f7cc8b6cb8cAhmed Bougacha 197ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha assert(Module->func_begin() == Module->func_end() 198ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha && "Module already has a CFG!"); 199ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 200ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha // First, determine the basic block boundaries and call targets. 201ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (MCModule::atom_iterator AI = Module->atom_begin(), 202ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha AE = Module->atom_end(); 203ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha AI != AE; ++AI) { 204ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); 205ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (!TA) continue; 20605a81020d970dc0fe8ae29e484d06aae619a4ca4Ahmed Bougacha Calls.push_back(TA->getBeginAddr()); 2077ab184a2a1cbf5b5b340d663e07550659438ed7cAhmed Bougacha BBInfos[TA->getBeginAddr()].Atom = TA; 208ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); 209ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha II != IE; ++II) { 210ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (MIA.isTerminator(II->Inst)) 21105a81020d970dc0fe8ae29e484d06aae619a4ca4Ahmed Bougacha Splits.push_back(II->Address + II->Size); 212ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha uint64_t Target; 213ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) { 214ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (MIA.isCall(II->Inst)) 21505a81020d970dc0fe8ae29e484d06aae619a4ca4Ahmed Bougacha Calls.push_back(Target); 21605a81020d970dc0fe8ae29e484d06aae619a4ca4Ahmed Bougacha Splits.push_back(Target); 217ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 218ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 219ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 220ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 221f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha RemoveDupsFromAddressVector(Splits); 222f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha RemoveDupsFromAddressVector(Calls); 22305a81020d970dc0fe8ae29e484d06aae619a4ca4Ahmed Bougacha 224ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha // Split text atoms into basic block atoms. 225ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end(); 226ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha SI != SE; ++SI) { 227ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCAtom *A = Module->findAtomContaining(*SI); 228ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (!A) continue; 229ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCTextAtom *TA = cast<MCTextAtom>(A); 230ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (TA->getBeginAddr() == *SI) 231ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha continue; 232ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCTextAtom *NewAtom = TA->split(*SI); 233ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom; 234ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha StringRef BBName = TA->getName(); 235ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha BBName = BBName.substr(0, BBName.find_last_of(':')); 236ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha NewAtom->setName((BBName + ":" + utohexstr(*SI)).str()); 237ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 238ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 239ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha // Compute succs/preds. 240ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (MCModule::atom_iterator AI = Module->atom_begin(), 241ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha AE = Module->atom_end(); 242ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha AI != AE; ++AI) { 243ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); 244ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (!TA) continue; 245ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha BBInfo &CurBB = BBInfos[TA->getBeginAddr()]; 246ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha const MCDecodedInst &LI = TA->back(); 247ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (MIA.isBranch(LI.Inst)) { 248ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha uint64_t Target; 249ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target)) 250ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha CurBB.addSucc(BBInfos[Target]); 251ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (MIA.isConditionalBranch(LI.Inst)) 252ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha CurBB.addSucc(BBInfos[LI.Address + LI.Size]); 253ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } else if (!MIA.isTerminator(LI.Inst)) 254ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha CurBB.addSucc(BBInfos[LI.Address + LI.Size]); 255ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 256ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 257ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 258ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha // Create functions and basic blocks. 259ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end(); 260ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha CI != CE; ++CI) { 261ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha BBInfo &BBI = BBInfos[*CI]; 262ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (!BBI.Atom) continue; 263ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 264ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName()); 265ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 266ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha // Create MCBBs. 267ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha SmallSetVector<BBInfo*, 16> Worklist; 268ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha Worklist.insert(&BBI); 26946937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha for (size_t wi = 0; wi < Worklist.size(); ++wi) { 27046937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha BBInfo *BBI = Worklist[wi]; 271ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (!BBI->Atom) 272ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha continue; 273ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha BBI->BB = &MCFN.createBlock(*BBI->Atom); 274ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha // Add all predecessors and successors to the worklist. 275ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); 276ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha SI != SE; ++SI) 277ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha Worklist.insert(*SI); 278ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); 279ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha PI != PE; ++PI) 280ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha Worklist.insert(*PI); 281ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 282ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha 283ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha // Set preds/succs. 28446937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha for (size_t wi = 0; wi < Worklist.size(); ++wi) { 28546937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha BBInfo *BBI = Worklist[wi]; 286ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha MCBasicBlock *MCBB = BBI->BB; 287ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha if (!MCBB) 288ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha continue; 289ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); 29046937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha SI != SE; ++SI) 29146937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha if ((*SI)->BB) 29246937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha MCBB->addSuccessor((*SI)->BB); 293ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); 29446937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha PI != PE; ++PI) 29546937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha if ((*PI)->BB) 29646937278fad5e47178b0c5f5e062eba71644231eAhmed Bougacha MCBB->addPredecessor((*PI)->BB); 297ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 298ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha } 299ef99356dfebb96f6f90efb912c2877214bad060eAhmed Bougacha} 3000e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 301f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// Basic idea of the disassembly + discovery: 302f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// 303f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// start with the wanted address, insert it in the worklist 304f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// while worklist not empty, take next address in the worklist: 305f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// - check if atom exists there 306f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// - if middle of atom: 307f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// - split basic blocks referencing the atom 308f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// - look for an already encountered BBInfo (using a map<atom, bbinfo>) 309f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// - if there is, split it (new one, fallthrough, move succs, etc..) 310f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// - if start of atom: nothing else to do 311f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// - if no atom: create new atom and new bbinfo 312f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// - look at the last instruction in the atom, add succs to worklist 313f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// for all elements in the worklist: 314f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// - create basic block, update preds/succs, etc.. 315f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha// 316f176482752fbea3139394e280adfb10270dd3aacAhmed BougachaMCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN, 317f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha uint64_t BBBeginAddr, 318f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha AddressSetTy &CallTargets, 319f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha AddressSetTy &TailCallTargets) { 320f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy; 321f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha typedef SmallSetVector<uint64_t, 16> AddrWorklistTy; 322f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBInfoByAddrTy BBInfos; 323f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha AddrWorklistTy Worklist; 324f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 325f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha Worklist.insert(BBBeginAddr); 326f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha for (size_t wi = 0; wi < Worklist.size(); ++wi) { 327f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha const uint64_t BeginAddr = Worklist[wi]; 328f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBInfo *BBI = &BBInfos[BeginAddr]; 329f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 330f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MCTextAtom *&TA = BBI->Atom; 331f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha assert(!TA && "Discovered basic block already has an associated atom!"); 332f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 333f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // Look for an atom at BeginAddr. 334f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (MCAtom *A = Module->findAtomContaining(BeginAddr)) { 335f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // FIXME: We don't care about mixed atoms, see above. 336f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha TA = cast<MCTextAtom>(A); 337f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 338f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // The found atom doesn't begin at BeginAddr, we have to split it. 339f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (TA->getBeginAddr() != BeginAddr) { 340f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // FIXME: Handle overlapping atoms: middle-starting instructions, etc.. 341f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MCTextAtom *NewTA = TA->split(BeginAddr); 342f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 343f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // Look for an already encountered basic block that needs splitting 344f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr()); 345f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (It != BBInfos.end() && It->second.Atom) { 346f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBI->SuccAddrs = It->second.SuccAddrs; 347f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha It->second.SuccAddrs.clear(); 348f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha It->second.SuccAddrs.push_back(BeginAddr); 349f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 350f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha TA = NewTA; 351f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 352f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBI->Atom = TA; 353f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } else { 354f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // If we didn't find an atom, then we have to disassemble to create one! 355f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 356f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MemoryObject *Region = getRegionFor(BeginAddr); 357f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (!Region) 358f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha llvm_unreachable(("Couldn't find suitable region for disassembly at " + 359f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha utostr(BeginAddr)).c_str()); 360f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 361f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha uint64_t InstSize; 362f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha uint64_t EndAddr = Region->getBase() + Region->getExtent(); 363f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 364f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // We want to stop before the next atom and have a fallthrough to it. 365f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (MCTextAtom *NextAtom = 366f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr))) 367f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha EndAddr = std::min(EndAddr, NextAtom->getBeginAddr()); 368f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 369f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) { 370f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MCInst Inst; 371f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(), 372f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha nulls())) { 373f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (!TA) 374f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha TA = Module->createTextAtom(Addr, Addr); 375f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha TA->addInst(Inst, InstSize); 376f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } else { 377f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // We don't care about splitting mixed atoms either. 378f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha llvm_unreachable("Couldn't disassemble instruction in atom."); 379f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 380f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 381f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha uint64_t BranchTarget; 382f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) { 383f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (MIA.isCall(Inst)) 384f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha CallTargets.push_back(BranchTarget); 385f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 386f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 387f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (MIA.isTerminator(Inst)) 388f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha break; 389f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 390f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBI->Atom = TA; 391f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 392f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 393f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha assert(TA && "Couldn't disassemble atom, none was created!"); 394f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha assert(TA->begin() != TA->end() && "Empty atom!"); 395f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 396f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MemoryObject *Region = getRegionFor(TA->getBeginAddr()); 397f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha assert(Region && "Couldn't find region for already disassembled code!"); 398f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha uint64_t EndRegion = Region->getBase() + Region->getExtent(); 399f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 400f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // Now we have a basic block atom, add successors. 401f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // Add the fallthrough block. 402f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if ((MIA.isConditionalBranch(TA->back().Inst) || 403f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha !MIA.isTerminator(TA->back().Inst)) && 404f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha (TA->getEndAddr() + 1 < EndRegion)) { 405f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBI->SuccAddrs.push_back(TA->getEndAddr() + 1); 406f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha Worklist.insert(TA->getEndAddr() + 1); 407f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 408f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 409f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // If the terminator is a branch, add the target block. 410f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (MIA.isBranch(TA->back().Inst)) { 411f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha uint64_t BranchTarget; 412f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address, 413f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha TA->back().Size, BranchTarget)) { 414f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha StringRef ExtFnName; 415f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (MOS) 416f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha ExtFnName = 417f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget)); 418f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (!ExtFnName.empty()) { 419f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha TailCallTargets.push_back(BranchTarget); 420f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha CallTargets.push_back(BranchTarget); 421f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } else { 422f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBI->SuccAddrs.push_back(BranchTarget); 423f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha Worklist.insert(BranchTarget); 424f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 425f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 426f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 427f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 428f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 429f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { 430f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha const uint64_t BeginAddr = Worklist[wi]; 431f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBInfo *BBI = &BBInfos[BeginAddr]; 432f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 433f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha assert(BBI->Atom && "Found a basic block without an associated atom!"); 434f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 435f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // Look for a basic block at BeginAddr. 436f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBI->BB = MCFN->find(BeginAddr); 437f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (BBI->BB) { 438f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // FIXME: check that the succs/preds are the same 439f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha continue; 440f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 441f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // If there was none, we have to create one from the atom. 442f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBI->BB = &MCFN->createBlock(*BBI->Atom); 443f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 444f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 445f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { 446f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha const uint64_t BeginAddr = Worklist[wi]; 447f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BBInfo *BBI = &BBInfos[BeginAddr]; 448f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MCBasicBlock *BB = BBI->BB; 449f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 450f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha RemoveDupsFromAddressVector(BBI->SuccAddrs); 451f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(), 452f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha SE = BBI->SuccAddrs.end(); 453f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha SE != SE; ++SI) { 454f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MCBasicBlock *Succ = BBInfos[*SI].BB; 455f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha BB->addSuccessor(Succ); 456f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha Succ->addPredecessor(BB); 457f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 458f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 459f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 460f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha assert(BBInfos[Worklist[0]].BB && 461f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha "No basic block created at requested address?"); 462f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 463f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha return BBInfos[Worklist[0]].BB; 464f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha} 465f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 466f176482752fbea3139394e280adfb10270dd3aacAhmed BougachaMCFunction * 467f176482752fbea3139394e280adfb10270dd3aacAhmed BougachaMCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr, 468f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha AddressSetTy &CallTargets, 469f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha AddressSetTy &TailCallTargets) { 470f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // First, check if this is an external function. 471f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha StringRef ExtFnName; 472f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (MOS) 473f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr)); 474f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if (!ExtFnName.empty()) 475f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha return Module->createFunction(ExtFnName); 476f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 477f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // If it's not, look for an existing function. 478f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha for (MCModule::func_iterator FI = Module->func_begin(), 479f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha FE = Module->func_end(); 480f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha FI != FE; ++FI) { 481f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if ((*FI)->empty()) 482f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha continue; 483f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // FIXME: MCModule should provide a findFunctionByAddr() 484f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr) 485dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return FI->get(); 486f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha } 487f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 488f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha // Finally, just create a new one. 489f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha MCFunction *MCFN = Module->createFunction(""); 490f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets); 491f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha return MCFN; 492f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha} 493f176482752fbea3139394e280adfb10270dd3aacAhmed Bougacha 4940e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha// MachO MCObjectDisassembler implementation. 4950e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 4960e83b902834530da4670ad8416cf44afba9b4111Ahmed BougachaMCMachOObjectDisassembler::MCMachOObjectDisassembler( 4970e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha const MachOObjectFile &MOOF, const MCDisassembler &Dis, 4980e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha const MCInstrAnalysis &MIA, uint64_t VMAddrSlide, 4990e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha uint64_t HeaderLoadAddress) 5000e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF), 5010e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) { 5020e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 50336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (const SectionRef &Section : MOOF.sections()) { 5040e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha StringRef Name; 50536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Section.getName(Name); 5060e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha // FIXME: We should use the S_ section type instead of the name. 5070e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha if (Name == "__mod_init_func") { 5080e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha DEBUG(dbgs() << "Found __mod_init_func section!\n"); 50936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Section.getContents(ModInitContents); 5100e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha } else if (Name == "__mod_exit_func") { 5110e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha DEBUG(dbgs() << "Found __mod_exit_func section!\n"); 51236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Section.getContents(ModExitContents); 5130e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha } 5140e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha } 5150e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha} 5160e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5170e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha// FIXME: Only do the translations for addresses actually inside the object. 5180e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougachauint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) { 5190e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha return Addr + VMAddrSlide; 5200e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha} 5210e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5220e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougachauint64_t 5230e83b902834530da4670ad8416cf44afba9b4111Ahmed BougachaMCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) { 5240e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha return EffectiveAddr - VMAddrSlide; 5250e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha} 5260e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5270e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougachauint64_t MCMachOObjectDisassembler::getEntrypoint() { 5280e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha uint64_t EntryFileOffset = 0; 5290e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5300e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha // Look for LC_MAIN. 5310e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha { 5325510728d28bb1ee04abc32da3d21b7df12948053Charles Davis uint32_t LoadCommandCount = MOOF.getHeader().ncmds; 5330e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo(); 5340e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha for (unsigned I = 0;; ++I) { 5355510728d28bb1ee04abc32da3d21b7df12948053Charles Davis if (Load.C.cmd == MachO::LC_MAIN) { 5360e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha EntryFileOffset = 5370e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha ((const MachO::entry_point_command *)Load.Ptr)->entryoff; 5380e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha break; 5390e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha } 5400e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5410e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha if (I == LoadCommandCount - 1) 5420e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha break; 5430e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha else 5440e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha Load = MOOF.getNextLoadCommandInfo(Load); 5450e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha } 5460e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha } 5470e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5480e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha // If we didn't find anything, default to the common implementation. 5490e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends? 5500e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha if (EntryFileOffset) 5510e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha return MCObjectDisassembler::getEntrypoint(); 5520e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5530e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha return EntryFileOffset + HeaderLoadAddress; 5540e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha} 5550e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5560e83b902834530da4670ad8416cf44afba9b4111Ahmed BougachaArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticInitFunctions() { 5570e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha // FIXME: We only handle 64bit mach-o 5580e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha assert(MOOF.is64Bit()); 5590e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5600e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha size_t EntrySize = 8; 5610e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha size_t EntryCount = ModInitContents.size() / EntrySize; 5620e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha return ArrayRef<uint64_t>( 5630e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha reinterpret_cast<const uint64_t *>(ModInitContents.data()), EntryCount); 5640e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha} 5650e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5660e83b902834530da4670ad8416cf44afba9b4111Ahmed BougachaArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticExitFunctions() { 5670e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha // FIXME: We only handle 64bit mach-o 5680e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha assert(MOOF.is64Bit()); 5690e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha 5700e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha size_t EntrySize = 8; 5710e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha size_t EntryCount = ModExitContents.size() / EntrySize; 5720e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha return ArrayRef<uint64_t>( 5730e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha reinterpret_cast<const uint64_t *>(ModExitContents.data()), EntryCount); 5740e83b902834530da4670ad8416cf44afba9b4111Ahmed Bougacha} 575