1685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//===-- MCFunction.cpp ----------------------------------------------------===// 2685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// 3685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// The LLVM Compiler Infrastructure 4685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// 5685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// This file is distributed under the University of Illinois Open Source 6685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// License. See LICENSE.TXT for details. 7685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// 8685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//===----------------------------------------------------------------------===// 9685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// 10685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// This file defines the algorithm to break down a region of machine code 11685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// into basic blocks and try to reconstruct a CFG from it. 12685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// 13685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//===----------------------------------------------------------------------===// 14685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer 15685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "MCFunction.h" 16685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/ADT/STLExtras.h" 17685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/MC/MCDisassembler.h" 18685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/MC/MCInst.h" 19685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/MC/MCInstPrinter.h" 2041ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer#include "llvm/MC/MCInstrAnalysis.h" 21685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/MC/MCInstrDesc.h" 22685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/MC/MCInstrInfo.h" 23685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/Support/MemoryObject.h" 24685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/Support/raw_ostream.h" 25685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/Support/system_error.h" 26685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include <set> 27685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramerusing namespace llvm; 28685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer 29685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin KramerMCFunction 30685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin KramerMCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, 31685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer const MemoryObject &Region, uint64_t Start, 3241ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer uint64_t End, const MCInstrAnalysis *Ana, 330b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer raw_ostream &DebugOut, 340b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer SmallVectorImpl<uint64_t> &Calls) { 350b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer std::vector<MCDecodedInst> Instructions; 36685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer std::set<uint64_t> Splits; 37685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer Splits.insert(Start); 38685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer uint64_t Size; 39685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer 400b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer MCFunction f(Name); 410b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer 420b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer { 430b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer DenseSet<uint64_t> VisitedInsts; 440b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer SmallVector<uint64_t, 16> WorkList; 450b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer WorkList.push_back(Start); 46685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer // Disassemble code and gather basic block split points. 470b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer while (!WorkList.empty()) { 480b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer uint64_t Index = WorkList.pop_back_val(); 490b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer if (VisitedInsts.find(Index) != VisitedInsts.end()) 50a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer continue; // Already visited this location. 51685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer 520b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer for (;Index < End; Index += Size) { 53a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer VisitedInsts.insert(Index); 540b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer 55a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer MCInst Inst; 560b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){ 57a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer Instructions.push_back(MCDecodedInst(Index, Size, Inst)); 580b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer if (Ana->isBranch(Inst)) { 590b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); 60a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer if (targ != -1ULL && targ == Index+Size) 61a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer continue; // Skip nop jumps. 62a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer 63a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer // If we could determine the branch target, make a note to start a 64a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer // new basic block there and add the target to the worklist. 650b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer if (targ != -1ULL) { 660b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer Splits.insert(targ); 670b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer WorkList.push_back(targ); 680b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer WorkList.push_back(Index+Size); 690b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer } 700b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer Splits.insert(Index+Size); 710b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer break; 720b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer } else if (Ana->isReturn(Inst)) { 73a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer // Return instruction. This basic block ends here. 740b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer Splits.insert(Index+Size); 750b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer break; 760b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer } else if (Ana->isCall(Inst)) { 770b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); 78a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer // Add the call to the call list if the destination is known. 79a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer if (targ != -1ULL && targ != Index+Size) 800b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer Calls.push_back(targ); 81685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer } 820b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer } else { 830b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer errs().write_hex(Index) << ": warning: invalid instruction encoding\n"; 840b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer if (Size == 0) 850b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer Size = 1; // skip illegible bytes 860b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer } 87685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer } 880b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer } 89685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer } 90685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer 91a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer // Make sure the instruction list is sorted. 920b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer std::sort(Instructions.begin(), Instructions.end()); 93685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer 94a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer // Create basic blocks. 95685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer unsigned ii = 0, ie = Instructions.size(); 96685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer for (std::set<uint64_t>::iterator spi = Splits.begin(), 970b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer spe = llvm::prior(Splits.end()); spi != spe; ++spi) { 98685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer MCBasicBlock BB; 990b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer uint64_t BlockEnd = *llvm::next(spi); 100685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer // Add instructions to the BB. 101685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer for (; ii != ie; ++ii) { 102685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer if (Instructions[ii].Address < *spi || 103685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer Instructions[ii].Address >= BlockEnd) 104685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer break; 105685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer BB.addInst(Instructions[ii]); 106685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer } 107685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer f.addBlock(*spi, BB); 108685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer } 109685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer 1100b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer std::sort(f.Blocks.begin(), f.Blocks.end()); 1110b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer 112685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer // Calculate successors of each block. 113685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { 114a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second); 115685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer if (BB.getInsts().empty()) continue; 116685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer const MCDecodedInst &Inst = BB.getInsts().back(); 117685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer 11841ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer if (Ana->isBranch(Inst.Inst)) { 11941ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size); 12041ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer if (targ == -1ULL) { 121685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer // Indirect branch. Bail and add all blocks of the function as a 122685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer // successor. 123685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) 1240b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer BB.addSucc(i->first); 12541ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer } else if (targ != Inst.Address+Inst.Size) 1260b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer BB.addSucc(targ); 12741ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer // Conditional branches can also fall through to the next block. 12841ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e) 1290b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer BB.addSucc(llvm::next(i)->first); 130685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer } else { 131685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer // No branch. Fall through to the next block. 13241ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e) 1330b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer BB.addSucc(llvm::next(i)->first); 134685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer } 135685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer } 136685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer 137685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer return f; 138685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer} 139