1//===-- MCFunction.cpp ----------------------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the algorithm to break down a region of machine code 11// into basic blocks and try to reconstruct a CFG from it. 12// 13//===----------------------------------------------------------------------===// 14 15#include "MCFunction.h" 16#include "llvm/ADT/STLExtras.h" 17#include "llvm/MC/MCDisassembler.h" 18#include "llvm/MC/MCInst.h" 19#include "llvm/MC/MCInstPrinter.h" 20#include "llvm/MC/MCInstrAnalysis.h" 21#include "llvm/MC/MCInstrDesc.h" 22#include "llvm/MC/MCInstrInfo.h" 23#include "llvm/Support/MemoryObject.h" 24#include "llvm/Support/raw_ostream.h" 25#include "llvm/Support/system_error.h" 26#include <set> 27using namespace llvm; 28 29MCFunction 30MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, 31 const MemoryObject &Region, uint64_t Start, 32 uint64_t End, const MCInstrAnalysis *Ana, 33 raw_ostream &DebugOut, 34 SmallVectorImpl<uint64_t> &Calls) { 35 std::vector<MCDecodedInst> Instructions; 36 std::set<uint64_t> Splits; 37 Splits.insert(Start); 38 uint64_t Size; 39 40 MCFunction f(Name); 41 42 { 43 DenseSet<uint64_t> VisitedInsts; 44 SmallVector<uint64_t, 16> WorkList; 45 WorkList.push_back(Start); 46 // Disassemble code and gather basic block split points. 47 while (!WorkList.empty()) { 48 uint64_t Index = WorkList.pop_back_val(); 49 if (VisitedInsts.find(Index) != VisitedInsts.end()) 50 continue; // Already visited this location. 51 52 for (;Index < End; Index += Size) { 53 VisitedInsts.insert(Index); 54 55 MCInst Inst; 56 if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){ 57 Instructions.push_back(MCDecodedInst(Index, Size, Inst)); 58 if (Ana->isBranch(Inst)) { 59 uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); 60 if (targ != -1ULL && targ == Index+Size) 61 continue; // Skip nop jumps. 62 63 // If we could determine the branch target, make a note to start a 64 // new basic block there and add the target to the worklist. 65 if (targ != -1ULL) { 66 Splits.insert(targ); 67 WorkList.push_back(targ); 68 WorkList.push_back(Index+Size); 69 } 70 Splits.insert(Index+Size); 71 break; 72 } else if (Ana->isReturn(Inst)) { 73 // Return instruction. This basic block ends here. 74 Splits.insert(Index+Size); 75 break; 76 } else if (Ana->isCall(Inst)) { 77 uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); 78 // Add the call to the call list if the destination is known. 79 if (targ != -1ULL && targ != Index+Size) 80 Calls.push_back(targ); 81 } 82 } else { 83 errs().write_hex(Index) << ": warning: invalid instruction encoding\n"; 84 if (Size == 0) 85 Size = 1; // skip illegible bytes 86 } 87 } 88 } 89 } 90 91 // Make sure the instruction list is sorted. 92 std::sort(Instructions.begin(), Instructions.end()); 93 94 // Create basic blocks. 95 unsigned ii = 0, ie = Instructions.size(); 96 for (std::set<uint64_t>::iterator spi = Splits.begin(), 97 spe = llvm::prior(Splits.end()); spi != spe; ++spi) { 98 MCBasicBlock BB; 99 uint64_t BlockEnd = *llvm::next(spi); 100 // Add instructions to the BB. 101 for (; ii != ie; ++ii) { 102 if (Instructions[ii].Address < *spi || 103 Instructions[ii].Address >= BlockEnd) 104 break; 105 BB.addInst(Instructions[ii]); 106 } 107 f.addBlock(*spi, BB); 108 } 109 110 std::sort(f.Blocks.begin(), f.Blocks.end()); 111 112 // Calculate successors of each block. 113 for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { 114 MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second); 115 if (BB.getInsts().empty()) continue; 116 const MCDecodedInst &Inst = BB.getInsts().back(); 117 118 if (Ana->isBranch(Inst.Inst)) { 119 uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size); 120 if (targ == -1ULL) { 121 // Indirect branch. Bail and add all blocks of the function as a 122 // successor. 123 for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) 124 BB.addSucc(i->first); 125 } else if (targ != Inst.Address+Inst.Size) 126 BB.addSucc(targ); 127 // Conditional branches can also fall through to the next block. 128 if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e) 129 BB.addSucc(llvm::next(i)->first); 130 } else { 131 // No branch. Fall through to the next block. 132 if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e) 133 BB.addSucc(llvm::next(i)->first); 134 } 135 } 136 137 return f; 138} 139