1685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//===-- MCFunction.cpp ----------------------------------------------------===//
2685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//
3685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//                     The LLVM Compiler Infrastructure
4685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//
5685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// This file is distributed under the University of Illinois Open Source
6685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// License. See LICENSE.TXT for details.
7685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//
8685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//===----------------------------------------------------------------------===//
9685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//
10685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// This file defines the algorithm to break down a region of machine code
11685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer// into basic blocks and try to reconstruct a CFG from it.
12685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//
13685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer//===----------------------------------------------------------------------===//
14685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer
15685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "MCFunction.h"
16685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/ADT/STLExtras.h"
17685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/MC/MCDisassembler.h"
18685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/MC/MCInst.h"
19685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/MC/MCInstPrinter.h"
2041ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer#include "llvm/MC/MCInstrAnalysis.h"
21685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/MC/MCInstrDesc.h"
22685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/MC/MCInstrInfo.h"
23685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/Support/MemoryObject.h"
24685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/Support/raw_ostream.h"
25685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include "llvm/Support/system_error.h"
26685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer#include <set>
27685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramerusing namespace llvm;
28685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer
29685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin KramerMCFunction
30685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin KramerMCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
31685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer                                 const MemoryObject &Region, uint64_t Start,
3241ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer                                 uint64_t End, const MCInstrAnalysis *Ana,
330b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer                                 raw_ostream &DebugOut,
340b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer                                 SmallVectorImpl<uint64_t> &Calls) {
350b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer  std::vector<MCDecodedInst> Instructions;
36685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  std::set<uint64_t> Splits;
37685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  Splits.insert(Start);
38685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  uint64_t Size;
39685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer
400b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer  MCFunction f(Name);
410b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer
420b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer  {
430b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer  DenseSet<uint64_t> VisitedInsts;
440b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer  SmallVector<uint64_t, 16> WorkList;
450b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer  WorkList.push_back(Start);
46685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  // Disassemble code and gather basic block split points.
470b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer  while (!WorkList.empty()) {
480b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer    uint64_t Index = WorkList.pop_back_val();
490b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer    if (VisitedInsts.find(Index) != VisitedInsts.end())
50a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer      continue; // Already visited this location.
51685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer
520b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer    for (;Index < End; Index += Size) {
53a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer      VisitedInsts.insert(Index);
540b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer
55a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer      MCInst Inst;
560b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer      if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
57a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer        Instructions.push_back(MCDecodedInst(Index, Size, Inst));
580b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer        if (Ana->isBranch(Inst)) {
590b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer          uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
60a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer          if (targ != -1ULL && targ == Index+Size)
61a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer            continue; // Skip nop jumps.
62a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer
63a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer          // If we could determine the branch target, make a note to start a
64a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer          // new basic block there and add the target to the worklist.
650b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer          if (targ != -1ULL) {
660b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer            Splits.insert(targ);
670b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer            WorkList.push_back(targ);
680b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer            WorkList.push_back(Index+Size);
690b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer          }
700b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer          Splits.insert(Index+Size);
710b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer          break;
720b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer        } else if (Ana->isReturn(Inst)) {
73a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer          // Return instruction. This basic block ends here.
740b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer          Splits.insert(Index+Size);
750b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer          break;
760b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer        } else if (Ana->isCall(Inst)) {
770b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer          uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
78a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer          // Add the call to the call list if the destination is known.
79a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer          if (targ != -1ULL && targ != Index+Size)
800b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer            Calls.push_back(targ);
81685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer        }
820b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer      } else {
830b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer        errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
840b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer        if (Size == 0)
850b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer          Size = 1; // skip illegible bytes
860b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer      }
87685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer    }
880b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer  }
89685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  }
90685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer
91a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer  // Make sure the instruction list is sorted.
920b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer  std::sort(Instructions.begin(), Instructions.end());
93685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer
94a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer  // Create basic blocks.
95685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  unsigned ii = 0, ie = Instructions.size();
96685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  for (std::set<uint64_t>::iterator spi = Splits.begin(),
970b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer       spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
98685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer    MCBasicBlock BB;
990b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer    uint64_t BlockEnd = *llvm::next(spi);
100685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer    // Add instructions to the BB.
101685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer    for (; ii != ie; ++ii) {
102685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer      if (Instructions[ii].Address < *spi ||
103685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer          Instructions[ii].Address >= BlockEnd)
104685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer        break;
105685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer      BB.addInst(Instructions[ii]);
106685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer    }
107685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer    f.addBlock(*spi, BB);
108685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  }
109685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer
1100b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer  std::sort(f.Blocks.begin(), f.Blocks.end());
1110b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer
112685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  // Calculate successors of each block.
113685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
114a894c8e34453493a9d3fb2ffbbc21151c3965b63Benjamin Kramer    MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second);
115685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer    if (BB.getInsts().empty()) continue;
116685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer    const MCDecodedInst &Inst = BB.getInsts().back();
117685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer
11841ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer    if (Ana->isBranch(Inst.Inst)) {
11941ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer      uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size);
12041ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer      if (targ == -1ULL) {
121685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer        // Indirect branch. Bail and add all blocks of the function as a
122685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer        // successor.
123685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer        for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
1240b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer          BB.addSucc(i->first);
12541ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer      } else if (targ != Inst.Address+Inst.Size)
1260b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer        BB.addSucc(targ);
12741ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer      // Conditional branches can also fall through to the next block.
12841ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer      if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
1290b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer        BB.addSucc(llvm::next(i)->first);
130685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer    } else {
131685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer      // No branch. Fall through to the next block.
13241ab14b725c8f2bb3e54553d0d7d96ff184786b1Benjamin Kramer      if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
1330b8b771e9f2f251460a6f200c45efe9d55640d60Benjamin Kramer        BB.addSucc(llvm::next(i)->first);
134685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer    }
135685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  }
136685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer
137685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer  return f;
138685a2501b20baf688f6cc087f4b92bbafcd8028eBenjamin Kramer}
139