SelectionDAGISel.cpp revision 06a248c9b398049d41cf6dd1a3f9eecc75603401
1//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by the LLVM research group and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements the SelectionDAGISel class.
11//
12//===----------------------------------------------------------------------===//
13
14#define DEBUG_TYPE "isel"
15#include "llvm/CodeGen/SelectionDAGISel.h"
16#include "llvm/CodeGen/ScheduleDAG.h"
17#include "llvm/CallingConv.h"
18#include "llvm/Constants.h"
19#include "llvm/DerivedTypes.h"
20#include "llvm/Function.h"
21#include "llvm/GlobalVariable.h"
22#include "llvm/InlineAsm.h"
23#include "llvm/Instructions.h"
24#include "llvm/Intrinsics.h"
25#include "llvm/CodeGen/IntrinsicLowering.h"
26#include "llvm/CodeGen/MachineDebugInfo.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/SelectionDAG.h"
31#include "llvm/CodeGen/SSARegMap.h"
32#include "llvm/Target/MRegisterInfo.h"
33#include "llvm/Target/TargetData.h"
34#include "llvm/Target/TargetFrameInfo.h"
35#include "llvm/Target/TargetInstrInfo.h"
36#include "llvm/Target/TargetLowering.h"
37#include "llvm/Target/TargetMachine.h"
38#include "llvm/Transforms/Utils/BasicBlockUtils.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/MathExtras.h"
41#include "llvm/Support/Debug.h"
42#include <map>
43#include <set>
44#include <iostream>
45using namespace llvm;
46
47#ifndef NDEBUG
48static cl::opt<bool>
49ViewISelDAGs("view-isel-dags", cl::Hidden,
50          cl::desc("Pop up a window to show isel dags as they are selected"));
51static cl::opt<bool>
52ViewSchedDAGs("view-sched-dags", cl::Hidden,
53          cl::desc("Pop up a window to show sched dags as they are processed"));
54#else
55static const bool ViewISelDAGs = 0;
56static const bool ViewSchedDAGs = 0;
57#endif
58
59namespace {
60  cl::opt<SchedHeuristics>
61  ISHeuristic(
62    "sched",
63    cl::desc("Choose scheduling style"),
64    cl::init(defaultScheduling),
65    cl::values(
66      clEnumValN(defaultScheduling, "default",
67                 "Target preferred scheduling style"),
68      clEnumValN(noScheduling, "none",
69                 "No scheduling: breadth first sequencing"),
70      clEnumValN(simpleScheduling, "simple",
71                 "Simple two pass scheduling: minimize critical path "
72                 "and maximize processor utilization"),
73      clEnumValN(simpleNoItinScheduling, "simple-noitin",
74                 "Simple two pass scheduling: Same as simple "
75                 "except using generic latency"),
76      clEnumValN(listSchedulingBURR, "list-burr",
77                 "Bottom up register reduction list scheduling"),
78      clEnumValEnd));
79} // namespace
80
81
82namespace llvm {
83  //===--------------------------------------------------------------------===//
84  /// FunctionLoweringInfo - This contains information that is global to a
85  /// function that is used when lowering a region of the function.
86  class FunctionLoweringInfo {
87  public:
88    TargetLowering &TLI;
89    Function &Fn;
90    MachineFunction &MF;
91    SSARegMap *RegMap;
92
93    FunctionLoweringInfo(TargetLowering &TLI, Function &Fn,MachineFunction &MF);
94
95    /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
96    std::map<const BasicBlock*, MachineBasicBlock *> MBBMap;
97
98    /// ValueMap - Since we emit code for the function a basic block at a time,
99    /// we must remember which virtual registers hold the values for
100    /// cross-basic-block values.
101    std::map<const Value*, unsigned> ValueMap;
102
103    /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
104    /// the entry block.  This allows the allocas to be efficiently referenced
105    /// anywhere in the function.
106    std::map<const AllocaInst*, int> StaticAllocaMap;
107
108    unsigned MakeReg(MVT::ValueType VT) {
109      return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
110    }
111
112    unsigned CreateRegForValue(const Value *V) {
113      MVT::ValueType VT = TLI.getValueType(V->getType());
114      // The common case is that we will only create one register for this
115      // value.  If we have that case, create and return the virtual register.
116      unsigned NV = TLI.getNumElements(VT);
117      if (NV == 1) {
118        // If we are promoting this value, pick the next largest supported type.
119        return MakeReg(TLI.getTypeToTransformTo(VT));
120      }
121
122      // If this value is represented with multiple target registers, make sure
123      // to create enough consequtive registers of the right (smaller) type.
124      unsigned NT = VT-1;  // Find the type to use.
125      while (TLI.getNumElements((MVT::ValueType)NT) != 1)
126        --NT;
127
128      unsigned R = MakeReg((MVT::ValueType)NT);
129      for (unsigned i = 1; i != NV; ++i)
130        MakeReg((MVT::ValueType)NT);
131      return R;
132    }
133
134    unsigned InitializeRegForValue(const Value *V) {
135      unsigned &R = ValueMap[V];
136      assert(R == 0 && "Already initialized this value register!");
137      return R = CreateRegForValue(V);
138    }
139  };
140}
141
142/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
143/// PHI nodes or outside of the basic block that defines it.
144static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
145  if (isa<PHINode>(I)) return true;
146  BasicBlock *BB = I->getParent();
147  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
148    if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
149      return true;
150  return false;
151}
152
153/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
154/// entry block, return true.
155static bool isOnlyUsedInEntryBlock(Argument *A) {
156  BasicBlock *Entry = A->getParent()->begin();
157  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
158    if (cast<Instruction>(*UI)->getParent() != Entry)
159      return false;  // Use not in entry block.
160  return true;
161}
162
163FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli,
164                                           Function &fn, MachineFunction &mf)
165    : TLI(tli), Fn(fn), MF(mf), RegMap(MF.getSSARegMap()) {
166
167  // Create a vreg for each argument register that is not dead and is used
168  // outside of the entry block for the function.
169  for (Function::arg_iterator AI = Fn.arg_begin(), E = Fn.arg_end();
170       AI != E; ++AI)
171    if (!isOnlyUsedInEntryBlock(AI))
172      InitializeRegForValue(AI);
173
174  // Initialize the mapping of values to registers.  This is only set up for
175  // instruction values that are used outside of the block that defines
176  // them.
177  Function::iterator BB = Fn.begin(), EB = Fn.end();
178  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
179    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
180      if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(AI->getArraySize())) {
181        const Type *Ty = AI->getAllocatedType();
182        uint64_t TySize = TLI.getTargetData().getTypeSize(Ty);
183        unsigned Align =
184          std::max((unsigned)TLI.getTargetData().getTypeAlignment(Ty),
185                   AI->getAlignment());
186
187        // If the alignment of the value is smaller than the size of the value,
188        // and if the size of the value is particularly small (<= 8 bytes),
189        // round up to the size of the value for potentially better performance.
190        //
191        // FIXME: This could be made better with a preferred alignment hook in
192        // TargetData.  It serves primarily to 8-byte align doubles for X86.
193        if (Align < TySize && TySize <= 8) Align = TySize;
194        TySize *= CUI->getValue();   // Get total allocated size.
195        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
196        StaticAllocaMap[AI] =
197          MF.getFrameInfo()->CreateStackObject((unsigned)TySize, Align);
198      }
199
200  for (; BB != EB; ++BB)
201    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
202      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
203        if (!isa<AllocaInst>(I) ||
204            !StaticAllocaMap.count(cast<AllocaInst>(I)))
205          InitializeRegForValue(I);
206
207  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
208  // also creates the initial PHI MachineInstrs, though none of the input
209  // operands are populated.
210  for (BB = Fn.begin(), EB = Fn.end(); BB != EB; ++BB) {
211    MachineBasicBlock *MBB = new MachineBasicBlock(BB);
212    MBBMap[BB] = MBB;
213    MF.getBasicBlockList().push_back(MBB);
214
215    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
216    // appropriate.
217    PHINode *PN;
218    for (BasicBlock::iterator I = BB->begin();
219         (PN = dyn_cast<PHINode>(I)); ++I)
220      if (!PN->use_empty()) {
221        unsigned NumElements =
222          TLI.getNumElements(TLI.getValueType(PN->getType()));
223        unsigned PHIReg = ValueMap[PN];
224        assert(PHIReg &&"PHI node does not have an assigned virtual register!");
225        for (unsigned i = 0; i != NumElements; ++i)
226          BuildMI(MBB, TargetInstrInfo::PHI, PN->getNumOperands(), PHIReg+i);
227      }
228  }
229}
230
231
232
233//===----------------------------------------------------------------------===//
234/// SelectionDAGLowering - This is the common target-independent lowering
235/// implementation that is parameterized by a TargetLowering object.
236/// Also, targets can overload any lowering method.
237///
238namespace llvm {
239class SelectionDAGLowering {
240  MachineBasicBlock *CurMBB;
241
242  std::map<const Value*, SDOperand> NodeMap;
243
244  /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
245  /// them up and then emit token factor nodes when possible.  This allows us to
246  /// get simple disambiguation between loads without worrying about alias
247  /// analysis.
248  std::vector<SDOperand> PendingLoads;
249
250public:
251  // TLI - This is information that describes the available target features we
252  // need for lowering.  This indicates when operations are unavailable,
253  // implemented with a libcall, etc.
254  TargetLowering &TLI;
255  SelectionDAG &DAG;
256  const TargetData &TD;
257
258  /// FuncInfo - Information about the function as a whole.
259  ///
260  FunctionLoweringInfo &FuncInfo;
261
262  SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
263                       FunctionLoweringInfo &funcinfo)
264    : TLI(tli), DAG(dag), TD(DAG.getTarget().getTargetData()),
265      FuncInfo(funcinfo) {
266  }
267
268  /// getRoot - Return the current virtual root of the Selection DAG.
269  ///
270  SDOperand getRoot() {
271    if (PendingLoads.empty())
272      return DAG.getRoot();
273
274    if (PendingLoads.size() == 1) {
275      SDOperand Root = PendingLoads[0];
276      DAG.setRoot(Root);
277      PendingLoads.clear();
278      return Root;
279    }
280
281    // Otherwise, we have to make a token factor node.
282    SDOperand Root = DAG.getNode(ISD::TokenFactor, MVT::Other, PendingLoads);
283    PendingLoads.clear();
284    DAG.setRoot(Root);
285    return Root;
286  }
287
288  void visit(Instruction &I) { visit(I.getOpcode(), I); }
289
290  void visit(unsigned Opcode, User &I) {
291    switch (Opcode) {
292    default: assert(0 && "Unknown instruction type encountered!");
293             abort();
294      // Build the switch statement using the Instruction.def file.
295#define HANDLE_INST(NUM, OPCODE, CLASS) \
296    case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
297#include "llvm/Instruction.def"
298    }
299  }
300
301  void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
302
303
304  SDOperand getIntPtrConstant(uint64_t Val) {
305    return DAG.getConstant(Val, TLI.getPointerTy());
306  }
307
308  SDOperand getValue(const Value *V) {
309    SDOperand &N = NodeMap[V];
310    if (N.Val) return N;
311
312    const Type *VTy = V->getType();
313    MVT::ValueType VT = TLI.getValueType(VTy);
314    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)))
315      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
316        visit(CE->getOpcode(), *CE);
317        assert(N.Val && "visit didn't populate the ValueMap!");
318        return N;
319      } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
320        return N = DAG.getGlobalAddress(GV, VT);
321      } else if (isa<ConstantPointerNull>(C)) {
322        return N = DAG.getConstant(0, TLI.getPointerTy());
323      } else if (isa<UndefValue>(C)) {
324        return N = DAG.getNode(ISD::UNDEF, VT);
325      } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
326        return N = DAG.getConstantFP(CFP->getValue(), VT);
327      } else if (const PackedType *PTy = dyn_cast<PackedType>(VTy)) {
328        unsigned NumElements = PTy->getNumElements();
329        MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
330        MVT::ValueType TVT = MVT::getVectorType(PVT, NumElements);
331
332        // Now that we know the number and type of the elements, push a
333        // Constant or ConstantFP node onto the ops list for each element of
334        // the packed constant.
335        std::vector<SDOperand> Ops;
336        if (ConstantPacked *CP = dyn_cast<ConstantPacked>(C)) {
337          if (MVT::isFloatingPoint(PVT)) {
338            for (unsigned i = 0; i != NumElements; ++i) {
339              const ConstantFP *El = cast<ConstantFP>(CP->getOperand(i));
340              Ops.push_back(DAG.getConstantFP(El->getValue(), PVT));
341            }
342          } else {
343            for (unsigned i = 0; i != NumElements; ++i) {
344              const ConstantIntegral *El =
345                cast<ConstantIntegral>(CP->getOperand(i));
346              Ops.push_back(DAG.getConstant(El->getRawValue(), PVT));
347            }
348          }
349        } else {
350          assert(isa<ConstantAggregateZero>(C) && "Unknown packed constant!");
351          SDOperand Op;
352          if (MVT::isFloatingPoint(PVT))
353            Op = DAG.getConstantFP(0, PVT);
354          else
355            Op = DAG.getConstant(0, PVT);
356          Ops.assign(NumElements, Op);
357        }
358
359        // Handle the case where we have a 1-element vector, in which
360        // case we want to immediately turn it into a scalar constant.
361        if (Ops.size() == 1) {
362          return N = Ops[0];
363        } else if (TVT != MVT::Other && TLI.isTypeLegal(TVT)) {
364          return N = DAG.getNode(ISD::ConstantVec, TVT, Ops);
365        } else {
366          // If the packed type isn't legal, then create a ConstantVec node with
367          // generic Vector type instead.
368          return N = DAG.getNode(ISD::ConstantVec, MVT::Vector, Ops);
369        }
370      } else {
371        // Canonicalize all constant ints to be unsigned.
372        return N = DAG.getConstant(cast<ConstantIntegral>(C)->getRawValue(),VT);
373      }
374
375    if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
376      std::map<const AllocaInst*, int>::iterator SI =
377        FuncInfo.StaticAllocaMap.find(AI);
378      if (SI != FuncInfo.StaticAllocaMap.end())
379        return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
380    }
381
382    std::map<const Value*, unsigned>::const_iterator VMI =
383      FuncInfo.ValueMap.find(V);
384    assert(VMI != FuncInfo.ValueMap.end() && "Value not in map!");
385
386    unsigned InReg = VMI->second;
387
388    // If this type is not legal, make it so now.
389    MVT::ValueType DestVT = TLI.getTypeToTransformTo(VT);
390
391    N = DAG.getCopyFromReg(DAG.getEntryNode(), InReg, DestVT);
392    if (DestVT < VT) {
393      // Source must be expanded.  This input value is actually coming from the
394      // register pair VMI->second and VMI->second+1.
395      N = DAG.getNode(ISD::BUILD_PAIR, VT, N,
396                      DAG.getCopyFromReg(DAG.getEntryNode(), InReg+1, DestVT));
397    } else {
398      if (DestVT > VT) { // Promotion case
399        if (MVT::isFloatingPoint(VT))
400          N = DAG.getNode(ISD::FP_ROUND, VT, N);
401        else
402          N = DAG.getNode(ISD::TRUNCATE, VT, N);
403      }
404    }
405
406    return N;
407  }
408
409  const SDOperand &setValue(const Value *V, SDOperand NewN) {
410    SDOperand &N = NodeMap[V];
411    assert(N.Val == 0 && "Already set a value for this node!");
412    return N = NewN;
413  }
414
415  unsigned GetAvailableRegister(bool OutReg, bool InReg,
416                                const std::vector<unsigned> &RegChoices,
417                                std::set<unsigned> &OutputRegs,
418                                std::set<unsigned> &InputRegs);
419
420  // Terminator instructions.
421  void visitRet(ReturnInst &I);
422  void visitBr(BranchInst &I);
423  void visitUnreachable(UnreachableInst &I) { /* noop */ }
424
425  // These all get lowered before this pass.
426  void visitExtractElement(ExtractElementInst &I) { assert(0 && "TODO"); }
427  void visitInsertElement(InsertElementInst &I) { assert(0 && "TODO"); }
428  void visitSwitch(SwitchInst &I) { assert(0 && "TODO"); }
429  void visitInvoke(InvokeInst &I) { assert(0 && "TODO"); }
430  void visitUnwind(UnwindInst &I) { assert(0 && "TODO"); }
431
432  //
433  void visitBinary(User &I, unsigned IntOp, unsigned FPOp, unsigned VecOp);
434  void visitShift(User &I, unsigned Opcode);
435  void visitAdd(User &I) {
436    visitBinary(I, ISD::ADD, ISD::FADD, ISD::VADD);
437  }
438  void visitSub(User &I);
439  void visitMul(User &I) {
440    visitBinary(I, ISD::MUL, ISD::FMUL, ISD::VMUL);
441  }
442  void visitDiv(User &I) {
443    const Type *Ty = I.getType();
444    visitBinary(I, Ty->isSigned() ? ISD::SDIV : ISD::UDIV, ISD::FDIV, 0);
445  }
446  void visitRem(User &I) {
447    const Type *Ty = I.getType();
448    visitBinary(I, Ty->isSigned() ? ISD::SREM : ISD::UREM, ISD::FREM, 0);
449  }
450  void visitAnd(User &I) { visitBinary(I, ISD::AND, 0, 0); }
451  void visitOr (User &I) { visitBinary(I, ISD::OR,  0, 0); }
452  void visitXor(User &I) { visitBinary(I, ISD::XOR, 0, 0); }
453  void visitShl(User &I) { visitShift(I, ISD::SHL); }
454  void visitShr(User &I) {
455    visitShift(I, I.getType()->isUnsigned() ? ISD::SRL : ISD::SRA);
456  }
457
458  void visitSetCC(User &I, ISD::CondCode SignedOpc, ISD::CondCode UnsignedOpc);
459  void visitSetEQ(User &I) { visitSetCC(I, ISD::SETEQ, ISD::SETEQ); }
460  void visitSetNE(User &I) { visitSetCC(I, ISD::SETNE, ISD::SETNE); }
461  void visitSetLE(User &I) { visitSetCC(I, ISD::SETLE, ISD::SETULE); }
462  void visitSetGE(User &I) { visitSetCC(I, ISD::SETGE, ISD::SETUGE); }
463  void visitSetLT(User &I) { visitSetCC(I, ISD::SETLT, ISD::SETULT); }
464  void visitSetGT(User &I) { visitSetCC(I, ISD::SETGT, ISD::SETUGT); }
465
466  void visitGetElementPtr(User &I);
467  void visitCast(User &I);
468  void visitSelect(User &I);
469  //
470
471  void visitMalloc(MallocInst &I);
472  void visitFree(FreeInst &I);
473  void visitAlloca(AllocaInst &I);
474  void visitLoad(LoadInst &I);
475  void visitStore(StoreInst &I);
476  void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
477  void visitCall(CallInst &I);
478  void visitInlineAsm(CallInst &I);
479  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
480
481  void visitVAStart(CallInst &I);
482  void visitVAArg(VAArgInst &I);
483  void visitVAEnd(CallInst &I);
484  void visitVACopy(CallInst &I);
485  void visitFrameReturnAddress(CallInst &I, bool isFrameAddress);
486
487  void visitMemIntrinsic(CallInst &I, unsigned Op);
488
489  void visitUserOp1(Instruction &I) {
490    assert(0 && "UserOp1 should not exist at instruction selection time!");
491    abort();
492  }
493  void visitUserOp2(Instruction &I) {
494    assert(0 && "UserOp2 should not exist at instruction selection time!");
495    abort();
496  }
497};
498} // end namespace llvm
499
500void SelectionDAGLowering::visitRet(ReturnInst &I) {
501  if (I.getNumOperands() == 0) {
502    DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, getRoot()));
503    return;
504  }
505  std::vector<SDOperand> NewValues;
506  NewValues.push_back(getRoot());
507  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
508    SDOperand RetOp = getValue(I.getOperand(i));
509
510    // If this is an integer return value, we need to promote it ourselves to
511    // the full width of a register, since LegalizeOp will use ANY_EXTEND rather
512    // than sign/zero.
513    if (MVT::isInteger(RetOp.getValueType()) &&
514        RetOp.getValueType() < MVT::i64) {
515      MVT::ValueType TmpVT;
516      if (TLI.getTypeAction(MVT::i32) == TargetLowering::Promote)
517        TmpVT = TLI.getTypeToTransformTo(MVT::i32);
518      else
519        TmpVT = MVT::i32;
520
521      if (I.getOperand(i)->getType()->isSigned())
522        RetOp = DAG.getNode(ISD::SIGN_EXTEND, TmpVT, RetOp);
523      else
524        RetOp = DAG.getNode(ISD::ZERO_EXTEND, TmpVT, RetOp);
525    }
526    NewValues.push_back(RetOp);
527  }
528  DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, NewValues));
529}
530
531void SelectionDAGLowering::visitBr(BranchInst &I) {
532  // Update machine-CFG edges.
533  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
534
535  // Figure out which block is immediately after the current one.
536  MachineBasicBlock *NextBlock = 0;
537  MachineFunction::iterator BBI = CurMBB;
538  if (++BBI != CurMBB->getParent()->end())
539    NextBlock = BBI;
540
541  if (I.isUnconditional()) {
542    // If this is not a fall-through branch, emit the branch.
543    if (Succ0MBB != NextBlock)
544      DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
545                              DAG.getBasicBlock(Succ0MBB)));
546  } else {
547    MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
548
549    SDOperand Cond = getValue(I.getCondition());
550    if (Succ1MBB == NextBlock) {
551      // If the condition is false, fall through.  This means we should branch
552      // if the condition is true to Succ #0.
553      DAG.setRoot(DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(),
554                              Cond, DAG.getBasicBlock(Succ0MBB)));
555    } else if (Succ0MBB == NextBlock) {
556      // If the condition is true, fall through.  This means we should branch if
557      // the condition is false to Succ #1.  Invert the condition first.
558      SDOperand True = DAG.getConstant(1, Cond.getValueType());
559      Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True);
560      DAG.setRoot(DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(),
561                              Cond, DAG.getBasicBlock(Succ1MBB)));
562    } else {
563      std::vector<SDOperand> Ops;
564      Ops.push_back(getRoot());
565      Ops.push_back(Cond);
566      Ops.push_back(DAG.getBasicBlock(Succ0MBB));
567      Ops.push_back(DAG.getBasicBlock(Succ1MBB));
568      DAG.setRoot(DAG.getNode(ISD::BRCONDTWOWAY, MVT::Other, Ops));
569    }
570  }
571}
572
573void SelectionDAGLowering::visitSub(User &I) {
574  // -0.0 - X --> fneg
575  if (I.getType()->isFloatingPoint()) {
576    if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
577      if (CFP->isExactlyValue(-0.0)) {
578        SDOperand Op2 = getValue(I.getOperand(1));
579        setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2));
580        return;
581      }
582  }
583  visitBinary(I, ISD::SUB, ISD::FSUB, ISD::VSUB);
584}
585
586void SelectionDAGLowering::visitBinary(User &I, unsigned IntOp, unsigned FPOp,
587                                       unsigned VecOp) {
588  const Type *Ty = I.getType();
589  SDOperand Op1 = getValue(I.getOperand(0));
590  SDOperand Op2 = getValue(I.getOperand(1));
591
592  if (Ty->isIntegral()) {
593    setValue(&I, DAG.getNode(IntOp, Op1.getValueType(), Op1, Op2));
594  } else if (Ty->isFloatingPoint()) {
595    setValue(&I, DAG.getNode(FPOp, Op1.getValueType(), Op1, Op2));
596  } else {
597    const PackedType *PTy = cast<PackedType>(Ty);
598    unsigned NumElements = PTy->getNumElements();
599    MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
600    MVT::ValueType TVT = MVT::getVectorType(PVT, NumElements);
601
602    // Immediately scalarize packed types containing only one element, so that
603    // the Legalize pass does not have to deal with them.  Similarly, if the
604    // abstract vector is going to turn into one that the target natively
605    // supports, generate that type now so that Legalize doesn't have to deal
606    // with that either.  These steps ensure that Legalize only has to handle
607    // vector types in its Expand case.
608    unsigned Opc = MVT::isFloatingPoint(PVT) ? FPOp : IntOp;
609    if (NumElements == 1) {
610      setValue(&I, DAG.getNode(Opc, PVT, Op1, Op2));
611    } else if (TVT != MVT::Other && TLI.isTypeLegal(TVT)) {
612      setValue(&I, DAG.getNode(Opc, TVT, Op1, Op2));
613    } else {
614      SDOperand Num = DAG.getConstant(NumElements, MVT::i32);
615      SDOperand Typ = DAG.getValueType(PVT);
616      setValue(&I, DAG.getNode(VecOp, MVT::Vector, Op1, Op2, Num, Typ));
617    }
618  }
619}
620
621void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
622  SDOperand Op1 = getValue(I.getOperand(0));
623  SDOperand Op2 = getValue(I.getOperand(1));
624
625  Op2 = DAG.getNode(ISD::ANY_EXTEND, TLI.getShiftAmountTy(), Op2);
626
627  setValue(&I, DAG.getNode(Opcode, Op1.getValueType(), Op1, Op2));
628}
629
630void SelectionDAGLowering::visitSetCC(User &I,ISD::CondCode SignedOpcode,
631                                      ISD::CondCode UnsignedOpcode) {
632  SDOperand Op1 = getValue(I.getOperand(0));
633  SDOperand Op2 = getValue(I.getOperand(1));
634  ISD::CondCode Opcode = SignedOpcode;
635  if (I.getOperand(0)->getType()->isUnsigned())
636    Opcode = UnsignedOpcode;
637  setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Opcode));
638}
639
640void SelectionDAGLowering::visitSelect(User &I) {
641  SDOperand Cond     = getValue(I.getOperand(0));
642  SDOperand TrueVal  = getValue(I.getOperand(1));
643  SDOperand FalseVal = getValue(I.getOperand(2));
644  setValue(&I, DAG.getNode(ISD::SELECT, TrueVal.getValueType(), Cond,
645                           TrueVal, FalseVal));
646}
647
648void SelectionDAGLowering::visitCast(User &I) {
649  SDOperand N = getValue(I.getOperand(0));
650  MVT::ValueType SrcTy = TLI.getValueType(I.getOperand(0)->getType());
651  MVT::ValueType DestTy = TLI.getValueType(I.getType());
652
653  if (N.getValueType() == DestTy) {
654    setValue(&I, N);  // noop cast.
655  } else if (DestTy == MVT::i1) {
656    // Cast to bool is a comparison against zero, not truncation to zero.
657    SDOperand Zero = isInteger(SrcTy) ? DAG.getConstant(0, N.getValueType()) :
658                                       DAG.getConstantFP(0.0, N.getValueType());
659    setValue(&I, DAG.getSetCC(MVT::i1, N, Zero, ISD::SETNE));
660  } else if (isInteger(SrcTy)) {
661    if (isInteger(DestTy)) {        // Int -> Int cast
662      if (DestTy < SrcTy)   // Truncating cast?
663        setValue(&I, DAG.getNode(ISD::TRUNCATE, DestTy, N));
664      else if (I.getOperand(0)->getType()->isSigned())
665        setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, DestTy, N));
666      else
667        setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestTy, N));
668    } else {                        // Int -> FP cast
669      if (I.getOperand(0)->getType()->isSigned())
670        setValue(&I, DAG.getNode(ISD::SINT_TO_FP, DestTy, N));
671      else
672        setValue(&I, DAG.getNode(ISD::UINT_TO_FP, DestTy, N));
673    }
674  } else {
675    assert(isFloatingPoint(SrcTy) && "Unknown value type!");
676    if (isFloatingPoint(DestTy)) {  // FP -> FP cast
677      if (DestTy < SrcTy)   // Rounding cast?
678        setValue(&I, DAG.getNode(ISD::FP_ROUND, DestTy, N));
679      else
680        setValue(&I, DAG.getNode(ISD::FP_EXTEND, DestTy, N));
681    } else {                        // FP -> Int cast.
682      if (I.getType()->isSigned())
683        setValue(&I, DAG.getNode(ISD::FP_TO_SINT, DestTy, N));
684      else
685        setValue(&I, DAG.getNode(ISD::FP_TO_UINT, DestTy, N));
686    }
687  }
688}
689
690void SelectionDAGLowering::visitGetElementPtr(User &I) {
691  SDOperand N = getValue(I.getOperand(0));
692  const Type *Ty = I.getOperand(0)->getType();
693  const Type *UIntPtrTy = TD.getIntPtrType();
694
695  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
696       OI != E; ++OI) {
697    Value *Idx = *OI;
698    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
699      unsigned Field = cast<ConstantUInt>(Idx)->getValue();
700      if (Field) {
701        // N = N + Offset
702        uint64_t Offset = TD.getStructLayout(StTy)->MemberOffsets[Field];
703        N = DAG.getNode(ISD::ADD, N.getValueType(), N,
704                        getIntPtrConstant(Offset));
705      }
706      Ty = StTy->getElementType(Field);
707    } else {
708      Ty = cast<SequentialType>(Ty)->getElementType();
709
710      // If this is a constant subscript, handle it quickly.
711      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
712        if (CI->getRawValue() == 0) continue;
713
714        uint64_t Offs;
715        if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(CI))
716          Offs = (int64_t)TD.getTypeSize(Ty)*CSI->getValue();
717        else
718          Offs = TD.getTypeSize(Ty)*cast<ConstantUInt>(CI)->getValue();
719        N = DAG.getNode(ISD::ADD, N.getValueType(), N, getIntPtrConstant(Offs));
720        continue;
721      }
722
723      // N = N + Idx * ElementSize;
724      uint64_t ElementSize = TD.getTypeSize(Ty);
725      SDOperand IdxN = getValue(Idx);
726
727      // If the index is smaller or larger than intptr_t, truncate or extend
728      // it.
729      if (IdxN.getValueType() < N.getValueType()) {
730        if (Idx->getType()->isSigned())
731          IdxN = DAG.getNode(ISD::SIGN_EXTEND, N.getValueType(), IdxN);
732        else
733          IdxN = DAG.getNode(ISD::ZERO_EXTEND, N.getValueType(), IdxN);
734      } else if (IdxN.getValueType() > N.getValueType())
735        IdxN = DAG.getNode(ISD::TRUNCATE, N.getValueType(), IdxN);
736
737      // If this is a multiply by a power of two, turn it into a shl
738      // immediately.  This is a very common case.
739      if (isPowerOf2_64(ElementSize)) {
740        unsigned Amt = Log2_64(ElementSize);
741        IdxN = DAG.getNode(ISD::SHL, N.getValueType(), IdxN,
742                           DAG.getConstant(Amt, TLI.getShiftAmountTy()));
743        N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
744        continue;
745      }
746
747      SDOperand Scale = getIntPtrConstant(ElementSize);
748      IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale);
749      N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
750    }
751  }
752  setValue(&I, N);
753}
754
755void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
756  // If this is a fixed sized alloca in the entry block of the function,
757  // allocate it statically on the stack.
758  if (FuncInfo.StaticAllocaMap.count(&I))
759    return;   // getValue will auto-populate this.
760
761  const Type *Ty = I.getAllocatedType();
762  uint64_t TySize = TLI.getTargetData().getTypeSize(Ty);
763  unsigned Align = std::max((unsigned)TLI.getTargetData().getTypeAlignment(Ty),
764                            I.getAlignment());
765
766  SDOperand AllocSize = getValue(I.getArraySize());
767  MVT::ValueType IntPtr = TLI.getPointerTy();
768  if (IntPtr < AllocSize.getValueType())
769    AllocSize = DAG.getNode(ISD::TRUNCATE, IntPtr, AllocSize);
770  else if (IntPtr > AllocSize.getValueType())
771    AllocSize = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, AllocSize);
772
773  AllocSize = DAG.getNode(ISD::MUL, IntPtr, AllocSize,
774                          getIntPtrConstant(TySize));
775
776  // Handle alignment.  If the requested alignment is less than or equal to the
777  // stack alignment, ignore it and round the size of the allocation up to the
778  // stack alignment size.  If the size is greater than the stack alignment, we
779  // note this in the DYNAMIC_STACKALLOC node.
780  unsigned StackAlign =
781    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
782  if (Align <= StackAlign) {
783    Align = 0;
784    // Add SA-1 to the size.
785    AllocSize = DAG.getNode(ISD::ADD, AllocSize.getValueType(), AllocSize,
786                            getIntPtrConstant(StackAlign-1));
787    // Mask out the low bits for alignment purposes.
788    AllocSize = DAG.getNode(ISD::AND, AllocSize.getValueType(), AllocSize,
789                            getIntPtrConstant(~(uint64_t)(StackAlign-1)));
790  }
791
792  std::vector<MVT::ValueType> VTs;
793  VTs.push_back(AllocSize.getValueType());
794  VTs.push_back(MVT::Other);
795  std::vector<SDOperand> Ops;
796  Ops.push_back(getRoot());
797  Ops.push_back(AllocSize);
798  Ops.push_back(getIntPtrConstant(Align));
799  SDOperand DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, VTs, Ops);
800  DAG.setRoot(setValue(&I, DSA).getValue(1));
801
802  // Inform the Frame Information that we have just allocated a variable-sized
803  // object.
804  CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject();
805}
806
807/// getStringValue - Turn an LLVM constant pointer that eventually points to a
808/// global into a string value.  Return an empty string if we can't do it.
809///
810static std::string getStringValue(Value *V, unsigned Offset = 0) {
811  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
812    if (GV->hasInitializer() && isa<ConstantArray>(GV->getInitializer())) {
813      ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
814      if (Init->isString()) {
815        std::string Result = Init->getAsString();
816        if (Offset < Result.size()) {
817          // If we are pointing INTO The string, erase the beginning...
818          Result.erase(Result.begin(), Result.begin()+Offset);
819
820          // Take off the null terminator, and any string fragments after it.
821          std::string::size_type NullPos = Result.find_first_of((char)0);
822          if (NullPos != std::string::npos)
823            Result.erase(Result.begin()+NullPos, Result.end());
824          return Result;
825        }
826      }
827    }
828  } else if (Constant *C = dyn_cast<Constant>(V)) {
829    if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
830      return getStringValue(GV, Offset);
831    else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
832      if (CE->getOpcode() == Instruction::GetElementPtr) {
833        // Turn a gep into the specified offset.
834        if (CE->getNumOperands() == 3 &&
835            cast<Constant>(CE->getOperand(1))->isNullValue() &&
836            isa<ConstantInt>(CE->getOperand(2))) {
837          return getStringValue(CE->getOperand(0),
838                   Offset+cast<ConstantInt>(CE->getOperand(2))->getRawValue());
839        }
840      }
841    }
842  }
843  return "";
844}
845
846void SelectionDAGLowering::visitLoad(LoadInst &I) {
847  SDOperand Ptr = getValue(I.getOperand(0));
848
849  SDOperand Root;
850  if (I.isVolatile())
851    Root = getRoot();
852  else {
853    // Do not serialize non-volatile loads against each other.
854    Root = DAG.getRoot();
855  }
856
857  const Type *Ty = I.getType();
858  SDOperand L;
859
860  if (const PackedType *PTy = dyn_cast<PackedType>(Ty)) {
861    unsigned NumElements = PTy->getNumElements();
862    MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
863    MVT::ValueType TVT = MVT::getVectorType(PVT, NumElements);
864
865    // Immediately scalarize packed types containing only one element, so that
866    // the Legalize pass does not have to deal with them.
867    if (NumElements == 1) {
868      L = DAG.getLoad(PVT, Root, Ptr, DAG.getSrcValue(I.getOperand(0)));
869    } else if (TVT != MVT::Other && TLI.isTypeLegal(TVT)) {
870      L = DAG.getLoad(TVT, Root, Ptr, DAG.getSrcValue(I.getOperand(0)));
871    } else {
872      L = DAG.getVecLoad(NumElements, PVT, Root, Ptr,
873                         DAG.getSrcValue(I.getOperand(0)));
874    }
875  } else {
876    L = DAG.getLoad(TLI.getValueType(Ty), Root, Ptr,
877                    DAG.getSrcValue(I.getOperand(0)));
878  }
879  setValue(&I, L);
880
881  if (I.isVolatile())
882    DAG.setRoot(L.getValue(1));
883  else
884    PendingLoads.push_back(L.getValue(1));
885}
886
887
888void SelectionDAGLowering::visitStore(StoreInst &I) {
889  Value *SrcV = I.getOperand(0);
890  SDOperand Src = getValue(SrcV);
891  SDOperand Ptr = getValue(I.getOperand(1));
892  DAG.setRoot(DAG.getNode(ISD::STORE, MVT::Other, getRoot(), Src, Ptr,
893                          DAG.getSrcValue(I.getOperand(1))));
894}
895
896/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
897/// we want to emit this as a call to a named external function, return the name
898/// otherwise lower it and return null.
899const char *
900SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
901  switch (Intrinsic) {
902  case Intrinsic::vastart:  visitVAStart(I); return 0;
903  case Intrinsic::vaend:    visitVAEnd(I); return 0;
904  case Intrinsic::vacopy:   visitVACopy(I); return 0;
905  case Intrinsic::returnaddress: visitFrameReturnAddress(I, false); return 0;
906  case Intrinsic::frameaddress:  visitFrameReturnAddress(I, true); return 0;
907  case Intrinsic::setjmp:
908    return "_setjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
909    break;
910  case Intrinsic::longjmp:
911    return "_longjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
912    break;
913  case Intrinsic::memcpy:  visitMemIntrinsic(I, ISD::MEMCPY); return 0;
914  case Intrinsic::memset:  visitMemIntrinsic(I, ISD::MEMSET); return 0;
915  case Intrinsic::memmove: visitMemIntrinsic(I, ISD::MEMMOVE); return 0;
916
917  case Intrinsic::readport:
918  case Intrinsic::readio: {
919    std::vector<MVT::ValueType> VTs;
920    VTs.push_back(TLI.getValueType(I.getType()));
921    VTs.push_back(MVT::Other);
922    std::vector<SDOperand> Ops;
923    Ops.push_back(getRoot());
924    Ops.push_back(getValue(I.getOperand(1)));
925    SDOperand Tmp = DAG.getNode(Intrinsic == Intrinsic::readport ?
926                                ISD::READPORT : ISD::READIO, VTs, Ops);
927
928    setValue(&I, Tmp);
929    DAG.setRoot(Tmp.getValue(1));
930    return 0;
931  }
932  case Intrinsic::writeport:
933  case Intrinsic::writeio:
934    DAG.setRoot(DAG.getNode(Intrinsic == Intrinsic::writeport ?
935                            ISD::WRITEPORT : ISD::WRITEIO, MVT::Other,
936                            getRoot(), getValue(I.getOperand(1)),
937                            getValue(I.getOperand(2))));
938    return 0;
939
940  case Intrinsic::dbg_stoppoint: {
941    if (TLI.getTargetMachine().getIntrinsicLowering().EmitDebugFunctions())
942      return "llvm_debugger_stop";
943
944    MachineDebugInfo *DebugInfo = DAG.getMachineDebugInfo();
945    if (DebugInfo &&  DebugInfo->Verify(I.getOperand(4))) {
946      std::vector<SDOperand> Ops;
947
948      // Input Chain
949      Ops.push_back(getRoot());
950
951      // line number
952      Ops.push_back(getValue(I.getOperand(2)));
953
954      // column
955      Ops.push_back(getValue(I.getOperand(3)));
956
957      DebugInfoDesc *DD = DebugInfo->getDescFor(I.getOperand(4));
958      assert(DD && "Not a debug information descriptor");
959      CompileUnitDesc *CompileUnit = dyn_cast<CompileUnitDesc>(DD);
960      assert(CompileUnit && "Not a compile unit");
961      Ops.push_back(DAG.getString(CompileUnit->getFileName()));
962      Ops.push_back(DAG.getString(CompileUnit->getDirectory()));
963
964      if (Ops.size() == 5)  // Found filename/workingdir.
965        DAG.setRoot(DAG.getNode(ISD::LOCATION, MVT::Other, Ops));
966    }
967
968    setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
969    return 0;
970  }
971  case Intrinsic::dbg_region_start:
972    if (TLI.getTargetMachine().getIntrinsicLowering().EmitDebugFunctions())
973      return "llvm_dbg_region_start";
974    if (I.getType() != Type::VoidTy)
975      setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
976    return 0;
977  case Intrinsic::dbg_region_end:
978    if (TLI.getTargetMachine().getIntrinsicLowering().EmitDebugFunctions())
979      return "llvm_dbg_region_end";
980    if (I.getType() != Type::VoidTy)
981      setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
982    return 0;
983  case Intrinsic::dbg_func_start:
984    if (TLI.getTargetMachine().getIntrinsicLowering().EmitDebugFunctions())
985      return "llvm_dbg_subprogram";
986    if (I.getType() != Type::VoidTy)
987      setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
988    return 0;
989  case Intrinsic::dbg_declare:
990    if (I.getType() != Type::VoidTy)
991      setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
992    return 0;
993
994  case Intrinsic::isunordered_f32:
995  case Intrinsic::isunordered_f64:
996    setValue(&I, DAG.getSetCC(MVT::i1,getValue(I.getOperand(1)),
997                              getValue(I.getOperand(2)), ISD::SETUO));
998    return 0;
999
1000  case Intrinsic::sqrt_f32:
1001  case Intrinsic::sqrt_f64:
1002    setValue(&I, DAG.getNode(ISD::FSQRT,
1003                             getValue(I.getOperand(1)).getValueType(),
1004                             getValue(I.getOperand(1))));
1005    return 0;
1006  case Intrinsic::pcmarker: {
1007    SDOperand Tmp = getValue(I.getOperand(1));
1008    DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp));
1009    return 0;
1010  }
1011  case Intrinsic::readcyclecounter: {
1012    std::vector<MVT::ValueType> VTs;
1013    VTs.push_back(MVT::i64);
1014    VTs.push_back(MVT::Other);
1015    std::vector<SDOperand> Ops;
1016    Ops.push_back(getRoot());
1017    SDOperand Tmp = DAG.getNode(ISD::READCYCLECOUNTER, VTs, Ops);
1018    setValue(&I, Tmp);
1019    DAG.setRoot(Tmp.getValue(1));
1020    return 0;
1021  }
1022  case Intrinsic::bswap_i16:
1023  case Intrinsic::bswap_i32:
1024  case Intrinsic::bswap_i64:
1025    setValue(&I, DAG.getNode(ISD::BSWAP,
1026                             getValue(I.getOperand(1)).getValueType(),
1027                             getValue(I.getOperand(1))));
1028    return 0;
1029  case Intrinsic::cttz_i8:
1030  case Intrinsic::cttz_i16:
1031  case Intrinsic::cttz_i32:
1032  case Intrinsic::cttz_i64:
1033    setValue(&I, DAG.getNode(ISD::CTTZ,
1034                             getValue(I.getOperand(1)).getValueType(),
1035                             getValue(I.getOperand(1))));
1036    return 0;
1037  case Intrinsic::ctlz_i8:
1038  case Intrinsic::ctlz_i16:
1039  case Intrinsic::ctlz_i32:
1040  case Intrinsic::ctlz_i64:
1041    setValue(&I, DAG.getNode(ISD::CTLZ,
1042                             getValue(I.getOperand(1)).getValueType(),
1043                             getValue(I.getOperand(1))));
1044    return 0;
1045  case Intrinsic::ctpop_i8:
1046  case Intrinsic::ctpop_i16:
1047  case Intrinsic::ctpop_i32:
1048  case Intrinsic::ctpop_i64:
1049    setValue(&I, DAG.getNode(ISD::CTPOP,
1050                             getValue(I.getOperand(1)).getValueType(),
1051                             getValue(I.getOperand(1))));
1052    return 0;
1053  case Intrinsic::stacksave: {
1054    std::vector<MVT::ValueType> VTs;
1055    VTs.push_back(TLI.getPointerTy());
1056    VTs.push_back(MVT::Other);
1057    std::vector<SDOperand> Ops;
1058    Ops.push_back(getRoot());
1059    SDOperand Tmp = DAG.getNode(ISD::STACKSAVE, VTs, Ops);
1060    setValue(&I, Tmp);
1061    DAG.setRoot(Tmp.getValue(1));
1062    return 0;
1063  }
1064  case Intrinsic::stackrestore: {
1065    SDOperand Tmp = getValue(I.getOperand(1));
1066    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, MVT::Other, getRoot(), Tmp));
1067    return 0;
1068  }
1069  case Intrinsic::prefetch:
1070    // FIXME: Currently discarding prefetches.
1071    return 0;
1072  default:
1073    std::cerr << I;
1074    assert(0 && "This intrinsic is not implemented yet!");
1075    return 0;
1076  }
1077}
1078
1079
1080void SelectionDAGLowering::visitCall(CallInst &I) {
1081  const char *RenameFn = 0;
1082  if (Function *F = I.getCalledFunction()) {
1083    if (F->isExternal())
1084      if (unsigned IID = F->getIntrinsicID()) {
1085        RenameFn = visitIntrinsicCall(I, IID);
1086        if (!RenameFn)
1087          return;
1088      } else {    // Not an LLVM intrinsic.
1089        const std::string &Name = F->getName();
1090        if (Name[0] == 'f' && (Name == "fabs" || Name == "fabsf")) {
1091          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
1092              I.getOperand(1)->getType()->isFloatingPoint() &&
1093              I.getType() == I.getOperand(1)->getType()) {
1094            SDOperand Tmp = getValue(I.getOperand(1));
1095            setValue(&I, DAG.getNode(ISD::FABS, Tmp.getValueType(), Tmp));
1096            return;
1097          }
1098        } else if (Name[0] == 's' && (Name == "sin" || Name == "sinf")) {
1099          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
1100              I.getOperand(1)->getType()->isFloatingPoint() &&
1101              I.getType() == I.getOperand(1)->getType()) {
1102            SDOperand Tmp = getValue(I.getOperand(1));
1103            setValue(&I, DAG.getNode(ISD::FSIN, Tmp.getValueType(), Tmp));
1104            return;
1105          }
1106        } else if (Name[0] == 'c' && (Name == "cos" || Name == "cosf")) {
1107          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
1108              I.getOperand(1)->getType()->isFloatingPoint() &&
1109              I.getType() == I.getOperand(1)->getType()) {
1110            SDOperand Tmp = getValue(I.getOperand(1));
1111            setValue(&I, DAG.getNode(ISD::FCOS, Tmp.getValueType(), Tmp));
1112            return;
1113          }
1114        }
1115      }
1116  } else if (isa<InlineAsm>(I.getOperand(0))) {
1117    visitInlineAsm(I);
1118    return;
1119  }
1120
1121  SDOperand Callee;
1122  if (!RenameFn)
1123    Callee = getValue(I.getOperand(0));
1124  else
1125    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
1126  std::vector<std::pair<SDOperand, const Type*> > Args;
1127  Args.reserve(I.getNumOperands());
1128  for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
1129    Value *Arg = I.getOperand(i);
1130    SDOperand ArgNode = getValue(Arg);
1131    Args.push_back(std::make_pair(ArgNode, Arg->getType()));
1132  }
1133
1134  const PointerType *PT = cast<PointerType>(I.getCalledValue()->getType());
1135  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1136
1137  std::pair<SDOperand,SDOperand> Result =
1138    TLI.LowerCallTo(getRoot(), I.getType(), FTy->isVarArg(), I.getCallingConv(),
1139                    I.isTailCall(), Callee, Args, DAG);
1140  if (I.getType() != Type::VoidTy)
1141    setValue(&I, Result.first);
1142  DAG.setRoot(Result.second);
1143}
1144
1145/// GetAvailableRegister - Pick a register from RegChoices that is available
1146/// for input and/or output as specified by isOutReg/isInReg.  If an allocatable
1147/// register is found, it is returned and added to the specified set of used
1148/// registers.  If not, zero is returned.
1149unsigned SelectionDAGLowering::
1150GetAvailableRegister(bool isOutReg, bool isInReg,
1151                     const std::vector<unsigned> &RegChoices,
1152                     std::set<unsigned> &OutputRegs,
1153                     std::set<unsigned> &InputRegs) {
1154  const MRegisterInfo *MRI = DAG.getTarget().getRegisterInfo();
1155  MachineFunction &MF = *CurMBB->getParent();
1156  for (unsigned i = 0, e = RegChoices.size(); i != e; ++i) {
1157    unsigned Reg = RegChoices[i];
1158    // See if this register is available.
1159    if (isOutReg && OutputRegs.count(Reg)) continue;  // Already used.
1160    if (isInReg  && InputRegs.count(Reg)) continue;  // Already used.
1161
1162    // Check to see if this register is allocatable (i.e. don't give out the
1163    // stack pointer).
1164    bool Found = false;
1165    for (MRegisterInfo::regclass_iterator RC = MRI->regclass_begin(),
1166         E = MRI->regclass_end(); !Found && RC != E; ++RC) {
1167      // NOTE: This isn't ideal.  In particular, this might allocate the
1168      // frame pointer in functions that need it (due to them not being taken
1169      // out of allocation, because a variable sized allocation hasn't been seen
1170      // yet).  This is a slight code pessimization, but should still work.
1171      for (TargetRegisterClass::iterator I = (*RC)->allocation_order_begin(MF),
1172           E = (*RC)->allocation_order_end(MF); I != E; ++I)
1173        if (*I == Reg) {
1174          Found = true;
1175          break;
1176        }
1177    }
1178    if (!Found) continue;
1179
1180    // Okay, this register is good, return it.
1181    if (isOutReg) OutputRegs.insert(Reg);  // Mark used.
1182    if (isInReg)  InputRegs.insert(Reg);   // Mark used.
1183    return Reg;
1184  }
1185  return 0;
1186}
1187
1188/// visitInlineAsm - Handle a call to an InlineAsm object.
1189///
1190void SelectionDAGLowering::visitInlineAsm(CallInst &I) {
1191  InlineAsm *IA = cast<InlineAsm>(I.getOperand(0));
1192
1193  SDOperand AsmStr = DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
1194                                                 MVT::Other);
1195
1196  // Note, we treat inline asms both with and without side-effects as the same.
1197  // If an inline asm doesn't have side effects and doesn't access memory, we
1198  // could not choose to not chain it.
1199  bool hasSideEffects = IA->hasSideEffects();
1200
1201  std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
1202
1203  /// AsmNodeOperands - A list of pairs.  The first element is a register, the
1204  /// second is a bitfield where bit #0 is set if it is a use and bit #1 is set
1205  /// if it is a def of that register.
1206  std::vector<SDOperand> AsmNodeOperands;
1207  AsmNodeOperands.push_back(SDOperand());  // reserve space for input chain
1208  AsmNodeOperands.push_back(AsmStr);
1209
1210  SDOperand Chain = getRoot();
1211  SDOperand Flag;
1212
1213  // Loop over all of the inputs, copying the operand values into the
1214  // appropriate registers and processing the output regs.
1215  unsigned RetValReg = 0;
1216  std::vector<std::pair<unsigned, Value*> > IndirectStoresToEmit;
1217  unsigned OpNum = 1;
1218  bool FoundOutputConstraint = false;
1219
1220  // We fully assign registers here at isel time.  This is not optimal, but
1221  // should work.  For register classes that correspond to LLVM classes, we
1222  // could let the LLVM RA do its thing, but we currently don't.  Do a prepass
1223  // over the constraints, collecting fixed registers that we know we can't use.
1224  std::set<unsigned> OutputRegs, InputRegs;
1225  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
1226    assert(Constraints[i].Codes.size() == 1 && "Only handles one code so far!");
1227    std::string &ConstraintCode = Constraints[i].Codes[0];
1228
1229    std::vector<unsigned> Regs =
1230      TLI.getRegForInlineAsmConstraint(ConstraintCode);
1231    if (Regs.size() != 1) continue;  // Not assigned a fixed reg.
1232    unsigned TheReg = Regs[0];
1233
1234    switch (Constraints[i].Type) {
1235    case InlineAsm::isOutput:
1236      // We can't assign any other output to this register.
1237      OutputRegs.insert(TheReg);
1238      // If this is an early-clobber output, it cannot be assigned to the same
1239      // value as the input reg.
1240      if (Constraints[i].isEarlyClobber || Constraints[i].hasMatchingInput)
1241        InputRegs.insert(TheReg);
1242      break;
1243    case InlineAsm::isClobber:
1244      // Clobbered regs cannot be used as inputs or outputs.
1245      InputRegs.insert(TheReg);
1246      OutputRegs.insert(TheReg);
1247      break;
1248    case InlineAsm::isInput:
1249      // We can't assign any other input to this register.
1250      InputRegs.insert(TheReg);
1251      break;
1252    }
1253  }
1254
1255  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
1256    assert(Constraints[i].Codes.size() == 1 && "Only handles one code so far!");
1257    std::string &ConstraintCode = Constraints[i].Codes[0];
1258    switch (Constraints[i].Type) {
1259    case InlineAsm::isOutput: {
1260      // Copy the output from the appropriate register.
1261      std::vector<unsigned> Regs =
1262        TLI.getRegForInlineAsmConstraint(ConstraintCode);
1263
1264      // Find a regsister that we can use.
1265      unsigned DestReg;
1266      if (Regs.size() == 1)
1267        DestReg = Regs[0];
1268      else {
1269        bool UsesInputRegister = false;
1270        // If this is an early-clobber output, or if there is an input
1271        // constraint that matches this, we need to reserve the input register
1272        // so no other inputs allocate to it.
1273        if (Constraints[i].isEarlyClobber || Constraints[i].hasMatchingInput)
1274          UsesInputRegister = true;
1275        DestReg = GetAvailableRegister(true, UsesInputRegister,
1276                                       Regs, OutputRegs, InputRegs);
1277      }
1278
1279      assert(DestReg && "Couldn't allocate output reg!");
1280
1281      const Type *OpTy;
1282      if (!Constraints[i].isIndirectOutput) {
1283        assert(!FoundOutputConstraint &&
1284               "Cannot have multiple output constraints yet!");
1285        FoundOutputConstraint = true;
1286        assert(I.getType() != Type::VoidTy && "Bad inline asm!");
1287
1288        RetValReg = DestReg;
1289        OpTy = I.getType();
1290      } else {
1291        IndirectStoresToEmit.push_back(std::make_pair(DestReg,
1292                                                      I.getOperand(OpNum)));
1293        OpTy = I.getOperand(OpNum)->getType();
1294        OpTy = cast<PointerType>(OpTy)->getElementType();
1295        OpNum++;  // Consumes a call operand.
1296      }
1297
1298      // Add information to the INLINEASM node to know that this register is
1299      // set.
1300      AsmNodeOperands.push_back(DAG.getRegister(DestReg,
1301                                                TLI.getValueType(OpTy)));
1302      AsmNodeOperands.push_back(DAG.getConstant(2, MVT::i32)); // ISDEF
1303
1304      break;
1305    }
1306    case InlineAsm::isInput: {
1307      Value *Operand = I.getOperand(OpNum);
1308      const Type *OpTy = Operand->getType();
1309      OpNum++;  // Consumes a call operand.
1310
1311      unsigned SrcReg;
1312      SDOperand ResOp;
1313      unsigned ResOpType;
1314      SDOperand InOperandVal = getValue(Operand);
1315
1316      if (isdigit(ConstraintCode[0])) {    // Matching constraint?
1317        // If this is required to match an output register we have already set,
1318        // just use its register.
1319        unsigned OperandNo = atoi(ConstraintCode.c_str());
1320        SrcReg = cast<RegisterSDNode>(AsmNodeOperands[OperandNo*2+2])->getReg();
1321        ResOp = DAG.getRegister(SrcReg, TLI.getValueType(OpTy));
1322        ResOpType = 1;
1323
1324        Chain = DAG.getCopyToReg(Chain, SrcReg, InOperandVal, Flag);
1325        Flag = Chain.getValue(1);
1326      } else {
1327        TargetLowering::ConstraintType CTy = TargetLowering::C_RegisterClass;
1328        if (ConstraintCode.size() == 1)   // not a physreg name.
1329          CTy = TLI.getConstraintType(ConstraintCode[0]);
1330
1331        switch (CTy) {
1332        default: assert(0 && "Unknown constraint type! FAIL!");
1333        case TargetLowering::C_RegisterClass: {
1334          // Copy the input into the appropriate register.
1335          std::vector<unsigned> Regs =
1336            TLI.getRegForInlineAsmConstraint(ConstraintCode);
1337          if (Regs.size() == 1)
1338            SrcReg = Regs[0];
1339          else
1340            SrcReg = GetAvailableRegister(false, true, Regs,
1341                                          OutputRegs, InputRegs);
1342          // FIXME: should be match fail.
1343          assert(SrcReg && "Wasn't able to allocate register!");
1344          Chain = DAG.getCopyToReg(Chain, SrcReg, InOperandVal, Flag);
1345          Flag = Chain.getValue(1);
1346
1347          ResOp = DAG.getRegister(SrcReg, TLI.getValueType(OpTy));
1348          ResOpType = 1;
1349          break;
1350        }
1351        case TargetLowering::C_Other:
1352          if (!TLI.isOperandValidForConstraint(InOperandVal, ConstraintCode[0]))
1353            assert(0 && "MATCH FAIL!");
1354          ResOp = InOperandVal;
1355          ResOpType = 3;
1356          break;
1357        }
1358      }
1359
1360      // Add information to the INLINEASM node to know about this input.
1361      AsmNodeOperands.push_back(ResOp);
1362      AsmNodeOperands.push_back(DAG.getConstant(ResOpType, MVT::i32));
1363      break;
1364    }
1365    case InlineAsm::isClobber:
1366      // Nothing to do.
1367      break;
1368    }
1369  }
1370
1371  // Finish up input operands.
1372  AsmNodeOperands[0] = Chain;
1373  if (Flag.Val) AsmNodeOperands.push_back(Flag);
1374
1375  std::vector<MVT::ValueType> VTs;
1376  VTs.push_back(MVT::Other);
1377  VTs.push_back(MVT::Flag);
1378  Chain = DAG.getNode(ISD::INLINEASM, VTs, AsmNodeOperands);
1379  Flag = Chain.getValue(1);
1380
1381  // If this asm returns a register value, copy the result from that register
1382  // and set it as the value of the call.
1383  if (RetValReg) {
1384    SDOperand Val = DAG.getCopyFromReg(Chain, RetValReg,
1385                                       TLI.getValueType(I.getType()), Flag);
1386    Chain = Val.getValue(1);
1387    Flag  = Val.getValue(2);
1388    setValue(&I, Val);
1389  }
1390
1391  std::vector<std::pair<SDOperand, Value*> > StoresToEmit;
1392
1393  // Process indirect outputs, first output all of the flagged copies out of
1394  // physregs.
1395  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
1396    Value *Ptr = IndirectStoresToEmit[i].second;
1397    const Type *Ty = cast<PointerType>(Ptr->getType())->getElementType();
1398    SDOperand Val = DAG.getCopyFromReg(Chain, IndirectStoresToEmit[i].first,
1399                                       TLI.getValueType(Ty), Flag);
1400    Chain = Val.getValue(1);
1401    Flag  = Val.getValue(2);
1402    StoresToEmit.push_back(std::make_pair(Val, Ptr));
1403  }
1404
1405  // Emit the non-flagged stores from the physregs.
1406  std::vector<SDOperand> OutChains;
1407  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
1408    OutChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1409                                    StoresToEmit[i].first,
1410                                    getValue(StoresToEmit[i].second),
1411                                    DAG.getSrcValue(StoresToEmit[i].second)));
1412  if (!OutChains.empty())
1413    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains);
1414  DAG.setRoot(Chain);
1415}
1416
1417
1418void SelectionDAGLowering::visitMalloc(MallocInst &I) {
1419  SDOperand Src = getValue(I.getOperand(0));
1420
1421  MVT::ValueType IntPtr = TLI.getPointerTy();
1422
1423  if (IntPtr < Src.getValueType())
1424    Src = DAG.getNode(ISD::TRUNCATE, IntPtr, Src);
1425  else if (IntPtr > Src.getValueType())
1426    Src = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, Src);
1427
1428  // Scale the source by the type size.
1429  uint64_t ElementSize = TD.getTypeSize(I.getType()->getElementType());
1430  Src = DAG.getNode(ISD::MUL, Src.getValueType(),
1431                    Src, getIntPtrConstant(ElementSize));
1432
1433  std::vector<std::pair<SDOperand, const Type*> > Args;
1434  Args.push_back(std::make_pair(Src, TLI.getTargetData().getIntPtrType()));
1435
1436  std::pair<SDOperand,SDOperand> Result =
1437    TLI.LowerCallTo(getRoot(), I.getType(), false, CallingConv::C, true,
1438                    DAG.getExternalSymbol("malloc", IntPtr),
1439                    Args, DAG);
1440  setValue(&I, Result.first);  // Pointers always fit in registers
1441  DAG.setRoot(Result.second);
1442}
1443
1444void SelectionDAGLowering::visitFree(FreeInst &I) {
1445  std::vector<std::pair<SDOperand, const Type*> > Args;
1446  Args.push_back(std::make_pair(getValue(I.getOperand(0)),
1447                                TLI.getTargetData().getIntPtrType()));
1448  MVT::ValueType IntPtr = TLI.getPointerTy();
1449  std::pair<SDOperand,SDOperand> Result =
1450    TLI.LowerCallTo(getRoot(), Type::VoidTy, false, CallingConv::C, true,
1451                    DAG.getExternalSymbol("free", IntPtr), Args, DAG);
1452  DAG.setRoot(Result.second);
1453}
1454
1455// InsertAtEndOfBasicBlock - This method should be implemented by targets that
1456// mark instructions with the 'usesCustomDAGSchedInserter' flag.  These
1457// instructions are special in various ways, which require special support to
1458// insert.  The specified MachineInstr is created but not inserted into any
1459// basic blocks, and the scheduler passes ownership of it to this method.
1460MachineBasicBlock *TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
1461                                                       MachineBasicBlock *MBB) {
1462  std::cerr << "If a target marks an instruction with "
1463               "'usesCustomDAGSchedInserter', it must implement "
1464               "TargetLowering::InsertAtEndOfBasicBlock!\n";
1465  abort();
1466  return 0;
1467}
1468
1469void SelectionDAGLowering::visitVAStart(CallInst &I) {
1470  DAG.setRoot(DAG.getNode(ISD::VASTART, MVT::Other, getRoot(),
1471                          getValue(I.getOperand(1)),
1472                          DAG.getSrcValue(I.getOperand(1))));
1473}
1474
1475void SelectionDAGLowering::visitVAArg(VAArgInst &I) {
1476  SDOperand V = DAG.getVAArg(TLI.getValueType(I.getType()), getRoot(),
1477                             getValue(I.getOperand(0)),
1478                             DAG.getSrcValue(I.getOperand(0)));
1479  setValue(&I, V);
1480  DAG.setRoot(V.getValue(1));
1481}
1482
1483void SelectionDAGLowering::visitVAEnd(CallInst &I) {
1484  DAG.setRoot(DAG.getNode(ISD::VAEND, MVT::Other, getRoot(),
1485                          getValue(I.getOperand(1)),
1486                          DAG.getSrcValue(I.getOperand(1))));
1487}
1488
1489void SelectionDAGLowering::visitVACopy(CallInst &I) {
1490  DAG.setRoot(DAG.getNode(ISD::VACOPY, MVT::Other, getRoot(),
1491                          getValue(I.getOperand(1)),
1492                          getValue(I.getOperand(2)),
1493                          DAG.getSrcValue(I.getOperand(1)),
1494                          DAG.getSrcValue(I.getOperand(2))));
1495}
1496
1497// It is always conservatively correct for llvm.returnaddress and
1498// llvm.frameaddress to return 0.
1499std::pair<SDOperand, SDOperand>
1500TargetLowering::LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain,
1501                                        unsigned Depth, SelectionDAG &DAG) {
1502  return std::make_pair(DAG.getConstant(0, getPointerTy()), Chain);
1503}
1504
1505SDOperand TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
1506  assert(0 && "LowerOperation not implemented for this target!");
1507  abort();
1508  return SDOperand();
1509}
1510
1511SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op,
1512                                                 SelectionDAG &DAG) {
1513  assert(0 && "CustomPromoteOperation not implemented for this target!");
1514  abort();
1515  return SDOperand();
1516}
1517
1518void SelectionDAGLowering::visitFrameReturnAddress(CallInst &I, bool isFrame) {
1519  unsigned Depth = (unsigned)cast<ConstantUInt>(I.getOperand(1))->getValue();
1520  std::pair<SDOperand,SDOperand> Result =
1521    TLI.LowerFrameReturnAddress(isFrame, getRoot(), Depth, DAG);
1522  setValue(&I, Result.first);
1523  DAG.setRoot(Result.second);
1524}
1525
1526void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) {
1527  std::vector<SDOperand> Ops;
1528  Ops.push_back(getRoot());
1529  Ops.push_back(getValue(I.getOperand(1)));
1530  Ops.push_back(getValue(I.getOperand(2)));
1531  Ops.push_back(getValue(I.getOperand(3)));
1532  Ops.push_back(getValue(I.getOperand(4)));
1533  DAG.setRoot(DAG.getNode(Op, MVT::Other, Ops));
1534}
1535
1536//===----------------------------------------------------------------------===//
1537// SelectionDAGISel code
1538//===----------------------------------------------------------------------===//
1539
1540unsigned SelectionDAGISel::MakeReg(MVT::ValueType VT) {
1541  return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
1542}
1543
1544void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
1545  // FIXME: we only modify the CFG to split critical edges.  This
1546  // updates dom and loop info.
1547}
1548
1549
1550/// InsertGEPComputeCode - Insert code into BB to compute Ptr+PtrOffset,
1551/// casting to the type of GEPI.
1552static Value *InsertGEPComputeCode(Value *&V, BasicBlock *BB, Instruction *GEPI,
1553                                   Value *Ptr, Value *PtrOffset) {
1554  if (V) return V;   // Already computed.
1555
1556  BasicBlock::iterator InsertPt;
1557  if (BB == GEPI->getParent()) {
1558    // If insert into the GEP's block, insert right after the GEP.
1559    InsertPt = GEPI;
1560    ++InsertPt;
1561  } else {
1562    // Otherwise, insert at the top of BB, after any PHI nodes
1563    InsertPt = BB->begin();
1564    while (isa<PHINode>(InsertPt)) ++InsertPt;
1565  }
1566
1567  // If Ptr is itself a cast, but in some other BB, emit a copy of the cast into
1568  // BB so that there is only one value live across basic blocks (the cast
1569  // operand).
1570  if (CastInst *CI = dyn_cast<CastInst>(Ptr))
1571    if (CI->getParent() != BB && isa<PointerType>(CI->getOperand(0)->getType()))
1572      Ptr = new CastInst(CI->getOperand(0), CI->getType(), "", InsertPt);
1573
1574  // Add the offset, cast it to the right type.
1575  Ptr = BinaryOperator::createAdd(Ptr, PtrOffset, "", InsertPt);
1576  Ptr = new CastInst(Ptr, GEPI->getType(), "", InsertPt);
1577  return V = Ptr;
1578}
1579
1580
1581/// OptimizeGEPExpression - Since we are doing basic-block-at-a-time instruction
1582/// selection, we want to be a bit careful about some things.  In particular, if
1583/// we have a GEP instruction that is used in a different block than it is
1584/// defined, the addressing expression of the GEP cannot be folded into loads or
1585/// stores that use it.  In this case, decompose the GEP and move constant
1586/// indices into blocks that use it.
1587static void OptimizeGEPExpression(GetElementPtrInst *GEPI,
1588                                  const TargetData &TD) {
1589  // If this GEP is only used inside the block it is defined in, there is no
1590  // need to rewrite it.
1591  bool isUsedOutsideDefBB = false;
1592  BasicBlock *DefBB = GEPI->getParent();
1593  for (Value::use_iterator UI = GEPI->use_begin(), E = GEPI->use_end();
1594       UI != E; ++UI) {
1595    if (cast<Instruction>(*UI)->getParent() != DefBB) {
1596      isUsedOutsideDefBB = true;
1597      break;
1598    }
1599  }
1600  if (!isUsedOutsideDefBB) return;
1601
1602  // If this GEP has no non-zero constant indices, there is nothing we can do,
1603  // ignore it.
1604  bool hasConstantIndex = false;
1605  for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1,
1606       E = GEPI->op_end(); OI != E; ++OI) {
1607    if (ConstantInt *CI = dyn_cast<ConstantInt>(*OI))
1608      if (CI->getRawValue()) {
1609        hasConstantIndex = true;
1610        break;
1611      }
1612  }
1613  // If this is a GEP &Alloca, 0, 0, forward subst the frame index into uses.
1614  if (!hasConstantIndex && !isa<AllocaInst>(GEPI->getOperand(0))) return;
1615
1616  // Otherwise, decompose the GEP instruction into multiplies and adds.  Sum the
1617  // constant offset (which we now know is non-zero) and deal with it later.
1618  uint64_t ConstantOffset = 0;
1619  const Type *UIntPtrTy = TD.getIntPtrType();
1620  Value *Ptr = new CastInst(GEPI->getOperand(0), UIntPtrTy, "", GEPI);
1621  const Type *Ty = GEPI->getOperand(0)->getType();
1622
1623  for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1,
1624       E = GEPI->op_end(); OI != E; ++OI) {
1625    Value *Idx = *OI;
1626    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
1627      unsigned Field = cast<ConstantUInt>(Idx)->getValue();
1628      if (Field)
1629        ConstantOffset += TD.getStructLayout(StTy)->MemberOffsets[Field];
1630      Ty = StTy->getElementType(Field);
1631    } else {
1632      Ty = cast<SequentialType>(Ty)->getElementType();
1633
1634      // Handle constant subscripts.
1635      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
1636        if (CI->getRawValue() == 0) continue;
1637
1638        if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(CI))
1639          ConstantOffset += (int64_t)TD.getTypeSize(Ty)*CSI->getValue();
1640        else
1641          ConstantOffset+=TD.getTypeSize(Ty)*cast<ConstantUInt>(CI)->getValue();
1642        continue;
1643      }
1644
1645      // Ptr = Ptr + Idx * ElementSize;
1646
1647      // Cast Idx to UIntPtrTy if needed.
1648      Idx = new CastInst(Idx, UIntPtrTy, "", GEPI);
1649
1650      uint64_t ElementSize = TD.getTypeSize(Ty);
1651      // Mask off bits that should not be set.
1652      ElementSize &= ~0ULL >> (64-UIntPtrTy->getPrimitiveSizeInBits());
1653      Constant *SizeCst = ConstantUInt::get(UIntPtrTy, ElementSize);
1654
1655      // Multiply by the element size and add to the base.
1656      Idx = BinaryOperator::createMul(Idx, SizeCst, "", GEPI);
1657      Ptr = BinaryOperator::createAdd(Ptr, Idx, "", GEPI);
1658    }
1659  }
1660
1661  // Make sure that the offset fits in uintptr_t.
1662  ConstantOffset &= ~0ULL >> (64-UIntPtrTy->getPrimitiveSizeInBits());
1663  Constant *PtrOffset = ConstantUInt::get(UIntPtrTy, ConstantOffset);
1664
1665  // Okay, we have now emitted all of the variable index parts to the BB that
1666  // the GEP is defined in.  Loop over all of the using instructions, inserting
1667  // an "add Ptr, ConstantOffset" into each block that uses it and update the
1668  // instruction to use the newly computed value, making GEPI dead.  When the
1669  // user is a load or store instruction address, we emit the add into the user
1670  // block, otherwise we use a canonical version right next to the gep (these
1671  // won't be foldable as addresses, so we might as well share the computation).
1672
1673  std::map<BasicBlock*,Value*> InsertedExprs;
1674  while (!GEPI->use_empty()) {
1675    Instruction *User = cast<Instruction>(GEPI->use_back());
1676
1677    // If this use is not foldable into the addressing mode, use a version
1678    // emitted in the GEP block.
1679    Value *NewVal;
1680    if (!isa<LoadInst>(User) &&
1681        (!isa<StoreInst>(User) || User->getOperand(0) == GEPI)) {
1682      NewVal = InsertGEPComputeCode(InsertedExprs[DefBB], DefBB, GEPI,
1683                                    Ptr, PtrOffset);
1684    } else {
1685      // Otherwise, insert the code in the User's block so it can be folded into
1686      // any users in that block.
1687      NewVal = InsertGEPComputeCode(InsertedExprs[User->getParent()],
1688                                    User->getParent(), GEPI,
1689                                    Ptr, PtrOffset);
1690    }
1691    User->replaceUsesOfWith(GEPI, NewVal);
1692  }
1693
1694  // Finally, the GEP is dead, remove it.
1695  GEPI->eraseFromParent();
1696}
1697
1698bool SelectionDAGISel::runOnFunction(Function &Fn) {
1699  MachineFunction &MF = MachineFunction::construct(&Fn, TLI.getTargetMachine());
1700  RegMap = MF.getSSARegMap();
1701  DEBUG(std::cerr << "\n\n\n=== " << Fn.getName() << "\n");
1702
1703  // First, split all critical edges for PHI nodes with incoming values that are
1704  // constants, this way the load of the constant into a vreg will not be placed
1705  // into MBBs that are used some other way.
1706  //
1707  // In this pass we also look for GEP instructions that are used across basic
1708  // blocks and rewrites them to improve basic-block-at-a-time selection.
1709  //
1710  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
1711    PHINode *PN;
1712    BasicBlock::iterator BBI;
1713    for (BBI = BB->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI)
1714      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1715        if (isa<Constant>(PN->getIncomingValue(i)))
1716          SplitCriticalEdge(PN->getIncomingBlock(i), BB);
1717
1718    for (BasicBlock::iterator E = BB->end(); BBI != E; )
1719      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(BBI++))
1720        OptimizeGEPExpression(GEPI, TLI.getTargetData());
1721  }
1722
1723  FunctionLoweringInfo FuncInfo(TLI, Fn, MF);
1724
1725  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
1726    SelectBasicBlock(I, MF, FuncInfo);
1727
1728  return true;
1729}
1730
1731
1732SDOperand SelectionDAGISel::
1733CopyValueToVirtualRegister(SelectionDAGLowering &SDL, Value *V, unsigned Reg) {
1734  SDOperand Op = SDL.getValue(V);
1735  assert((Op.getOpcode() != ISD::CopyFromReg ||
1736          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
1737         "Copy from a reg to the same reg!");
1738
1739  // If this type is not legal, we must make sure to not create an invalid
1740  // register use.
1741  MVT::ValueType SrcVT = Op.getValueType();
1742  MVT::ValueType DestVT = TLI.getTypeToTransformTo(SrcVT);
1743  SelectionDAG &DAG = SDL.DAG;
1744  if (SrcVT == DestVT) {
1745    return DAG.getCopyToReg(SDL.getRoot(), Reg, Op);
1746  } else if (SrcVT < DestVT) {
1747    // The src value is promoted to the register.
1748    if (MVT::isFloatingPoint(SrcVT))
1749      Op = DAG.getNode(ISD::FP_EXTEND, DestVT, Op);
1750    else
1751      Op = DAG.getNode(ISD::ANY_EXTEND, DestVT, Op);
1752    return DAG.getCopyToReg(SDL.getRoot(), Reg, Op);
1753  } else  {
1754    // The src value is expanded into multiple registers.
1755    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DestVT,
1756                               Op, DAG.getConstant(0, MVT::i32));
1757    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DestVT,
1758                               Op, DAG.getConstant(1, MVT::i32));
1759    Op = DAG.getCopyToReg(SDL.getRoot(), Reg, Lo);
1760    return DAG.getCopyToReg(Op, Reg+1, Hi);
1761  }
1762}
1763
1764void SelectionDAGISel::
1765LowerArguments(BasicBlock *BB, SelectionDAGLowering &SDL,
1766               std::vector<SDOperand> &UnorderedChains) {
1767  // If this is the entry block, emit arguments.
1768  Function &F = *BB->getParent();
1769  FunctionLoweringInfo &FuncInfo = SDL.FuncInfo;
1770  SDOperand OldRoot = SDL.DAG.getRoot();
1771  std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
1772
1773  unsigned a = 0;
1774  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
1775       AI != E; ++AI, ++a)
1776    if (!AI->use_empty()) {
1777      SDL.setValue(AI, Args[a]);
1778
1779      // If this argument is live outside of the entry block, insert a copy from
1780      // whereever we got it to the vreg that other BB's will reference it as.
1781      if (FuncInfo.ValueMap.count(AI)) {
1782        SDOperand Copy =
1783          CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]);
1784        UnorderedChains.push_back(Copy);
1785      }
1786    }
1787
1788  // Next, if the function has live ins that need to be copied into vregs,
1789  // emit the copies now, into the top of the block.
1790  MachineFunction &MF = SDL.DAG.getMachineFunction();
1791  if (MF.livein_begin() != MF.livein_end()) {
1792    SSARegMap *RegMap = MF.getSSARegMap();
1793    const MRegisterInfo &MRI = *MF.getTarget().getRegisterInfo();
1794    for (MachineFunction::livein_iterator LI = MF.livein_begin(),
1795         E = MF.livein_end(); LI != E; ++LI)
1796      if (LI->second)
1797        MRI.copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
1798                         LI->first, RegMap->getRegClass(LI->second));
1799  }
1800
1801  // Finally, if the target has anything special to do, allow it to do so.
1802  EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction());
1803}
1804
1805
1806void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB,
1807       std::vector<std::pair<MachineInstr*, unsigned> > &PHINodesToUpdate,
1808                                    FunctionLoweringInfo &FuncInfo) {
1809  SelectionDAGLowering SDL(DAG, TLI, FuncInfo);
1810
1811  std::vector<SDOperand> UnorderedChains;
1812
1813  // Lower any arguments needed in this block if this is the entry block.
1814  if (LLVMBB == &LLVMBB->getParent()->front())
1815    LowerArguments(LLVMBB, SDL, UnorderedChains);
1816
1817  BB = FuncInfo.MBBMap[LLVMBB];
1818  SDL.setCurrentBasicBlock(BB);
1819
1820  // Lower all of the non-terminator instructions.
1821  for (BasicBlock::iterator I = LLVMBB->begin(), E = --LLVMBB->end();
1822       I != E; ++I)
1823    SDL.visit(*I);
1824
1825  // Ensure that all instructions which are used outside of their defining
1826  // blocks are available as virtual registers.
1827  for (BasicBlock::iterator I = LLVMBB->begin(), E = LLVMBB->end(); I != E;++I)
1828    if (!I->use_empty() && !isa<PHINode>(I)) {
1829      std::map<const Value*, unsigned>::iterator VMI =FuncInfo.ValueMap.find(I);
1830      if (VMI != FuncInfo.ValueMap.end())
1831        UnorderedChains.push_back(
1832                           CopyValueToVirtualRegister(SDL, I, VMI->second));
1833    }
1834
1835  // Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
1836  // ensure constants are generated when needed.  Remember the virtual registers
1837  // that need to be added to the Machine PHI nodes as input.  We cannot just
1838  // directly add them, because expansion might result in multiple MBB's for one
1839  // BB.  As such, the start of the BB might correspond to a different MBB than
1840  // the end.
1841  //
1842
1843  // Emit constants only once even if used by multiple PHI nodes.
1844  std::map<Constant*, unsigned> ConstantsOut;
1845
1846  // Check successor nodes PHI nodes that expect a constant to be available from
1847  // this block.
1848  TerminatorInst *TI = LLVMBB->getTerminator();
1849  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
1850    BasicBlock *SuccBB = TI->getSuccessor(succ);
1851    MachineBasicBlock::iterator MBBI = FuncInfo.MBBMap[SuccBB]->begin();
1852    PHINode *PN;
1853
1854    // At this point we know that there is a 1-1 correspondence between LLVM PHI
1855    // nodes and Machine PHI nodes, but the incoming operands have not been
1856    // emitted yet.
1857    for (BasicBlock::iterator I = SuccBB->begin();
1858         (PN = dyn_cast<PHINode>(I)); ++I)
1859      if (!PN->use_empty()) {
1860        unsigned Reg;
1861        Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
1862        if (Constant *C = dyn_cast<Constant>(PHIOp)) {
1863          unsigned &RegOut = ConstantsOut[C];
1864          if (RegOut == 0) {
1865            RegOut = FuncInfo.CreateRegForValue(C);
1866            UnorderedChains.push_back(
1867                             CopyValueToVirtualRegister(SDL, C, RegOut));
1868          }
1869          Reg = RegOut;
1870        } else {
1871          Reg = FuncInfo.ValueMap[PHIOp];
1872          if (Reg == 0) {
1873            assert(isa<AllocaInst>(PHIOp) &&
1874                   FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
1875                   "Didn't codegen value into a register!??");
1876            Reg = FuncInfo.CreateRegForValue(PHIOp);
1877            UnorderedChains.push_back(
1878                             CopyValueToVirtualRegister(SDL, PHIOp, Reg));
1879          }
1880        }
1881
1882        // Remember that this register needs to added to the machine PHI node as
1883        // the input for this MBB.
1884        unsigned NumElements =
1885          TLI.getNumElements(TLI.getValueType(PN->getType()));
1886        for (unsigned i = 0, e = NumElements; i != e; ++i)
1887          PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
1888      }
1889  }
1890  ConstantsOut.clear();
1891
1892  // Turn all of the unordered chains into one factored node.
1893  if (!UnorderedChains.empty()) {
1894    SDOperand Root = SDL.getRoot();
1895    if (Root.getOpcode() != ISD::EntryToken) {
1896      unsigned i = 0, e = UnorderedChains.size();
1897      for (; i != e; ++i) {
1898        assert(UnorderedChains[i].Val->getNumOperands() > 1);
1899        if (UnorderedChains[i].Val->getOperand(0) == Root)
1900          break;  // Don't add the root if we already indirectly depend on it.
1901      }
1902
1903      if (i == e)
1904        UnorderedChains.push_back(Root);
1905    }
1906    DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, UnorderedChains));
1907  }
1908
1909  // Lower the terminator after the copies are emitted.
1910  SDL.visit(*LLVMBB->getTerminator());
1911
1912  // Make sure the root of the DAG is up-to-date.
1913  DAG.setRoot(SDL.getRoot());
1914}
1915
1916void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, MachineFunction &MF,
1917                                        FunctionLoweringInfo &FuncInfo) {
1918  SelectionDAG DAG(TLI, MF, getAnalysisToUpdate<MachineDebugInfo>());
1919  CurDAG = &DAG;
1920  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
1921
1922  // First step, lower LLVM code to some DAG.  This DAG may use operations and
1923  // types that are not supported by the target.
1924  BuildSelectionDAG(DAG, LLVMBB, PHINodesToUpdate, FuncInfo);
1925
1926  // Run the DAG combiner in pre-legalize mode.
1927  DAG.Combine(false);
1928
1929  DEBUG(std::cerr << "Lowered selection DAG:\n");
1930  DEBUG(DAG.dump());
1931
1932  // Second step, hack on the DAG until it only uses operations and types that
1933  // the target supports.
1934  DAG.Legalize();
1935
1936  DEBUG(std::cerr << "Legalized selection DAG:\n");
1937  DEBUG(DAG.dump());
1938
1939  // Run the DAG combiner in post-legalize mode.
1940  DAG.Combine(true);
1941
1942  if (ViewISelDAGs) DAG.viewGraph();
1943
1944  // Third, instruction select all of the operations to machine code, adding the
1945  // code to the MachineBasicBlock.
1946  InstructionSelectBasicBlock(DAG);
1947
1948  DEBUG(std::cerr << "Selected machine code:\n");
1949  DEBUG(BB->dump());
1950
1951  // Next, now that we know what the last MBB the LLVM BB expanded is, update
1952  // PHI nodes in successors.
1953  for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) {
1954    MachineInstr *PHI = PHINodesToUpdate[i].first;
1955    assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
1956           "This is not a machine PHI node that we are updating!");
1957    PHI->addRegOperand(PHINodesToUpdate[i].second);
1958    PHI->addMachineBasicBlockOperand(BB);
1959  }
1960
1961  // Finally, add the CFG edges from the last selected MBB to the successor
1962  // MBBs.
1963  TerminatorInst *TI = LLVMBB->getTerminator();
1964  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
1965    MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[TI->getSuccessor(i)];
1966    BB->addSuccessor(Succ0MBB);
1967  }
1968}
1969
1970//===----------------------------------------------------------------------===//
1971/// ScheduleAndEmitDAG - Pick a safe ordering and emit instructions for each
1972/// target node in the graph.
1973void SelectionDAGISel::ScheduleAndEmitDAG(SelectionDAG &DAG) {
1974  if (ViewSchedDAGs) DAG.viewGraph();
1975  ScheduleDAG *SL = NULL;
1976
1977  switch (ISHeuristic) {
1978  default: assert(0 && "Unrecognized scheduling heuristic");
1979  case defaultScheduling:
1980    if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency)
1981      SL = createSimpleDAGScheduler(noScheduling, DAG, BB);
1982    else /* TargetLowering::SchedulingForRegPressure */
1983      SL = createBURRListDAGScheduler(DAG, BB);
1984    break;
1985  case noScheduling:
1986  case simpleScheduling:
1987  case simpleNoItinScheduling:
1988    SL = createSimpleDAGScheduler(ISHeuristic, DAG, BB);
1989    break;
1990  case listSchedulingBURR:
1991    SL = createBURRListDAGScheduler(DAG, BB);
1992  }
1993  BB = SL->Run();
1994  delete SL;
1995}
1996