SelectionDAGISel.cpp revision 860771d2d86243b65ec16fac6cc57b285078f138
1//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by the LLVM research group and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements the SelectionDAGISel class.
11//
12//===----------------------------------------------------------------------===//
13
14#define DEBUG_TYPE "isel"
15#include "llvm/CodeGen/SelectionDAGISel.h"
16#include "llvm/CodeGen/ScheduleDAG.h"
17#include "llvm/CallingConv.h"
18#include "llvm/Constants.h"
19#include "llvm/DerivedTypes.h"
20#include "llvm/Function.h"
21#include "llvm/GlobalVariable.h"
22#include "llvm/InlineAsm.h"
23#include "llvm/Instructions.h"
24#include "llvm/Intrinsics.h"
25#include "llvm/CodeGen/IntrinsicLowering.h"
26#include "llvm/CodeGen/MachineDebugInfo.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/SelectionDAG.h"
31#include "llvm/CodeGen/SSARegMap.h"
32#include "llvm/Target/MRegisterInfo.h"
33#include "llvm/Target/TargetData.h"
34#include "llvm/Target/TargetFrameInfo.h"
35#include "llvm/Target/TargetInstrInfo.h"
36#include "llvm/Target/TargetLowering.h"
37#include "llvm/Target/TargetMachine.h"
38#include "llvm/Transforms/Utils/BasicBlockUtils.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/MathExtras.h"
41#include "llvm/Support/Debug.h"
42#include <map>
43#include <set>
44#include <iostream>
45#include <algorithm>
46using namespace llvm;
47
48#ifndef NDEBUG
49static cl::opt<bool>
50ViewISelDAGs("view-isel-dags", cl::Hidden,
51          cl::desc("Pop up a window to show isel dags as they are selected"));
52static cl::opt<bool>
53ViewSchedDAGs("view-sched-dags", cl::Hidden,
54          cl::desc("Pop up a window to show sched dags as they are processed"));
55#else
56static const bool ViewISelDAGs = 0;
57static const bool ViewSchedDAGs = 0;
58#endif
59
60namespace {
61  cl::opt<SchedHeuristics>
62  ISHeuristic(
63    "sched",
64    cl::desc("Choose scheduling style"),
65    cl::init(defaultScheduling),
66    cl::values(
67      clEnumValN(defaultScheduling, "default",
68                 "Target preferred scheduling style"),
69      clEnumValN(noScheduling, "none",
70                 "No scheduling: breadth first sequencing"),
71      clEnumValN(simpleScheduling, "simple",
72                 "Simple two pass scheduling: minimize critical path "
73                 "and maximize processor utilization"),
74      clEnumValN(simpleNoItinScheduling, "simple-noitin",
75                 "Simple two pass scheduling: Same as simple "
76                 "except using generic latency"),
77      clEnumValN(listSchedulingBURR, "list-burr",
78                 "Bottom up register reduction list scheduling"),
79      clEnumValEnd));
80} // namespace
81
82namespace {
83  /// RegsForValue - This struct represents the physical registers that a
84  /// particular value is assigned and the type information about the value.
85  /// This is needed because values can be promoted into larger registers and
86  /// expanded into multiple smaller registers than the value.
87  struct RegsForValue {
88    /// Regs - This list hold the register (for legal and promoted values)
89    /// or register set (for expanded values) that the value should be assigned
90    /// to.
91    std::vector<unsigned> Regs;
92
93    /// RegVT - The value type of each register.
94    ///
95    MVT::ValueType RegVT;
96
97    /// ValueVT - The value type of the LLVM value, which may be promoted from
98    /// RegVT or made from merging the two expanded parts.
99    MVT::ValueType ValueVT;
100
101    RegsForValue() : RegVT(MVT::Other), ValueVT(MVT::Other) {}
102
103    RegsForValue(unsigned Reg, MVT::ValueType regvt, MVT::ValueType valuevt)
104      : RegVT(regvt), ValueVT(valuevt) {
105        Regs.push_back(Reg);
106    }
107    RegsForValue(const std::vector<unsigned> &regs,
108                 MVT::ValueType regvt, MVT::ValueType valuevt)
109      : Regs(regs), RegVT(regvt), ValueVT(valuevt) {
110    }
111
112    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
113    /// this value and returns the result as a ValueVT value.  This uses
114    /// Chain/Flag as the input and updates them for the output Chain/Flag.
115    SDOperand getCopyFromRegs(SelectionDAG &DAG,
116                              SDOperand &Chain, SDOperand &Flag) const;
117
118    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
119    /// specified value into the registers specified by this object.  This uses
120    /// Chain/Flag as the input and updates them for the output Chain/Flag.
121    void getCopyToRegs(SDOperand Val, SelectionDAG &DAG,
122                       SDOperand &Chain, SDOperand &Flag) const;
123
124    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
125    /// operand list.  This adds the code marker and includes the number of
126    /// values added into it.
127    void AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG,
128                              std::vector<SDOperand> &Ops) const;
129  };
130}
131
132namespace llvm {
133  //===--------------------------------------------------------------------===//
134  /// FunctionLoweringInfo - This contains information that is global to a
135  /// function that is used when lowering a region of the function.
136  class FunctionLoweringInfo {
137  public:
138    TargetLowering &TLI;
139    Function &Fn;
140    MachineFunction &MF;
141    SSARegMap *RegMap;
142
143    FunctionLoweringInfo(TargetLowering &TLI, Function &Fn,MachineFunction &MF);
144
145    /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
146    std::map<const BasicBlock*, MachineBasicBlock *> MBBMap;
147
148    /// ValueMap - Since we emit code for the function a basic block at a time,
149    /// we must remember which virtual registers hold the values for
150    /// cross-basic-block values.
151    std::map<const Value*, unsigned> ValueMap;
152
153    /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
154    /// the entry block.  This allows the allocas to be efficiently referenced
155    /// anywhere in the function.
156    std::map<const AllocaInst*, int> StaticAllocaMap;
157
158    unsigned MakeReg(MVT::ValueType VT) {
159      return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
160    }
161
162    unsigned CreateRegForValue(const Value *V) {
163      MVT::ValueType VT = TLI.getValueType(V->getType());
164      // The common case is that we will only create one register for this
165      // value.  If we have that case, create and return the virtual register.
166      unsigned NV = TLI.getNumElements(VT);
167      if (NV == 1) {
168        // If we are promoting this value, pick the next largest supported type.
169        return MakeReg(TLI.getTypeToTransformTo(VT));
170      }
171
172      // If this value is represented with multiple target registers, make sure
173      // to create enough consecutive registers of the right (smaller) type.
174      unsigned NT = VT-1;  // Find the type to use.
175      while (TLI.getNumElements((MVT::ValueType)NT) != 1)
176        --NT;
177
178      unsigned R = MakeReg((MVT::ValueType)NT);
179      for (unsigned i = 1; i != NV; ++i)
180        MakeReg((MVT::ValueType)NT);
181      return R;
182    }
183
184    unsigned InitializeRegForValue(const Value *V) {
185      unsigned &R = ValueMap[V];
186      assert(R == 0 && "Already initialized this value register!");
187      return R = CreateRegForValue(V);
188    }
189  };
190}
191
192/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
193/// PHI nodes or outside of the basic block that defines it.
194static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
195  if (isa<PHINode>(I)) return true;
196  BasicBlock *BB = I->getParent();
197  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
198    if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
199      return true;
200  return false;
201}
202
203/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
204/// entry block, return true.
205static bool isOnlyUsedInEntryBlock(Argument *A) {
206  BasicBlock *Entry = A->getParent()->begin();
207  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
208    if (cast<Instruction>(*UI)->getParent() != Entry)
209      return false;  // Use not in entry block.
210  return true;
211}
212
213FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli,
214                                           Function &fn, MachineFunction &mf)
215    : TLI(tli), Fn(fn), MF(mf), RegMap(MF.getSSARegMap()) {
216
217  // Create a vreg for each argument register that is not dead and is used
218  // outside of the entry block for the function.
219  for (Function::arg_iterator AI = Fn.arg_begin(), E = Fn.arg_end();
220       AI != E; ++AI)
221    if (!isOnlyUsedInEntryBlock(AI))
222      InitializeRegForValue(AI);
223
224  // Initialize the mapping of values to registers.  This is only set up for
225  // instruction values that are used outside of the block that defines
226  // them.
227  Function::iterator BB = Fn.begin(), EB = Fn.end();
228  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
229    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
230      if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(AI->getArraySize())) {
231        const Type *Ty = AI->getAllocatedType();
232        uint64_t TySize = TLI.getTargetData().getTypeSize(Ty);
233        unsigned Align =
234          std::max((unsigned)TLI.getTargetData().getTypeAlignment(Ty),
235                   AI->getAlignment());
236
237        // If the alignment of the value is smaller than the size of the value,
238        // and if the size of the value is particularly small (<= 8 bytes),
239        // round up to the size of the value for potentially better performance.
240        //
241        // FIXME: This could be made better with a preferred alignment hook in
242        // TargetData.  It serves primarily to 8-byte align doubles for X86.
243        if (Align < TySize && TySize <= 8) Align = TySize;
244        TySize *= CUI->getValue();   // Get total allocated size.
245        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
246        StaticAllocaMap[AI] =
247          MF.getFrameInfo()->CreateStackObject((unsigned)TySize, Align);
248      }
249
250  for (; BB != EB; ++BB)
251    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
252      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
253        if (!isa<AllocaInst>(I) ||
254            !StaticAllocaMap.count(cast<AllocaInst>(I)))
255          InitializeRegForValue(I);
256
257  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
258  // also creates the initial PHI MachineInstrs, though none of the input
259  // operands are populated.
260  for (BB = Fn.begin(), EB = Fn.end(); BB != EB; ++BB) {
261    MachineBasicBlock *MBB = new MachineBasicBlock(BB);
262    MBBMap[BB] = MBB;
263    MF.getBasicBlockList().push_back(MBB);
264
265    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
266    // appropriate.
267    PHINode *PN;
268    for (BasicBlock::iterator I = BB->begin();
269         (PN = dyn_cast<PHINode>(I)); ++I)
270      if (!PN->use_empty()) {
271        unsigned NumElements =
272          TLI.getNumElements(TLI.getValueType(PN->getType()));
273        unsigned PHIReg = ValueMap[PN];
274        assert(PHIReg &&"PHI node does not have an assigned virtual register!");
275        for (unsigned i = 0; i != NumElements; ++i)
276          BuildMI(MBB, TargetInstrInfo::PHI, PN->getNumOperands(), PHIReg+i);
277      }
278  }
279}
280
281
282
283//===----------------------------------------------------------------------===//
284/// SelectionDAGLowering - This is the common target-independent lowering
285/// implementation that is parameterized by a TargetLowering object.
286/// Also, targets can overload any lowering method.
287///
288namespace llvm {
289class SelectionDAGLowering {
290  MachineBasicBlock *CurMBB;
291
292  std::map<const Value*, SDOperand> NodeMap;
293
294  /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
295  /// them up and then emit token factor nodes when possible.  This allows us to
296  /// get simple disambiguation between loads without worrying about alias
297  /// analysis.
298  std::vector<SDOperand> PendingLoads;
299
300public:
301  // TLI - This is information that describes the available target features we
302  // need for lowering.  This indicates when operations are unavailable,
303  // implemented with a libcall, etc.
304  TargetLowering &TLI;
305  SelectionDAG &DAG;
306  const TargetData &TD;
307
308  /// FuncInfo - Information about the function as a whole.
309  ///
310  FunctionLoweringInfo &FuncInfo;
311
312  SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
313                       FunctionLoweringInfo &funcinfo)
314    : TLI(tli), DAG(dag), TD(DAG.getTarget().getTargetData()),
315      FuncInfo(funcinfo) {
316  }
317
318  /// getRoot - Return the current virtual root of the Selection DAG.
319  ///
320  SDOperand getRoot() {
321    if (PendingLoads.empty())
322      return DAG.getRoot();
323
324    if (PendingLoads.size() == 1) {
325      SDOperand Root = PendingLoads[0];
326      DAG.setRoot(Root);
327      PendingLoads.clear();
328      return Root;
329    }
330
331    // Otherwise, we have to make a token factor node.
332    SDOperand Root = DAG.getNode(ISD::TokenFactor, MVT::Other, PendingLoads);
333    PendingLoads.clear();
334    DAG.setRoot(Root);
335    return Root;
336  }
337
338  void visit(Instruction &I) { visit(I.getOpcode(), I); }
339
340  void visit(unsigned Opcode, User &I) {
341    switch (Opcode) {
342    default: assert(0 && "Unknown instruction type encountered!");
343             abort();
344      // Build the switch statement using the Instruction.def file.
345#define HANDLE_INST(NUM, OPCODE, CLASS) \
346    case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
347#include "llvm/Instruction.def"
348    }
349  }
350
351  void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
352
353
354  SDOperand getIntPtrConstant(uint64_t Val) {
355    return DAG.getConstant(Val, TLI.getPointerTy());
356  }
357
358  SDOperand getValue(const Value *V) {
359    SDOperand &N = NodeMap[V];
360    if (N.Val) return N;
361
362    const Type *VTy = V->getType();
363    MVT::ValueType VT = TLI.getValueType(VTy);
364    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)))
365      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
366        visit(CE->getOpcode(), *CE);
367        assert(N.Val && "visit didn't populate the ValueMap!");
368        return N;
369      } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
370        return N = DAG.getGlobalAddress(GV, VT);
371      } else if (isa<ConstantPointerNull>(C)) {
372        return N = DAG.getConstant(0, TLI.getPointerTy());
373      } else if (isa<UndefValue>(C)) {
374        return N = DAG.getNode(ISD::UNDEF, VT);
375      } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
376        return N = DAG.getConstantFP(CFP->getValue(), VT);
377      } else if (const PackedType *PTy = dyn_cast<PackedType>(VTy)) {
378        unsigned NumElements = PTy->getNumElements();
379        MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
380        MVT::ValueType TVT = MVT::getVectorType(PVT, NumElements);
381
382        // Now that we know the number and type of the elements, push a
383        // Constant or ConstantFP node onto the ops list for each element of
384        // the packed constant.
385        std::vector<SDOperand> Ops;
386        if (ConstantPacked *CP = dyn_cast<ConstantPacked>(C)) {
387          if (MVT::isFloatingPoint(PVT)) {
388            for (unsigned i = 0; i != NumElements; ++i) {
389              const ConstantFP *El = cast<ConstantFP>(CP->getOperand(i));
390              Ops.push_back(DAG.getConstantFP(El->getValue(), PVT));
391            }
392          } else {
393            for (unsigned i = 0; i != NumElements; ++i) {
394              const ConstantIntegral *El =
395                cast<ConstantIntegral>(CP->getOperand(i));
396              Ops.push_back(DAG.getConstant(El->getRawValue(), PVT));
397            }
398          }
399        } else {
400          assert(isa<ConstantAggregateZero>(C) && "Unknown packed constant!");
401          SDOperand Op;
402          if (MVT::isFloatingPoint(PVT))
403            Op = DAG.getConstantFP(0, PVT);
404          else
405            Op = DAG.getConstant(0, PVT);
406          Ops.assign(NumElements, Op);
407        }
408
409        // Handle the case where we have a 1-element vector, in which
410        // case we want to immediately turn it into a scalar constant.
411        if (Ops.size() == 1) {
412          return N = Ops[0];
413        } else if (TVT != MVT::Other && TLI.isTypeLegal(TVT)) {
414          return N = DAG.getNode(ISD::ConstantVec, TVT, Ops);
415        } else {
416          // If the packed type isn't legal, then create a ConstantVec node with
417          // generic Vector type instead.
418          SDOperand Num = DAG.getConstant(NumElements, MVT::i32);
419          SDOperand Typ = DAG.getValueType(PVT);
420          Ops.insert(Ops.begin(), Typ);
421          Ops.insert(Ops.begin(), Num);
422          return N = DAG.getNode(ISD::VConstant, MVT::Vector, Ops);
423        }
424      } else {
425        // Canonicalize all constant ints to be unsigned.
426        return N = DAG.getConstant(cast<ConstantIntegral>(C)->getRawValue(),VT);
427      }
428
429    if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
430      std::map<const AllocaInst*, int>::iterator SI =
431        FuncInfo.StaticAllocaMap.find(AI);
432      if (SI != FuncInfo.StaticAllocaMap.end())
433        return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
434    }
435
436    std::map<const Value*, unsigned>::const_iterator VMI =
437      FuncInfo.ValueMap.find(V);
438    assert(VMI != FuncInfo.ValueMap.end() && "Value not in map!");
439
440    unsigned InReg = VMI->second;
441
442    // If this type is not legal, make it so now.
443    MVT::ValueType DestVT = TLI.getTypeToTransformTo(VT);
444
445    N = DAG.getCopyFromReg(DAG.getEntryNode(), InReg, DestVT);
446    if (DestVT < VT) {
447      // Source must be expanded.  This input value is actually coming from the
448      // register pair VMI->second and VMI->second+1.
449      N = DAG.getNode(ISD::BUILD_PAIR, VT, N,
450                      DAG.getCopyFromReg(DAG.getEntryNode(), InReg+1, DestVT));
451    } else {
452      if (DestVT > VT) { // Promotion case
453        if (MVT::isFloatingPoint(VT))
454          N = DAG.getNode(ISD::FP_ROUND, VT, N);
455        else
456          N = DAG.getNode(ISD::TRUNCATE, VT, N);
457      }
458    }
459
460    return N;
461  }
462
463  const SDOperand &setValue(const Value *V, SDOperand NewN) {
464    SDOperand &N = NodeMap[V];
465    assert(N.Val == 0 && "Already set a value for this node!");
466    return N = NewN;
467  }
468
469  RegsForValue GetRegistersForValue(const std::string &ConstrCode,
470                                    MVT::ValueType VT,
471                                    bool OutReg, bool InReg,
472                                    std::set<unsigned> &OutputRegs,
473                                    std::set<unsigned> &InputRegs);
474
475  // Terminator instructions.
476  void visitRet(ReturnInst &I);
477  void visitBr(BranchInst &I);
478  void visitUnreachable(UnreachableInst &I) { /* noop */ }
479
480  // These all get lowered before this pass.
481  void visitExtractElement(ExtractElementInst &I) { assert(0 && "TODO"); }
482  void visitInsertElement(InsertElementInst &I) { assert(0 && "TODO"); }
483  void visitSwitch(SwitchInst &I) { assert(0 && "TODO"); }
484  void visitInvoke(InvokeInst &I) { assert(0 && "TODO"); }
485  void visitUnwind(UnwindInst &I) { assert(0 && "TODO"); }
486
487  //
488  void visitBinary(User &I, unsigned IntOp, unsigned FPOp, unsigned VecOp);
489  void visitShift(User &I, unsigned Opcode);
490  void visitAdd(User &I) {
491    visitBinary(I, ISD::ADD, ISD::FADD, ISD::VADD);
492  }
493  void visitSub(User &I);
494  void visitMul(User &I) {
495    visitBinary(I, ISD::MUL, ISD::FMUL, ISD::VMUL);
496  }
497  void visitDiv(User &I) {
498    const Type *Ty = I.getType();
499    visitBinary(I, Ty->isSigned() ? ISD::SDIV : ISD::UDIV, ISD::FDIV, 0);
500  }
501  void visitRem(User &I) {
502    const Type *Ty = I.getType();
503    visitBinary(I, Ty->isSigned() ? ISD::SREM : ISD::UREM, ISD::FREM, 0);
504  }
505  void visitAnd(User &I) { visitBinary(I, ISD::AND, 0, 0); }
506  void visitOr (User &I) { visitBinary(I, ISD::OR,  0, 0); }
507  void visitXor(User &I) { visitBinary(I, ISD::XOR, 0, 0); }
508  void visitShl(User &I) { visitShift(I, ISD::SHL); }
509  void visitShr(User &I) {
510    visitShift(I, I.getType()->isUnsigned() ? ISD::SRL : ISD::SRA);
511  }
512
513  void visitSetCC(User &I, ISD::CondCode SignedOpc, ISD::CondCode UnsignedOpc);
514  void visitSetEQ(User &I) { visitSetCC(I, ISD::SETEQ, ISD::SETEQ); }
515  void visitSetNE(User &I) { visitSetCC(I, ISD::SETNE, ISD::SETNE); }
516  void visitSetLE(User &I) { visitSetCC(I, ISD::SETLE, ISD::SETULE); }
517  void visitSetGE(User &I) { visitSetCC(I, ISD::SETGE, ISD::SETUGE); }
518  void visitSetLT(User &I) { visitSetCC(I, ISD::SETLT, ISD::SETULT); }
519  void visitSetGT(User &I) { visitSetCC(I, ISD::SETGT, ISD::SETUGT); }
520
521  void visitGetElementPtr(User &I);
522  void visitCast(User &I);
523  void visitSelect(User &I);
524  //
525
526  void visitMalloc(MallocInst &I);
527  void visitFree(FreeInst &I);
528  void visitAlloca(AllocaInst &I);
529  void visitLoad(LoadInst &I);
530  void visitStore(StoreInst &I);
531  void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
532  void visitCall(CallInst &I);
533  void visitInlineAsm(CallInst &I);
534  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
535
536  void visitVAStart(CallInst &I);
537  void visitVAArg(VAArgInst &I);
538  void visitVAEnd(CallInst &I);
539  void visitVACopy(CallInst &I);
540  void visitFrameReturnAddress(CallInst &I, bool isFrameAddress);
541
542  void visitMemIntrinsic(CallInst &I, unsigned Op);
543
544  void visitUserOp1(Instruction &I) {
545    assert(0 && "UserOp1 should not exist at instruction selection time!");
546    abort();
547  }
548  void visitUserOp2(Instruction &I) {
549    assert(0 && "UserOp2 should not exist at instruction selection time!");
550    abort();
551  }
552};
553} // end namespace llvm
554
555void SelectionDAGLowering::visitRet(ReturnInst &I) {
556  if (I.getNumOperands() == 0) {
557    DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, getRoot()));
558    return;
559  }
560  std::vector<SDOperand> NewValues;
561  NewValues.push_back(getRoot());
562  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
563    SDOperand RetOp = getValue(I.getOperand(i));
564
565    // If this is an integer return value, we need to promote it ourselves to
566    // the full width of a register, since LegalizeOp will use ANY_EXTEND rather
567    // than sign/zero.
568    if (MVT::isInteger(RetOp.getValueType()) &&
569        RetOp.getValueType() < MVT::i64) {
570      MVT::ValueType TmpVT;
571      if (TLI.getTypeAction(MVT::i32) == TargetLowering::Promote)
572        TmpVT = TLI.getTypeToTransformTo(MVT::i32);
573      else
574        TmpVT = MVT::i32;
575
576      if (I.getOperand(i)->getType()->isSigned())
577        RetOp = DAG.getNode(ISD::SIGN_EXTEND, TmpVT, RetOp);
578      else
579        RetOp = DAG.getNode(ISD::ZERO_EXTEND, TmpVT, RetOp);
580    }
581    NewValues.push_back(RetOp);
582  }
583  DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, NewValues));
584}
585
586void SelectionDAGLowering::visitBr(BranchInst &I) {
587  // Update machine-CFG edges.
588  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
589
590  // Figure out which block is immediately after the current one.
591  MachineBasicBlock *NextBlock = 0;
592  MachineFunction::iterator BBI = CurMBB;
593  if (++BBI != CurMBB->getParent()->end())
594    NextBlock = BBI;
595
596  if (I.isUnconditional()) {
597    // If this is not a fall-through branch, emit the branch.
598    if (Succ0MBB != NextBlock)
599      DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
600                              DAG.getBasicBlock(Succ0MBB)));
601  } else {
602    MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
603
604    SDOperand Cond = getValue(I.getCondition());
605    if (Succ1MBB == NextBlock) {
606      // If the condition is false, fall through.  This means we should branch
607      // if the condition is true to Succ #0.
608      DAG.setRoot(DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(),
609                              Cond, DAG.getBasicBlock(Succ0MBB)));
610    } else if (Succ0MBB == NextBlock) {
611      // If the condition is true, fall through.  This means we should branch if
612      // the condition is false to Succ #1.  Invert the condition first.
613      SDOperand True = DAG.getConstant(1, Cond.getValueType());
614      Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True);
615      DAG.setRoot(DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(),
616                              Cond, DAG.getBasicBlock(Succ1MBB)));
617    } else {
618      std::vector<SDOperand> Ops;
619      Ops.push_back(getRoot());
620      // If the false case is the current basic block, then this is a self
621      // loop. We do not want to emit "Loop: ... brcond Out; br Loop", as it
622      // adds an extra instruction in the loop.  Instead, invert the
623      // condition and emit "Loop: ... br!cond Loop; br Out.
624      if (CurMBB == Succ1MBB) {
625        std::swap(Succ0MBB, Succ1MBB);
626        SDOperand True = DAG.getConstant(1, Cond.getValueType());
627        Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True);
628      }
629      Ops.push_back(Cond);
630      Ops.push_back(DAG.getBasicBlock(Succ0MBB));
631      Ops.push_back(DAG.getBasicBlock(Succ1MBB));
632      DAG.setRoot(DAG.getNode(ISD::BRCONDTWOWAY, MVT::Other, Ops));
633    }
634  }
635}
636
637void SelectionDAGLowering::visitSub(User &I) {
638  // -0.0 - X --> fneg
639  if (I.getType()->isFloatingPoint()) {
640    if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
641      if (CFP->isExactlyValue(-0.0)) {
642        SDOperand Op2 = getValue(I.getOperand(1));
643        setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2));
644        return;
645      }
646  }
647  visitBinary(I, ISD::SUB, ISD::FSUB, ISD::VSUB);
648}
649
650void SelectionDAGLowering::visitBinary(User &I, unsigned IntOp, unsigned FPOp,
651                                       unsigned VecOp) {
652  const Type *Ty = I.getType();
653  SDOperand Op1 = getValue(I.getOperand(0));
654  SDOperand Op2 = getValue(I.getOperand(1));
655
656  if (Ty->isIntegral()) {
657    setValue(&I, DAG.getNode(IntOp, Op1.getValueType(), Op1, Op2));
658  } else if (Ty->isFloatingPoint()) {
659    setValue(&I, DAG.getNode(FPOp, Op1.getValueType(), Op1, Op2));
660  } else {
661    const PackedType *PTy = cast<PackedType>(Ty);
662    unsigned NumElements = PTy->getNumElements();
663    MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
664    MVT::ValueType TVT = MVT::getVectorType(PVT, NumElements);
665
666    // Immediately scalarize packed types containing only one element, so that
667    // the Legalize pass does not have to deal with them.  Similarly, if the
668    // abstract vector is going to turn into one that the target natively
669    // supports, generate that type now so that Legalize doesn't have to deal
670    // with that either.  These steps ensure that Legalize only has to handle
671    // vector types in its Expand case.
672    unsigned Opc = MVT::isFloatingPoint(PVT) ? FPOp : IntOp;
673    if (NumElements == 1) {
674      setValue(&I, DAG.getNode(Opc, PVT, Op1, Op2));
675    } else if (TVT != MVT::Other &&
676               TLI.isTypeLegal(TVT) && TLI.isOperationLegal(Opc, TVT)) {
677      setValue(&I, DAG.getNode(Opc, TVT, Op1, Op2));
678    } else {
679      SDOperand Num = DAG.getConstant(NumElements, MVT::i32);
680      SDOperand Typ = DAG.getValueType(PVT);
681      setValue(&I, DAG.getNode(VecOp, MVT::Vector, Num, Typ, Op1, Op2));
682    }
683  }
684}
685
686void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
687  SDOperand Op1 = getValue(I.getOperand(0));
688  SDOperand Op2 = getValue(I.getOperand(1));
689
690  Op2 = DAG.getNode(ISD::ANY_EXTEND, TLI.getShiftAmountTy(), Op2);
691
692  setValue(&I, DAG.getNode(Opcode, Op1.getValueType(), Op1, Op2));
693}
694
695void SelectionDAGLowering::visitSetCC(User &I,ISD::CondCode SignedOpcode,
696                                      ISD::CondCode UnsignedOpcode) {
697  SDOperand Op1 = getValue(I.getOperand(0));
698  SDOperand Op2 = getValue(I.getOperand(1));
699  ISD::CondCode Opcode = SignedOpcode;
700  if (I.getOperand(0)->getType()->isUnsigned())
701    Opcode = UnsignedOpcode;
702  setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Opcode));
703}
704
705void SelectionDAGLowering::visitSelect(User &I) {
706  SDOperand Cond     = getValue(I.getOperand(0));
707  SDOperand TrueVal  = getValue(I.getOperand(1));
708  SDOperand FalseVal = getValue(I.getOperand(2));
709  setValue(&I, DAG.getNode(ISD::SELECT, TrueVal.getValueType(), Cond,
710                           TrueVal, FalseVal));
711}
712
713void SelectionDAGLowering::visitCast(User &I) {
714  SDOperand N = getValue(I.getOperand(0));
715  MVT::ValueType SrcTy = TLI.getValueType(I.getOperand(0)->getType());
716  MVT::ValueType DestTy = TLI.getValueType(I.getType());
717
718  if (N.getValueType() == DestTy) {
719    setValue(&I, N);  // noop cast.
720  } else if (DestTy == MVT::i1) {
721    // Cast to bool is a comparison against zero, not truncation to zero.
722    SDOperand Zero = isInteger(SrcTy) ? DAG.getConstant(0, N.getValueType()) :
723                                       DAG.getConstantFP(0.0, N.getValueType());
724    setValue(&I, DAG.getSetCC(MVT::i1, N, Zero, ISD::SETNE));
725  } else if (isInteger(SrcTy)) {
726    if (isInteger(DestTy)) {        // Int -> Int cast
727      if (DestTy < SrcTy)   // Truncating cast?
728        setValue(&I, DAG.getNode(ISD::TRUNCATE, DestTy, N));
729      else if (I.getOperand(0)->getType()->isSigned())
730        setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, DestTy, N));
731      else
732        setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestTy, N));
733    } else {                        // Int -> FP cast
734      if (I.getOperand(0)->getType()->isSigned())
735        setValue(&I, DAG.getNode(ISD::SINT_TO_FP, DestTy, N));
736      else
737        setValue(&I, DAG.getNode(ISD::UINT_TO_FP, DestTy, N));
738    }
739  } else {
740    assert(isFloatingPoint(SrcTy) && "Unknown value type!");
741    if (isFloatingPoint(DestTy)) {  // FP -> FP cast
742      if (DestTy < SrcTy)   // Rounding cast?
743        setValue(&I, DAG.getNode(ISD::FP_ROUND, DestTy, N));
744      else
745        setValue(&I, DAG.getNode(ISD::FP_EXTEND, DestTy, N));
746    } else {                        // FP -> Int cast.
747      if (I.getType()->isSigned())
748        setValue(&I, DAG.getNode(ISD::FP_TO_SINT, DestTy, N));
749      else
750        setValue(&I, DAG.getNode(ISD::FP_TO_UINT, DestTy, N));
751    }
752  }
753}
754
755void SelectionDAGLowering::visitGetElementPtr(User &I) {
756  SDOperand N = getValue(I.getOperand(0));
757  const Type *Ty = I.getOperand(0)->getType();
758  const Type *UIntPtrTy = TD.getIntPtrType();
759
760  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
761       OI != E; ++OI) {
762    Value *Idx = *OI;
763    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
764      unsigned Field = cast<ConstantUInt>(Idx)->getValue();
765      if (Field) {
766        // N = N + Offset
767        uint64_t Offset = TD.getStructLayout(StTy)->MemberOffsets[Field];
768        N = DAG.getNode(ISD::ADD, N.getValueType(), N,
769                        getIntPtrConstant(Offset));
770      }
771      Ty = StTy->getElementType(Field);
772    } else {
773      Ty = cast<SequentialType>(Ty)->getElementType();
774
775      // If this is a constant subscript, handle it quickly.
776      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
777        if (CI->getRawValue() == 0) continue;
778
779        uint64_t Offs;
780        if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(CI))
781          Offs = (int64_t)TD.getTypeSize(Ty)*CSI->getValue();
782        else
783          Offs = TD.getTypeSize(Ty)*cast<ConstantUInt>(CI)->getValue();
784        N = DAG.getNode(ISD::ADD, N.getValueType(), N, getIntPtrConstant(Offs));
785        continue;
786      }
787
788      // N = N + Idx * ElementSize;
789      uint64_t ElementSize = TD.getTypeSize(Ty);
790      SDOperand IdxN = getValue(Idx);
791
792      // If the index is smaller or larger than intptr_t, truncate or extend
793      // it.
794      if (IdxN.getValueType() < N.getValueType()) {
795        if (Idx->getType()->isSigned())
796          IdxN = DAG.getNode(ISD::SIGN_EXTEND, N.getValueType(), IdxN);
797        else
798          IdxN = DAG.getNode(ISD::ZERO_EXTEND, N.getValueType(), IdxN);
799      } else if (IdxN.getValueType() > N.getValueType())
800        IdxN = DAG.getNode(ISD::TRUNCATE, N.getValueType(), IdxN);
801
802      // If this is a multiply by a power of two, turn it into a shl
803      // immediately.  This is a very common case.
804      if (isPowerOf2_64(ElementSize)) {
805        unsigned Amt = Log2_64(ElementSize);
806        IdxN = DAG.getNode(ISD::SHL, N.getValueType(), IdxN,
807                           DAG.getConstant(Amt, TLI.getShiftAmountTy()));
808        N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
809        continue;
810      }
811
812      SDOperand Scale = getIntPtrConstant(ElementSize);
813      IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale);
814      N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
815    }
816  }
817  setValue(&I, N);
818}
819
820void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
821  // If this is a fixed sized alloca in the entry block of the function,
822  // allocate it statically on the stack.
823  if (FuncInfo.StaticAllocaMap.count(&I))
824    return;   // getValue will auto-populate this.
825
826  const Type *Ty = I.getAllocatedType();
827  uint64_t TySize = TLI.getTargetData().getTypeSize(Ty);
828  unsigned Align = std::max((unsigned)TLI.getTargetData().getTypeAlignment(Ty),
829                            I.getAlignment());
830
831  SDOperand AllocSize = getValue(I.getArraySize());
832  MVT::ValueType IntPtr = TLI.getPointerTy();
833  if (IntPtr < AllocSize.getValueType())
834    AllocSize = DAG.getNode(ISD::TRUNCATE, IntPtr, AllocSize);
835  else if (IntPtr > AllocSize.getValueType())
836    AllocSize = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, AllocSize);
837
838  AllocSize = DAG.getNode(ISD::MUL, IntPtr, AllocSize,
839                          getIntPtrConstant(TySize));
840
841  // Handle alignment.  If the requested alignment is less than or equal to the
842  // stack alignment, ignore it and round the size of the allocation up to the
843  // stack alignment size.  If the size is greater than the stack alignment, we
844  // note this in the DYNAMIC_STACKALLOC node.
845  unsigned StackAlign =
846    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
847  if (Align <= StackAlign) {
848    Align = 0;
849    // Add SA-1 to the size.
850    AllocSize = DAG.getNode(ISD::ADD, AllocSize.getValueType(), AllocSize,
851                            getIntPtrConstant(StackAlign-1));
852    // Mask out the low bits for alignment purposes.
853    AllocSize = DAG.getNode(ISD::AND, AllocSize.getValueType(), AllocSize,
854                            getIntPtrConstant(~(uint64_t)(StackAlign-1)));
855  }
856
857  std::vector<MVT::ValueType> VTs;
858  VTs.push_back(AllocSize.getValueType());
859  VTs.push_back(MVT::Other);
860  std::vector<SDOperand> Ops;
861  Ops.push_back(getRoot());
862  Ops.push_back(AllocSize);
863  Ops.push_back(getIntPtrConstant(Align));
864  SDOperand DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, VTs, Ops);
865  DAG.setRoot(setValue(&I, DSA).getValue(1));
866
867  // Inform the Frame Information that we have just allocated a variable-sized
868  // object.
869  CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject();
870}
871
872/// getStringValue - Turn an LLVM constant pointer that eventually points to a
873/// global into a string value.  Return an empty string if we can't do it.
874///
875static std::string getStringValue(GlobalVariable *GV, unsigned Offset = 0) {
876  if (GV->hasInitializer() && isa<ConstantArray>(GV->getInitializer())) {
877    ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
878    if (Init->isString()) {
879      std::string Result = Init->getAsString();
880      if (Offset < Result.size()) {
881        // If we are pointing INTO The string, erase the beginning...
882        Result.erase(Result.begin(), Result.begin()+Offset);
883        return Result;
884      }
885    }
886  }
887  return "";
888}
889
890void SelectionDAGLowering::visitLoad(LoadInst &I) {
891  SDOperand Ptr = getValue(I.getOperand(0));
892
893  SDOperand Root;
894  if (I.isVolatile())
895    Root = getRoot();
896  else {
897    // Do not serialize non-volatile loads against each other.
898    Root = DAG.getRoot();
899  }
900
901  const Type *Ty = I.getType();
902  SDOperand L;
903
904  if (const PackedType *PTy = dyn_cast<PackedType>(Ty)) {
905    unsigned NumElements = PTy->getNumElements();
906    MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
907    MVT::ValueType TVT = MVT::getVectorType(PVT, NumElements);
908
909    // Immediately scalarize packed types containing only one element, so that
910    // the Legalize pass does not have to deal with them.
911    if (NumElements == 1) {
912      L = DAG.getLoad(PVT, Root, Ptr, DAG.getSrcValue(I.getOperand(0)));
913    } else if (TVT != MVT::Other &&
914               TLI.isTypeLegal(TVT) && TLI.isOperationLegal(ISD::LOAD, TVT)) {
915      L = DAG.getLoad(TVT, Root, Ptr, DAG.getSrcValue(I.getOperand(0)));
916    } else {
917      L = DAG.getVecLoad(NumElements, PVT, Root, Ptr,
918                         DAG.getSrcValue(I.getOperand(0)));
919    }
920  } else {
921    L = DAG.getLoad(TLI.getValueType(Ty), Root, Ptr,
922                    DAG.getSrcValue(I.getOperand(0)));
923  }
924  setValue(&I, L);
925
926  if (I.isVolatile())
927    DAG.setRoot(L.getValue(1));
928  else
929    PendingLoads.push_back(L.getValue(1));
930}
931
932
933void SelectionDAGLowering::visitStore(StoreInst &I) {
934  Value *SrcV = I.getOperand(0);
935  SDOperand Src = getValue(SrcV);
936  SDOperand Ptr = getValue(I.getOperand(1));
937  DAG.setRoot(DAG.getNode(ISD::STORE, MVT::Other, getRoot(), Src, Ptr,
938                          DAG.getSrcValue(I.getOperand(1))));
939}
940
941/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
942/// we want to emit this as a call to a named external function, return the name
943/// otherwise lower it and return null.
944const char *
945SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
946  switch (Intrinsic) {
947  case Intrinsic::vastart:  visitVAStart(I); return 0;
948  case Intrinsic::vaend:    visitVAEnd(I); return 0;
949  case Intrinsic::vacopy:   visitVACopy(I); return 0;
950  case Intrinsic::returnaddress: visitFrameReturnAddress(I, false); return 0;
951  case Intrinsic::frameaddress:  visitFrameReturnAddress(I, true); return 0;
952  case Intrinsic::setjmp:
953    return "_setjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
954    break;
955  case Intrinsic::longjmp:
956    return "_longjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
957    break;
958  case Intrinsic::memcpy:  visitMemIntrinsic(I, ISD::MEMCPY); return 0;
959  case Intrinsic::memset:  visitMemIntrinsic(I, ISD::MEMSET); return 0;
960  case Intrinsic::memmove: visitMemIntrinsic(I, ISD::MEMMOVE); return 0;
961
962  case Intrinsic::readport:
963  case Intrinsic::readio: {
964    std::vector<MVT::ValueType> VTs;
965    VTs.push_back(TLI.getValueType(I.getType()));
966    VTs.push_back(MVT::Other);
967    std::vector<SDOperand> Ops;
968    Ops.push_back(getRoot());
969    Ops.push_back(getValue(I.getOperand(1)));
970    SDOperand Tmp = DAG.getNode(Intrinsic == Intrinsic::readport ?
971                                ISD::READPORT : ISD::READIO, VTs, Ops);
972
973    setValue(&I, Tmp);
974    DAG.setRoot(Tmp.getValue(1));
975    return 0;
976  }
977  case Intrinsic::writeport:
978  case Intrinsic::writeio:
979    DAG.setRoot(DAG.getNode(Intrinsic == Intrinsic::writeport ?
980                            ISD::WRITEPORT : ISD::WRITEIO, MVT::Other,
981                            getRoot(), getValue(I.getOperand(1)),
982                            getValue(I.getOperand(2))));
983    return 0;
984
985  case Intrinsic::dbg_stoppoint: {
986    if (TLI.getTargetMachine().getIntrinsicLowering().EmitDebugFunctions())
987      return "llvm_debugger_stop";
988
989    MachineDebugInfo *DebugInfo = DAG.getMachineDebugInfo();
990    if (DebugInfo &&  DebugInfo->Verify(I.getOperand(4))) {
991      std::vector<SDOperand> Ops;
992
993      // Input Chain
994      Ops.push_back(getRoot());
995
996      // line number
997      Ops.push_back(getValue(I.getOperand(2)));
998
999      // column
1000      Ops.push_back(getValue(I.getOperand(3)));
1001
1002      DebugInfoDesc *DD = DebugInfo->getDescFor(I.getOperand(4));
1003      assert(DD && "Not a debug information descriptor");
1004      CompileUnitDesc *CompileUnit = dyn_cast<CompileUnitDesc>(DD);
1005      assert(CompileUnit && "Not a compile unit");
1006      Ops.push_back(DAG.getString(CompileUnit->getFileName()));
1007      Ops.push_back(DAG.getString(CompileUnit->getDirectory()));
1008
1009      if (Ops.size() == 5)  // Found filename/workingdir.
1010        DAG.setRoot(DAG.getNode(ISD::LOCATION, MVT::Other, Ops));
1011    }
1012
1013    setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
1014    return 0;
1015  }
1016  case Intrinsic::dbg_region_start:
1017    if (TLI.getTargetMachine().getIntrinsicLowering().EmitDebugFunctions())
1018      return "llvm_dbg_region_start";
1019    if (I.getType() != Type::VoidTy)
1020      setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
1021    return 0;
1022  case Intrinsic::dbg_region_end:
1023    if (TLI.getTargetMachine().getIntrinsicLowering().EmitDebugFunctions())
1024      return "llvm_dbg_region_end";
1025    if (I.getType() != Type::VoidTy)
1026      setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
1027    return 0;
1028  case Intrinsic::dbg_func_start:
1029    if (TLI.getTargetMachine().getIntrinsicLowering().EmitDebugFunctions())
1030      return "llvm_dbg_subprogram";
1031    if (I.getType() != Type::VoidTy)
1032      setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
1033    return 0;
1034  case Intrinsic::dbg_declare:
1035    if (I.getType() != Type::VoidTy)
1036      setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
1037    return 0;
1038
1039  case Intrinsic::isunordered_f32:
1040  case Intrinsic::isunordered_f64:
1041    setValue(&I, DAG.getSetCC(MVT::i1,getValue(I.getOperand(1)),
1042                              getValue(I.getOperand(2)), ISD::SETUO));
1043    return 0;
1044
1045  case Intrinsic::sqrt_f32:
1046  case Intrinsic::sqrt_f64:
1047    setValue(&I, DAG.getNode(ISD::FSQRT,
1048                             getValue(I.getOperand(1)).getValueType(),
1049                             getValue(I.getOperand(1))));
1050    return 0;
1051  case Intrinsic::pcmarker: {
1052    SDOperand Tmp = getValue(I.getOperand(1));
1053    DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp));
1054    return 0;
1055  }
1056  case Intrinsic::readcyclecounter: {
1057    std::vector<MVT::ValueType> VTs;
1058    VTs.push_back(MVT::i64);
1059    VTs.push_back(MVT::Other);
1060    std::vector<SDOperand> Ops;
1061    Ops.push_back(getRoot());
1062    SDOperand Tmp = DAG.getNode(ISD::READCYCLECOUNTER, VTs, Ops);
1063    setValue(&I, Tmp);
1064    DAG.setRoot(Tmp.getValue(1));
1065    return 0;
1066  }
1067  case Intrinsic::bswap_i16:
1068  case Intrinsic::bswap_i32:
1069  case Intrinsic::bswap_i64:
1070    setValue(&I, DAG.getNode(ISD::BSWAP,
1071                             getValue(I.getOperand(1)).getValueType(),
1072                             getValue(I.getOperand(1))));
1073    return 0;
1074  case Intrinsic::cttz_i8:
1075  case Intrinsic::cttz_i16:
1076  case Intrinsic::cttz_i32:
1077  case Intrinsic::cttz_i64:
1078    setValue(&I, DAG.getNode(ISD::CTTZ,
1079                             getValue(I.getOperand(1)).getValueType(),
1080                             getValue(I.getOperand(1))));
1081    return 0;
1082  case Intrinsic::ctlz_i8:
1083  case Intrinsic::ctlz_i16:
1084  case Intrinsic::ctlz_i32:
1085  case Intrinsic::ctlz_i64:
1086    setValue(&I, DAG.getNode(ISD::CTLZ,
1087                             getValue(I.getOperand(1)).getValueType(),
1088                             getValue(I.getOperand(1))));
1089    return 0;
1090  case Intrinsic::ctpop_i8:
1091  case Intrinsic::ctpop_i16:
1092  case Intrinsic::ctpop_i32:
1093  case Intrinsic::ctpop_i64:
1094    setValue(&I, DAG.getNode(ISD::CTPOP,
1095                             getValue(I.getOperand(1)).getValueType(),
1096                             getValue(I.getOperand(1))));
1097    return 0;
1098  case Intrinsic::stacksave: {
1099    std::vector<MVT::ValueType> VTs;
1100    VTs.push_back(TLI.getPointerTy());
1101    VTs.push_back(MVT::Other);
1102    std::vector<SDOperand> Ops;
1103    Ops.push_back(getRoot());
1104    SDOperand Tmp = DAG.getNode(ISD::STACKSAVE, VTs, Ops);
1105    setValue(&I, Tmp);
1106    DAG.setRoot(Tmp.getValue(1));
1107    return 0;
1108  }
1109  case Intrinsic::stackrestore: {
1110    SDOperand Tmp = getValue(I.getOperand(1));
1111    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, MVT::Other, getRoot(), Tmp));
1112    return 0;
1113  }
1114  case Intrinsic::prefetch:
1115    // FIXME: Currently discarding prefetches.
1116    return 0;
1117  default:
1118    std::cerr << I;
1119    assert(0 && "This intrinsic is not implemented yet!");
1120    return 0;
1121  }
1122}
1123
1124
1125void SelectionDAGLowering::visitCall(CallInst &I) {
1126  const char *RenameFn = 0;
1127  if (Function *F = I.getCalledFunction()) {
1128    if (F->isExternal())
1129      if (unsigned IID = F->getIntrinsicID()) {
1130        RenameFn = visitIntrinsicCall(I, IID);
1131        if (!RenameFn)
1132          return;
1133      } else {    // Not an LLVM intrinsic.
1134        const std::string &Name = F->getName();
1135        if (Name[0] == 'f' && (Name == "fabs" || Name == "fabsf")) {
1136          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
1137              I.getOperand(1)->getType()->isFloatingPoint() &&
1138              I.getType() == I.getOperand(1)->getType()) {
1139            SDOperand Tmp = getValue(I.getOperand(1));
1140            setValue(&I, DAG.getNode(ISD::FABS, Tmp.getValueType(), Tmp));
1141            return;
1142          }
1143        } else if (Name[0] == 's' && (Name == "sin" || Name == "sinf")) {
1144          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
1145              I.getOperand(1)->getType()->isFloatingPoint() &&
1146              I.getType() == I.getOperand(1)->getType()) {
1147            SDOperand Tmp = getValue(I.getOperand(1));
1148            setValue(&I, DAG.getNode(ISD::FSIN, Tmp.getValueType(), Tmp));
1149            return;
1150          }
1151        } else if (Name[0] == 'c' && (Name == "cos" || Name == "cosf")) {
1152          if (I.getNumOperands() == 2 &&   // Basic sanity checks.
1153              I.getOperand(1)->getType()->isFloatingPoint() &&
1154              I.getType() == I.getOperand(1)->getType()) {
1155            SDOperand Tmp = getValue(I.getOperand(1));
1156            setValue(&I, DAG.getNode(ISD::FCOS, Tmp.getValueType(), Tmp));
1157            return;
1158          }
1159        }
1160      }
1161  } else if (isa<InlineAsm>(I.getOperand(0))) {
1162    visitInlineAsm(I);
1163    return;
1164  }
1165
1166  SDOperand Callee;
1167  if (!RenameFn)
1168    Callee = getValue(I.getOperand(0));
1169  else
1170    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
1171  std::vector<std::pair<SDOperand, const Type*> > Args;
1172  Args.reserve(I.getNumOperands());
1173  for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
1174    Value *Arg = I.getOperand(i);
1175    SDOperand ArgNode = getValue(Arg);
1176    Args.push_back(std::make_pair(ArgNode, Arg->getType()));
1177  }
1178
1179  const PointerType *PT = cast<PointerType>(I.getCalledValue()->getType());
1180  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1181
1182  std::pair<SDOperand,SDOperand> Result =
1183    TLI.LowerCallTo(getRoot(), I.getType(), FTy->isVarArg(), I.getCallingConv(),
1184                    I.isTailCall(), Callee, Args, DAG);
1185  if (I.getType() != Type::VoidTy)
1186    setValue(&I, Result.first);
1187  DAG.setRoot(Result.second);
1188}
1189
1190SDOperand RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
1191                                        SDOperand &Chain, SDOperand &Flag)const{
1192  SDOperand Val = DAG.getCopyFromReg(Chain, Regs[0], RegVT, Flag);
1193  Chain = Val.getValue(1);
1194  Flag  = Val.getValue(2);
1195
1196  // If the result was expanded, copy from the top part.
1197  if (Regs.size() > 1) {
1198    assert(Regs.size() == 2 &&
1199           "Cannot expand to more than 2 elts yet!");
1200    SDOperand Hi = DAG.getCopyFromReg(Chain, Regs[1], RegVT, Flag);
1201    Chain = Val.getValue(1);
1202    Flag  = Val.getValue(2);
1203    if (DAG.getTargetLoweringInfo().isLittleEndian())
1204      return DAG.getNode(ISD::BUILD_PAIR, ValueVT, Val, Hi);
1205    else
1206      return DAG.getNode(ISD::BUILD_PAIR, ValueVT, Hi, Val);
1207  }
1208
1209  // Otherwise, if the return value was promoted, truncate it to the
1210  // appropriate type.
1211  if (RegVT == ValueVT)
1212    return Val;
1213
1214  if (MVT::isInteger(RegVT))
1215    return DAG.getNode(ISD::TRUNCATE, ValueVT, Val);
1216  else
1217    return DAG.getNode(ISD::FP_ROUND, ValueVT, Val);
1218}
1219
1220/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
1221/// specified value into the registers specified by this object.  This uses
1222/// Chain/Flag as the input and updates them for the output Chain/Flag.
1223void RegsForValue::getCopyToRegs(SDOperand Val, SelectionDAG &DAG,
1224                                 SDOperand &Chain, SDOperand &Flag) const {
1225  if (Regs.size() == 1) {
1226    // If there is a single register and the types differ, this must be
1227    // a promotion.
1228    if (RegVT != ValueVT) {
1229      if (MVT::isInteger(RegVT))
1230        Val = DAG.getNode(ISD::ANY_EXTEND, RegVT, Val);
1231      else
1232        Val = DAG.getNode(ISD::FP_EXTEND, RegVT, Val);
1233    }
1234    Chain = DAG.getCopyToReg(Chain, Regs[0], Val, Flag);
1235    Flag = Chain.getValue(1);
1236  } else {
1237    std::vector<unsigned> R(Regs);
1238    if (!DAG.getTargetLoweringInfo().isLittleEndian())
1239      std::reverse(R.begin(), R.end());
1240
1241    for (unsigned i = 0, e = R.size(); i != e; ++i) {
1242      SDOperand Part = DAG.getNode(ISD::EXTRACT_ELEMENT, RegVT, Val,
1243                                   DAG.getConstant(i, MVT::i32));
1244      Chain = DAG.getCopyToReg(Chain, R[i], Part, Flag);
1245      Flag = Chain.getValue(1);
1246    }
1247  }
1248}
1249
1250/// AddInlineAsmOperands - Add this value to the specified inlineasm node
1251/// operand list.  This adds the code marker and includes the number of
1252/// values added into it.
1253void RegsForValue::AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG,
1254                                        std::vector<SDOperand> &Ops) const {
1255  Ops.push_back(DAG.getConstant(Code | (Regs.size() << 3), MVT::i32));
1256  for (unsigned i = 0, e = Regs.size(); i != e; ++i)
1257    Ops.push_back(DAG.getRegister(Regs[i], RegVT));
1258}
1259
1260/// isAllocatableRegister - If the specified register is safe to allocate,
1261/// i.e. it isn't a stack pointer or some other special register, return the
1262/// register class for the register.  Otherwise, return null.
1263static const TargetRegisterClass *
1264isAllocatableRegister(unsigned Reg, MachineFunction &MF,
1265                      const TargetLowering &TLI, const MRegisterInfo *MRI) {
1266  for (MRegisterInfo::regclass_iterator RCI = MRI->regclass_begin(),
1267       E = MRI->regclass_end(); RCI != E; ++RCI) {
1268    const TargetRegisterClass *RC = *RCI;
1269    // If none of the the value types for this register class are valid, we
1270    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
1271    bool isLegal = false;
1272    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
1273         I != E; ++I) {
1274      if (TLI.isTypeLegal(*I)) {
1275        isLegal = true;
1276        break;
1277      }
1278    }
1279
1280    if (!isLegal) continue;
1281
1282    // NOTE: This isn't ideal.  In particular, this might allocate the
1283    // frame pointer in functions that need it (due to them not being taken
1284    // out of allocation, because a variable sized allocation hasn't been seen
1285    // yet).  This is a slight code pessimization, but should still work.
1286    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
1287         E = RC->allocation_order_end(MF); I != E; ++I)
1288      if (*I == Reg)
1289        return RC;
1290  }
1291  return 0;
1292}
1293
1294RegsForValue SelectionDAGLowering::
1295GetRegistersForValue(const std::string &ConstrCode,
1296                     MVT::ValueType VT, bool isOutReg, bool isInReg,
1297                     std::set<unsigned> &OutputRegs,
1298                     std::set<unsigned> &InputRegs) {
1299  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
1300    TLI.getRegForInlineAsmConstraint(ConstrCode, VT);
1301  std::vector<unsigned> Regs;
1302
1303  unsigned NumRegs = VT != MVT::Other ? TLI.getNumElements(VT) : 1;
1304  MVT::ValueType RegVT;
1305  MVT::ValueType ValueVT = VT;
1306
1307  if (PhysReg.first) {
1308    if (VT == MVT::Other)
1309      ValueVT = *PhysReg.second->vt_begin();
1310    RegVT = VT;
1311
1312    // This is a explicit reference to a physical register.
1313    Regs.push_back(PhysReg.first);
1314
1315    // If this is an expanded reference, add the rest of the regs to Regs.
1316    if (NumRegs != 1) {
1317      RegVT = *PhysReg.second->vt_begin();
1318      TargetRegisterClass::iterator I = PhysReg.second->begin();
1319      TargetRegisterClass::iterator E = PhysReg.second->end();
1320      for (; *I != PhysReg.first; ++I)
1321        assert(I != E && "Didn't find reg!");
1322
1323      // Already added the first reg.
1324      --NumRegs; ++I;
1325      for (; NumRegs; --NumRegs, ++I) {
1326        assert(I != E && "Ran out of registers to allocate!");
1327        Regs.push_back(*I);
1328      }
1329    }
1330    return RegsForValue(Regs, RegVT, ValueVT);
1331  }
1332
1333  // This is a reference to a register class.  Allocate NumRegs consecutive,
1334  // available, registers from the class.
1335  std::vector<unsigned> RegClassRegs =
1336    TLI.getRegClassForInlineAsmConstraint(ConstrCode, VT);
1337
1338  const MRegisterInfo *MRI = DAG.getTarget().getRegisterInfo();
1339  MachineFunction &MF = *CurMBB->getParent();
1340  unsigned NumAllocated = 0;
1341  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
1342    unsigned Reg = RegClassRegs[i];
1343    // See if this register is available.
1344    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
1345        (isInReg  && InputRegs.count(Reg))) {    // Already used.
1346      // Make sure we find consecutive registers.
1347      NumAllocated = 0;
1348      continue;
1349    }
1350
1351    // Check to see if this register is allocatable (i.e. don't give out the
1352    // stack pointer).
1353    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, MRI);
1354    if (!RC) {
1355      // Make sure we find consecutive registers.
1356      NumAllocated = 0;
1357      continue;
1358    }
1359
1360    // Okay, this register is good, we can use it.
1361    ++NumAllocated;
1362
1363    // If we allocated enough consecutive
1364    if (NumAllocated == NumRegs) {
1365      unsigned RegStart = (i-NumAllocated)+1;
1366      unsigned RegEnd   = i+1;
1367      // Mark all of the allocated registers used.
1368      for (unsigned i = RegStart; i != RegEnd; ++i) {
1369        unsigned Reg = RegClassRegs[i];
1370        Regs.push_back(Reg);
1371        if (isOutReg) OutputRegs.insert(Reg);    // Mark reg used.
1372        if (isInReg)  InputRegs.insert(Reg);     // Mark reg used.
1373      }
1374
1375      return RegsForValue(Regs, *RC->vt_begin(), VT);
1376    }
1377  }
1378
1379  // Otherwise, we couldn't allocate enough registers for this.
1380  return RegsForValue();
1381}
1382
1383
1384/// visitInlineAsm - Handle a call to an InlineAsm object.
1385///
1386void SelectionDAGLowering::visitInlineAsm(CallInst &I) {
1387  InlineAsm *IA = cast<InlineAsm>(I.getOperand(0));
1388
1389  SDOperand AsmStr = DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
1390                                                 MVT::Other);
1391
1392  // Note, we treat inline asms both with and without side-effects as the same.
1393  // If an inline asm doesn't have side effects and doesn't access memory, we
1394  // could not choose to not chain it.
1395  bool hasSideEffects = IA->hasSideEffects();
1396
1397  std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
1398  std::vector<MVT::ValueType> ConstraintVTs;
1399
1400  /// AsmNodeOperands - A list of pairs.  The first element is a register, the
1401  /// second is a bitfield where bit #0 is set if it is a use and bit #1 is set
1402  /// if it is a def of that register.
1403  std::vector<SDOperand> AsmNodeOperands;
1404  AsmNodeOperands.push_back(SDOperand());  // reserve space for input chain
1405  AsmNodeOperands.push_back(AsmStr);
1406
1407  SDOperand Chain = getRoot();
1408  SDOperand Flag;
1409
1410  // We fully assign registers here at isel time.  This is not optimal, but
1411  // should work.  For register classes that correspond to LLVM classes, we
1412  // could let the LLVM RA do its thing, but we currently don't.  Do a prepass
1413  // over the constraints, collecting fixed registers that we know we can't use.
1414  std::set<unsigned> OutputRegs, InputRegs;
1415  unsigned OpNum = 1;
1416  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
1417    assert(Constraints[i].Codes.size() == 1 && "Only handles one code so far!");
1418    std::string &ConstraintCode = Constraints[i].Codes[0];
1419
1420    MVT::ValueType OpVT;
1421
1422    // Compute the value type for each operand and add it to ConstraintVTs.
1423    switch (Constraints[i].Type) {
1424    case InlineAsm::isOutput:
1425      if (!Constraints[i].isIndirectOutput) {
1426        assert(I.getType() != Type::VoidTy && "Bad inline asm!");
1427        OpVT = TLI.getValueType(I.getType());
1428      } else {
1429        const Type *OpTy = I.getOperand(OpNum)->getType();
1430        OpVT = TLI.getValueType(cast<PointerType>(OpTy)->getElementType());
1431        OpNum++;  // Consumes a call operand.
1432      }
1433      break;
1434    case InlineAsm::isInput:
1435      OpVT = TLI.getValueType(I.getOperand(OpNum)->getType());
1436      OpNum++;  // Consumes a call operand.
1437      break;
1438    case InlineAsm::isClobber:
1439      OpVT = MVT::Other;
1440      break;
1441    }
1442
1443    ConstraintVTs.push_back(OpVT);
1444
1445    if (TLI.getRegForInlineAsmConstraint(ConstraintCode, OpVT).first == 0)
1446      continue;  // Not assigned a fixed reg.
1447
1448    // Build a list of regs that this operand uses.  This always has a single
1449    // element for promoted/expanded operands.
1450    RegsForValue Regs = GetRegistersForValue(ConstraintCode, OpVT,
1451                                             false, false,
1452                                             OutputRegs, InputRegs);
1453
1454    switch (Constraints[i].Type) {
1455    case InlineAsm::isOutput:
1456      // We can't assign any other output to this register.
1457      OutputRegs.insert(Regs.Regs.begin(), Regs.Regs.end());
1458      // If this is an early-clobber output, it cannot be assigned to the same
1459      // value as the input reg.
1460      if (Constraints[i].isEarlyClobber || Constraints[i].hasMatchingInput)
1461        InputRegs.insert(Regs.Regs.begin(), Regs.Regs.end());
1462      break;
1463    case InlineAsm::isInput:
1464      // We can't assign any other input to this register.
1465      InputRegs.insert(Regs.Regs.begin(), Regs.Regs.end());
1466      break;
1467    case InlineAsm::isClobber:
1468      // Clobbered regs cannot be used as inputs or outputs.
1469      InputRegs.insert(Regs.Regs.begin(), Regs.Regs.end());
1470      OutputRegs.insert(Regs.Regs.begin(), Regs.Regs.end());
1471      break;
1472    }
1473  }
1474
1475  // Loop over all of the inputs, copying the operand values into the
1476  // appropriate registers and processing the output regs.
1477  RegsForValue RetValRegs;
1478  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
1479  OpNum = 1;
1480
1481  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
1482    assert(Constraints[i].Codes.size() == 1 && "Only handles one code so far!");
1483    std::string &ConstraintCode = Constraints[i].Codes[0];
1484
1485    switch (Constraints[i].Type) {
1486    case InlineAsm::isOutput: {
1487      TargetLowering::ConstraintType CTy = TargetLowering::C_RegisterClass;
1488      if (ConstraintCode.size() == 1)   // not a physreg name.
1489        CTy = TLI.getConstraintType(ConstraintCode[0]);
1490
1491      if (CTy == TargetLowering::C_Memory) {
1492        // Memory output.
1493        SDOperand InOperandVal = getValue(I.getOperand(OpNum));
1494
1495        // Check that the operand (the address to store to) isn't a float.
1496        if (!MVT::isInteger(InOperandVal.getValueType()))
1497          assert(0 && "MATCH FAIL!");
1498
1499        if (!Constraints[i].isIndirectOutput)
1500          assert(0 && "MATCH FAIL!");
1501
1502        OpNum++;  // Consumes a call operand.
1503
1504        // Extend/truncate to the right pointer type if needed.
1505        MVT::ValueType PtrType = TLI.getPointerTy();
1506        if (InOperandVal.getValueType() < PtrType)
1507          InOperandVal = DAG.getNode(ISD::ZERO_EXTEND, PtrType, InOperandVal);
1508        else if (InOperandVal.getValueType() > PtrType)
1509          InOperandVal = DAG.getNode(ISD::TRUNCATE, PtrType, InOperandVal);
1510
1511        // Add information to the INLINEASM node to know about this output.
1512        unsigned ResOpType = 4/*MEM*/ | (1 << 3);
1513        AsmNodeOperands.push_back(DAG.getConstant(ResOpType, MVT::i32));
1514        AsmNodeOperands.push_back(InOperandVal);
1515        break;
1516      }
1517
1518      // Otherwise, this is a register output.
1519      assert(CTy == TargetLowering::C_RegisterClass && "Unknown op type!");
1520
1521      // If this is an early-clobber output, or if there is an input
1522      // constraint that matches this, we need to reserve the input register
1523      // so no other inputs allocate to it.
1524      bool UsesInputRegister = false;
1525      if (Constraints[i].isEarlyClobber || Constraints[i].hasMatchingInput)
1526        UsesInputRegister = true;
1527
1528      // Copy the output from the appropriate register.  Find a register that
1529      // we can use.
1530      RegsForValue Regs =
1531        GetRegistersForValue(ConstraintCode, ConstraintVTs[i],
1532                             true, UsesInputRegister,
1533                             OutputRegs, InputRegs);
1534      assert(!Regs.Regs.empty() && "Couldn't allocate output reg!");
1535
1536      if (!Constraints[i].isIndirectOutput) {
1537        assert(RetValRegs.Regs.empty() &&
1538               "Cannot have multiple output constraints yet!");
1539        assert(I.getType() != Type::VoidTy && "Bad inline asm!");
1540        RetValRegs = Regs;
1541      } else {
1542        IndirectStoresToEmit.push_back(std::make_pair(Regs,
1543                                                      I.getOperand(OpNum)));
1544        OpNum++;  // Consumes a call operand.
1545      }
1546
1547      // Add information to the INLINEASM node to know that this register is
1548      // set.
1549      Regs.AddInlineAsmOperands(2 /*REGDEF*/, DAG, AsmNodeOperands);
1550      break;
1551    }
1552    case InlineAsm::isInput: {
1553      SDOperand InOperandVal = getValue(I.getOperand(OpNum));
1554      OpNum++;  // Consumes a call operand.
1555
1556      if (isdigit(ConstraintCode[0])) {    // Matching constraint?
1557        // If this is required to match an output register we have already set,
1558        // just use its register.
1559        unsigned OperandNo = atoi(ConstraintCode.c_str());
1560
1561        // Scan until we find the definition we already emitted of this operand.
1562        // When we find it, create a RegsForValue operand.
1563        unsigned CurOp = 2;  // The first operand.
1564        for (; OperandNo; --OperandNo) {
1565          // Advance to the next operand.
1566          unsigned NumOps =
1567            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue();
1568          assert((NumOps & 7) == 2 /*REGDEF*/ &&
1569                 "Skipped past definitions?");
1570          CurOp += (NumOps>>3)+1;
1571        }
1572
1573        unsigned NumOps =
1574          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue();
1575        assert((NumOps & 7) == 2 /*REGDEF*/ &&
1576               "Skipped past definitions?");
1577
1578        // Add NumOps>>3 registers to MatchedRegs.
1579        RegsForValue MatchedRegs;
1580        MatchedRegs.ValueVT = InOperandVal.getValueType();
1581        MatchedRegs.RegVT   = AsmNodeOperands[CurOp+1].getValueType();
1582        for (unsigned i = 0, e = NumOps>>3; i != e; ++i) {
1583          unsigned Reg=cast<RegisterSDNode>(AsmNodeOperands[++CurOp])->getReg();
1584          MatchedRegs.Regs.push_back(Reg);
1585        }
1586
1587        // Use the produced MatchedRegs object to
1588        MatchedRegs.getCopyToRegs(InOperandVal, DAG, Chain, Flag);
1589        MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, DAG, AsmNodeOperands);
1590        break;
1591      }
1592
1593      TargetLowering::ConstraintType CTy = TargetLowering::C_RegisterClass;
1594      if (ConstraintCode.size() == 1)   // not a physreg name.
1595        CTy = TLI.getConstraintType(ConstraintCode[0]);
1596
1597      if (CTy == TargetLowering::C_Other) {
1598        if (!TLI.isOperandValidForConstraint(InOperandVal, ConstraintCode[0]))
1599          assert(0 && "MATCH FAIL!");
1600
1601        // Add information to the INLINEASM node to know about this input.
1602        unsigned ResOpType = 3 /*IMM*/ | (1 << 3);
1603        AsmNodeOperands.push_back(DAG.getConstant(ResOpType, MVT::i32));
1604        AsmNodeOperands.push_back(InOperandVal);
1605        break;
1606      } else if (CTy == TargetLowering::C_Memory) {
1607        // Memory input.
1608
1609        // Check that the operand isn't a float.
1610        if (!MVT::isInteger(InOperandVal.getValueType()))
1611          assert(0 && "MATCH FAIL!");
1612
1613        // Extend/truncate to the right pointer type if needed.
1614        MVT::ValueType PtrType = TLI.getPointerTy();
1615        if (InOperandVal.getValueType() < PtrType)
1616          InOperandVal = DAG.getNode(ISD::ZERO_EXTEND, PtrType, InOperandVal);
1617        else if (InOperandVal.getValueType() > PtrType)
1618          InOperandVal = DAG.getNode(ISD::TRUNCATE, PtrType, InOperandVal);
1619
1620        // Add information to the INLINEASM node to know about this input.
1621        unsigned ResOpType = 4/*MEM*/ | (1 << 3);
1622        AsmNodeOperands.push_back(DAG.getConstant(ResOpType, MVT::i32));
1623        AsmNodeOperands.push_back(InOperandVal);
1624        break;
1625      }
1626
1627      assert(CTy == TargetLowering::C_RegisterClass && "Unknown op type!");
1628
1629      // Copy the input into the appropriate registers.
1630      RegsForValue InRegs =
1631        GetRegistersForValue(ConstraintCode, ConstraintVTs[i],
1632                             false, true, OutputRegs, InputRegs);
1633      // FIXME: should be match fail.
1634      assert(!InRegs.Regs.empty() && "Couldn't allocate input reg!");
1635
1636      InRegs.getCopyToRegs(InOperandVal, DAG, Chain, Flag);
1637
1638      InRegs.AddInlineAsmOperands(1/*REGUSE*/, DAG, AsmNodeOperands);
1639      break;
1640    }
1641    case InlineAsm::isClobber: {
1642      RegsForValue ClobberedRegs =
1643        GetRegistersForValue(ConstraintCode, MVT::Other, false, false,
1644                             OutputRegs, InputRegs);
1645      // Add the clobbered value to the operand list, so that the register
1646      // allocator is aware that the physreg got clobbered.
1647      if (!ClobberedRegs.Regs.empty())
1648        ClobberedRegs.AddInlineAsmOperands(2/*REGDEF*/, DAG, AsmNodeOperands);
1649      break;
1650    }
1651    }
1652  }
1653
1654  // Finish up input operands.
1655  AsmNodeOperands[0] = Chain;
1656  if (Flag.Val) AsmNodeOperands.push_back(Flag);
1657
1658  std::vector<MVT::ValueType> VTs;
1659  VTs.push_back(MVT::Other);
1660  VTs.push_back(MVT::Flag);
1661  Chain = DAG.getNode(ISD::INLINEASM, VTs, AsmNodeOperands);
1662  Flag = Chain.getValue(1);
1663
1664  // If this asm returns a register value, copy the result from that register
1665  // and set it as the value of the call.
1666  if (!RetValRegs.Regs.empty())
1667    setValue(&I, RetValRegs.getCopyFromRegs(DAG, Chain, Flag));
1668
1669  std::vector<std::pair<SDOperand, Value*> > StoresToEmit;
1670
1671  // Process indirect outputs, first output all of the flagged copies out of
1672  // physregs.
1673  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
1674    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
1675    Value *Ptr = IndirectStoresToEmit[i].second;
1676    SDOperand OutVal = OutRegs.getCopyFromRegs(DAG, Chain, Flag);
1677    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
1678  }
1679
1680  // Emit the non-flagged stores from the physregs.
1681  std::vector<SDOperand> OutChains;
1682  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
1683    OutChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1684                                    StoresToEmit[i].first,
1685                                    getValue(StoresToEmit[i].second),
1686                                    DAG.getSrcValue(StoresToEmit[i].second)));
1687  if (!OutChains.empty())
1688    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains);
1689  DAG.setRoot(Chain);
1690}
1691
1692
1693void SelectionDAGLowering::visitMalloc(MallocInst &I) {
1694  SDOperand Src = getValue(I.getOperand(0));
1695
1696  MVT::ValueType IntPtr = TLI.getPointerTy();
1697
1698  if (IntPtr < Src.getValueType())
1699    Src = DAG.getNode(ISD::TRUNCATE, IntPtr, Src);
1700  else if (IntPtr > Src.getValueType())
1701    Src = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, Src);
1702
1703  // Scale the source by the type size.
1704  uint64_t ElementSize = TD.getTypeSize(I.getType()->getElementType());
1705  Src = DAG.getNode(ISD::MUL, Src.getValueType(),
1706                    Src, getIntPtrConstant(ElementSize));
1707
1708  std::vector<std::pair<SDOperand, const Type*> > Args;
1709  Args.push_back(std::make_pair(Src, TLI.getTargetData().getIntPtrType()));
1710
1711  std::pair<SDOperand,SDOperand> Result =
1712    TLI.LowerCallTo(getRoot(), I.getType(), false, CallingConv::C, true,
1713                    DAG.getExternalSymbol("malloc", IntPtr),
1714                    Args, DAG);
1715  setValue(&I, Result.first);  // Pointers always fit in registers
1716  DAG.setRoot(Result.second);
1717}
1718
1719void SelectionDAGLowering::visitFree(FreeInst &I) {
1720  std::vector<std::pair<SDOperand, const Type*> > Args;
1721  Args.push_back(std::make_pair(getValue(I.getOperand(0)),
1722                                TLI.getTargetData().getIntPtrType()));
1723  MVT::ValueType IntPtr = TLI.getPointerTy();
1724  std::pair<SDOperand,SDOperand> Result =
1725    TLI.LowerCallTo(getRoot(), Type::VoidTy, false, CallingConv::C, true,
1726                    DAG.getExternalSymbol("free", IntPtr), Args, DAG);
1727  DAG.setRoot(Result.second);
1728}
1729
1730// InsertAtEndOfBasicBlock - This method should be implemented by targets that
1731// mark instructions with the 'usesCustomDAGSchedInserter' flag.  These
1732// instructions are special in various ways, which require special support to
1733// insert.  The specified MachineInstr is created but not inserted into any
1734// basic blocks, and the scheduler passes ownership of it to this method.
1735MachineBasicBlock *TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
1736                                                       MachineBasicBlock *MBB) {
1737  std::cerr << "If a target marks an instruction with "
1738               "'usesCustomDAGSchedInserter', it must implement "
1739               "TargetLowering::InsertAtEndOfBasicBlock!\n";
1740  abort();
1741  return 0;
1742}
1743
1744void SelectionDAGLowering::visitVAStart(CallInst &I) {
1745  DAG.setRoot(DAG.getNode(ISD::VASTART, MVT::Other, getRoot(),
1746                          getValue(I.getOperand(1)),
1747                          DAG.getSrcValue(I.getOperand(1))));
1748}
1749
1750void SelectionDAGLowering::visitVAArg(VAArgInst &I) {
1751  SDOperand V = DAG.getVAArg(TLI.getValueType(I.getType()), getRoot(),
1752                             getValue(I.getOperand(0)),
1753                             DAG.getSrcValue(I.getOperand(0)));
1754  setValue(&I, V);
1755  DAG.setRoot(V.getValue(1));
1756}
1757
1758void SelectionDAGLowering::visitVAEnd(CallInst &I) {
1759  DAG.setRoot(DAG.getNode(ISD::VAEND, MVT::Other, getRoot(),
1760                          getValue(I.getOperand(1)),
1761                          DAG.getSrcValue(I.getOperand(1))));
1762}
1763
1764void SelectionDAGLowering::visitVACopy(CallInst &I) {
1765  DAG.setRoot(DAG.getNode(ISD::VACOPY, MVT::Other, getRoot(),
1766                          getValue(I.getOperand(1)),
1767                          getValue(I.getOperand(2)),
1768                          DAG.getSrcValue(I.getOperand(1)),
1769                          DAG.getSrcValue(I.getOperand(2))));
1770}
1771
1772// It is always conservatively correct for llvm.returnaddress and
1773// llvm.frameaddress to return 0.
1774std::pair<SDOperand, SDOperand>
1775TargetLowering::LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain,
1776                                        unsigned Depth, SelectionDAG &DAG) {
1777  return std::make_pair(DAG.getConstant(0, getPointerTy()), Chain);
1778}
1779
1780SDOperand TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
1781  assert(0 && "LowerOperation not implemented for this target!");
1782  abort();
1783  return SDOperand();
1784}
1785
1786SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op,
1787                                                 SelectionDAG &DAG) {
1788  assert(0 && "CustomPromoteOperation not implemented for this target!");
1789  abort();
1790  return SDOperand();
1791}
1792
1793void SelectionDAGLowering::visitFrameReturnAddress(CallInst &I, bool isFrame) {
1794  unsigned Depth = (unsigned)cast<ConstantUInt>(I.getOperand(1))->getValue();
1795  std::pair<SDOperand,SDOperand> Result =
1796    TLI.LowerFrameReturnAddress(isFrame, getRoot(), Depth, DAG);
1797  setValue(&I, Result.first);
1798  DAG.setRoot(Result.second);
1799}
1800
1801/// getMemsetValue - Vectorized representation of the memset value
1802/// operand.
1803static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
1804                                SelectionDAG &DAG) {
1805  MVT::ValueType CurVT = VT;
1806  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
1807    uint64_t Val   = C->getValue() & 255;
1808    unsigned Shift = 8;
1809    while (CurVT != MVT::i8) {
1810      Val = (Val << Shift) | Val;
1811      Shift <<= 1;
1812      CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
1813    }
1814    return DAG.getConstant(Val, VT);
1815  } else {
1816    Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
1817    unsigned Shift = 8;
1818    while (CurVT != MVT::i8) {
1819      Value =
1820        DAG.getNode(ISD::OR, VT,
1821                    DAG.getNode(ISD::SHL, VT, Value,
1822                                DAG.getConstant(Shift, MVT::i8)), Value);
1823      Shift <<= 1;
1824      CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
1825    }
1826
1827    return Value;
1828  }
1829}
1830
1831/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
1832/// used when a memcpy is turned into a memset when the source is a constant
1833/// string ptr.
1834static SDOperand getMemsetStringVal(MVT::ValueType VT,
1835                                    SelectionDAG &DAG, TargetLowering &TLI,
1836                                    std::string &Str, unsigned Offset) {
1837  MVT::ValueType CurVT = VT;
1838  uint64_t Val = 0;
1839  unsigned MSB = getSizeInBits(VT) / 8;
1840  if (TLI.isLittleEndian())
1841    Offset = Offset + MSB - 1;
1842  for (unsigned i = 0; i != MSB; ++i) {
1843    Val = (Val << 8) | Str[Offset];
1844    Offset += TLI.isLittleEndian() ? -1 : 1;
1845  }
1846  return DAG.getConstant(Val, VT);
1847}
1848
1849/// getMemBasePlusOffset - Returns base and offset node for the
1850static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
1851                                      SelectionDAG &DAG, TargetLowering &TLI) {
1852  MVT::ValueType VT = Base.getValueType();
1853  return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
1854}
1855
1856/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
1857/// to replace the memset / memcpy is below the threshold. It also returns the
1858/// types of the sequence of  memory ops to perform memset / memcpy.
1859static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps,
1860                                     unsigned Limit, uint64_t Size,
1861                                     unsigned Align, TargetLowering &TLI) {
1862  MVT::ValueType VT;
1863
1864  if (TLI.allowsUnalignedMemoryAccesses()) {
1865    VT = MVT::i64;
1866  } else {
1867    switch (Align & 7) {
1868    case 0:
1869      VT = MVT::i64;
1870      break;
1871    case 4:
1872      VT = MVT::i32;
1873      break;
1874    case 2:
1875      VT = MVT::i16;
1876      break;
1877    default:
1878      VT = MVT::i8;
1879      break;
1880    }
1881  }
1882
1883  MVT::ValueType LVT = MVT::i64;
1884  while (!TLI.isTypeLegal(LVT))
1885    LVT = (MVT::ValueType)((unsigned)LVT - 1);
1886  assert(MVT::isInteger(LVT));
1887
1888  if (VT > LVT)
1889    VT = LVT;
1890
1891  unsigned NumMemOps = 0;
1892  while (Size != 0) {
1893    unsigned VTSize = getSizeInBits(VT) / 8;
1894    while (VTSize > Size) {
1895      VT = (MVT::ValueType)((unsigned)VT - 1);
1896      VTSize >>= 1;
1897    }
1898    assert(MVT::isInteger(VT));
1899
1900    if (++NumMemOps > Limit)
1901      return false;
1902    MemOps.push_back(VT);
1903    Size -= VTSize;
1904  }
1905
1906  return true;
1907}
1908
1909void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) {
1910  SDOperand Op1 = getValue(I.getOperand(1));
1911  SDOperand Op2 = getValue(I.getOperand(2));
1912  SDOperand Op3 = getValue(I.getOperand(3));
1913  SDOperand Op4 = getValue(I.getOperand(4));
1914  unsigned Align = (unsigned)cast<ConstantSDNode>(Op4)->getValue();
1915  if (Align == 0) Align = 1;
1916
1917  if (ConstantSDNode *Size = dyn_cast<ConstantSDNode>(Op3)) {
1918    std::vector<MVT::ValueType> MemOps;
1919
1920    // Expand memset / memcpy to a series of load / store ops
1921    // if the size operand falls below a certain threshold.
1922    std::vector<SDOperand> OutChains;
1923    switch (Op) {
1924    default: break;  // Do nothing for now.
1925    case ISD::MEMSET: {
1926      if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
1927                                   Size->getValue(), Align, TLI)) {
1928        unsigned NumMemOps = MemOps.size();
1929        unsigned Offset = 0;
1930        for (unsigned i = 0; i < NumMemOps; i++) {
1931          MVT::ValueType VT = MemOps[i];
1932          unsigned VTSize = getSizeInBits(VT) / 8;
1933          SDOperand Value = getMemsetValue(Op2, VT, DAG);
1934          SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, getRoot(),
1935                                        Value,
1936                                    getMemBasePlusOffset(Op1, Offset, DAG, TLI),
1937                                      DAG.getSrcValue(I.getOperand(1), Offset));
1938          OutChains.push_back(Store);
1939          Offset += VTSize;
1940        }
1941      }
1942      break;
1943    }
1944    case ISD::MEMCPY: {
1945      if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemcpy(),
1946                                   Size->getValue(), Align, TLI)) {
1947        unsigned NumMemOps = MemOps.size();
1948        unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0;
1949        GlobalAddressSDNode *G = NULL;
1950        std::string Str;
1951        bool CopyFromStr = false;
1952
1953        if (Op2.getOpcode() == ISD::GlobalAddress)
1954          G = cast<GlobalAddressSDNode>(Op2);
1955        else if (Op2.getOpcode() == ISD::ADD &&
1956                 Op2.getOperand(0).getOpcode() == ISD::GlobalAddress &&
1957                 Op2.getOperand(1).getOpcode() == ISD::Constant) {
1958          G = cast<GlobalAddressSDNode>(Op2.getOperand(0));
1959          SrcDelta = cast<ConstantSDNode>(Op2.getOperand(1))->getValue();
1960        }
1961        if (G) {
1962          GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
1963          if (GV) {
1964            Str = getStringValue(GV);
1965            if (!Str.empty()) {
1966              CopyFromStr = true;
1967              SrcOff += SrcDelta;
1968            }
1969          }
1970        }
1971
1972        for (unsigned i = 0; i < NumMemOps; i++) {
1973          MVT::ValueType VT = MemOps[i];
1974          unsigned VTSize = getSizeInBits(VT) / 8;
1975          SDOperand Value, Chain, Store;
1976
1977          if (CopyFromStr) {
1978            Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
1979            Chain = getRoot();
1980            Store =
1981              DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
1982                          getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
1983                          DAG.getSrcValue(I.getOperand(1), DstOff));
1984          } else {
1985            Value = DAG.getLoad(VT, getRoot(),
1986                        getMemBasePlusOffset(Op2, SrcOff, DAG, TLI),
1987                        DAG.getSrcValue(I.getOperand(2), SrcOff));
1988            Chain = Value.getValue(1);
1989            Store =
1990              DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
1991                          getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
1992                          DAG.getSrcValue(I.getOperand(1), DstOff));
1993          }
1994          OutChains.push_back(Store);
1995          SrcOff += VTSize;
1996          DstOff += VTSize;
1997        }
1998      }
1999      break;
2000    }
2001    }
2002
2003    if (!OutChains.empty()) {
2004      DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains));
2005      return;
2006    }
2007  }
2008
2009  std::vector<SDOperand> Ops;
2010  Ops.push_back(getRoot());
2011  Ops.push_back(Op1);
2012  Ops.push_back(Op2);
2013  Ops.push_back(Op3);
2014  Ops.push_back(Op4);
2015  DAG.setRoot(DAG.getNode(Op, MVT::Other, Ops));
2016}
2017
2018//===----------------------------------------------------------------------===//
2019// SelectionDAGISel code
2020//===----------------------------------------------------------------------===//
2021
2022unsigned SelectionDAGISel::MakeReg(MVT::ValueType VT) {
2023  return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
2024}
2025
2026void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
2027  // FIXME: we only modify the CFG to split critical edges.  This
2028  // updates dom and loop info.
2029}
2030
2031
2032/// InsertGEPComputeCode - Insert code into BB to compute Ptr+PtrOffset,
2033/// casting to the type of GEPI.
2034static Value *InsertGEPComputeCode(Value *&V, BasicBlock *BB, Instruction *GEPI,
2035                                   Value *Ptr, Value *PtrOffset) {
2036  if (V) return V;   // Already computed.
2037
2038  BasicBlock::iterator InsertPt;
2039  if (BB == GEPI->getParent()) {
2040    // If insert into the GEP's block, insert right after the GEP.
2041    InsertPt = GEPI;
2042    ++InsertPt;
2043  } else {
2044    // Otherwise, insert at the top of BB, after any PHI nodes
2045    InsertPt = BB->begin();
2046    while (isa<PHINode>(InsertPt)) ++InsertPt;
2047  }
2048
2049  // If Ptr is itself a cast, but in some other BB, emit a copy of the cast into
2050  // BB so that there is only one value live across basic blocks (the cast
2051  // operand).
2052  if (CastInst *CI = dyn_cast<CastInst>(Ptr))
2053    if (CI->getParent() != BB && isa<PointerType>(CI->getOperand(0)->getType()))
2054      Ptr = new CastInst(CI->getOperand(0), CI->getType(), "", InsertPt);
2055
2056  // Add the offset, cast it to the right type.
2057  Ptr = BinaryOperator::createAdd(Ptr, PtrOffset, "", InsertPt);
2058  Ptr = new CastInst(Ptr, GEPI->getType(), "", InsertPt);
2059  return V = Ptr;
2060}
2061
2062
2063/// OptimizeGEPExpression - Since we are doing basic-block-at-a-time instruction
2064/// selection, we want to be a bit careful about some things.  In particular, if
2065/// we have a GEP instruction that is used in a different block than it is
2066/// defined, the addressing expression of the GEP cannot be folded into loads or
2067/// stores that use it.  In this case, decompose the GEP and move constant
2068/// indices into blocks that use it.
2069static void OptimizeGEPExpression(GetElementPtrInst *GEPI,
2070                                  const TargetData &TD) {
2071  // If this GEP is only used inside the block it is defined in, there is no
2072  // need to rewrite it.
2073  bool isUsedOutsideDefBB = false;
2074  BasicBlock *DefBB = GEPI->getParent();
2075  for (Value::use_iterator UI = GEPI->use_begin(), E = GEPI->use_end();
2076       UI != E; ++UI) {
2077    if (cast<Instruction>(*UI)->getParent() != DefBB) {
2078      isUsedOutsideDefBB = true;
2079      break;
2080    }
2081  }
2082  if (!isUsedOutsideDefBB) return;
2083
2084  // If this GEP has no non-zero constant indices, there is nothing we can do,
2085  // ignore it.
2086  bool hasConstantIndex = false;
2087  for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1,
2088       E = GEPI->op_end(); OI != E; ++OI) {
2089    if (ConstantInt *CI = dyn_cast<ConstantInt>(*OI))
2090      if (CI->getRawValue()) {
2091        hasConstantIndex = true;
2092        break;
2093      }
2094  }
2095  // If this is a GEP &Alloca, 0, 0, forward subst the frame index into uses.
2096  if (!hasConstantIndex && !isa<AllocaInst>(GEPI->getOperand(0))) return;
2097
2098  // Otherwise, decompose the GEP instruction into multiplies and adds.  Sum the
2099  // constant offset (which we now know is non-zero) and deal with it later.
2100  uint64_t ConstantOffset = 0;
2101  const Type *UIntPtrTy = TD.getIntPtrType();
2102  Value *Ptr = new CastInst(GEPI->getOperand(0), UIntPtrTy, "", GEPI);
2103  const Type *Ty = GEPI->getOperand(0)->getType();
2104
2105  for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1,
2106       E = GEPI->op_end(); OI != E; ++OI) {
2107    Value *Idx = *OI;
2108    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
2109      unsigned Field = cast<ConstantUInt>(Idx)->getValue();
2110      if (Field)
2111        ConstantOffset += TD.getStructLayout(StTy)->MemberOffsets[Field];
2112      Ty = StTy->getElementType(Field);
2113    } else {
2114      Ty = cast<SequentialType>(Ty)->getElementType();
2115
2116      // Handle constant subscripts.
2117      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
2118        if (CI->getRawValue() == 0) continue;
2119
2120        if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(CI))
2121          ConstantOffset += (int64_t)TD.getTypeSize(Ty)*CSI->getValue();
2122        else
2123          ConstantOffset+=TD.getTypeSize(Ty)*cast<ConstantUInt>(CI)->getValue();
2124        continue;
2125      }
2126
2127      // Ptr = Ptr + Idx * ElementSize;
2128
2129      // Cast Idx to UIntPtrTy if needed.
2130      Idx = new CastInst(Idx, UIntPtrTy, "", GEPI);
2131
2132      uint64_t ElementSize = TD.getTypeSize(Ty);
2133      // Mask off bits that should not be set.
2134      ElementSize &= ~0ULL >> (64-UIntPtrTy->getPrimitiveSizeInBits());
2135      Constant *SizeCst = ConstantUInt::get(UIntPtrTy, ElementSize);
2136
2137      // Multiply by the element size and add to the base.
2138      Idx = BinaryOperator::createMul(Idx, SizeCst, "", GEPI);
2139      Ptr = BinaryOperator::createAdd(Ptr, Idx, "", GEPI);
2140    }
2141  }
2142
2143  // Make sure that the offset fits in uintptr_t.
2144  ConstantOffset &= ~0ULL >> (64-UIntPtrTy->getPrimitiveSizeInBits());
2145  Constant *PtrOffset = ConstantUInt::get(UIntPtrTy, ConstantOffset);
2146
2147  // Okay, we have now emitted all of the variable index parts to the BB that
2148  // the GEP is defined in.  Loop over all of the using instructions, inserting
2149  // an "add Ptr, ConstantOffset" into each block that uses it and update the
2150  // instruction to use the newly computed value, making GEPI dead.  When the
2151  // user is a load or store instruction address, we emit the add into the user
2152  // block, otherwise we use a canonical version right next to the gep (these
2153  // won't be foldable as addresses, so we might as well share the computation).
2154
2155  std::map<BasicBlock*,Value*> InsertedExprs;
2156  while (!GEPI->use_empty()) {
2157    Instruction *User = cast<Instruction>(GEPI->use_back());
2158
2159    // If this use is not foldable into the addressing mode, use a version
2160    // emitted in the GEP block.
2161    Value *NewVal;
2162    if (!isa<LoadInst>(User) &&
2163        (!isa<StoreInst>(User) || User->getOperand(0) == GEPI)) {
2164      NewVal = InsertGEPComputeCode(InsertedExprs[DefBB], DefBB, GEPI,
2165                                    Ptr, PtrOffset);
2166    } else {
2167      // Otherwise, insert the code in the User's block so it can be folded into
2168      // any users in that block.
2169      NewVal = InsertGEPComputeCode(InsertedExprs[User->getParent()],
2170                                    User->getParent(), GEPI,
2171                                    Ptr, PtrOffset);
2172    }
2173    User->replaceUsesOfWith(GEPI, NewVal);
2174  }
2175
2176  // Finally, the GEP is dead, remove it.
2177  GEPI->eraseFromParent();
2178}
2179
2180bool SelectionDAGISel::runOnFunction(Function &Fn) {
2181  MachineFunction &MF = MachineFunction::construct(&Fn, TLI.getTargetMachine());
2182  RegMap = MF.getSSARegMap();
2183  DEBUG(std::cerr << "\n\n\n=== " << Fn.getName() << "\n");
2184
2185  // First, split all critical edges for PHI nodes with incoming values that are
2186  // constants, this way the load of the constant into a vreg will not be placed
2187  // into MBBs that are used some other way.
2188  //
2189  // In this pass we also look for GEP instructions that are used across basic
2190  // blocks and rewrites them to improve basic-block-at-a-time selection.
2191  //
2192  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
2193    PHINode *PN;
2194    BasicBlock::iterator BBI;
2195    for (BBI = BB->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI)
2196      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
2197        if (isa<Constant>(PN->getIncomingValue(i)))
2198          SplitCriticalEdge(PN->getIncomingBlock(i), BB);
2199
2200    for (BasicBlock::iterator E = BB->end(); BBI != E; )
2201      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(BBI++))
2202        OptimizeGEPExpression(GEPI, TLI.getTargetData());
2203  }
2204
2205  FunctionLoweringInfo FuncInfo(TLI, Fn, MF);
2206
2207  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
2208    SelectBasicBlock(I, MF, FuncInfo);
2209
2210  return true;
2211}
2212
2213
2214SDOperand SelectionDAGISel::
2215CopyValueToVirtualRegister(SelectionDAGLowering &SDL, Value *V, unsigned Reg) {
2216  SDOperand Op = SDL.getValue(V);
2217  assert((Op.getOpcode() != ISD::CopyFromReg ||
2218          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
2219         "Copy from a reg to the same reg!");
2220
2221  // If this type is not legal, we must make sure to not create an invalid
2222  // register use.
2223  MVT::ValueType SrcVT = Op.getValueType();
2224  MVT::ValueType DestVT = TLI.getTypeToTransformTo(SrcVT);
2225  SelectionDAG &DAG = SDL.DAG;
2226  if (SrcVT == DestVT) {
2227    return DAG.getCopyToReg(SDL.getRoot(), Reg, Op);
2228  } else if (SrcVT < DestVT) {
2229    // The src value is promoted to the register.
2230    if (MVT::isFloatingPoint(SrcVT))
2231      Op = DAG.getNode(ISD::FP_EXTEND, DestVT, Op);
2232    else
2233      Op = DAG.getNode(ISD::ANY_EXTEND, DestVT, Op);
2234    return DAG.getCopyToReg(SDL.getRoot(), Reg, Op);
2235  } else  {
2236    // The src value is expanded into multiple registers.
2237    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DestVT,
2238                               Op, DAG.getConstant(0, MVT::i32));
2239    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DestVT,
2240                               Op, DAG.getConstant(1, MVT::i32));
2241    Op = DAG.getCopyToReg(SDL.getRoot(), Reg, Lo);
2242    return DAG.getCopyToReg(Op, Reg+1, Hi);
2243  }
2244}
2245
2246void SelectionDAGISel::
2247LowerArguments(BasicBlock *BB, SelectionDAGLowering &SDL,
2248               std::vector<SDOperand> &UnorderedChains) {
2249  // If this is the entry block, emit arguments.
2250  Function &F = *BB->getParent();
2251  FunctionLoweringInfo &FuncInfo = SDL.FuncInfo;
2252  SDOperand OldRoot = SDL.DAG.getRoot();
2253  std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
2254
2255  unsigned a = 0;
2256  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
2257       AI != E; ++AI, ++a)
2258    if (!AI->use_empty()) {
2259      SDL.setValue(AI, Args[a]);
2260
2261      // If this argument is live outside of the entry block, insert a copy from
2262      // whereever we got it to the vreg that other BB's will reference it as.
2263      if (FuncInfo.ValueMap.count(AI)) {
2264        SDOperand Copy =
2265          CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]);
2266        UnorderedChains.push_back(Copy);
2267      }
2268    }
2269
2270  // Next, if the function has live ins that need to be copied into vregs,
2271  // emit the copies now, into the top of the block.
2272  MachineFunction &MF = SDL.DAG.getMachineFunction();
2273  if (MF.livein_begin() != MF.livein_end()) {
2274    SSARegMap *RegMap = MF.getSSARegMap();
2275    const MRegisterInfo &MRI = *MF.getTarget().getRegisterInfo();
2276    for (MachineFunction::livein_iterator LI = MF.livein_begin(),
2277         E = MF.livein_end(); LI != E; ++LI)
2278      if (LI->second)
2279        MRI.copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
2280                         LI->first, RegMap->getRegClass(LI->second));
2281  }
2282
2283  // Finally, if the target has anything special to do, allow it to do so.
2284  EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction());
2285}
2286
2287
2288void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB,
2289       std::vector<std::pair<MachineInstr*, unsigned> > &PHINodesToUpdate,
2290                                    FunctionLoweringInfo &FuncInfo) {
2291  SelectionDAGLowering SDL(DAG, TLI, FuncInfo);
2292
2293  std::vector<SDOperand> UnorderedChains;
2294
2295  // Lower any arguments needed in this block if this is the entry block.
2296  if (LLVMBB == &LLVMBB->getParent()->front())
2297    LowerArguments(LLVMBB, SDL, UnorderedChains);
2298
2299  BB = FuncInfo.MBBMap[LLVMBB];
2300  SDL.setCurrentBasicBlock(BB);
2301
2302  // Lower all of the non-terminator instructions.
2303  for (BasicBlock::iterator I = LLVMBB->begin(), E = --LLVMBB->end();
2304       I != E; ++I)
2305    SDL.visit(*I);
2306
2307  // Ensure that all instructions which are used outside of their defining
2308  // blocks are available as virtual registers.
2309  for (BasicBlock::iterator I = LLVMBB->begin(), E = LLVMBB->end(); I != E;++I)
2310    if (!I->use_empty() && !isa<PHINode>(I)) {
2311      std::map<const Value*, unsigned>::iterator VMI =FuncInfo.ValueMap.find(I);
2312      if (VMI != FuncInfo.ValueMap.end())
2313        UnorderedChains.push_back(
2314                           CopyValueToVirtualRegister(SDL, I, VMI->second));
2315    }
2316
2317  // Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
2318  // ensure constants are generated when needed.  Remember the virtual registers
2319  // that need to be added to the Machine PHI nodes as input.  We cannot just
2320  // directly add them, because expansion might result in multiple MBB's for one
2321  // BB.  As such, the start of the BB might correspond to a different MBB than
2322  // the end.
2323  //
2324
2325  // Emit constants only once even if used by multiple PHI nodes.
2326  std::map<Constant*, unsigned> ConstantsOut;
2327
2328  // Check successor nodes PHI nodes that expect a constant to be available from
2329  // this block.
2330  TerminatorInst *TI = LLVMBB->getTerminator();
2331  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
2332    BasicBlock *SuccBB = TI->getSuccessor(succ);
2333    MachineBasicBlock::iterator MBBI = FuncInfo.MBBMap[SuccBB]->begin();
2334    PHINode *PN;
2335
2336    // At this point we know that there is a 1-1 correspondence between LLVM PHI
2337    // nodes and Machine PHI nodes, but the incoming operands have not been
2338    // emitted yet.
2339    for (BasicBlock::iterator I = SuccBB->begin();
2340         (PN = dyn_cast<PHINode>(I)); ++I)
2341      if (!PN->use_empty()) {
2342        unsigned Reg;
2343        Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
2344        if (Constant *C = dyn_cast<Constant>(PHIOp)) {
2345          unsigned &RegOut = ConstantsOut[C];
2346          if (RegOut == 0) {
2347            RegOut = FuncInfo.CreateRegForValue(C);
2348            UnorderedChains.push_back(
2349                             CopyValueToVirtualRegister(SDL, C, RegOut));
2350          }
2351          Reg = RegOut;
2352        } else {
2353          Reg = FuncInfo.ValueMap[PHIOp];
2354          if (Reg == 0) {
2355            assert(isa<AllocaInst>(PHIOp) &&
2356                   FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
2357                   "Didn't codegen value into a register!??");
2358            Reg = FuncInfo.CreateRegForValue(PHIOp);
2359            UnorderedChains.push_back(
2360                             CopyValueToVirtualRegister(SDL, PHIOp, Reg));
2361          }
2362        }
2363
2364        // Remember that this register needs to added to the machine PHI node as
2365        // the input for this MBB.
2366        unsigned NumElements =
2367          TLI.getNumElements(TLI.getValueType(PN->getType()));
2368        for (unsigned i = 0, e = NumElements; i != e; ++i)
2369          PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
2370      }
2371  }
2372  ConstantsOut.clear();
2373
2374  // Turn all of the unordered chains into one factored node.
2375  if (!UnorderedChains.empty()) {
2376    SDOperand Root = SDL.getRoot();
2377    if (Root.getOpcode() != ISD::EntryToken) {
2378      unsigned i = 0, e = UnorderedChains.size();
2379      for (; i != e; ++i) {
2380        assert(UnorderedChains[i].Val->getNumOperands() > 1);
2381        if (UnorderedChains[i].Val->getOperand(0) == Root)
2382          break;  // Don't add the root if we already indirectly depend on it.
2383      }
2384
2385      if (i == e)
2386        UnorderedChains.push_back(Root);
2387    }
2388    DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, UnorderedChains));
2389  }
2390
2391  // Lower the terminator after the copies are emitted.
2392  SDL.visit(*LLVMBB->getTerminator());
2393
2394  // Make sure the root of the DAG is up-to-date.
2395  DAG.setRoot(SDL.getRoot());
2396}
2397
2398void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, MachineFunction &MF,
2399                                        FunctionLoweringInfo &FuncInfo) {
2400  SelectionDAG DAG(TLI, MF, getAnalysisToUpdate<MachineDebugInfo>());
2401  CurDAG = &DAG;
2402  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
2403
2404  // First step, lower LLVM code to some DAG.  This DAG may use operations and
2405  // types that are not supported by the target.
2406  BuildSelectionDAG(DAG, LLVMBB, PHINodesToUpdate, FuncInfo);
2407
2408  // Run the DAG combiner in pre-legalize mode.
2409  DAG.Combine(false);
2410
2411  DEBUG(std::cerr << "Lowered selection DAG:\n");
2412  DEBUG(DAG.dump());
2413
2414  // Second step, hack on the DAG until it only uses operations and types that
2415  // the target supports.
2416  DAG.Legalize();
2417
2418  DEBUG(std::cerr << "Legalized selection DAG:\n");
2419  DEBUG(DAG.dump());
2420
2421  // Run the DAG combiner in post-legalize mode.
2422  DAG.Combine(true);
2423
2424  if (ViewISelDAGs) DAG.viewGraph();
2425
2426  // Third, instruction select all of the operations to machine code, adding the
2427  // code to the MachineBasicBlock.
2428  InstructionSelectBasicBlock(DAG);
2429
2430  DEBUG(std::cerr << "Selected machine code:\n");
2431  DEBUG(BB->dump());
2432
2433  // Next, now that we know what the last MBB the LLVM BB expanded is, update
2434  // PHI nodes in successors.
2435  for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) {
2436    MachineInstr *PHI = PHINodesToUpdate[i].first;
2437    assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
2438           "This is not a machine PHI node that we are updating!");
2439    PHI->addRegOperand(PHINodesToUpdate[i].second);
2440    PHI->addMachineBasicBlockOperand(BB);
2441  }
2442
2443  // Finally, add the CFG edges from the last selected MBB to the successor
2444  // MBBs.
2445  TerminatorInst *TI = LLVMBB->getTerminator();
2446  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
2447    MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[TI->getSuccessor(i)];
2448    BB->addSuccessor(Succ0MBB);
2449  }
2450}
2451
2452//===----------------------------------------------------------------------===//
2453/// ScheduleAndEmitDAG - Pick a safe ordering and emit instructions for each
2454/// target node in the graph.
2455void SelectionDAGISel::ScheduleAndEmitDAG(SelectionDAG &DAG) {
2456  if (ViewSchedDAGs) DAG.viewGraph();
2457  ScheduleDAG *SL = NULL;
2458
2459  switch (ISHeuristic) {
2460  default: assert(0 && "Unrecognized scheduling heuristic");
2461  case defaultScheduling:
2462    if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency)
2463      SL = createSimpleDAGScheduler(noScheduling, DAG, BB);
2464    else /* TargetLowering::SchedulingForRegPressure */
2465      SL = createBURRListDAGScheduler(DAG, BB);
2466    break;
2467  case noScheduling:
2468  case simpleScheduling:
2469  case simpleNoItinScheduling:
2470    SL = createSimpleDAGScheduler(ISHeuristic, DAG, BB);
2471    break;
2472  case listSchedulingBURR:
2473    SL = createBURRListDAGScheduler(DAG, BB);
2474  }
2475  BB = SL->Run();
2476  delete SL;
2477}
2478
2479/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
2480/// by tblgen.  Others should not call it.
2481void SelectionDAGISel::
2482SelectInlineAsmMemoryOperands(std::vector<SDOperand> &Ops, SelectionDAG &DAG) {
2483  std::vector<SDOperand> InOps;
2484  std::swap(InOps, Ops);
2485
2486  Ops.push_back(InOps[0]);  // input chain.
2487  Ops.push_back(InOps[1]);  // input asm string.
2488
2489  const char *AsmStr = cast<ExternalSymbolSDNode>(InOps[1])->getSymbol();
2490  unsigned i = 2, e = InOps.size();
2491  if (InOps[e-1].getValueType() == MVT::Flag)
2492    --e;  // Don't process a flag operand if it is here.
2493
2494  while (i != e) {
2495    unsigned Flags = cast<ConstantSDNode>(InOps[i])->getValue();
2496    if ((Flags & 7) != 4 /*MEM*/) {
2497      // Just skip over this operand, copying the operands verbatim.
2498      Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+(Flags >> 3) + 1);
2499      i += (Flags >> 3) + 1;
2500    } else {
2501      assert((Flags >> 3) == 1 && "Memory operand with multiple values?");
2502      // Otherwise, this is a memory operand.  Ask the target to select it.
2503      std::vector<SDOperand> SelOps;
2504      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps, DAG)) {
2505        std::cerr << "Could not match memory address.  Inline asm failure!\n";
2506        exit(1);
2507      }
2508
2509      // Add this to the output node.
2510      Ops.push_back(DAG.getConstant(4/*MEM*/ | (SelOps.size() << 3), MVT::i32));
2511      Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
2512      i += 2;
2513    }
2514  }
2515
2516  // Add the flag input back if present.
2517  if (e != InOps.size())
2518    Ops.push_back(InOps.back());
2519}
2520