X86ISelDAGToDAG.cpp revision 79964fdbaf11877c463d081ae3ffdc7a540f54c2
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a DAG pattern matching instruction selector for X86,
11// converting from a legalized dag to a X86 dag.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "x86-isel"
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86MachineFunctionInfo.h"
20#include "X86RegisterInfo.h"
21#include "X86Subtarget.h"
22#include "X86TargetMachine.h"
23#include "llvm/GlobalValue.h"
24#include "llvm/Instructions.h"
25#include "llvm/Intrinsics.h"
26#include "llvm/Support/CFG.h"
27#include "llvm/Type.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineFrameInfo.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/CodeGen/SelectionDAGISel.h"
34#include "llvm/Target/TargetMachine.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/Compiler.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/MathExtras.h"
39#include "llvm/ADT/Statistic.h"
40#include <queue>
41#include <set>
42using namespace llvm;
43
44STATISTIC(NumFPKill   , "Number of FP_REG_KILL instructions added");
45STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
46
47namespace {
48  static cl::opt<bool>
49  FoldAndInTest("x86-fold-and-in-test", cl::desc("Fold and operation in test"),
50                cl::init(false), cl::Hidden);
51}
52
53//===----------------------------------------------------------------------===//
54//                      Pattern Matcher Implementation
55//===----------------------------------------------------------------------===//
56
57namespace {
58  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
59  /// SDOperand's instead of register numbers for the leaves of the matched
60  /// tree.
61  struct X86ISelAddressMode {
62    enum {
63      RegBase,
64      FrameIndexBase
65    } BaseType;
66
67    struct {            // This is really a union, discriminated by BaseType!
68      SDOperand Reg;
69      int FrameIndex;
70    } Base;
71
72    bool isRIPRel;     // RIP as base?
73    unsigned Scale;
74    SDOperand IndexReg;
75    unsigned Disp;
76    GlobalValue *GV;
77    Constant *CP;
78    const char *ES;
79    int JT;
80    unsigned Align;    // CP alignment.
81
82    X86ISelAddressMode()
83      : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
84        GV(0), CP(0), ES(0), JT(-1), Align(0) {
85    }
86  };
87}
88
89namespace {
90  //===--------------------------------------------------------------------===//
91  /// ISel - X86 specific code to select X86 machine instructions for
92  /// SelectionDAG operations.
93  ///
94  class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
95    /// ContainsFPCode - Every instruction we select that uses or defines a FP
96    /// register should set this to true.
97    bool ContainsFPCode;
98
99    /// FastISel - Enable fast(er) instruction selection.
100    ///
101    bool FastISel;
102
103    /// TM - Keep a reference to X86TargetMachine.
104    ///
105    X86TargetMachine &TM;
106
107    /// X86Lowering - This object fully describes how to lower LLVM code to an
108    /// X86-specific SelectionDAG.
109    X86TargetLowering X86Lowering;
110
111    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
112    /// make the right decision when generating code for different targets.
113    const X86Subtarget *Subtarget;
114
115    /// GlobalBaseReg - keeps track of the virtual register mapped onto global
116    /// base register.
117    unsigned GlobalBaseReg;
118
119  public:
120    X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
121      : SelectionDAGISel(X86Lowering),
122        ContainsFPCode(false), FastISel(fast), TM(tm),
123        X86Lowering(*TM.getTargetLowering()),
124        Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
125
126    virtual bool runOnFunction(Function &Fn) {
127      // Make sure we re-emit a set of the global base reg if necessary
128      GlobalBaseReg = 0;
129      return SelectionDAGISel::runOnFunction(Fn);
130    }
131
132    virtual const char *getPassName() const {
133      return "X86 DAG->DAG Instruction Selection";
134    }
135
136    /// InstructionSelectBasicBlock - This callback is invoked by
137    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
138    virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
139
140    virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
141
142    virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const;
143
144// Include the pieces autogenerated from the target description.
145#include "X86GenDAGISel.inc"
146
147  private:
148    SDNode *Select(SDOperand N);
149
150    bool MatchAddress(SDOperand N, X86ISelAddressMode &AM,
151                      bool isRoot = true, unsigned Depth = 0);
152    bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
153                          bool isRoot, unsigned Depth);
154    bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
155                    SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
156    bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base,
157                       SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
158    bool SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
159                             SDOperand N, SDOperand &Base, SDOperand &Scale,
160                             SDOperand &Index, SDOperand &Disp,
161                             SDOperand &InChain, SDOperand &OutChain);
162    bool TryFoldLoad(SDOperand P, SDOperand N,
163                     SDOperand &Base, SDOperand &Scale,
164                     SDOperand &Index, SDOperand &Disp);
165    void PreprocessForRMW(SelectionDAG &DAG);
166    void PreprocessForFPConvert(SelectionDAG &DAG);
167
168    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
169    /// inline asm expressions.
170    virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
171                                              char ConstraintCode,
172                                              std::vector<SDOperand> &OutOps,
173                                              SelectionDAG &DAG);
174
175    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
176
177    inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
178                                   SDOperand &Scale, SDOperand &Index,
179                                   SDOperand &Disp) {
180      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
181        CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
182        AM.Base.Reg;
183      Scale = getI8Imm(AM.Scale);
184      Index = AM.IndexReg;
185      // These are 32-bit even in 64-bit mode since RIP relative offset
186      // is 32-bit.
187      if (AM.GV)
188        Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
189      else if (AM.CP)
190        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
191      else if (AM.ES)
192        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
193      else if (AM.JT != -1)
194        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
195      else
196        Disp = getI32Imm(AM.Disp);
197    }
198
199    /// getI8Imm - Return a target constant with the specified value, of type
200    /// i8.
201    inline SDOperand getI8Imm(unsigned Imm) {
202      return CurDAG->getTargetConstant(Imm, MVT::i8);
203    }
204
205    /// getI16Imm - Return a target constant with the specified value, of type
206    /// i16.
207    inline SDOperand getI16Imm(unsigned Imm) {
208      return CurDAG->getTargetConstant(Imm, MVT::i16);
209    }
210
211    /// getI32Imm - Return a target constant with the specified value, of type
212    /// i32.
213    inline SDOperand getI32Imm(unsigned Imm) {
214      return CurDAG->getTargetConstant(Imm, MVT::i32);
215    }
216
217    /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
218    /// base register.  Return the virtual register that holds this value.
219    SDNode *getGlobalBaseReg();
220
221    /// getTruncate - return an SDNode that implements a subreg based truncate
222    /// of the specified operand to the the specified value type.
223    SDNode *getTruncate(SDOperand N0, MVT::ValueType VT);
224
225#ifndef NDEBUG
226    unsigned Indent;
227#endif
228  };
229}
230
231static SDNode *findFlagUse(SDNode *N) {
232  unsigned FlagResNo = N->getNumValues()-1;
233  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
234    SDNode *User = *I;
235    for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
236      SDOperand Op = User->getOperand(i);
237      if (Op.Val == N && Op.ResNo == FlagResNo)
238        return User;
239    }
240  }
241  return NULL;
242}
243
244static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
245                          SDNode *Root, SDNode *Skip, bool &found,
246                          std::set<SDNode *> &Visited) {
247  if (found ||
248      Use->getNodeId() > Def->getNodeId() ||
249      !Visited.insert(Use).second)
250    return;
251
252  for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) {
253    SDNode *N = Use->getOperand(i).Val;
254    if (N == Skip)
255      continue;
256    if (N == Def) {
257      if (Use == ImmedUse)
258        continue; // Immediate use is ok.
259      if (Use == Root) {
260        assert(Use->getOpcode() == ISD::STORE ||
261               Use->getOpcode() == X86ISD::CMP);
262        continue;
263      }
264      found = true;
265      break;
266    }
267    findNonImmUse(N, Def, ImmedUse, Root, Skip, found, Visited);
268  }
269}
270
271/// isNonImmUse - Start searching from Root up the DAG to check is Def can
272/// be reached. Return true if that's the case. However, ignore direct uses
273/// by ImmedUse (which would be U in the example illustrated in
274/// CanBeFoldedBy) and by Root (which can happen in the store case).
275/// FIXME: to be really generic, we should allow direct use by any node
276/// that is being folded. But realisticly since we only fold loads which
277/// have one non-chain use, we only need to watch out for load/op/store
278/// and load/op/cmp case where the root (store / cmp) may reach the load via
279/// its chain operand.
280static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
281                               SDNode *Skip = NULL) {
282  std::set<SDNode *> Visited;
283  bool found = false;
284  findNonImmUse(Root, Def, ImmedUse, Root, Skip, found, Visited);
285  return found;
286}
287
288
289bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const {
290  if (FastISel) return false;
291
292  // If U use can somehow reach N through another path then U can't fold N or
293  // it will create a cycle. e.g. In the following diagram, U can reach N
294  // through X. If N is folded into into U, then X is both a predecessor and
295  // a successor of U.
296  //
297  //         [ N ]
298  //         ^  ^
299  //         |  |
300  //        /   \---
301  //      /        [X]
302  //      |         ^
303  //     [U]--------|
304
305  if (isNonImmUse(Root, N, U))
306    return false;
307
308  // If U produces a flag, then it gets (even more) interesting. Since it
309  // would have been "glued" together with its flag use, we need to check if
310  // it might reach N:
311  //
312  //       [ N ]
313  //        ^ ^
314  //        | |
315  //       [U] \--
316  //        ^   [TF]
317  //        |    ^
318  //        |    |
319  //         \  /
320  //          [FU]
321  //
322  // If FU (flag use) indirectly reach N (the load), and U fold N (call it
323  // NU), then TF is a predecessor of FU and a successor of NU. But since
324  // NU and FU are flagged together, this effectively creates a cycle.
325  bool HasFlagUse = false;
326  MVT::ValueType VT = Root->getValueType(Root->getNumValues()-1);
327  while ((VT == MVT::Flag && !Root->use_empty())) {
328    SDNode *FU = findFlagUse(Root);
329    if (FU == NULL)
330      break;
331    else {
332      Root = FU;
333      HasFlagUse = true;
334    }
335    VT = Root->getValueType(Root->getNumValues()-1);
336  }
337
338  if (HasFlagUse)
339    return !isNonImmUse(Root, N, Root, U);
340  return true;
341}
342
343/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
344/// and move load below the TokenFactor. Replace store's chain operand with
345/// load's chain result.
346static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
347                                 SDOperand Store, SDOperand TF) {
348  std::vector<SDOperand> Ops;
349  for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
350    if (Load.Val == TF.Val->getOperand(i).Val)
351      Ops.push_back(Load.Val->getOperand(0));
352    else
353      Ops.push_back(TF.Val->getOperand(i));
354  DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
355  DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
356  DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
357                         Store.getOperand(2), Store.getOperand(3));
358}
359
360/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
361/// This is only run if not in -fast mode (aka -O0).
362/// This allows the instruction selector to pick more read-modify-write
363/// instructions. This is a common case:
364///
365///     [Load chain]
366///         ^
367///         |
368///       [Load]
369///       ^    ^
370///       |    |
371///      /      \-
372///     /         |
373/// [TokenFactor] [Op]
374///     ^          ^
375///     |          |
376///      \        /
377///       \      /
378///       [Store]
379///
380/// The fact the store's chain operand != load's chain will prevent the
381/// (store (op (load))) instruction from being selected. We can transform it to:
382///
383///     [Load chain]
384///         ^
385///         |
386///    [TokenFactor]
387///         ^
388///         |
389///       [Load]
390///       ^    ^
391///       |    |
392///       |     \-
393///       |       |
394///       |     [Op]
395///       |       ^
396///       |       |
397///       \      /
398///        \    /
399///       [Store]
400void X86DAGToDAGISel::PreprocessForRMW(SelectionDAG &DAG) {
401  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
402         E = DAG.allnodes_end(); I != E; ++I) {
403    if (!ISD::isNON_TRUNCStore(I))
404      continue;
405    SDOperand Chain = I->getOperand(0);
406    if (Chain.Val->getOpcode() != ISD::TokenFactor)
407      continue;
408
409    SDOperand N1 = I->getOperand(1);
410    SDOperand N2 = I->getOperand(2);
411    if (MVT::isFloatingPoint(N1.getValueType()) ||
412        MVT::isVector(N1.getValueType()) ||
413        !N1.hasOneUse())
414      continue;
415
416    bool RModW = false;
417    SDOperand Load;
418    unsigned Opcode = N1.Val->getOpcode();
419    switch (Opcode) {
420      case ISD::ADD:
421      case ISD::MUL:
422      case ISD::AND:
423      case ISD::OR:
424      case ISD::XOR:
425      case ISD::ADDC:
426      case ISD::ADDE: {
427        SDOperand N10 = N1.getOperand(0);
428        SDOperand N11 = N1.getOperand(1);
429        if (ISD::isNON_EXTLoad(N10.Val))
430          RModW = true;
431        else if (ISD::isNON_EXTLoad(N11.Val)) {
432          RModW = true;
433          std::swap(N10, N11);
434        }
435        RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
436          (N10.getOperand(1) == N2) &&
437          (N10.Val->getValueType(0) == N1.getValueType());
438        if (RModW)
439          Load = N10;
440        break;
441      }
442      case ISD::SUB:
443      case ISD::SHL:
444      case ISD::SRA:
445      case ISD::SRL:
446      case ISD::ROTL:
447      case ISD::ROTR:
448      case ISD::SUBC:
449      case ISD::SUBE:
450      case X86ISD::SHLD:
451      case X86ISD::SHRD: {
452        SDOperand N10 = N1.getOperand(0);
453        if (ISD::isNON_EXTLoad(N10.Val))
454          RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
455            (N10.getOperand(1) == N2) &&
456            (N10.Val->getValueType(0) == N1.getValueType());
457        if (RModW)
458          Load = N10;
459        break;
460      }
461    }
462
463    if (RModW) {
464      MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
465      ++NumLoadMoved;
466    }
467  }
468}
469
470
471/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
472/// nodes that target the FP stack to be store and load to the stack.  This is a
473/// gross hack.  We would like to simply mark these as being illegal, but when
474/// we do that, legalize produces these when it expands calls, then expands
475/// these in the same legalize pass.  We would like dag combine to be able to
476/// hack on these between the call expansion and the node legalization.  As such
477/// this pass basically does "really late" legalization of these inline with the
478/// X86 isel pass.
479void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) {
480  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
481       E = DAG.allnodes_end(); I != E; ) {
482    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
483    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
484      continue;
485
486    // If the source and destination are SSE registers, then this is a legal
487    // conversion that should not be lowered.
488    MVT::ValueType SrcVT = N->getOperand(0).getValueType();
489    MVT::ValueType DstVT = N->getValueType(0);
490    bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
491    bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
492    if (SrcIsSSE && DstIsSSE)
493      continue;
494
495    // If this is an FPStack extension (but not a truncation), it is a noop.
496    if (!SrcIsSSE && !DstIsSSE && N->getOpcode() == ISD::FP_EXTEND)
497      continue;
498
499    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
500    // FPStack has extload and truncstore.  SSE can fold direct loads into other
501    // operations.  Based on this, decide what we want to do.
502    MVT::ValueType MemVT;
503    if (N->getOpcode() == ISD::FP_ROUND)
504      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
505    else
506      MemVT = SrcIsSSE ? SrcVT : DstVT;
507
508    SDOperand MemTmp = DAG.CreateStackTemporary(MemVT);
509
510    // FIXME: optimize the case where the src/dest is a load or store?
511    SDOperand Store = DAG.getTruncStore(DAG.getEntryNode(), N->getOperand(0),
512                                        MemTmp, NULL, 0, MemVT);
513    SDOperand Result = DAG.getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp,
514                                      NULL, 0, MemVT);
515
516    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
517    // extload we created.  This will cause general havok on the dag because
518    // anything below the conversion could be folded into other existing nodes.
519    // To avoid invalidating 'I', back it up to the convert node.
520    --I;
521    DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result);
522
523    // Now that we did that, the node is dead.  Increment the iterator to the
524    // next node to process, then delete N.
525    ++I;
526    DAG.DeleteNode(N);
527  }
528}
529
530/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
531/// when it has created a SelectionDAG for us to codegen.
532void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
533  DEBUG(BB->dump());
534  MachineFunction::iterator FirstMBB = BB;
535
536  if (!FastISel)
537    PreprocessForRMW(DAG);
538
539  // FIXME: This should only happen when not -fast.
540  PreprocessForFPConvert(DAG);
541
542  // Codegen the basic block.
543#ifndef NDEBUG
544  DOUT << "===== Instruction selection begins:\n";
545  Indent = 0;
546#endif
547  DAG.setRoot(SelectRoot(DAG.getRoot()));
548#ifndef NDEBUG
549  DOUT << "===== Instruction selection ends:\n";
550#endif
551
552  DAG.RemoveDeadNodes();
553
554  // Emit machine code to BB.
555  ScheduleAndEmitDAG(DAG);
556
557  // If we are emitting FP stack code, scan the basic block to determine if this
558  // block defines any FP values.  If so, put an FP_REG_KILL instruction before
559  // the terminator of the block.
560
561  // Note that FP stack instructions are used in all modes for long double,
562  // so we always need to do this check.
563  // Also note that it's possible for an FP stack register to be live across
564  // an instruction that produces multiple basic blocks (SSE CMOV) so we
565  // must check all the generated basic blocks.
566
567  // Scan all of the machine instructions in these MBBs, checking for FP
568  // stores.  (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
569  MachineFunction::iterator MBBI = FirstMBB;
570  do {
571    bool ContainsFPCode = false;
572    for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
573         !ContainsFPCode && I != E; ++I) {
574      if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) {
575        const TargetRegisterClass *clas;
576        for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
577          if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
578              TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
579              ((clas = RegInfo->getRegClass(I->getOperand(0).getReg())) ==
580                 X86::RFP32RegisterClass ||
581               clas == X86::RFP64RegisterClass ||
582               clas == X86::RFP80RegisterClass)) {
583            ContainsFPCode = true;
584            break;
585          }
586        }
587      }
588    }
589    // Check PHI nodes in successor blocks.  These PHI's will be lowered to have
590    // a copy of the input value in this block.  In SSE mode, we only care about
591    // 80-bit values.
592    if (!ContainsFPCode) {
593      // Final check, check LLVM BB's that are successors to the LLVM BB
594      // corresponding to BB for FP PHI nodes.
595      const BasicBlock *LLVMBB = BB->getBasicBlock();
596      const PHINode *PN;
597      for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
598           !ContainsFPCode && SI != E; ++SI) {
599        for (BasicBlock::const_iterator II = SI->begin();
600             (PN = dyn_cast<PHINode>(II)); ++II) {
601          if (PN->getType()==Type::X86_FP80Ty ||
602              (!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) ||
603              (!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) {
604            ContainsFPCode = true;
605            break;
606          }
607        }
608      }
609    }
610    // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
611    if (ContainsFPCode) {
612      BuildMI(*MBBI, MBBI->getFirstTerminator(),
613              TM.getInstrInfo()->get(X86::FP_REG_KILL));
614      ++NumFPKill;
615    }
616  } while (&*(MBBI++) != BB);
617}
618
619/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
620/// the main function.
621void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
622                                             MachineFrameInfo *MFI) {
623  const TargetInstrInfo *TII = TM.getInstrInfo();
624  if (Subtarget->isTargetCygMing())
625    BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
626}
627
628void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
629  // If this is main, emit special code for main.
630  MachineBasicBlock *BB = MF.begin();
631  if (Fn.hasExternalLinkage() && Fn.getName() == "main")
632    EmitSpecialCodeForMain(BB, MF.getFrameInfo());
633}
634
635/// MatchAddress - Add the specified node to the specified addressing mode,
636/// returning true if it cannot be done.  This just pattern matches for the
637/// addressing mode.
638bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
639                                   bool isRoot, unsigned Depth) {
640  // Limit recursion.
641  if (Depth > 5)
642    return MatchAddressBase(N, AM, isRoot, Depth);
643
644  // RIP relative addressing: %rip + 32-bit displacement!
645  if (AM.isRIPRel) {
646    if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
647      int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
648      if (isInt32(AM.Disp + Val)) {
649        AM.Disp += Val;
650        return false;
651      }
652    }
653    return true;
654  }
655
656  int id = N.Val->getNodeId();
657  bool AlreadySelected = isSelected(id); // Already selected, not yet replaced.
658
659  switch (N.getOpcode()) {
660  default: break;
661  case ISD::Constant: {
662    int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
663    if (isInt32(AM.Disp + Val)) {
664      AM.Disp += Val;
665      return false;
666    }
667    break;
668  }
669
670  case X86ISD::Wrapper: {
671    bool is64Bit = Subtarget->is64Bit();
672    // Under X86-64 non-small code model, GV (and friends) are 64-bits.
673    // Also, base and index reg must be 0 in order to use rip as base.
674    if (is64Bit && (TM.getCodeModel() != CodeModel::Small ||
675                    AM.Base.Reg.Val || AM.IndexReg.Val))
676      break;
677    if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1)
678      break;
679    // If value is available in a register both base and index components have
680    // been picked, we can't fit the result available in the register in the
681    // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
682    if (!AlreadySelected || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
683      SDOperand N0 = N.getOperand(0);
684      if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
685        GlobalValue *GV = G->getGlobal();
686        AM.GV = GV;
687        AM.Disp += G->getOffset();
688        AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
689          Subtarget->isPICStyleRIPRel();
690        return false;
691      } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
692        AM.CP = CP->getConstVal();
693        AM.Align = CP->getAlignment();
694        AM.Disp += CP->getOffset();
695        AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
696          Subtarget->isPICStyleRIPRel();
697        return false;
698      } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
699        AM.ES = S->getSymbol();
700        AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
701          Subtarget->isPICStyleRIPRel();
702        return false;
703      } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
704        AM.JT = J->getIndex();
705        AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
706          Subtarget->isPICStyleRIPRel();
707        return false;
708      }
709    }
710    break;
711  }
712
713  case ISD::FrameIndex:
714    if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
715      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
716      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
717      return false;
718    }
719    break;
720
721  case ISD::SHL:
722    if (AlreadySelected || AM.IndexReg.Val != 0 || AM.Scale != 1 || AM.isRIPRel)
723      break;
724
725    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
726      unsigned Val = CN->getValue();
727      if (Val == 1 || Val == 2 || Val == 3) {
728        AM.Scale = 1 << Val;
729        SDOperand ShVal = N.Val->getOperand(0);
730
731        // Okay, we know that we have a scale by now.  However, if the scaled
732        // value is an add of something and a constant, we can fold the
733        // constant into the disp field here.
734        if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
735            isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
736          AM.IndexReg = ShVal.Val->getOperand(0);
737          ConstantSDNode *AddVal =
738            cast<ConstantSDNode>(ShVal.Val->getOperand(1));
739          uint64_t Disp = AM.Disp + (AddVal->getValue() << Val);
740          if (isInt32(Disp))
741            AM.Disp = Disp;
742          else
743            AM.IndexReg = ShVal;
744        } else {
745          AM.IndexReg = ShVal;
746        }
747        return false;
748      }
749    break;
750    }
751
752  case ISD::SMUL_LOHI:
753  case ISD::UMUL_LOHI:
754    // A mul_lohi where we need the low part can be folded as a plain multiply.
755    if (N.ResNo != 0) break;
756    // FALL THROUGH
757  case ISD::MUL:
758    // X*[3,5,9] -> X+X*[2,4,8]
759    if (!AlreadySelected &&
760        AM.BaseType == X86ISelAddressMode::RegBase &&
761        AM.Base.Reg.Val == 0 &&
762        AM.IndexReg.Val == 0 &&
763        !AM.isRIPRel) {
764      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
765        if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
766          AM.Scale = unsigned(CN->getValue())-1;
767
768          SDOperand MulVal = N.Val->getOperand(0);
769          SDOperand Reg;
770
771          // Okay, we know that we have a scale by now.  However, if the scaled
772          // value is an add of something and a constant, we can fold the
773          // constant into the disp field here.
774          if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
775              isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
776            Reg = MulVal.Val->getOperand(0);
777            ConstantSDNode *AddVal =
778              cast<ConstantSDNode>(MulVal.Val->getOperand(1));
779            uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
780            if (isInt32(Disp))
781              AM.Disp = Disp;
782            else
783              Reg = N.Val->getOperand(0);
784          } else {
785            Reg = N.Val->getOperand(0);
786          }
787
788          AM.IndexReg = AM.Base.Reg = Reg;
789          return false;
790        }
791    }
792    break;
793
794  case ISD::ADD:
795    if (!AlreadySelected) {
796      X86ISelAddressMode Backup = AM;
797      if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) &&
798          !MatchAddress(N.Val->getOperand(1), AM, false, Depth+1))
799        return false;
800      AM = Backup;
801      if (!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1) &&
802          !MatchAddress(N.Val->getOperand(0), AM, false, Depth+1))
803        return false;
804      AM = Backup;
805    }
806    break;
807
808  case ISD::OR:
809    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
810    if (AlreadySelected) break;
811
812    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
813      X86ISelAddressMode Backup = AM;
814      // Start with the LHS as an addr mode.
815      if (!MatchAddress(N.getOperand(0), AM, false) &&
816          // Address could not have picked a GV address for the displacement.
817          AM.GV == NULL &&
818          // On x86-64, the resultant disp must fit in 32-bits.
819          isInt32(AM.Disp + CN->getSignExtended()) &&
820          // Check to see if the LHS & C is zero.
821          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getValue())) {
822        AM.Disp += CN->getValue();
823        return false;
824      }
825      AM = Backup;
826    }
827    break;
828
829  case ISD::AND: {
830    // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
831    // allows us to fold the shift into this addressing mode.
832    if (AlreadySelected) break;
833    SDOperand Shift = N.getOperand(0);
834    if (Shift.getOpcode() != ISD::SHL) break;
835
836    // Scale must not be used already.
837    if (AM.IndexReg.Val != 0 || AM.Scale != 1) break;
838
839    // Not when RIP is used as the base.
840    if (AM.isRIPRel) break;
841
842    ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
843    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
844    if (!C1 || !C2) break;
845
846    // Not likely to be profitable if either the AND or SHIFT node has more
847    // than one use (unless all uses are for address computation). Besides,
848    // isel mechanism requires their node ids to be reused.
849    if (!N.hasOneUse() || !Shift.hasOneUse())
850      break;
851
852    // Verify that the shift amount is something we can fold.
853    unsigned ShiftCst = C1->getValue();
854    if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
855      break;
856
857    // Get the new AND mask, this folds to a constant.
858    SDOperand NewANDMask = CurDAG->getNode(ISD::SRL, N.getValueType(),
859                                           SDOperand(C2, 0), SDOperand(C1, 0));
860    SDOperand NewAND = CurDAG->getNode(ISD::AND, N.getValueType(),
861                                       Shift.getOperand(0), NewANDMask);
862    NewANDMask.Val->setNodeId(Shift.Val->getNodeId());
863    NewAND.Val->setNodeId(N.Val->getNodeId());
864
865    AM.Scale = 1 << ShiftCst;
866    AM.IndexReg = NewAND;
867    return false;
868  }
869  }
870
871  return MatchAddressBase(N, AM, isRoot, Depth);
872}
873
874/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
875/// specified addressing mode without any further recursion.
876bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
877                                       bool isRoot, unsigned Depth) {
878  // Is the base register already occupied?
879  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
880    // If so, check to see if the scale index register is set.
881    if (AM.IndexReg.Val == 0 && !AM.isRIPRel) {
882      AM.IndexReg = N;
883      AM.Scale = 1;
884      return false;
885    }
886
887    // Otherwise, we cannot select it.
888    return true;
889  }
890
891  // Default, generate it as a register.
892  AM.BaseType = X86ISelAddressMode::RegBase;
893  AM.Base.Reg = N;
894  return false;
895}
896
897/// SelectAddr - returns true if it is able pattern match an addressing mode.
898/// It returns the operands which make up the maximal addressing mode it can
899/// match by reference.
900bool X86DAGToDAGISel::SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
901                                 SDOperand &Scale, SDOperand &Index,
902                                 SDOperand &Disp) {
903  X86ISelAddressMode AM;
904  if (MatchAddress(N, AM))
905    return false;
906
907  MVT::ValueType VT = N.getValueType();
908  if (AM.BaseType == X86ISelAddressMode::RegBase) {
909    if (!AM.Base.Reg.Val)
910      AM.Base.Reg = CurDAG->getRegister(0, VT);
911  }
912
913  if (!AM.IndexReg.Val)
914    AM.IndexReg = CurDAG->getRegister(0, VT);
915
916  getAddressOperands(AM, Base, Scale, Index, Disp);
917  return true;
918}
919
920/// isZeroNode - Returns true if Elt is a constant zero or a floating point
921/// constant +0.0.
922static inline bool isZeroNode(SDOperand Elt) {
923  return ((isa<ConstantSDNode>(Elt) &&
924  cast<ConstantSDNode>(Elt)->getValue() == 0) ||
925  (isa<ConstantFPSDNode>(Elt) &&
926  cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
927}
928
929
930/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
931/// match a load whose top elements are either undef or zeros.  The load flavor
932/// is derived from the type of N, which is either v4f32 or v2f64.
933bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
934                                          SDOperand N, SDOperand &Base,
935                                          SDOperand &Scale, SDOperand &Index,
936                                          SDOperand &Disp, SDOperand &InChain,
937                                          SDOperand &OutChain) {
938  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
939    InChain = N.getOperand(0).getValue(1);
940    if (ISD::isNON_EXTLoad(InChain.Val) &&
941        InChain.getValue(0).hasOneUse() &&
942        N.hasOneUse() &&
943        CanBeFoldedBy(N.Val, Pred.Val, Op.Val)) {
944      LoadSDNode *LD = cast<LoadSDNode>(InChain);
945      if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
946        return false;
947      OutChain = LD->getChain();
948      return true;
949    }
950  }
951
952  // Also handle the case where we explicitly require zeros in the top
953  // elements.  This is a vector shuffle from the zero vector.
954  if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
955      // Check to see if the top elements are all zeros (or bitcast of zeros).
956      ISD::isBuildVectorAllZeros(N.getOperand(0).Val) &&
957      N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR &&
958      N.getOperand(1).Val->hasOneUse() &&
959      ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
960      N.getOperand(1).getOperand(0).hasOneUse()) {
961    // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something
962    // from the LHS.
963    unsigned VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType());
964    SDOperand ShufMask = N.getOperand(2);
965    assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!");
966    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
967      if (C->getValue() == VecWidth) {
968        for (unsigned i = 1; i != VecWidth; ++i) {
969          if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) {
970            // ok.
971          } else {
972            ConstantSDNode *C = cast<ConstantSDNode>(ShufMask.getOperand(i));
973            if (C->getValue() >= VecWidth) return false;
974          }
975        }
976      }
977
978      // Okay, this is a zero extending load.  Fold it.
979      LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(1).getOperand(0));
980      if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
981        return false;
982      OutChain = LD->getChain();
983      InChain = SDOperand(LD, 1);
984      return true;
985    }
986  }
987  return false;
988}
989
990
991/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
992/// mode it matches can be cost effectively emitted as an LEA instruction.
993bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N,
994                                    SDOperand &Base, SDOperand &Scale,
995                                    SDOperand &Index, SDOperand &Disp) {
996  X86ISelAddressMode AM;
997  if (MatchAddress(N, AM))
998    return false;
999
1000  MVT::ValueType VT = N.getValueType();
1001  unsigned Complexity = 0;
1002  if (AM.BaseType == X86ISelAddressMode::RegBase)
1003    if (AM.Base.Reg.Val)
1004      Complexity = 1;
1005    else
1006      AM.Base.Reg = CurDAG->getRegister(0, VT);
1007  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1008    Complexity = 4;
1009
1010  if (AM.IndexReg.Val)
1011    Complexity++;
1012  else
1013    AM.IndexReg = CurDAG->getRegister(0, VT);
1014
1015  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1016  // a simple shift.
1017  if (AM.Scale > 1)
1018    Complexity++;
1019
1020  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1021  // to a LEA. This is determined with some expermentation but is by no means
1022  // optimal (especially for code size consideration). LEA is nice because of
1023  // its three-address nature. Tweak the cost function again when we can run
1024  // convertToThreeAddress() at register allocation time.
1025  if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
1026    // For X86-64, we should always use lea to materialize RIP relative
1027    // addresses.
1028    if (Subtarget->is64Bit())
1029      Complexity = 4;
1030    else
1031      Complexity += 2;
1032  }
1033
1034  if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
1035    Complexity++;
1036
1037  if (Complexity > 2) {
1038    getAddressOperands(AM, Base, Scale, Index, Disp);
1039    return true;
1040  }
1041  return false;
1042}
1043
1044bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
1045                                  SDOperand &Base, SDOperand &Scale,
1046                                  SDOperand &Index, SDOperand &Disp) {
1047  if (ISD::isNON_EXTLoad(N.Val) &&
1048      N.hasOneUse() &&
1049      CanBeFoldedBy(N.Val, P.Val, P.Val))
1050    return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp);
1051  return false;
1052}
1053
1054/// getGlobalBaseReg - Output the instructions required to put the
1055/// base address to use for accessing globals into a register.
1056///
1057SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1058  assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
1059  if (!GlobalBaseReg) {
1060    // Insert the set of GlobalBaseReg into the first MBB of the function
1061    MachineFunction *MF = BB->getParent();
1062    MachineBasicBlock &FirstMBB = MF->front();
1063    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
1064    MachineRegisterInfo &RegInfo = MF->getRegInfo();
1065    unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
1066
1067    const TargetInstrInfo *TII = TM.getInstrInfo();
1068    // Operand of MovePCtoStack is completely ignored by asm printer. It's
1069    // only used in JIT code emission as displacement to pc.
1070    BuildMI(FirstMBB, MBBI, TII->get(X86::MOVPC32r), PC).addImm(0);
1071
1072    // If we're using vanilla 'GOT' PIC style, we should use relative addressing
1073    // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
1074    if (TM.getRelocationModel() == Reloc::PIC_ &&
1075        Subtarget->isPICStyleGOT()) {
1076      GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
1077      BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg)
1078        .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
1079    } else {
1080      GlobalBaseReg = PC;
1081    }
1082
1083  }
1084  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
1085}
1086
1087static SDNode *FindCallStartFromCall(SDNode *Node) {
1088  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
1089    assert(Node->getOperand(0).getValueType() == MVT::Other &&
1090         "Node doesn't have a token chain argument!");
1091  return FindCallStartFromCall(Node->getOperand(0).Val);
1092}
1093
1094SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT::ValueType VT) {
1095    SDOperand SRIdx;
1096    switch (VT) {
1097    case MVT::i8:
1098      SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
1099      // Ensure that the source register has an 8-bit subreg on 32-bit targets
1100      if (!Subtarget->is64Bit()) {
1101        unsigned Opc;
1102        MVT::ValueType VT;
1103        switch (N0.getValueType()) {
1104        default: assert(0 && "Unknown truncate!");
1105        case MVT::i16:
1106          Opc = X86::MOV16to16_;
1107          VT = MVT::i16;
1108          break;
1109        case MVT::i32:
1110          Opc = X86::MOV32to32_;
1111          VT = MVT::i32;
1112          break;
1113        }
1114        N0 = SDOperand(CurDAG->getTargetNode(Opc, VT, MVT::Flag, N0), 0);
1115        return CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
1116                                     VT, N0, SRIdx, N0.getValue(1));
1117      }
1118      break;
1119    case MVT::i16:
1120      SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
1121      break;
1122    case MVT::i32:
1123      SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
1124      break;
1125    default: assert(0 && "Unknown truncate!"); break;
1126    }
1127    return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, VT, N0, SRIdx);
1128}
1129
1130
1131SDNode *X86DAGToDAGISel::Select(SDOperand N) {
1132  SDNode *Node = N.Val;
1133  MVT::ValueType NVT = Node->getValueType(0);
1134  unsigned Opc, MOpc;
1135  unsigned Opcode = Node->getOpcode();
1136
1137#ifndef NDEBUG
1138  DOUT << std::string(Indent, ' ') << "Selecting: ";
1139  DEBUG(Node->dump(CurDAG));
1140  DOUT << "\n";
1141  Indent += 2;
1142#endif
1143
1144  if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
1145#ifndef NDEBUG
1146    DOUT << std::string(Indent-2, ' ') << "== ";
1147    DEBUG(Node->dump(CurDAG));
1148    DOUT << "\n";
1149    Indent -= 2;
1150#endif
1151    return NULL;   // Already selected.
1152  }
1153
1154  switch (Opcode) {
1155    default: break;
1156    case X86ISD::GlobalBaseReg:
1157      return getGlobalBaseReg();
1158
1159    case X86ISD::FP_GET_RESULT2: {
1160      SDOperand Chain = N.getOperand(0);
1161      SDOperand InFlag = N.getOperand(1);
1162      AddToISelQueue(Chain);
1163      AddToISelQueue(InFlag);
1164      std::vector<MVT::ValueType> Tys;
1165      Tys.push_back(MVT::f80);
1166      Tys.push_back(MVT::f80);
1167      Tys.push_back(MVT::Other);
1168      Tys.push_back(MVT::Flag);
1169      SDOperand Ops[] = { Chain, InFlag };
1170      SDNode *ResNode = CurDAG->getTargetNode(X86::FpGETRESULT80x2, Tys,
1171                                              Ops, 2);
1172      Chain = SDOperand(ResNode, 2);
1173      InFlag = SDOperand(ResNode, 3);
1174      ReplaceUses(SDOperand(N.Val, 2), Chain);
1175      ReplaceUses(SDOperand(N.Val, 3), InFlag);
1176      return ResNode;
1177    }
1178
1179    case ISD::ADD: {
1180      // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
1181      // code and is matched first so to prevent it from being turned into
1182      // LEA32r X+c.
1183      // In 64-bit small code size mode, use LEA to take advantage of
1184      // RIP-relative addressing.
1185      if (TM.getCodeModel() != CodeModel::Small)
1186        break;
1187      MVT::ValueType PtrVT = TLI.getPointerTy();
1188      SDOperand N0 = N.getOperand(0);
1189      SDOperand N1 = N.getOperand(1);
1190      if (N.Val->getValueType(0) == PtrVT &&
1191          N0.getOpcode() == X86ISD::Wrapper &&
1192          N1.getOpcode() == ISD::Constant) {
1193        unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
1194        SDOperand C(0, 0);
1195        // TODO: handle ExternalSymbolSDNode.
1196        if (GlobalAddressSDNode *G =
1197            dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
1198          C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
1199                                             G->getOffset() + Offset);
1200        } else if (ConstantPoolSDNode *CP =
1201                   dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
1202          C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT,
1203                                            CP->getAlignment(),
1204                                            CP->getOffset()+Offset);
1205        }
1206
1207        if (C.Val) {
1208          if (Subtarget->is64Bit()) {
1209            SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
1210                                CurDAG->getRegister(0, PtrVT), C };
1211            return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
1212          } else
1213            return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
1214        }
1215      }
1216
1217      // Other cases are handled by auto-generated code.
1218      break;
1219    }
1220
1221    case ISD::SMUL_LOHI:
1222    case ISD::UMUL_LOHI: {
1223      SDOperand N0 = Node->getOperand(0);
1224      SDOperand N1 = Node->getOperand(1);
1225
1226      // There are several forms of IMUL that just return the low part and
1227      // don't have fixed-register operands. If we don't need the high part,
1228      // use these instead. They can be selected with the generated ISel code.
1229      if (NVT != MVT::i8 &&
1230          N.getValue(1).use_empty()) {
1231        N = CurDAG->getNode(ISD::MUL, NVT, N0, N1);
1232        break;
1233      }
1234
1235      bool isSigned = Opcode == ISD::SMUL_LOHI;
1236      if (!isSigned)
1237        switch (NVT) {
1238        default: assert(0 && "Unsupported VT!");
1239        case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
1240        case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
1241        case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
1242        case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
1243        }
1244      else
1245        switch (NVT) {
1246        default: assert(0 && "Unsupported VT!");
1247        case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
1248        case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
1249        case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
1250        case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
1251        }
1252
1253      unsigned LoReg, HiReg;
1254      switch (NVT) {
1255      default: assert(0 && "Unsupported VT!");
1256      case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
1257      case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
1258      case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
1259      case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
1260      }
1261
1262      SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
1263      bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
1264      // multiplty is commmutative
1265      if (!foldedLoad) {
1266        foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
1267        if (foldedLoad)
1268          std::swap(N0, N1);
1269      }
1270
1271      AddToISelQueue(N0);
1272      SDOperand InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg,
1273                                              N0, SDOperand()).getValue(1);
1274
1275      if (foldedLoad) {
1276        AddToISelQueue(N1.getOperand(0));
1277        AddToISelQueue(Tmp0);
1278        AddToISelQueue(Tmp1);
1279        AddToISelQueue(Tmp2);
1280        AddToISelQueue(Tmp3);
1281        SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
1282        SDNode *CNode =
1283          CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
1284        InFlag = SDOperand(CNode, 1);
1285        // Update the chain.
1286        ReplaceUses(N1.getValue(1), SDOperand(CNode, 0));
1287      } else {
1288        AddToISelQueue(N1);
1289        InFlag =
1290          SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
1291      }
1292
1293      // Copy the low half of the result, if it is needed.
1294      if (!N.getValue(0).use_empty()) {
1295        SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
1296                                                  LoReg, NVT, InFlag);
1297        InFlag = Result.getValue(2);
1298        ReplaceUses(N.getValue(0), Result);
1299#ifndef NDEBUG
1300        DOUT << std::string(Indent-2, ' ') << "=> ";
1301        DEBUG(Result.Val->dump(CurDAG));
1302        DOUT << "\n";
1303#endif
1304      }
1305      // Copy the high half of the result, if it is needed.
1306      if (!N.getValue(1).use_empty()) {
1307        SDOperand Result;
1308        if (HiReg == X86::AH && Subtarget->is64Bit()) {
1309          // Prevent use of AH in a REX instruction by referencing AX instead.
1310          // Shift it down 8 bits.
1311          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
1312                                          X86::AX, MVT::i16, InFlag);
1313          InFlag = Result.getValue(2);
1314          Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
1315                                       CurDAG->getTargetConstant(8, MVT::i8)), 0);
1316          // Then truncate it down to i8.
1317          SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
1318          Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
1319                                                   MVT::i8, Result, SRIdx), 0);
1320        } else {
1321          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
1322                                          HiReg, NVT, InFlag);
1323          InFlag = Result.getValue(2);
1324        }
1325        ReplaceUses(N.getValue(1), Result);
1326#ifndef NDEBUG
1327        DOUT << std::string(Indent-2, ' ') << "=> ";
1328        DEBUG(Result.Val->dump(CurDAG));
1329        DOUT << "\n";
1330#endif
1331      }
1332
1333#ifndef NDEBUG
1334      Indent -= 2;
1335#endif
1336
1337      return NULL;
1338    }
1339
1340    case ISD::SDIVREM:
1341    case ISD::UDIVREM: {
1342      SDOperand N0 = Node->getOperand(0);
1343      SDOperand N1 = Node->getOperand(1);
1344
1345      bool isSigned = Opcode == ISD::SDIVREM;
1346      if (!isSigned)
1347        switch (NVT) {
1348        default: assert(0 && "Unsupported VT!");
1349        case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
1350        case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
1351        case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
1352        case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
1353        }
1354      else
1355        switch (NVT) {
1356        default: assert(0 && "Unsupported VT!");
1357        case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
1358        case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
1359        case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1360        case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1361        }
1362
1363      unsigned LoReg, HiReg;
1364      unsigned ClrOpcode, SExtOpcode;
1365      switch (NVT) {
1366      default: assert(0 && "Unsupported VT!");
1367      case MVT::i8:
1368        LoReg = X86::AL;  HiReg = X86::AH;
1369        ClrOpcode  = 0;
1370        SExtOpcode = X86::CBW;
1371        break;
1372      case MVT::i16:
1373        LoReg = X86::AX;  HiReg = X86::DX;
1374        ClrOpcode  = X86::MOV16r0;
1375        SExtOpcode = X86::CWD;
1376        break;
1377      case MVT::i32:
1378        LoReg = X86::EAX; HiReg = X86::EDX;
1379        ClrOpcode  = X86::MOV32r0;
1380        SExtOpcode = X86::CDQ;
1381        break;
1382      case MVT::i64:
1383        LoReg = X86::RAX; HiReg = X86::RDX;
1384        ClrOpcode  = X86::MOV64r0;
1385        SExtOpcode = X86::CQO;
1386        break;
1387      }
1388
1389      SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
1390      bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
1391
1392      SDOperand InFlag;
1393      if (NVT == MVT::i8 && !isSigned) {
1394        // Special case for div8, just use a move with zero extension to AX to
1395        // clear the upper 8 bits (AH).
1396        SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain;
1397        if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) {
1398          SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) };
1399          AddToISelQueue(N0.getOperand(0));
1400          AddToISelQueue(Tmp0);
1401          AddToISelQueue(Tmp1);
1402          AddToISelQueue(Tmp2);
1403          AddToISelQueue(Tmp3);
1404          Move =
1405            SDOperand(CurDAG->getTargetNode(X86::MOVZX16rm8, MVT::i16, MVT::Other,
1406                                            Ops, 5), 0);
1407          Chain = Move.getValue(1);
1408          ReplaceUses(N0.getValue(1), Chain);
1409        } else {
1410          AddToISelQueue(N0);
1411          Move =
1412            SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0);
1413          Chain = CurDAG->getEntryNode();
1414        }
1415        Chain  = CurDAG->getCopyToReg(Chain, X86::AX, Move, SDOperand());
1416        InFlag = Chain.getValue(1);
1417      } else {
1418        AddToISelQueue(N0);
1419        InFlag =
1420          CurDAG->getCopyToReg(CurDAG->getEntryNode(),
1421                               LoReg, N0, SDOperand()).getValue(1);
1422        if (isSigned) {
1423          // Sign extend the low part into the high part.
1424          InFlag =
1425            SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
1426        } else {
1427          // Zero out the high part, effectively zero extending the input.
1428          SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
1429          InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg,
1430                                        ClrNode, InFlag).getValue(1);
1431        }
1432      }
1433
1434      if (foldedLoad) {
1435        AddToISelQueue(N1.getOperand(0));
1436        AddToISelQueue(Tmp0);
1437        AddToISelQueue(Tmp1);
1438        AddToISelQueue(Tmp2);
1439        AddToISelQueue(Tmp3);
1440        SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
1441        SDNode *CNode =
1442          CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
1443        InFlag = SDOperand(CNode, 1);
1444        // Update the chain.
1445        ReplaceUses(N1.getValue(1), SDOperand(CNode, 0));
1446      } else {
1447        AddToISelQueue(N1);
1448        InFlag =
1449          SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
1450      }
1451
1452      // Copy the division (low) result, if it is needed.
1453      if (!N.getValue(0).use_empty()) {
1454        SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
1455                                                  LoReg, NVT, InFlag);
1456        InFlag = Result.getValue(2);
1457        ReplaceUses(N.getValue(0), Result);
1458#ifndef NDEBUG
1459        DOUT << std::string(Indent-2, ' ') << "=> ";
1460        DEBUG(Result.Val->dump(CurDAG));
1461        DOUT << "\n";
1462#endif
1463      }
1464      // Copy the remainder (high) result, if it is needed.
1465      if (!N.getValue(1).use_empty()) {
1466        SDOperand Result;
1467        if (HiReg == X86::AH && Subtarget->is64Bit()) {
1468          // Prevent use of AH in a REX instruction by referencing AX instead.
1469          // Shift it down 8 bits.
1470          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
1471                                          X86::AX, MVT::i16, InFlag);
1472          InFlag = Result.getValue(2);
1473          Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
1474                                       CurDAG->getTargetConstant(8, MVT::i8)), 0);
1475          // Then truncate it down to i8.
1476          SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
1477          Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
1478                                                   MVT::i8, Result, SRIdx), 0);
1479        } else {
1480          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
1481                                          HiReg, NVT, InFlag);
1482          InFlag = Result.getValue(2);
1483        }
1484        ReplaceUses(N.getValue(1), Result);
1485#ifndef NDEBUG
1486        DOUT << std::string(Indent-2, ' ') << "=> ";
1487        DEBUG(Result.Val->dump(CurDAG));
1488        DOUT << "\n";
1489#endif
1490      }
1491
1492#ifndef NDEBUG
1493      Indent -= 2;
1494#endif
1495
1496      return NULL;
1497    }
1498
1499    case ISD::ANY_EXTEND: {
1500      SDOperand N0 = Node->getOperand(0);
1501      AddToISelQueue(N0);
1502      if (NVT == MVT::i64 || NVT == MVT::i32 || NVT == MVT::i16) {
1503        SDOperand SRIdx;
1504        switch(N0.getValueType()) {
1505        case MVT::i32:
1506          SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
1507          break;
1508        case MVT::i16:
1509          SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
1510          break;
1511        case MVT::i8:
1512          if (Subtarget->is64Bit())
1513            SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
1514          break;
1515        default: assert(0 && "Unknown any_extend!");
1516        }
1517        if (SRIdx.Val) {
1518          SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG,
1519                                                  NVT, N0, SRIdx);
1520
1521#ifndef NDEBUG
1522          DOUT << std::string(Indent-2, ' ') << "=> ";
1523          DEBUG(ResNode->dump(CurDAG));
1524          DOUT << "\n";
1525          Indent -= 2;
1526#endif
1527          return ResNode;
1528        } // Otherwise let generated ISel handle it.
1529      }
1530      break;
1531    }
1532
1533    case ISD::SIGN_EXTEND_INREG: {
1534      SDOperand N0 = Node->getOperand(0);
1535      AddToISelQueue(N0);
1536
1537      MVT::ValueType SVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
1538      SDOperand TruncOp = SDOperand(getTruncate(N0, SVT), 0);
1539      unsigned Opc = 0;
1540      switch (NVT) {
1541      case MVT::i16:
1542        if (SVT == MVT::i8) Opc = X86::MOVSX16rr8;
1543        else assert(0 && "Unknown sign_extend_inreg!");
1544        break;
1545      case MVT::i32:
1546        switch (SVT) {
1547        case MVT::i8:  Opc = X86::MOVSX32rr8;  break;
1548        case MVT::i16: Opc = X86::MOVSX32rr16; break;
1549        default: assert(0 && "Unknown sign_extend_inreg!");
1550        }
1551        break;
1552      case MVT::i64:
1553        switch (SVT) {
1554        case MVT::i8:  Opc = X86::MOVSX64rr8;  break;
1555        case MVT::i16: Opc = X86::MOVSX64rr16; break;
1556        case MVT::i32: Opc = X86::MOVSX64rr32; break;
1557        default: assert(0 && "Unknown sign_extend_inreg!");
1558        }
1559        break;
1560      default: assert(0 && "Unknown sign_extend_inreg!");
1561      }
1562
1563      SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp);
1564
1565#ifndef NDEBUG
1566      DOUT << std::string(Indent-2, ' ') << "=> ";
1567      DEBUG(TruncOp.Val->dump(CurDAG));
1568      DOUT << "\n";
1569      DOUT << std::string(Indent-2, ' ') << "=> ";
1570      DEBUG(ResNode->dump(CurDAG));
1571      DOUT << "\n";
1572      Indent -= 2;
1573#endif
1574      return ResNode;
1575      break;
1576    }
1577
1578    case ISD::TRUNCATE: {
1579      SDOperand Input = Node->getOperand(0);
1580      AddToISelQueue(Node->getOperand(0));
1581      SDNode *ResNode = getTruncate(Input, NVT);
1582
1583#ifndef NDEBUG
1584        DOUT << std::string(Indent-2, ' ') << "=> ";
1585        DEBUG(ResNode->dump(CurDAG));
1586        DOUT << "\n";
1587        Indent -= 2;
1588#endif
1589      return ResNode;
1590      break;
1591    }
1592  }
1593
1594  SDNode *ResNode = SelectCode(N);
1595
1596#ifndef NDEBUG
1597  DOUT << std::string(Indent-2, ' ') << "=> ";
1598  if (ResNode == NULL || ResNode == N.Val)
1599    DEBUG(N.Val->dump(CurDAG));
1600  else
1601    DEBUG(ResNode->dump(CurDAG));
1602  DOUT << "\n";
1603  Indent -= 2;
1604#endif
1605
1606  return ResNode;
1607}
1608
1609bool X86DAGToDAGISel::
1610SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
1611                             std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
1612  SDOperand Op0, Op1, Op2, Op3;
1613  switch (ConstraintCode) {
1614  case 'o':   // offsetable        ??
1615  case 'v':   // not offsetable    ??
1616  default: return true;
1617  case 'm':   // memory
1618    if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3))
1619      return true;
1620    break;
1621  }
1622
1623  OutOps.push_back(Op0);
1624  OutOps.push_back(Op1);
1625  OutOps.push_back(Op2);
1626  OutOps.push_back(Op3);
1627  AddToISelQueue(Op0);
1628  AddToISelQueue(Op1);
1629  AddToISelQueue(Op2);
1630  AddToISelQueue(Op3);
1631  return false;
1632}
1633
1634/// createX86ISelDag - This pass converts a legalized DAG into a
1635/// X86-specific DAG, ready for instruction scheduling.
1636///
1637FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) {
1638  return new X86DAGToDAGISel(TM, Fast);
1639}
1640