X86ISelDAGToDAG.cpp revision 7571eb5015ad07d4b849cd97a5f820be19523a66
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a DAG pattern matching instruction selector for X86,
11// converting from a legalized dag to a X86 dag.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "x86-isel"
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86MachineFunctionInfo.h"
20#include "X86RegisterInfo.h"
21#include "X86Subtarget.h"
22#include "X86TargetMachine.h"
23#include "llvm/GlobalValue.h"
24#include "llvm/Instructions.h"
25#include "llvm/Intrinsics.h"
26#include "llvm/Support/CFG.h"
27#include "llvm/Type.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineFrameInfo.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/CodeGen/SelectionDAGISel.h"
34#include "llvm/Target/TargetMachine.h"
35#include "llvm/Target/TargetOptions.h"
36#include "llvm/Support/Compiler.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/ErrorHandling.h"
39#include "llvm/Support/MathExtras.h"
40#include "llvm/Support/Streams.h"
41#include "llvm/Support/raw_ostream.h"
42#include "llvm/ADT/SmallPtrSet.h"
43#include "llvm/ADT/Statistic.h"
44using namespace llvm;
45
46#include "llvm/Support/CommandLine.h"
47static cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden);
48
49STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
50
51//===----------------------------------------------------------------------===//
52//                      Pattern Matcher Implementation
53//===----------------------------------------------------------------------===//
54
55namespace {
56  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
57  /// SDValue's instead of register numbers for the leaves of the matched
58  /// tree.
59  struct X86ISelAddressMode {
60    enum {
61      RegBase,
62      FrameIndexBase
63    } BaseType;
64
65    struct {            // This is really a union, discriminated by BaseType!
66      SDValue Reg;
67      int FrameIndex;
68    } Base;
69
70    unsigned Scale;
71    SDValue IndexReg;
72    int32_t Disp;
73    SDValue Segment;
74    GlobalValue *GV;
75    Constant *CP;
76    const char *ES;
77    int JT;
78    unsigned Align;    // CP alignment.
79    unsigned char SymbolFlags;  // X86II::MO_*
80
81    X86ISelAddressMode()
82      : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
83        Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) {
84    }
85
86    bool hasSymbolicDisplacement() const {
87      return GV != 0 || CP != 0 || ES != 0 || JT != -1;
88    }
89
90    bool hasBaseOrIndexReg() const {
91      return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0;
92    }
93
94    /// isRIPRelative - Return true if this addressing mode is already RIP
95    /// relative.
96    bool isRIPRelative() const {
97      if (BaseType != RegBase) return false;
98      if (RegisterSDNode *RegNode =
99            dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode()))
100        return RegNode->getReg() == X86::RIP;
101      return false;
102    }
103
104    void setBaseReg(SDValue Reg) {
105      BaseType = RegBase;
106      Base.Reg = Reg;
107    }
108
109    void dump() {
110      cerr << "X86ISelAddressMode " << this << "\n";
111      cerr << "Base.Reg ";
112              if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump();
113              else cerr << "nul";
114      cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
115      cerr << " Scale" << Scale << "\n";
116      cerr << "IndexReg ";
117              if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
118              else cerr << "nul";
119      cerr << " Disp " << Disp << "\n";
120      cerr << "GV "; if (GV) GV->dump();
121                     else cerr << "nul";
122      cerr << " CP "; if (CP) CP->dump();
123                     else cerr << "nul";
124      cerr << "\n";
125      cerr << "ES "; if (ES) cerr << ES; else cerr << "nul";
126      cerr  << " JT" << JT << " Align" << Align << "\n";
127    }
128  };
129}
130
131namespace {
132  //===--------------------------------------------------------------------===//
133  /// ISel - X86 specific code to select X86 machine instructions for
134  /// SelectionDAG operations.
135  ///
136  class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
137    /// X86Lowering - This object fully describes how to lower LLVM code to an
138    /// X86-specific SelectionDAG.
139    X86TargetLowering &X86Lowering;
140
141    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
142    /// make the right decision when generating code for different targets.
143    const X86Subtarget *Subtarget;
144
145    /// OptForSize - If true, selector should try to optimize for code size
146    /// instead of performance.
147    bool OptForSize;
148
149  public:
150    explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
151      : SelectionDAGISel(tm, OptLevel),
152        X86Lowering(*tm.getTargetLowering()),
153        Subtarget(&tm.getSubtarget<X86Subtarget>()),
154        OptForSize(false) {}
155
156    virtual const char *getPassName() const {
157      return "X86 DAG->DAG Instruction Selection";
158    }
159
160    /// InstructionSelect - This callback is invoked by
161    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
162    virtual void InstructionSelect();
163
164    virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
165
166    virtual
167      bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
168
169// Include the pieces autogenerated from the target description.
170#include "X86GenDAGISel.inc"
171
172  private:
173    SDNode *Select(SDValue N);
174    SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
175    SDNode *SelectAtomicLoadAdd(SDNode *Node, MVT NVT);
176
177    bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
178    bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
179    bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
180    bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
181    bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
182                                 unsigned Depth);
183    bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
184    bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
185                    SDValue &Scale, SDValue &Index, SDValue &Disp,
186                    SDValue &Segment);
187    bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
188                       SDValue &Scale, SDValue &Index, SDValue &Disp);
189    bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
190                       SDValue &Scale, SDValue &Index, SDValue &Disp);
191    bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
192                             SDValue N, SDValue &Base, SDValue &Scale,
193                             SDValue &Index, SDValue &Disp,
194                             SDValue &Segment,
195                             SDValue &InChain, SDValue &OutChain);
196    bool TryFoldLoad(SDValue P, SDValue N,
197                     SDValue &Base, SDValue &Scale,
198                     SDValue &Index, SDValue &Disp,
199                     SDValue &Segment);
200    void PreprocessForRMW();
201    void PreprocessForFPConvert();
202
203    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
204    /// inline asm expressions.
205    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
206                                              char ConstraintCode,
207                                              std::vector<SDValue> &OutOps);
208
209    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
210
211    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
212                                   SDValue &Scale, SDValue &Index,
213                                   SDValue &Disp, SDValue &Segment) {
214      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
215        CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
216        AM.Base.Reg;
217      Scale = getI8Imm(AM.Scale);
218      Index = AM.IndexReg;
219      // These are 32-bit even in 64-bit mode since RIP relative offset
220      // is 32-bit.
221      if (AM.GV)
222        Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp,
223                                              AM.SymbolFlags);
224      else if (AM.CP)
225        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
226                                             AM.Align, AM.Disp, AM.SymbolFlags);
227      else if (AM.ES)
228        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
229      else if (AM.JT != -1)
230        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
231      else
232        Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
233
234      if (AM.Segment.getNode())
235        Segment = AM.Segment;
236      else
237        Segment = CurDAG->getRegister(0, MVT::i32);
238    }
239
240    /// getI8Imm - Return a target constant with the specified value, of type
241    /// i8.
242    inline SDValue getI8Imm(unsigned Imm) {
243      return CurDAG->getTargetConstant(Imm, MVT::i8);
244    }
245
246    /// getI16Imm - Return a target constant with the specified value, of type
247    /// i16.
248    inline SDValue getI16Imm(unsigned Imm) {
249      return CurDAG->getTargetConstant(Imm, MVT::i16);
250    }
251
252    /// getI32Imm - Return a target constant with the specified value, of type
253    /// i32.
254    inline SDValue getI32Imm(unsigned Imm) {
255      return CurDAG->getTargetConstant(Imm, MVT::i32);
256    }
257
258    /// getGlobalBaseReg - Return an SDNode that returns the value of
259    /// the global base register. Output instructions required to
260    /// initialize the global base register, if necessary.
261    ///
262    SDNode *getGlobalBaseReg();
263
264    /// getTargetMachine - Return a reference to the TargetMachine, casted
265    /// to the target-specific type.
266    const X86TargetMachine &getTargetMachine() {
267      return static_cast<const X86TargetMachine &>(TM);
268    }
269
270    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
271    /// to the target-specific type.
272    const X86InstrInfo *getInstrInfo() {
273      return getTargetMachine().getInstrInfo();
274    }
275
276#ifndef NDEBUG
277    unsigned Indent;
278#endif
279  };
280}
281
282
283bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
284                                                 SDNode *Root) const {
285  if (OptLevel == CodeGenOpt::None) return false;
286
287  if (U == Root)
288    switch (U->getOpcode()) {
289    default: break;
290    case ISD::ADD:
291    case ISD::ADDC:
292    case ISD::ADDE:
293    case ISD::AND:
294    case ISD::OR:
295    case ISD::XOR: {
296      SDValue Op1 = U->getOperand(1);
297
298      // If the other operand is a 8-bit immediate we should fold the immediate
299      // instead. This reduces code size.
300      // e.g.
301      // movl 4(%esp), %eax
302      // addl $4, %eax
303      // vs.
304      // movl $4, %eax
305      // addl 4(%esp), %eax
306      // The former is 2 bytes shorter. In case where the increment is 1, then
307      // the saving can be 4 bytes (by using incl %eax).
308      if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
309        if (Imm->getAPIntValue().isSignedIntN(8))
310          return false;
311
312      // If the other operand is a TLS address, we should fold it instead.
313      // This produces
314      // movl    %gs:0, %eax
315      // leal    i@NTPOFF(%eax), %eax
316      // instead of
317      // movl    $i@NTPOFF, %eax
318      // addl    %gs:0, %eax
319      // if the block also has an access to a second TLS address this will save
320      // a load.
321      // FIXME: This is probably also true for non TLS addresses.
322      if (Op1.getOpcode() == X86ISD::Wrapper) {
323        SDValue Val = Op1.getOperand(0);
324        if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
325          return false;
326      }
327    }
328    }
329
330  // Proceed to 'generic' cycle finder code
331  return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
332}
333
334/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
335/// and move load below the TokenFactor. Replace store's chain operand with
336/// load's chain result.
337static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
338                                 SDValue Store, SDValue TF) {
339  SmallVector<SDValue, 4> Ops;
340  for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
341    if (Load.getNode() == TF.getOperand(i).getNode())
342      Ops.push_back(Load.getOperand(0));
343    else
344      Ops.push_back(TF.getOperand(i));
345  CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
346  CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
347  CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
348                             Store.getOperand(2), Store.getOperand(3));
349}
350
351/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
352///
353static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
354                      SDValue &Load) {
355  if (N.getOpcode() == ISD::BIT_CONVERT)
356    N = N.getOperand(0);
357
358  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
359  if (!LD || LD->isVolatile())
360    return false;
361  if (LD->getAddressingMode() != ISD::UNINDEXED)
362    return false;
363
364  ISD::LoadExtType ExtType = LD->getExtensionType();
365  if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
366    return false;
367
368  if (N.hasOneUse() &&
369      N.getOperand(1) == Address &&
370      N.getNode()->isOperandOf(Chain.getNode())) {
371    Load = N;
372    return true;
373  }
374  return false;
375}
376
377/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
378/// operand and move load below the call's chain operand.
379static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
380                                  SDValue Call, SDValue CallSeqStart) {
381  SmallVector<SDValue, 8> Ops;
382  SDValue Chain = CallSeqStart.getOperand(0);
383  if (Chain.getNode() == Load.getNode())
384    Ops.push_back(Load.getOperand(0));
385  else {
386    assert(Chain.getOpcode() == ISD::TokenFactor &&
387           "Unexpected CallSeqStart chain operand");
388    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
389      if (Chain.getOperand(i).getNode() == Load.getNode())
390        Ops.push_back(Load.getOperand(0));
391      else
392        Ops.push_back(Chain.getOperand(i));
393    SDValue NewChain =
394      CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
395                      MVT::Other, &Ops[0], Ops.size());
396    Ops.clear();
397    Ops.push_back(NewChain);
398  }
399  for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
400    Ops.push_back(CallSeqStart.getOperand(i));
401  CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
402  CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
403                             Load.getOperand(1), Load.getOperand(2));
404  Ops.clear();
405  Ops.push_back(SDValue(Load.getNode(), 1));
406  for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
407    Ops.push_back(Call.getOperand(i));
408  CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
409}
410
411/// isCalleeLoad - Return true if call address is a load and it can be
412/// moved below CALLSEQ_START and the chains leading up to the call.
413/// Return the CALLSEQ_START by reference as a second output.
414static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
415  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
416    return false;
417  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
418  if (!LD ||
419      LD->isVolatile() ||
420      LD->getAddressingMode() != ISD::UNINDEXED ||
421      LD->getExtensionType() != ISD::NON_EXTLOAD)
422    return false;
423
424  // Now let's find the callseq_start.
425  while (Chain.getOpcode() != ISD::CALLSEQ_START) {
426    if (!Chain.hasOneUse())
427      return false;
428    Chain = Chain.getOperand(0);
429  }
430
431  if (Chain.getOperand(0).getNode() == Callee.getNode())
432    return true;
433  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
434      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()))
435    return true;
436  return false;
437}
438
439
440/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
441/// This is only run if not in -O0 mode.
442/// This allows the instruction selector to pick more read-modify-write
443/// instructions. This is a common case:
444///
445///     [Load chain]
446///         ^
447///         |
448///       [Load]
449///       ^    ^
450///       |    |
451///      /      \-
452///     /         |
453/// [TokenFactor] [Op]
454///     ^          ^
455///     |          |
456///      \        /
457///       \      /
458///       [Store]
459///
460/// The fact the store's chain operand != load's chain will prevent the
461/// (store (op (load))) instruction from being selected. We can transform it to:
462///
463///     [Load chain]
464///         ^
465///         |
466///    [TokenFactor]
467///         ^
468///         |
469///       [Load]
470///       ^    ^
471///       |    |
472///       |     \-
473///       |       |
474///       |     [Op]
475///       |       ^
476///       |       |
477///       \      /
478///        \    /
479///       [Store]
480void X86DAGToDAGISel::PreprocessForRMW() {
481  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
482         E = CurDAG->allnodes_end(); I != E; ++I) {
483    if (I->getOpcode() == X86ISD::CALL) {
484      /// Also try moving call address load from outside callseq_start to just
485      /// before the call to allow it to be folded.
486      ///
487      ///     [Load chain]
488      ///         ^
489      ///         |
490      ///       [Load]
491      ///       ^    ^
492      ///       |    |
493      ///      /      \--
494      ///     /          |
495      ///[CALLSEQ_START] |
496      ///     ^          |
497      ///     |          |
498      /// [LOAD/C2Reg]   |
499      ///     |          |
500      ///      \        /
501      ///       \      /
502      ///       [CALL]
503      SDValue Chain = I->getOperand(0);
504      SDValue Load  = I->getOperand(1);
505      if (!isCalleeLoad(Load, Chain))
506        continue;
507      MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
508      ++NumLoadMoved;
509      continue;
510    }
511
512    if (!ISD::isNON_TRUNCStore(I))
513      continue;
514    SDValue Chain = I->getOperand(0);
515
516    if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
517      continue;
518
519    SDValue N1 = I->getOperand(1);
520    SDValue N2 = I->getOperand(2);
521    if ((N1.getValueType().isFloatingPoint() &&
522         !N1.getValueType().isVector()) ||
523        !N1.hasOneUse())
524      continue;
525
526    bool RModW = false;
527    SDValue Load;
528    unsigned Opcode = N1.getNode()->getOpcode();
529    switch (Opcode) {
530    case ISD::ADD:
531    case ISD::MUL:
532    case ISD::AND:
533    case ISD::OR:
534    case ISD::XOR:
535    case ISD::ADDC:
536    case ISD::ADDE:
537    case ISD::VECTOR_SHUFFLE: {
538      SDValue N10 = N1.getOperand(0);
539      SDValue N11 = N1.getOperand(1);
540      RModW = isRMWLoad(N10, Chain, N2, Load);
541      if (!RModW)
542        RModW = isRMWLoad(N11, Chain, N2, Load);
543      break;
544    }
545    case ISD::SUB:
546    case ISD::SHL:
547    case ISD::SRA:
548    case ISD::SRL:
549    case ISD::ROTL:
550    case ISD::ROTR:
551    case ISD::SUBC:
552    case ISD::SUBE:
553    case X86ISD::SHLD:
554    case X86ISD::SHRD: {
555      SDValue N10 = N1.getOperand(0);
556      RModW = isRMWLoad(N10, Chain, N2, Load);
557      break;
558    }
559    }
560
561    if (RModW) {
562      MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
563      ++NumLoadMoved;
564    }
565  }
566}
567
568
569/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
570/// nodes that target the FP stack to be store and load to the stack.  This is a
571/// gross hack.  We would like to simply mark these as being illegal, but when
572/// we do that, legalize produces these when it expands calls, then expands
573/// these in the same legalize pass.  We would like dag combine to be able to
574/// hack on these between the call expansion and the node legalization.  As such
575/// this pass basically does "really late" legalization of these inline with the
576/// X86 isel pass.
577void X86DAGToDAGISel::PreprocessForFPConvert() {
578  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
579       E = CurDAG->allnodes_end(); I != E; ) {
580    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
581    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
582      continue;
583
584    // If the source and destination are SSE registers, then this is a legal
585    // conversion that should not be lowered.
586    MVT SrcVT = N->getOperand(0).getValueType();
587    MVT DstVT = N->getValueType(0);
588    bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
589    bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
590    if (SrcIsSSE && DstIsSSE)
591      continue;
592
593    if (!SrcIsSSE && !DstIsSSE) {
594      // If this is an FPStack extension, it is a noop.
595      if (N->getOpcode() == ISD::FP_EXTEND)
596        continue;
597      // If this is a value-preserving FPStack truncation, it is a noop.
598      if (N->getConstantOperandVal(1))
599        continue;
600    }
601
602    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
603    // FPStack has extload and truncstore.  SSE can fold direct loads into other
604    // operations.  Based on this, decide what we want to do.
605    MVT MemVT;
606    if (N->getOpcode() == ISD::FP_ROUND)
607      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
608    else
609      MemVT = SrcIsSSE ? SrcVT : DstVT;
610
611    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
612    DebugLoc dl = N->getDebugLoc();
613
614    // FIXME: optimize the case where the src/dest is a load or store?
615    SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
616                                          N->getOperand(0),
617                                          MemTmp, NULL, 0, MemVT);
618    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
619                                        NULL, 0, MemVT);
620
621    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
622    // extload we created.  This will cause general havok on the dag because
623    // anything below the conversion could be folded into other existing nodes.
624    // To avoid invalidating 'I', back it up to the convert node.
625    --I;
626    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
627
628    // Now that we did that, the node is dead.  Increment the iterator to the
629    // next node to process, then delete N.
630    ++I;
631    CurDAG->DeleteNode(N);
632  }
633}
634
635/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
636/// when it has created a SelectionDAG for us to codegen.
637void X86DAGToDAGISel::InstructionSelect() {
638  const Function *F = MF->getFunction();
639  OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
640
641  DEBUG(BB->dump());
642  if (OptLevel != CodeGenOpt::None)
643    PreprocessForRMW();
644
645  // FIXME: This should only happen when not compiled with -O0.
646  PreprocessForFPConvert();
647
648  // Codegen the basic block.
649#ifndef NDEBUG
650  DOUT << "===== Instruction selection begins:\n";
651  Indent = 0;
652#endif
653  SelectRoot(*CurDAG);
654#ifndef NDEBUG
655  DOUT << "===== Instruction selection ends:\n";
656#endif
657
658  CurDAG->RemoveDeadNodes();
659}
660
661/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
662/// the main function.
663void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
664                                             MachineFrameInfo *MFI) {
665  const TargetInstrInfo *TII = TM.getInstrInfo();
666  if (Subtarget->isTargetCygMing())
667    BuildMI(BB, DebugLoc::getUnknownLoc(),
668            TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
669}
670
671void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
672  // If this is main, emit special code for main.
673  MachineBasicBlock *BB = MF.begin();
674  if (Fn.hasExternalLinkage() && Fn.getName() == "main")
675    EmitSpecialCodeForMain(BB, MF.getFrameInfo());
676}
677
678
679bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
680                                              X86ISelAddressMode &AM) {
681  assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
682  SDValue Segment = N.getOperand(0);
683
684  if (AM.Segment.getNode() == 0) {
685    AM.Segment = Segment;
686    return false;
687  }
688
689  return true;
690}
691
692bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
693  // This optimization is valid because the GNU TLS model defines that
694  // gs:0 (or fs:0 on X86-64) contains its own address.
695  // For more information see http://people.redhat.com/drepper/tls.pdf
696
697  SDValue Address = N.getOperand(1);
698  if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
699      !MatchSegmentBaseAddress (Address, AM))
700    return false;
701
702  return true;
703}
704
705/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
706/// into an addressing mode.  These wrap things that will resolve down into a
707/// symbol reference.  If no match is possible, this returns true, otherwise it
708/// returns false.
709bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
710  // If the addressing mode already has a symbol as the displacement, we can
711  // never match another symbol.
712  if (AM.hasSymbolicDisplacement())
713    return true;
714
715  SDValue N0 = N.getOperand(0);
716
717  // Handle X86-64 rip-relative addresses.  We check this before checking direct
718  // folding because RIP is preferable to non-RIP accesses.
719  if (Subtarget->is64Bit() &&
720      // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
721      // they cannot be folded into immediate fields.
722      // FIXME: This can be improved for kernel and other models?
723      TM.getCodeModel() == CodeModel::Small &&
724
725      // Base and index reg must be 0 in order to use %rip as base and lowering
726      // must allow RIP.
727      !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
728
729    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
730      int64_t Offset = AM.Disp + G->getOffset();
731      if (!isInt32(Offset)) return true;
732      AM.GV = G->getGlobal();
733      AM.Disp = Offset;
734      AM.SymbolFlags = G->getTargetFlags();
735    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
736      int64_t Offset = AM.Disp + CP->getOffset();
737      if (!isInt32(Offset)) return true;
738      AM.CP = CP->getConstVal();
739      AM.Align = CP->getAlignment();
740      AM.Disp = Offset;
741      AM.SymbolFlags = CP->getTargetFlags();
742    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
743      AM.ES = S->getSymbol();
744      AM.SymbolFlags = S->getTargetFlags();
745    } else {
746      JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
747      AM.JT = J->getIndex();
748      AM.SymbolFlags = J->getTargetFlags();
749    }
750
751    if (N.getOpcode() == X86ISD::WrapperRIP)
752      AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
753    return false;
754  }
755
756  // Handle the case when globals fit in our immediate field: This is true for
757  // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
758  // mode, this results in a non-RIP-relative computation.
759  if (!Subtarget->is64Bit() ||
760      (TM.getCodeModel() == CodeModel::Small &&
761       TM.getRelocationModel() == Reloc::Static)) {
762    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
763      AM.GV = G->getGlobal();
764      AM.Disp += G->getOffset();
765      AM.SymbolFlags = G->getTargetFlags();
766    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
767      AM.CP = CP->getConstVal();
768      AM.Align = CP->getAlignment();
769      AM.Disp += CP->getOffset();
770      AM.SymbolFlags = CP->getTargetFlags();
771    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
772      AM.ES = S->getSymbol();
773      AM.SymbolFlags = S->getTargetFlags();
774    } else {
775      JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
776      AM.JT = J->getIndex();
777      AM.SymbolFlags = J->getTargetFlags();
778    }
779    return false;
780  }
781
782  return true;
783}
784
785/// MatchAddress - Add the specified node to the specified addressing mode,
786/// returning true if it cannot be done.  This just pattern matches for the
787/// addressing mode.
788bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
789  if (MatchAddressRecursively(N, AM, 0))
790    return true;
791
792  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
793  // a smaller encoding and avoids a scaled-index.
794  if (AM.Scale == 2 &&
795      AM.BaseType == X86ISelAddressMode::RegBase &&
796      AM.Base.Reg.getNode() == 0) {
797    AM.Base.Reg = AM.IndexReg;
798    AM.Scale = 1;
799  }
800
801  return false;
802}
803
804bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
805                                              unsigned Depth) {
806  bool is64Bit = Subtarget->is64Bit();
807  DebugLoc dl = N.getDebugLoc();
808  DOUT << "MatchAddress: "; DEBUG(AM.dump());
809  // Limit recursion.
810  if (Depth > 5)
811    return MatchAddressBase(N, AM);
812
813  // If this is already a %rip relative address, we can only merge immediates
814  // into it.  Instead of handling this in every case, we handle it here.
815  // RIP relative addressing: %rip + 32-bit displacement!
816  if (AM.isRIPRelative()) {
817    // FIXME: JumpTable and ExternalSymbol address currently don't like
818    // displacements.  It isn't very important, but this should be fixed for
819    // consistency.
820    if (!AM.ES && AM.JT != -1) return true;
821
822    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
823      int64_t Val = AM.Disp + Cst->getSExtValue();
824      if (isInt32(Val)) {
825        AM.Disp = Val;
826        return false;
827      }
828    }
829    return true;
830  }
831
832  switch (N.getOpcode()) {
833  default: break;
834  case ISD::Constant: {
835    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
836    if (!is64Bit || isInt32(AM.Disp + Val)) {
837      AM.Disp += Val;
838      return false;
839    }
840    break;
841  }
842
843  case X86ISD::SegmentBaseAddress:
844    if (!MatchSegmentBaseAddress(N, AM))
845      return false;
846    break;
847
848  case X86ISD::Wrapper:
849  case X86ISD::WrapperRIP:
850    if (!MatchWrapper(N, AM))
851      return false;
852    break;
853
854  case ISD::LOAD:
855    if (!MatchLoad(N, AM))
856      return false;
857    break;
858
859  case ISD::FrameIndex:
860    if (AM.BaseType == X86ISelAddressMode::RegBase
861        && AM.Base.Reg.getNode() == 0) {
862      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
863      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
864      return false;
865    }
866    break;
867
868  case ISD::SHL:
869    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
870      break;
871
872    if (ConstantSDNode
873          *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
874      unsigned Val = CN->getZExtValue();
875      // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
876      // that the base operand remains free for further matching. If
877      // the base doesn't end up getting used, a post-processing step
878      // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
879      if (Val == 1 || Val == 2 || Val == 3) {
880        AM.Scale = 1 << Val;
881        SDValue ShVal = N.getNode()->getOperand(0);
882
883        // Okay, we know that we have a scale by now.  However, if the scaled
884        // value is an add of something and a constant, we can fold the
885        // constant into the disp field here.
886        if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
887            isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
888          AM.IndexReg = ShVal.getNode()->getOperand(0);
889          ConstantSDNode *AddVal =
890            cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
891          uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
892          if (!is64Bit || isInt32(Disp))
893            AM.Disp = Disp;
894          else
895            AM.IndexReg = ShVal;
896        } else {
897          AM.IndexReg = ShVal;
898        }
899        return false;
900      }
901    break;
902    }
903
904  case ISD::SMUL_LOHI:
905  case ISD::UMUL_LOHI:
906    // A mul_lohi where we need the low part can be folded as a plain multiply.
907    if (N.getResNo() != 0) break;
908    // FALL THROUGH
909  case ISD::MUL:
910  case X86ISD::MUL_IMM:
911    // X*[3,5,9] -> X+X*[2,4,8]
912    if (AM.BaseType == X86ISelAddressMode::RegBase &&
913        AM.Base.Reg.getNode() == 0 &&
914        AM.IndexReg.getNode() == 0) {
915      if (ConstantSDNode
916            *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
917        if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
918            CN->getZExtValue() == 9) {
919          AM.Scale = unsigned(CN->getZExtValue())-1;
920
921          SDValue MulVal = N.getNode()->getOperand(0);
922          SDValue Reg;
923
924          // Okay, we know that we have a scale by now.  However, if the scaled
925          // value is an add of something and a constant, we can fold the
926          // constant into the disp field here.
927          if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
928              isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
929            Reg = MulVal.getNode()->getOperand(0);
930            ConstantSDNode *AddVal =
931              cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
932            uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
933                                      CN->getZExtValue();
934            if (!is64Bit || isInt32(Disp))
935              AM.Disp = Disp;
936            else
937              Reg = N.getNode()->getOperand(0);
938          } else {
939            Reg = N.getNode()->getOperand(0);
940          }
941
942          AM.IndexReg = AM.Base.Reg = Reg;
943          return false;
944        }
945    }
946    break;
947
948  case ISD::SUB: {
949    // Given A-B, if A can be completely folded into the address and
950    // the index field with the index field unused, use -B as the index.
951    // This is a win if a has multiple parts that can be folded into
952    // the address. Also, this saves a mov if the base register has
953    // other uses, since it avoids a two-address sub instruction, however
954    // it costs an additional mov if the index register has other uses.
955
956    // Test if the LHS of the sub can be folded.
957    X86ISelAddressMode Backup = AM;
958    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
959      AM = Backup;
960      break;
961    }
962    // Test if the index field is free for use.
963    if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
964      AM = Backup;
965      break;
966    }
967    int Cost = 0;
968    SDValue RHS = N.getNode()->getOperand(1);
969    // If the RHS involves a register with multiple uses, this
970    // transformation incurs an extra mov, due to the neg instruction
971    // clobbering its operand.
972    if (!RHS.getNode()->hasOneUse() ||
973        RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
974        RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
975        RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
976        (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
977         RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
978      ++Cost;
979    // If the base is a register with multiple uses, this
980    // transformation may save a mov.
981    if ((AM.BaseType == X86ISelAddressMode::RegBase &&
982         AM.Base.Reg.getNode() &&
983         !AM.Base.Reg.getNode()->hasOneUse()) ||
984        AM.BaseType == X86ISelAddressMode::FrameIndexBase)
985      --Cost;
986    // If the folded LHS was interesting, this transformation saves
987    // address arithmetic.
988    if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
989        ((AM.Disp != 0) && (Backup.Disp == 0)) +
990        (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
991      --Cost;
992    // If it doesn't look like it may be an overall win, don't do it.
993    if (Cost >= 0) {
994      AM = Backup;
995      break;
996    }
997
998    // Ok, the transformation is legal and appears profitable. Go for it.
999    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
1000    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1001    AM.IndexReg = Neg;
1002    AM.Scale = 1;
1003
1004    // Insert the new nodes into the topological ordering.
1005    if (Zero.getNode()->getNodeId() == -1 ||
1006        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1007      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
1008      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
1009    }
1010    if (Neg.getNode()->getNodeId() == -1 ||
1011        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1012      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
1013      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
1014    }
1015    return false;
1016  }
1017
1018  case ISD::ADD: {
1019    X86ISelAddressMode Backup = AM;
1020    if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) &&
1021        !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1))
1022      return false;
1023    AM = Backup;
1024    if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) &&
1025        !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1))
1026      return false;
1027    AM = Backup;
1028
1029    // If we couldn't fold both operands into the address at the same time,
1030    // see if we can just put each operand into a register and fold at least
1031    // the add.
1032    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1033        !AM.Base.Reg.getNode() &&
1034        !AM.IndexReg.getNode()) {
1035      AM.Base.Reg = N.getNode()->getOperand(0);
1036      AM.IndexReg = N.getNode()->getOperand(1);
1037      AM.Scale = 1;
1038      return false;
1039    }
1040    break;
1041  }
1042
1043  case ISD::OR:
1044    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
1045    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1046      X86ISelAddressMode Backup = AM;
1047      uint64_t Offset = CN->getSExtValue();
1048      // Start with the LHS as an addr mode.
1049      if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1050          // Address could not have picked a GV address for the displacement.
1051          AM.GV == NULL &&
1052          // On x86-64, the resultant disp must fit in 32-bits.
1053          (!is64Bit || isInt32(AM.Disp + Offset)) &&
1054          // Check to see if the LHS & C is zero.
1055          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
1056        AM.Disp += Offset;
1057        return false;
1058      }
1059      AM = Backup;
1060    }
1061    break;
1062
1063  case ISD::AND: {
1064    // Perform some heroic transforms on an and of a constant-count shift
1065    // with a constant to enable use of the scaled offset field.
1066
1067    SDValue Shift = N.getOperand(0);
1068    if (Shift.getNumOperands() != 2) break;
1069
1070    // Scale must not be used already.
1071    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
1072
1073    SDValue X = Shift.getOperand(0);
1074    ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
1075    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
1076    if (!C1 || !C2) break;
1077
1078    // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
1079    // allows us to convert the shift and and into an h-register extract and
1080    // a scaled index.
1081    if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
1082      unsigned ScaleLog = 8 - C1->getZExtValue();
1083      if (ScaleLog > 0 && ScaleLog < 4 &&
1084          C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
1085        SDValue Eight = CurDAG->getConstant(8, MVT::i8);
1086        SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
1087        SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1088                                      X, Eight);
1089        SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
1090                                      Srl, Mask);
1091        SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
1092        SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1093                                      And, ShlCount);
1094
1095        // Insert the new nodes into the topological ordering.
1096        if (Eight.getNode()->getNodeId() == -1 ||
1097            Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1098          CurDAG->RepositionNode(X.getNode(), Eight.getNode());
1099          Eight.getNode()->setNodeId(X.getNode()->getNodeId());
1100        }
1101        if (Mask.getNode()->getNodeId() == -1 ||
1102            Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1103          CurDAG->RepositionNode(X.getNode(), Mask.getNode());
1104          Mask.getNode()->setNodeId(X.getNode()->getNodeId());
1105        }
1106        if (Srl.getNode()->getNodeId() == -1 ||
1107            Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1108          CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
1109          Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
1110        }
1111        if (And.getNode()->getNodeId() == -1 ||
1112            And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1113          CurDAG->RepositionNode(N.getNode(), And.getNode());
1114          And.getNode()->setNodeId(N.getNode()->getNodeId());
1115        }
1116        if (ShlCount.getNode()->getNodeId() == -1 ||
1117            ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1118          CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
1119          ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
1120        }
1121        if (Shl.getNode()->getNodeId() == -1 ||
1122            Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1123          CurDAG->RepositionNode(N.getNode(), Shl.getNode());
1124          Shl.getNode()->setNodeId(N.getNode()->getNodeId());
1125        }
1126        CurDAG->ReplaceAllUsesWith(N, Shl);
1127        AM.IndexReg = And;
1128        AM.Scale = (1 << ScaleLog);
1129        return false;
1130      }
1131    }
1132
1133    // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1134    // allows us to fold the shift into this addressing mode.
1135    if (Shift.getOpcode() != ISD::SHL) break;
1136
1137    // Not likely to be profitable if either the AND or SHIFT node has more
1138    // than one use (unless all uses are for address computation). Besides,
1139    // isel mechanism requires their node ids to be reused.
1140    if (!N.hasOneUse() || !Shift.hasOneUse())
1141      break;
1142
1143    // Verify that the shift amount is something we can fold.
1144    unsigned ShiftCst = C1->getZExtValue();
1145    if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
1146      break;
1147
1148    // Get the new AND mask, this folds to a constant.
1149    SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1150                                         SDValue(C2, 0), SDValue(C1, 0));
1151    SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
1152                                     NewANDMask);
1153    SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1154                                       NewAND, SDValue(C1, 0));
1155
1156    // Insert the new nodes into the topological ordering.
1157    if (C1->getNodeId() > X.getNode()->getNodeId()) {
1158      CurDAG->RepositionNode(X.getNode(), C1);
1159      C1->setNodeId(X.getNode()->getNodeId());
1160    }
1161    if (NewANDMask.getNode()->getNodeId() == -1 ||
1162        NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1163      CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
1164      NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
1165    }
1166    if (NewAND.getNode()->getNodeId() == -1 ||
1167        NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1168      CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
1169      NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
1170    }
1171    if (NewSHIFT.getNode()->getNodeId() == -1 ||
1172        NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1173      CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
1174      NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
1175    }
1176
1177    CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
1178
1179    AM.Scale = 1 << ShiftCst;
1180    AM.IndexReg = NewAND;
1181    return false;
1182  }
1183  }
1184
1185  return MatchAddressBase(N, AM);
1186}
1187
1188/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1189/// specified addressing mode without any further recursion.
1190bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1191  // Is the base register already occupied?
1192  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
1193    // If so, check to see if the scale index register is set.
1194    if (AM.IndexReg.getNode() == 0) {
1195      AM.IndexReg = N;
1196      AM.Scale = 1;
1197      return false;
1198    }
1199
1200    // Otherwise, we cannot select it.
1201    return true;
1202  }
1203
1204  // Default, generate it as a register.
1205  AM.BaseType = X86ISelAddressMode::RegBase;
1206  AM.Base.Reg = N;
1207  return false;
1208}
1209
1210/// SelectAddr - returns true if it is able pattern match an addressing mode.
1211/// It returns the operands which make up the maximal addressing mode it can
1212/// match by reference.
1213bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
1214                                 SDValue &Scale, SDValue &Index,
1215                                 SDValue &Disp, SDValue &Segment) {
1216  X86ISelAddressMode AM;
1217  bool Done = false;
1218  if (AvoidDupAddrCompute && !N.hasOneUse()) {
1219    unsigned Opcode = N.getOpcode();
1220    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex &&
1221        Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) {
1222      // If we are able to fold N into addressing mode, then we'll allow it even
1223      // if N has multiple uses. In general, addressing computation is used as
1224      // addresses by all of its uses. But watch out for CopyToReg uses, that
1225      // means the address computation is liveout. It will be computed by a LEA
1226      // so we want to avoid computing the address twice.
1227      for (SDNode::use_iterator UI = N.getNode()->use_begin(),
1228             UE = N.getNode()->use_end(); UI != UE; ++UI) {
1229        if (UI->getOpcode() == ISD::CopyToReg) {
1230          MatchAddressBase(N, AM);
1231          Done = true;
1232          break;
1233        }
1234      }
1235    }
1236  }
1237
1238  if (!Done && MatchAddress(N, AM))
1239    return false;
1240
1241  MVT VT = N.getValueType();
1242  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1243    if (!AM.Base.Reg.getNode())
1244      AM.Base.Reg = CurDAG->getRegister(0, VT);
1245  }
1246
1247  if (!AM.IndexReg.getNode())
1248    AM.IndexReg = CurDAG->getRegister(0, VT);
1249
1250  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1251  return true;
1252}
1253
1254/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
1255/// match a load whose top elements are either undef or zeros.  The load flavor
1256/// is derived from the type of N, which is either v4f32 or v2f64.
1257bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
1258                                          SDValue N, SDValue &Base,
1259                                          SDValue &Scale, SDValue &Index,
1260                                          SDValue &Disp, SDValue &Segment,
1261                                          SDValue &InChain,
1262                                          SDValue &OutChain) {
1263  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1264    InChain = N.getOperand(0).getValue(1);
1265    if (ISD::isNON_EXTLoad(InChain.getNode()) &&
1266        InChain.getValue(0).hasOneUse() &&
1267        N.hasOneUse() &&
1268        IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
1269      LoadSDNode *LD = cast<LoadSDNode>(InChain);
1270      if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1271        return false;
1272      OutChain = LD->getChain();
1273      return true;
1274    }
1275  }
1276
1277  // Also handle the case where we explicitly require zeros in the top
1278  // elements.  This is a vector shuffle from the zero vector.
1279  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1280      // Check to see if the top elements are all zeros (or bitcast of zeros).
1281      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
1282      N.getOperand(0).getNode()->hasOneUse() &&
1283      ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
1284      N.getOperand(0).getOperand(0).hasOneUse()) {
1285    // Okay, this is a zero extending load.  Fold it.
1286    LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1287    if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1288      return false;
1289    OutChain = LD->getChain();
1290    InChain = SDValue(LD, 1);
1291    return true;
1292  }
1293  return false;
1294}
1295
1296
1297/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1298/// mode it matches can be cost effectively emitted as an LEA instruction.
1299bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
1300                                    SDValue &Base, SDValue &Scale,
1301                                    SDValue &Index, SDValue &Disp) {
1302  X86ISelAddressMode AM;
1303
1304  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1305  // segments.
1306  SDValue Copy = AM.Segment;
1307  SDValue T = CurDAG->getRegister(0, MVT::i32);
1308  AM.Segment = T;
1309  if (MatchAddress(N, AM))
1310    return false;
1311  assert (T == AM.Segment);
1312  AM.Segment = Copy;
1313
1314  MVT VT = N.getValueType();
1315  unsigned Complexity = 0;
1316  if (AM.BaseType == X86ISelAddressMode::RegBase)
1317    if (AM.Base.Reg.getNode())
1318      Complexity = 1;
1319    else
1320      AM.Base.Reg = CurDAG->getRegister(0, VT);
1321  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1322    Complexity = 4;
1323
1324  if (AM.IndexReg.getNode())
1325    Complexity++;
1326  else
1327    AM.IndexReg = CurDAG->getRegister(0, VT);
1328
1329  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1330  // a simple shift.
1331  if (AM.Scale > 1)
1332    Complexity++;
1333
1334  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1335  // to a LEA. This is determined with some expermentation but is by no means
1336  // optimal (especially for code size consideration). LEA is nice because of
1337  // its three-address nature. Tweak the cost function again when we can run
1338  // convertToThreeAddress() at register allocation time.
1339  if (AM.hasSymbolicDisplacement()) {
1340    // For X86-64, we should always use lea to materialize RIP relative
1341    // addresses.
1342    if (Subtarget->is64Bit())
1343      Complexity = 4;
1344    else
1345      Complexity += 2;
1346  }
1347
1348  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
1349    Complexity++;
1350
1351  // If it isn't worth using an LEA, reject it.
1352  if (Complexity <= 2)
1353    return false;
1354
1355  SDValue Segment;
1356  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1357  return true;
1358}
1359
1360/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1361bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
1362                                        SDValue &Scale, SDValue &Index,
1363                                        SDValue &Disp) {
1364  assert(Op.getOpcode() == X86ISD::TLSADDR);
1365  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
1366  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1367
1368  X86ISelAddressMode AM;
1369  AM.GV = GA->getGlobal();
1370  AM.Disp += GA->getOffset();
1371  AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
1372  AM.SymbolFlags = GA->getTargetFlags();
1373
1374  if (N.getValueType() == MVT::i32) {
1375    AM.Scale = 1;
1376    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1377  } else {
1378    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1379  }
1380
1381  SDValue Segment;
1382  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1383  return true;
1384}
1385
1386
1387bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
1388                                  SDValue &Base, SDValue &Scale,
1389                                  SDValue &Index, SDValue &Disp,
1390                                  SDValue &Segment) {
1391  if (ISD::isNON_EXTLoad(N.getNode()) &&
1392      N.hasOneUse() &&
1393      IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
1394    return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
1395  return false;
1396}
1397
1398/// getGlobalBaseReg - Return an SDNode that returns the value of
1399/// the global base register. Output instructions required to
1400/// initialize the global base register, if necessary.
1401///
1402SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1403  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1404  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
1405}
1406
1407static SDNode *FindCallStartFromCall(SDNode *Node) {
1408  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
1409    assert(Node->getOperand(0).getValueType() == MVT::Other &&
1410         "Node doesn't have a token chain argument!");
1411  return FindCallStartFromCall(Node->getOperand(0).getNode());
1412}
1413
1414SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
1415  SDValue Chain = Node->getOperand(0);
1416  SDValue In1 = Node->getOperand(1);
1417  SDValue In2L = Node->getOperand(2);
1418  SDValue In2H = Node->getOperand(3);
1419  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1420  if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1421    return NULL;
1422  SDValue LSI = Node->getOperand(4);    // MemOperand
1423  const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain};
1424  return CurDAG->getTargetNode(Opc, Node->getDebugLoc(),
1425                               MVT::i32, MVT::i32, MVT::Other, Ops,
1426                               array_lengthof(Ops));
1427}
1428
1429SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, MVT NVT) {
1430  if (Node->hasAnyUseOfValue(0))
1431    return 0;
1432
1433  // Optimize common patterns for __sync_add_and_fetch and
1434  // __sync_sub_and_fetch where the result is not used. This allows us
1435  // to use "lock" version of add, sub, inc, dec instructions.
1436  // FIXME: Do not use special instructions but instead add the "lock"
1437  // prefix to the target node somehow. The extra information will then be
1438  // transferred to machine instruction and it denotes the prefix.
1439  SDValue Chain = Node->getOperand(0);
1440  SDValue Ptr = Node->getOperand(1);
1441  SDValue Val = Node->getOperand(2);
1442  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1443  if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1444    return 0;
1445
1446  bool isInc = false, isDec = false, isSub = false, isCN = false;
1447  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
1448  if (CN) {
1449    isCN = true;
1450    int64_t CNVal = CN->getSExtValue();
1451    if (CNVal == 1)
1452      isInc = true;
1453    else if (CNVal == -1)
1454      isDec = true;
1455    else if (CNVal >= 0)
1456      Val = CurDAG->getTargetConstant(CNVal, NVT);
1457    else {
1458      isSub = true;
1459      Val = CurDAG->getTargetConstant(-CNVal, NVT);
1460    }
1461  } else if (Val.hasOneUse() &&
1462             Val.getOpcode() == ISD::SUB &&
1463             X86::isZeroNode(Val.getOperand(0))) {
1464    isSub = true;
1465    Val = Val.getOperand(1);
1466  }
1467
1468  unsigned Opc = 0;
1469  switch (NVT.getSimpleVT()) {
1470  default: return 0;
1471  case MVT::i8:
1472    if (isInc)
1473      Opc = X86::LOCK_INC8m;
1474    else if (isDec)
1475      Opc = X86::LOCK_DEC8m;
1476    else if (isSub) {
1477      if (isCN)
1478        Opc = X86::LOCK_SUB8mi;
1479      else
1480        Opc = X86::LOCK_SUB8mr;
1481    } else {
1482      if (isCN)
1483        Opc = X86::LOCK_ADD8mi;
1484      else
1485        Opc = X86::LOCK_ADD8mr;
1486    }
1487    break;
1488  case MVT::i16:
1489    if (isInc)
1490      Opc = X86::LOCK_INC16m;
1491    else if (isDec)
1492      Opc = X86::LOCK_DEC16m;
1493    else if (isSub) {
1494      if (isCN) {
1495        if (Predicate_i16immSExt8(Val.getNode()))
1496          Opc = X86::LOCK_SUB16mi8;
1497        else
1498          Opc = X86::LOCK_SUB16mi;
1499      } else
1500        Opc = X86::LOCK_SUB16mr;
1501    } else {
1502      if (isCN) {
1503        if (Predicate_i16immSExt8(Val.getNode()))
1504          Opc = X86::LOCK_ADD16mi8;
1505        else
1506          Opc = X86::LOCK_ADD16mi;
1507      } else
1508        Opc = X86::LOCK_ADD16mr;
1509    }
1510    break;
1511  case MVT::i32:
1512    if (isInc)
1513      Opc = X86::LOCK_INC32m;
1514    else if (isDec)
1515      Opc = X86::LOCK_DEC32m;
1516    else if (isSub) {
1517      if (isCN) {
1518        if (Predicate_i32immSExt8(Val.getNode()))
1519          Opc = X86::LOCK_SUB32mi8;
1520        else
1521          Opc = X86::LOCK_SUB32mi;
1522      } else
1523        Opc = X86::LOCK_SUB32mr;
1524    } else {
1525      if (isCN) {
1526        if (Predicate_i32immSExt8(Val.getNode()))
1527          Opc = X86::LOCK_ADD32mi8;
1528        else
1529          Opc = X86::LOCK_ADD32mi;
1530      } else
1531        Opc = X86::LOCK_ADD32mr;
1532    }
1533    break;
1534  case MVT::i64:
1535    if (isInc)
1536      Opc = X86::LOCK_INC64m;
1537    else if (isDec)
1538      Opc = X86::LOCK_DEC64m;
1539    else if (isSub) {
1540      Opc = X86::LOCK_SUB64mr;
1541      if (isCN) {
1542        if (Predicate_i64immSExt8(Val.getNode()))
1543          Opc = X86::LOCK_SUB64mi8;
1544        else if (Predicate_i64immSExt32(Val.getNode()))
1545          Opc = X86::LOCK_SUB64mi32;
1546      }
1547    } else {
1548      Opc = X86::LOCK_ADD64mr;
1549      if (isCN) {
1550        if (Predicate_i64immSExt8(Val.getNode()))
1551          Opc = X86::LOCK_ADD64mi8;
1552        else if (Predicate_i64immSExt32(Val.getNode()))
1553          Opc = X86::LOCK_ADD64mi32;
1554      }
1555    }
1556    break;
1557  }
1558
1559  DebugLoc dl = Node->getDebugLoc();
1560  SDValue Undef = SDValue(CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF,
1561                                                dl, NVT), 0);
1562  SDValue MemOp = CurDAG->getMemOperand(cast<MemSDNode>(Node)->getMemOperand());
1563  if (isInc || isDec) {
1564    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, MemOp, Chain };
1565    SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 7), 0);
1566    SDValue RetVals[] = { Undef, Ret };
1567    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
1568  } else {
1569    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, MemOp, Chain };
1570    SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 8), 0);
1571    SDValue RetVals[] = { Undef, Ret };
1572    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
1573  }
1574}
1575
1576SDNode *X86DAGToDAGISel::Select(SDValue N) {
1577  SDNode *Node = N.getNode();
1578  MVT NVT = Node->getValueType(0);
1579  unsigned Opc, MOpc;
1580  unsigned Opcode = Node->getOpcode();
1581  DebugLoc dl = Node->getDebugLoc();
1582
1583#ifndef NDEBUG
1584  DOUT << std::string(Indent, ' ') << "Selecting: ";
1585  DEBUG(Node->dump(CurDAG));
1586  DOUT << "\n";
1587  Indent += 2;
1588#endif
1589
1590  if (Node->isMachineOpcode()) {
1591#ifndef NDEBUG
1592    DOUT << std::string(Indent-2, ' ') << "== ";
1593    DEBUG(Node->dump(CurDAG));
1594    DOUT << "\n";
1595    Indent -= 2;
1596#endif
1597    return NULL;   // Already selected.
1598  }
1599
1600  switch (Opcode) {
1601    default: break;
1602    case X86ISD::GlobalBaseReg:
1603      return getGlobalBaseReg();
1604
1605    case X86ISD::ATOMOR64_DAG:
1606      return SelectAtomic64(Node, X86::ATOMOR6432);
1607    case X86ISD::ATOMXOR64_DAG:
1608      return SelectAtomic64(Node, X86::ATOMXOR6432);
1609    case X86ISD::ATOMADD64_DAG:
1610      return SelectAtomic64(Node, X86::ATOMADD6432);
1611    case X86ISD::ATOMSUB64_DAG:
1612      return SelectAtomic64(Node, X86::ATOMSUB6432);
1613    case X86ISD::ATOMNAND64_DAG:
1614      return SelectAtomic64(Node, X86::ATOMNAND6432);
1615    case X86ISD::ATOMAND64_DAG:
1616      return SelectAtomic64(Node, X86::ATOMAND6432);
1617    case X86ISD::ATOMSWAP64_DAG:
1618      return SelectAtomic64(Node, X86::ATOMSWAP6432);
1619
1620    case ISD::ATOMIC_LOAD_ADD: {
1621      SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
1622      if (RetVal)
1623        return RetVal;
1624      break;
1625    }
1626
1627    case ISD::SMUL_LOHI:
1628    case ISD::UMUL_LOHI: {
1629      SDValue N0 = Node->getOperand(0);
1630      SDValue N1 = Node->getOperand(1);
1631
1632      bool isSigned = Opcode == ISD::SMUL_LOHI;
1633      if (!isSigned)
1634        switch (NVT.getSimpleVT()) {
1635        default: llvm_unreachable("Unsupported VT!");
1636        case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
1637        case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
1638        case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
1639        case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
1640        }
1641      else
1642        switch (NVT.getSimpleVT()) {
1643        default: llvm_unreachable("Unsupported VT!");
1644        case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
1645        case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
1646        case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
1647        case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
1648        }
1649
1650      unsigned LoReg, HiReg;
1651      switch (NVT.getSimpleVT()) {
1652      default: llvm_unreachable("Unsupported VT!");
1653      case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
1654      case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
1655      case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
1656      case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
1657      }
1658
1659      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1660      bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1661      // multiplty is commmutative
1662      if (!foldedLoad) {
1663        foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1664        if (foldedLoad)
1665          std::swap(N0, N1);
1666      }
1667
1668      SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
1669                                              N0, SDValue()).getValue(1);
1670
1671      if (foldedLoad) {
1672        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1673                          InFlag };
1674        SDNode *CNode =
1675          CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1676                                array_lengthof(Ops));
1677        InFlag = SDValue(CNode, 1);
1678        // Update the chain.
1679        ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1680      } else {
1681        InFlag =
1682          SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1683      }
1684
1685      // Copy the low half of the result, if it is needed.
1686      if (!N.getValue(0).use_empty()) {
1687        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1688                                                  LoReg, NVT, InFlag);
1689        InFlag = Result.getValue(2);
1690        ReplaceUses(N.getValue(0), Result);
1691#ifndef NDEBUG
1692        DOUT << std::string(Indent-2, ' ') << "=> ";
1693        DEBUG(Result.getNode()->dump(CurDAG));
1694        DOUT << "\n";
1695#endif
1696      }
1697      // Copy the high half of the result, if it is needed.
1698      if (!N.getValue(1).use_empty()) {
1699        SDValue Result;
1700        if (HiReg == X86::AH && Subtarget->is64Bit()) {
1701          // Prevent use of AH in a REX instruction by referencing AX instead.
1702          // Shift it down 8 bits.
1703          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1704                                          X86::AX, MVT::i16, InFlag);
1705          InFlag = Result.getValue(2);
1706          Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
1707                                                 Result,
1708                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
1709          // Then truncate it down to i8.
1710          SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
1711          Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1712                                                   MVT::i8, Result, SRIdx), 0);
1713        } else {
1714          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1715                                          HiReg, NVT, InFlag);
1716          InFlag = Result.getValue(2);
1717        }
1718        ReplaceUses(N.getValue(1), Result);
1719#ifndef NDEBUG
1720        DOUT << std::string(Indent-2, ' ') << "=> ";
1721        DEBUG(Result.getNode()->dump(CurDAG));
1722        DOUT << "\n";
1723#endif
1724      }
1725
1726#ifndef NDEBUG
1727      Indent -= 2;
1728#endif
1729
1730      return NULL;
1731    }
1732
1733    case ISD::SDIVREM:
1734    case ISD::UDIVREM: {
1735      SDValue N0 = Node->getOperand(0);
1736      SDValue N1 = Node->getOperand(1);
1737
1738      bool isSigned = Opcode == ISD::SDIVREM;
1739      if (!isSigned)
1740        switch (NVT.getSimpleVT()) {
1741        default: llvm_unreachable("Unsupported VT!");
1742        case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
1743        case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
1744        case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
1745        case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
1746        }
1747      else
1748        switch (NVT.getSimpleVT()) {
1749        default: llvm_unreachable("Unsupported VT!");
1750        case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
1751        case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
1752        case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1753        case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1754        }
1755
1756      unsigned LoReg, HiReg;
1757      unsigned ClrOpcode, SExtOpcode;
1758      switch (NVT.getSimpleVT()) {
1759      default: llvm_unreachable("Unsupported VT!");
1760      case MVT::i8:
1761        LoReg = X86::AL;  HiReg = X86::AH;
1762        ClrOpcode  = 0;
1763        SExtOpcode = X86::CBW;
1764        break;
1765      case MVT::i16:
1766        LoReg = X86::AX;  HiReg = X86::DX;
1767        ClrOpcode  = X86::MOV16r0;
1768        SExtOpcode = X86::CWD;
1769        break;
1770      case MVT::i32:
1771        LoReg = X86::EAX; HiReg = X86::EDX;
1772        ClrOpcode  = X86::MOV32r0;
1773        SExtOpcode = X86::CDQ;
1774        break;
1775      case MVT::i64:
1776        LoReg = X86::RAX; HiReg = X86::RDX;
1777        ClrOpcode  = ~0U; // NOT USED.
1778        SExtOpcode = X86::CQO;
1779        break;
1780      }
1781
1782      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1783      bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1784      bool signBitIsZero = CurDAG->SignBitIsZero(N0);
1785
1786      SDValue InFlag;
1787      if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
1788        // Special case for div8, just use a move with zero extension to AX to
1789        // clear the upper 8 bits (AH).
1790        SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
1791        if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
1792          SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
1793          Move =
1794            SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16,
1795                                          MVT::Other, Ops,
1796                                          array_lengthof(Ops)), 0);
1797          Chain = Move.getValue(1);
1798          ReplaceUses(N0.getValue(1), Chain);
1799        } else {
1800          Move =
1801            SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
1802          Chain = CurDAG->getEntryNode();
1803        }
1804        Chain  = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
1805        InFlag = Chain.getValue(1);
1806      } else {
1807        InFlag =
1808          CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
1809                               LoReg, N0, SDValue()).getValue(1);
1810        if (isSigned && !signBitIsZero) {
1811          // Sign extend the low part into the high part.
1812          InFlag =
1813            SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
1814        } else {
1815          // Zero out the high part, effectively zero extending the input.
1816          SDValue ClrNode;
1817
1818          if (NVT.getSimpleVT() == MVT::i64) {
1819            ClrNode = SDValue(CurDAG->getTargetNode(X86::MOV32r0, dl, MVT::i32),
1820                              0);
1821            // We just did a 32-bit clear, insert it into a 64-bit register to
1822            // clear the whole 64-bit reg.
1823            SDValue Undef =
1824              SDValue(CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF,
1825                                            dl, MVT::i64), 0);
1826            SDValue SubRegNo =
1827              CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
1828            ClrNode =
1829              SDValue(CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl,
1830                                            MVT::i64, Undef, ClrNode, SubRegNo),
1831                      0);
1832          } else {
1833            ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT), 0);
1834          }
1835
1836          InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
1837                                        ClrNode, InFlag).getValue(1);
1838        }
1839      }
1840
1841      if (foldedLoad) {
1842        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1843                          InFlag };
1844        SDNode *CNode =
1845          CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1846                                array_lengthof(Ops));
1847        InFlag = SDValue(CNode, 1);
1848        // Update the chain.
1849        ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1850      } else {
1851        InFlag =
1852          SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1853      }
1854
1855      // Copy the division (low) result, if it is needed.
1856      if (!N.getValue(0).use_empty()) {
1857        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1858                                                  LoReg, NVT, InFlag);
1859        InFlag = Result.getValue(2);
1860        ReplaceUses(N.getValue(0), Result);
1861#ifndef NDEBUG
1862        DOUT << std::string(Indent-2, ' ') << "=> ";
1863        DEBUG(Result.getNode()->dump(CurDAG));
1864        DOUT << "\n";
1865#endif
1866      }
1867      // Copy the remainder (high) result, if it is needed.
1868      if (!N.getValue(1).use_empty()) {
1869        SDValue Result;
1870        if (HiReg == X86::AH && Subtarget->is64Bit()) {
1871          // Prevent use of AH in a REX instruction by referencing AX instead.
1872          // Shift it down 8 bits.
1873          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1874                                          X86::AX, MVT::i16, InFlag);
1875          InFlag = Result.getValue(2);
1876          Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
1877                                        Result,
1878                                        CurDAG->getTargetConstant(8, MVT::i8)),
1879                           0);
1880          // Then truncate it down to i8.
1881          SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
1882          Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1883                                                   MVT::i8, Result, SRIdx), 0);
1884        } else {
1885          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1886                                          HiReg, NVT, InFlag);
1887          InFlag = Result.getValue(2);
1888        }
1889        ReplaceUses(N.getValue(1), Result);
1890#ifndef NDEBUG
1891        DOUT << std::string(Indent-2, ' ') << "=> ";
1892        DEBUG(Result.getNode()->dump(CurDAG));
1893        DOUT << "\n";
1894#endif
1895      }
1896
1897#ifndef NDEBUG
1898      Indent -= 2;
1899#endif
1900
1901      return NULL;
1902    }
1903
1904    case ISD::DECLARE: {
1905      // Handle DECLARE nodes here because the second operand may have been
1906      // wrapped in X86ISD::Wrapper.
1907      SDValue Chain = Node->getOperand(0);
1908      SDValue N1 = Node->getOperand(1);
1909      SDValue N2 = Node->getOperand(2);
1910      FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
1911
1912      // FIXME: We need to handle this for VLAs.
1913      if (!FINode) {
1914        ReplaceUses(N.getValue(0), Chain);
1915        return NULL;
1916      }
1917
1918      if (N2.getOpcode() == ISD::ADD &&
1919          N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg)
1920        N2 = N2.getOperand(1);
1921
1922      // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
1923      // somehow, just ignore it.
1924      if (N2.getOpcode() != X86ISD::Wrapper &&
1925          N2.getOpcode() != X86ISD::WrapperRIP) {
1926        ReplaceUses(N.getValue(0), Chain);
1927        return NULL;
1928      }
1929      GlobalAddressSDNode *GVNode =
1930        dyn_cast<GlobalAddressSDNode>(N2.getOperand(0));
1931      if (GVNode == 0) {
1932        ReplaceUses(N.getValue(0), Chain);
1933        return NULL;
1934      }
1935      SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
1936                                                 TLI.getPointerTy());
1937      SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(),
1938                                                    TLI.getPointerTy());
1939      SDValue Ops[] = { Tmp1, Tmp2, Chain };
1940      return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
1941                                   MVT::Other, Ops,
1942                                   array_lengthof(Ops));
1943    }
1944  }
1945
1946  SDNode *ResNode = SelectCode(N);
1947
1948#ifndef NDEBUG
1949  DOUT << std::string(Indent-2, ' ') << "=> ";
1950  if (ResNode == NULL || ResNode == N.getNode())
1951    DEBUG(N.getNode()->dump(CurDAG));
1952  else
1953    DEBUG(ResNode->dump(CurDAG));
1954  DOUT << "\n";
1955  Indent -= 2;
1956#endif
1957
1958  return ResNode;
1959}
1960
1961bool X86DAGToDAGISel::
1962SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
1963                             std::vector<SDValue> &OutOps) {
1964  SDValue Op0, Op1, Op2, Op3, Op4;
1965  switch (ConstraintCode) {
1966  case 'o':   // offsetable        ??
1967  case 'v':   // not offsetable    ??
1968  default: return true;
1969  case 'm':   // memory
1970    if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4))
1971      return true;
1972    break;
1973  }
1974
1975  OutOps.push_back(Op0);
1976  OutOps.push_back(Op1);
1977  OutOps.push_back(Op2);
1978  OutOps.push_back(Op3);
1979  OutOps.push_back(Op4);
1980  return false;
1981}
1982
1983/// createX86ISelDag - This pass converts a legalized DAG into a
1984/// X86-specific DAG, ready for instruction scheduling.
1985///
1986FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
1987                                     llvm::CodeGenOpt::Level OptLevel) {
1988  return new X86DAGToDAGISel(TM, OptLevel);
1989}
1990