X86ISelDAGToDAG.cpp revision c23197a26f34f559ea9797de51e187087c039c42
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a DAG pattern matching instruction selector for X86,
11// converting from a legalized dag to a X86 dag.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "x86-isel"
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86MachineFunctionInfo.h"
20#include "X86RegisterInfo.h"
21#include "X86Subtarget.h"
22#include "X86TargetMachine.h"
23#include "llvm/GlobalValue.h"
24#include "llvm/Instructions.h"
25#include "llvm/Intrinsics.h"
26#include "llvm/Support/CFG.h"
27#include "llvm/Type.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineFrameInfo.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/CodeGen/SelectionDAGISel.h"
34#include "llvm/Target/TargetMachine.h"
35#include "llvm/Target/TargetOptions.h"
36#include "llvm/Support/Compiler.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/ErrorHandling.h"
39#include "llvm/Support/MathExtras.h"
40#include "llvm/Support/Streams.h"
41#include "llvm/Support/raw_ostream.h"
42#include "llvm/ADT/SmallPtrSet.h"
43#include "llvm/ADT/Statistic.h"
44using namespace llvm;
45
46#include "llvm/Support/CommandLine.h"
47static cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden);
48
49STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
50
51//===----------------------------------------------------------------------===//
52//                      Pattern Matcher Implementation
53//===----------------------------------------------------------------------===//
54
55namespace {
56  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
57  /// SDValue's instead of register numbers for the leaves of the matched
58  /// tree.
59  struct X86ISelAddressMode {
60    enum {
61      RegBase,
62      FrameIndexBase
63    } BaseType;
64
65    struct {            // This is really a union, discriminated by BaseType!
66      SDValue Reg;
67      int FrameIndex;
68    } Base;
69
70    unsigned Scale;
71    SDValue IndexReg;
72    int32_t Disp;
73    SDValue Segment;
74    GlobalValue *GV;
75    Constant *CP;
76    const char *ES;
77    int JT;
78    unsigned Align;    // CP alignment.
79    unsigned char SymbolFlags;  // X86II::MO_*
80
81    X86ISelAddressMode()
82      : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
83        Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) {
84    }
85
86    bool hasSymbolicDisplacement() const {
87      return GV != 0 || CP != 0 || ES != 0 || JT != -1;
88    }
89
90    bool hasBaseOrIndexReg() const {
91      return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0;
92    }
93
94    /// isRIPRelative - Return true if this addressing mode is already RIP
95    /// relative.
96    bool isRIPRelative() const {
97      if (BaseType != RegBase) return false;
98      if (RegisterSDNode *RegNode =
99            dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode()))
100        return RegNode->getReg() == X86::RIP;
101      return false;
102    }
103
104    void setBaseReg(SDValue Reg) {
105      BaseType = RegBase;
106      Base.Reg = Reg;
107    }
108
109    void dump() {
110      cerr << "X86ISelAddressMode " << this << "\n";
111      cerr << "Base.Reg ";
112              if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump();
113              else cerr << "nul";
114      cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
115      cerr << " Scale" << Scale << "\n";
116      cerr << "IndexReg ";
117              if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
118              else cerr << "nul";
119      cerr << " Disp " << Disp << "\n";
120      cerr << "GV "; if (GV) GV->dump();
121                     else cerr << "nul";
122      cerr << " CP "; if (CP) CP->dump();
123                     else cerr << "nul";
124      cerr << "\n";
125      cerr << "ES "; if (ES) cerr << ES; else cerr << "nul";
126      cerr  << " JT" << JT << " Align" << Align << "\n";
127    }
128  };
129}
130
131namespace {
132  //===--------------------------------------------------------------------===//
133  /// ISel - X86 specific code to select X86 machine instructions for
134  /// SelectionDAG operations.
135  ///
136  class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
137    /// X86Lowering - This object fully describes how to lower LLVM code to an
138    /// X86-specific SelectionDAG.
139    X86TargetLowering &X86Lowering;
140
141    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
142    /// make the right decision when generating code for different targets.
143    const X86Subtarget *Subtarget;
144
145    /// CurBB - Current BB being isel'd.
146    ///
147    MachineBasicBlock *CurBB;
148
149    /// OptForSize - If true, selector should try to optimize for code size
150    /// instead of performance.
151    bool OptForSize;
152
153  public:
154    explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
155      : SelectionDAGISel(tm, OptLevel),
156        X86Lowering(*tm.getTargetLowering()),
157        Subtarget(&tm.getSubtarget<X86Subtarget>()),
158        OptForSize(false) {}
159
160    virtual const char *getPassName() const {
161      return "X86 DAG->DAG Instruction Selection";
162    }
163
164    /// InstructionSelect - This callback is invoked by
165    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
166    virtual void InstructionSelect();
167
168    virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
169
170    virtual
171      bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
172
173// Include the pieces autogenerated from the target description.
174#include "X86GenDAGISel.inc"
175
176  private:
177    SDNode *Select(SDValue N);
178    SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
179
180    bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
181    bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
182    bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
183    bool MatchAddress(SDValue N, X86ISelAddressMode &AM,
184                      unsigned Depth = 0);
185    bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
186    bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
187                    SDValue &Scale, SDValue &Index, SDValue &Disp,
188                    SDValue &Segment);
189    bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
190                       SDValue &Scale, SDValue &Index, SDValue &Disp);
191    bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
192                       SDValue &Scale, SDValue &Index, SDValue &Disp);
193    bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
194                             SDValue N, SDValue &Base, SDValue &Scale,
195                             SDValue &Index, SDValue &Disp,
196                             SDValue &Segment,
197                             SDValue &InChain, SDValue &OutChain);
198    bool TryFoldLoad(SDValue P, SDValue N,
199                     SDValue &Base, SDValue &Scale,
200                     SDValue &Index, SDValue &Disp,
201                     SDValue &Segment);
202    void PreprocessForRMW();
203    void PreprocessForFPConvert();
204
205    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
206    /// inline asm expressions.
207    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
208                                              char ConstraintCode,
209                                              std::vector<SDValue> &OutOps);
210
211    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
212
213    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
214                                   SDValue &Scale, SDValue &Index,
215                                   SDValue &Disp, SDValue &Segment) {
216      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
217        CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
218        AM.Base.Reg;
219      Scale = getI8Imm(AM.Scale);
220      Index = AM.IndexReg;
221      // These are 32-bit even in 64-bit mode since RIP relative offset
222      // is 32-bit.
223      if (AM.GV)
224        Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp,
225                                              AM.SymbolFlags);
226      else if (AM.CP)
227        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
228                                             AM.Align, AM.Disp, AM.SymbolFlags);
229      else if (AM.ES)
230        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
231      else if (AM.JT != -1)
232        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
233      else
234        Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
235
236      if (AM.Segment.getNode())
237        Segment = AM.Segment;
238      else
239        Segment = CurDAG->getRegister(0, MVT::i32);
240    }
241
242    /// getI8Imm - Return a target constant with the specified value, of type
243    /// i8.
244    inline SDValue getI8Imm(unsigned Imm) {
245      return CurDAG->getTargetConstant(Imm, MVT::i8);
246    }
247
248    /// getI16Imm - Return a target constant with the specified value, of type
249    /// i16.
250    inline SDValue getI16Imm(unsigned Imm) {
251      return CurDAG->getTargetConstant(Imm, MVT::i16);
252    }
253
254    /// getI32Imm - Return a target constant with the specified value, of type
255    /// i32.
256    inline SDValue getI32Imm(unsigned Imm) {
257      return CurDAG->getTargetConstant(Imm, MVT::i32);
258    }
259
260    /// getGlobalBaseReg - Return an SDNode that returns the value of
261    /// the global base register. Output instructions required to
262    /// initialize the global base register, if necessary.
263    ///
264    SDNode *getGlobalBaseReg();
265
266    /// getTargetMachine - Return a reference to the TargetMachine, casted
267    /// to the target-specific type.
268    const X86TargetMachine &getTargetMachine() {
269      return static_cast<const X86TargetMachine &>(TM);
270    }
271
272    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
273    /// to the target-specific type.
274    const X86InstrInfo *getInstrInfo() {
275      return getTargetMachine().getInstrInfo();
276    }
277
278#ifndef NDEBUG
279    unsigned Indent;
280#endif
281  };
282}
283
284
285bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
286                                                 SDNode *Root) const {
287  if (OptLevel == CodeGenOpt::None) return false;
288
289  if (U == Root)
290    switch (U->getOpcode()) {
291    default: break;
292    case ISD::ADD:
293    case ISD::ADDC:
294    case ISD::ADDE:
295    case ISD::AND:
296    case ISD::OR:
297    case ISD::XOR: {
298      SDValue Op1 = U->getOperand(1);
299
300      // If the other operand is a 8-bit immediate we should fold the immediate
301      // instead. This reduces code size.
302      // e.g.
303      // movl 4(%esp), %eax
304      // addl $4, %eax
305      // vs.
306      // movl $4, %eax
307      // addl 4(%esp), %eax
308      // The former is 2 bytes shorter. In case where the increment is 1, then
309      // the saving can be 4 bytes (by using incl %eax).
310      if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
311        if (Imm->getAPIntValue().isSignedIntN(8))
312          return false;
313
314      // If the other operand is a TLS address, we should fold it instead.
315      // This produces
316      // movl    %gs:0, %eax
317      // leal    i@NTPOFF(%eax), %eax
318      // instead of
319      // movl    $i@NTPOFF, %eax
320      // addl    %gs:0, %eax
321      // if the block also has an access to a second TLS address this will save
322      // a load.
323      // FIXME: This is probably also true for non TLS addresses.
324      if (Op1.getOpcode() == X86ISD::Wrapper) {
325        SDValue Val = Op1.getOperand(0);
326        if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
327          return false;
328      }
329    }
330    }
331
332  // Proceed to 'generic' cycle finder code
333  return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
334}
335
336/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
337/// and move load below the TokenFactor. Replace store's chain operand with
338/// load's chain result.
339static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
340                                 SDValue Store, SDValue TF) {
341  SmallVector<SDValue, 4> Ops;
342  for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
343    if (Load.getNode() == TF.getOperand(i).getNode())
344      Ops.push_back(Load.getOperand(0));
345    else
346      Ops.push_back(TF.getOperand(i));
347  CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
348  CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
349  CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
350                             Store.getOperand(2), Store.getOperand(3));
351}
352
353/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
354///
355static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
356                      SDValue &Load) {
357  if (N.getOpcode() == ISD::BIT_CONVERT)
358    N = N.getOperand(0);
359
360  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
361  if (!LD || LD->isVolatile())
362    return false;
363  if (LD->getAddressingMode() != ISD::UNINDEXED)
364    return false;
365
366  ISD::LoadExtType ExtType = LD->getExtensionType();
367  if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
368    return false;
369
370  if (N.hasOneUse() &&
371      N.getOperand(1) == Address &&
372      N.getNode()->isOperandOf(Chain.getNode())) {
373    Load = N;
374    return true;
375  }
376  return false;
377}
378
379/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
380/// operand and move load below the call's chain operand.
381static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
382                                  SDValue Call, SDValue CallSeqStart) {
383  SmallVector<SDValue, 8> Ops;
384  SDValue Chain = CallSeqStart.getOperand(0);
385  if (Chain.getNode() == Load.getNode())
386    Ops.push_back(Load.getOperand(0));
387  else {
388    assert(Chain.getOpcode() == ISD::TokenFactor &&
389           "Unexpected CallSeqStart chain operand");
390    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
391      if (Chain.getOperand(i).getNode() == Load.getNode())
392        Ops.push_back(Load.getOperand(0));
393      else
394        Ops.push_back(Chain.getOperand(i));
395    SDValue NewChain =
396      CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
397                      MVT::Other, &Ops[0], Ops.size());
398    Ops.clear();
399    Ops.push_back(NewChain);
400  }
401  for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
402    Ops.push_back(CallSeqStart.getOperand(i));
403  CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
404  CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
405                             Load.getOperand(1), Load.getOperand(2));
406  Ops.clear();
407  Ops.push_back(SDValue(Load.getNode(), 1));
408  for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
409    Ops.push_back(Call.getOperand(i));
410  CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
411}
412
413/// isCalleeLoad - Return true if call address is a load and it can be
414/// moved below CALLSEQ_START and the chains leading up to the call.
415/// Return the CALLSEQ_START by reference as a second output.
416static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
417  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
418    return false;
419  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
420  if (!LD ||
421      LD->isVolatile() ||
422      LD->getAddressingMode() != ISD::UNINDEXED ||
423      LD->getExtensionType() != ISD::NON_EXTLOAD)
424    return false;
425
426  // Now let's find the callseq_start.
427  while (Chain.getOpcode() != ISD::CALLSEQ_START) {
428    if (!Chain.hasOneUse())
429      return false;
430    Chain = Chain.getOperand(0);
431  }
432
433  if (Chain.getOperand(0).getNode() == Callee.getNode())
434    return true;
435  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
436      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()))
437    return true;
438  return false;
439}
440
441
442/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
443/// This is only run if not in -O0 mode.
444/// This allows the instruction selector to pick more read-modify-write
445/// instructions. This is a common case:
446///
447///     [Load chain]
448///         ^
449///         |
450///       [Load]
451///       ^    ^
452///       |    |
453///      /      \-
454///     /         |
455/// [TokenFactor] [Op]
456///     ^          ^
457///     |          |
458///      \        /
459///       \      /
460///       [Store]
461///
462/// The fact the store's chain operand != load's chain will prevent the
463/// (store (op (load))) instruction from being selected. We can transform it to:
464///
465///     [Load chain]
466///         ^
467///         |
468///    [TokenFactor]
469///         ^
470///         |
471///       [Load]
472///       ^    ^
473///       |    |
474///       |     \-
475///       |       |
476///       |     [Op]
477///       |       ^
478///       |       |
479///       \      /
480///        \    /
481///       [Store]
482void X86DAGToDAGISel::PreprocessForRMW() {
483  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
484         E = CurDAG->allnodes_end(); I != E; ++I) {
485    if (I->getOpcode() == X86ISD::CALL) {
486      /// Also try moving call address load from outside callseq_start to just
487      /// before the call to allow it to be folded.
488      ///
489      ///     [Load chain]
490      ///         ^
491      ///         |
492      ///       [Load]
493      ///       ^    ^
494      ///       |    |
495      ///      /      \--
496      ///     /          |
497      ///[CALLSEQ_START] |
498      ///     ^          |
499      ///     |          |
500      /// [LOAD/C2Reg]   |
501      ///     |          |
502      ///      \        /
503      ///       \      /
504      ///       [CALL]
505      SDValue Chain = I->getOperand(0);
506      SDValue Load  = I->getOperand(1);
507      if (!isCalleeLoad(Load, Chain))
508        continue;
509      MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
510      ++NumLoadMoved;
511      continue;
512    }
513
514    if (!ISD::isNON_TRUNCStore(I))
515      continue;
516    SDValue Chain = I->getOperand(0);
517
518    if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
519      continue;
520
521    SDValue N1 = I->getOperand(1);
522    SDValue N2 = I->getOperand(2);
523    if ((N1.getValueType().isFloatingPoint() &&
524         !N1.getValueType().isVector()) ||
525        !N1.hasOneUse())
526      continue;
527
528    bool RModW = false;
529    SDValue Load;
530    unsigned Opcode = N1.getNode()->getOpcode();
531    switch (Opcode) {
532    case ISD::ADD:
533    case ISD::MUL:
534    case ISD::AND:
535    case ISD::OR:
536    case ISD::XOR:
537    case ISD::ADDC:
538    case ISD::ADDE:
539    case ISD::VECTOR_SHUFFLE: {
540      SDValue N10 = N1.getOperand(0);
541      SDValue N11 = N1.getOperand(1);
542      RModW = isRMWLoad(N10, Chain, N2, Load);
543      if (!RModW)
544        RModW = isRMWLoad(N11, Chain, N2, Load);
545      break;
546    }
547    case ISD::SUB:
548    case ISD::SHL:
549    case ISD::SRA:
550    case ISD::SRL:
551    case ISD::ROTL:
552    case ISD::ROTR:
553    case ISD::SUBC:
554    case ISD::SUBE:
555    case X86ISD::SHLD:
556    case X86ISD::SHRD: {
557      SDValue N10 = N1.getOperand(0);
558      RModW = isRMWLoad(N10, Chain, N2, Load);
559      break;
560    }
561    }
562
563    if (RModW) {
564      MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
565      ++NumLoadMoved;
566    }
567  }
568}
569
570
571/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
572/// nodes that target the FP stack to be store and load to the stack.  This is a
573/// gross hack.  We would like to simply mark these as being illegal, but when
574/// we do that, legalize produces these when it expands calls, then expands
575/// these in the same legalize pass.  We would like dag combine to be able to
576/// hack on these between the call expansion and the node legalization.  As such
577/// this pass basically does "really late" legalization of these inline with the
578/// X86 isel pass.
579void X86DAGToDAGISel::PreprocessForFPConvert() {
580  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
581       E = CurDAG->allnodes_end(); I != E; ) {
582    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
583    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
584      continue;
585
586    // If the source and destination are SSE registers, then this is a legal
587    // conversion that should not be lowered.
588    MVT SrcVT = N->getOperand(0).getValueType();
589    MVT DstVT = N->getValueType(0);
590    bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
591    bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
592    if (SrcIsSSE && DstIsSSE)
593      continue;
594
595    if (!SrcIsSSE && !DstIsSSE) {
596      // If this is an FPStack extension, it is a noop.
597      if (N->getOpcode() == ISD::FP_EXTEND)
598        continue;
599      // If this is a value-preserving FPStack truncation, it is a noop.
600      if (N->getConstantOperandVal(1))
601        continue;
602    }
603
604    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
605    // FPStack has extload and truncstore.  SSE can fold direct loads into other
606    // operations.  Based on this, decide what we want to do.
607    MVT MemVT;
608    if (N->getOpcode() == ISD::FP_ROUND)
609      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
610    else
611      MemVT = SrcIsSSE ? SrcVT : DstVT;
612
613    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
614    DebugLoc dl = N->getDebugLoc();
615
616    // FIXME: optimize the case where the src/dest is a load or store?
617    SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
618                                          N->getOperand(0),
619                                          MemTmp, NULL, 0, MemVT);
620    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
621                                        NULL, 0, MemVT);
622
623    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
624    // extload we created.  This will cause general havok on the dag because
625    // anything below the conversion could be folded into other existing nodes.
626    // To avoid invalidating 'I', back it up to the convert node.
627    --I;
628    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
629
630    // Now that we did that, the node is dead.  Increment the iterator to the
631    // next node to process, then delete N.
632    ++I;
633    CurDAG->DeleteNode(N);
634  }
635}
636
637/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
638/// when it has created a SelectionDAG for us to codegen.
639void X86DAGToDAGISel::InstructionSelect() {
640  CurBB = BB;  // BB can change as result of isel.
641  const Function *F = CurDAG->getMachineFunction().getFunction();
642  OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
643
644  DEBUG(BB->dump());
645  if (OptLevel != CodeGenOpt::None)
646    PreprocessForRMW();
647
648  // FIXME: This should only happen when not compiled with -O0.
649  PreprocessForFPConvert();
650
651  // Codegen the basic block.
652#ifndef NDEBUG
653  DOUT << "===== Instruction selection begins:\n";
654  Indent = 0;
655#endif
656  SelectRoot(*CurDAG);
657#ifndef NDEBUG
658  DOUT << "===== Instruction selection ends:\n";
659#endif
660
661  CurDAG->RemoveDeadNodes();
662}
663
664/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
665/// the main function.
666void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
667                                             MachineFrameInfo *MFI) {
668  const TargetInstrInfo *TII = TM.getInstrInfo();
669  if (Subtarget->isTargetCygMing())
670    BuildMI(BB, DebugLoc::getUnknownLoc(),
671            TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
672}
673
674void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
675  // If this is main, emit special code for main.
676  MachineBasicBlock *BB = MF.begin();
677  if (Fn.hasExternalLinkage() && Fn.getName() == "main")
678    EmitSpecialCodeForMain(BB, MF.getFrameInfo());
679}
680
681
682bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
683                                              X86ISelAddressMode &AM) {
684  assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
685  SDValue Segment = N.getOperand(0);
686
687  if (AM.Segment.getNode() == 0) {
688    AM.Segment = Segment;
689    return false;
690  }
691
692  return true;
693}
694
695bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
696  // This optimization is valid because the GNU TLS model defines that
697  // gs:0 (or fs:0 on X86-64) contains its own address.
698  // For more information see http://people.redhat.com/drepper/tls.pdf
699
700  SDValue Address = N.getOperand(1);
701  if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
702      !MatchSegmentBaseAddress (Address, AM))
703    return false;
704
705  return true;
706}
707
708/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
709/// into an addressing mode.  These wrap things that will resolve down into a
710/// symbol reference.  If no match is possible, this returns true, otherwise it
711/// returns false.
712bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
713  // If the addressing mode already has a symbol as the displacement, we can
714  // never match another symbol.
715  if (AM.hasSymbolicDisplacement())
716    return true;
717
718  SDValue N0 = N.getOperand(0);
719
720  // Handle X86-64 rip-relative addresses.  We check this before checking direct
721  // folding because RIP is preferable to non-RIP accesses.
722  if (Subtarget->is64Bit() &&
723      // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
724      // they cannot be folded into immediate fields.
725      // FIXME: This can be improved for kernel and other models?
726      TM.getCodeModel() == CodeModel::Small &&
727
728      // Base and index reg must be 0 in order to use %rip as base and lowering
729      // must allow RIP.
730      !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
731
732    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
733      int64_t Offset = AM.Disp + G->getOffset();
734      if (!isInt32(Offset)) return true;
735      AM.GV = G->getGlobal();
736      AM.Disp = Offset;
737      AM.SymbolFlags = G->getTargetFlags();
738    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
739      int64_t Offset = AM.Disp + CP->getOffset();
740      if (!isInt32(Offset)) return true;
741      AM.CP = CP->getConstVal();
742      AM.Align = CP->getAlignment();
743      AM.Disp = Offset;
744      AM.SymbolFlags = CP->getTargetFlags();
745    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
746      AM.ES = S->getSymbol();
747      AM.SymbolFlags = S->getTargetFlags();
748    } else {
749      JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
750      AM.JT = J->getIndex();
751      AM.SymbolFlags = J->getTargetFlags();
752    }
753
754    if (N.getOpcode() == X86ISD::WrapperRIP)
755      AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
756    return false;
757  }
758
759  // Handle the case when globals fit in our immediate field: This is true for
760  // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
761  // mode, this results in a non-RIP-relative computation.
762  if (!Subtarget->is64Bit() ||
763      (TM.getCodeModel() == CodeModel::Small &&
764       TM.getRelocationModel() == Reloc::Static)) {
765    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
766      AM.GV = G->getGlobal();
767      AM.Disp += G->getOffset();
768      AM.SymbolFlags = G->getTargetFlags();
769    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
770      AM.CP = CP->getConstVal();
771      AM.Align = CP->getAlignment();
772      AM.Disp += CP->getOffset();
773      AM.SymbolFlags = CP->getTargetFlags();
774    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
775      AM.ES = S->getSymbol();
776      AM.SymbolFlags = S->getTargetFlags();
777    } else {
778      JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
779      AM.JT = J->getIndex();
780      AM.SymbolFlags = J->getTargetFlags();
781    }
782    return false;
783  }
784
785  return true;
786}
787
788/// MatchAddress - Add the specified node to the specified addressing mode,
789/// returning true if it cannot be done.  This just pattern matches for the
790/// addressing mode.
791bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
792                                   unsigned Depth) {
793  bool is64Bit = Subtarget->is64Bit();
794  DebugLoc dl = N.getDebugLoc();
795  DOUT << "MatchAddress: "; DEBUG(AM.dump());
796  // Limit recursion.
797  if (Depth > 5)
798    return MatchAddressBase(N, AM);
799
800  // If this is already a %rip relative address, we can only merge immediates
801  // into it.  Instead of handling this in every case, we handle it here.
802  // RIP relative addressing: %rip + 32-bit displacement!
803  if (AM.isRIPRelative()) {
804    // FIXME: JumpTable and ExternalSymbol address currently don't like
805    // displacements.  It isn't very important, but this should be fixed for
806    // consistency.
807    if (!AM.ES && AM.JT != -1) return true;
808
809    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
810      int64_t Val = AM.Disp + Cst->getSExtValue();
811      if (isInt32(Val)) {
812        AM.Disp = Val;
813        return false;
814      }
815    }
816    return true;
817  }
818
819  switch (N.getOpcode()) {
820  default: break;
821  case ISD::Constant: {
822    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
823    if (!is64Bit || isInt32(AM.Disp + Val)) {
824      AM.Disp += Val;
825      return false;
826    }
827    break;
828  }
829
830  case X86ISD::SegmentBaseAddress:
831    if (!MatchSegmentBaseAddress(N, AM))
832      return false;
833    break;
834
835  case X86ISD::Wrapper:
836  case X86ISD::WrapperRIP:
837    if (!MatchWrapper(N, AM))
838      return false;
839    break;
840
841  case ISD::LOAD:
842    if (!MatchLoad(N, AM))
843      return false;
844    break;
845
846  case ISD::FrameIndex:
847    if (AM.BaseType == X86ISelAddressMode::RegBase
848        && AM.Base.Reg.getNode() == 0) {
849      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
850      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
851      return false;
852    }
853    break;
854
855  case ISD::SHL:
856    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
857      break;
858
859    if (ConstantSDNode
860          *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
861      unsigned Val = CN->getZExtValue();
862      if (Val == 1 || Val == 2 || Val == 3) {
863        AM.Scale = 1 << Val;
864        SDValue ShVal = N.getNode()->getOperand(0);
865
866        // Okay, we know that we have a scale by now.  However, if the scaled
867        // value is an add of something and a constant, we can fold the
868        // constant into the disp field here.
869        if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
870            isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
871          AM.IndexReg = ShVal.getNode()->getOperand(0);
872          ConstantSDNode *AddVal =
873            cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
874          uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
875          if (!is64Bit || isInt32(Disp))
876            AM.Disp = Disp;
877          else
878            AM.IndexReg = ShVal;
879        } else {
880          AM.IndexReg = ShVal;
881        }
882        return false;
883      }
884    break;
885    }
886
887  case ISD::SMUL_LOHI:
888  case ISD::UMUL_LOHI:
889    // A mul_lohi where we need the low part can be folded as a plain multiply.
890    if (N.getResNo() != 0) break;
891    // FALL THROUGH
892  case ISD::MUL:
893  case X86ISD::MUL_IMM:
894    // X*[3,5,9] -> X+X*[2,4,8]
895    if (AM.BaseType == X86ISelAddressMode::RegBase &&
896        AM.Base.Reg.getNode() == 0 &&
897        AM.IndexReg.getNode() == 0) {
898      if (ConstantSDNode
899            *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
900        if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
901            CN->getZExtValue() == 9) {
902          AM.Scale = unsigned(CN->getZExtValue())-1;
903
904          SDValue MulVal = N.getNode()->getOperand(0);
905          SDValue Reg;
906
907          // Okay, we know that we have a scale by now.  However, if the scaled
908          // value is an add of something and a constant, we can fold the
909          // constant into the disp field here.
910          if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
911              isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
912            Reg = MulVal.getNode()->getOperand(0);
913            ConstantSDNode *AddVal =
914              cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
915            uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
916                                      CN->getZExtValue();
917            if (!is64Bit || isInt32(Disp))
918              AM.Disp = Disp;
919            else
920              Reg = N.getNode()->getOperand(0);
921          } else {
922            Reg = N.getNode()->getOperand(0);
923          }
924
925          AM.IndexReg = AM.Base.Reg = Reg;
926          return false;
927        }
928    }
929    break;
930
931  case ISD::SUB: {
932    // Given A-B, if A can be completely folded into the address and
933    // the index field with the index field unused, use -B as the index.
934    // This is a win if a has multiple parts that can be folded into
935    // the address. Also, this saves a mov if the base register has
936    // other uses, since it avoids a two-address sub instruction, however
937    // it costs an additional mov if the index register has other uses.
938
939    // Test if the LHS of the sub can be folded.
940    X86ISelAddressMode Backup = AM;
941    if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) {
942      AM = Backup;
943      break;
944    }
945    // Test if the index field is free for use.
946    if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
947      AM = Backup;
948      break;
949    }
950    int Cost = 0;
951    SDValue RHS = N.getNode()->getOperand(1);
952    // If the RHS involves a register with multiple uses, this
953    // transformation incurs an extra mov, due to the neg instruction
954    // clobbering its operand.
955    if (!RHS.getNode()->hasOneUse() ||
956        RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
957        RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
958        RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
959        (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
960         RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
961      ++Cost;
962    // If the base is a register with multiple uses, this
963    // transformation may save a mov.
964    if ((AM.BaseType == X86ISelAddressMode::RegBase &&
965         AM.Base.Reg.getNode() &&
966         !AM.Base.Reg.getNode()->hasOneUse()) ||
967        AM.BaseType == X86ISelAddressMode::FrameIndexBase)
968      --Cost;
969    // If the folded LHS was interesting, this transformation saves
970    // address arithmetic.
971    if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
972        ((AM.Disp != 0) && (Backup.Disp == 0)) +
973        (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
974      --Cost;
975    // If it doesn't look like it may be an overall win, don't do it.
976    if (Cost >= 0) {
977      AM = Backup;
978      break;
979    }
980
981    // Ok, the transformation is legal and appears profitable. Go for it.
982    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
983    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
984    AM.IndexReg = Neg;
985    AM.Scale = 1;
986
987    // Insert the new nodes into the topological ordering.
988    if (Zero.getNode()->getNodeId() == -1 ||
989        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
990      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
991      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
992    }
993    if (Neg.getNode()->getNodeId() == -1 ||
994        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
995      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
996      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
997    }
998    return false;
999  }
1000
1001  case ISD::ADD: {
1002    X86ISelAddressMode Backup = AM;
1003    if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) &&
1004        !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1))
1005      return false;
1006    AM = Backup;
1007    if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) &&
1008        !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1))
1009      return false;
1010    AM = Backup;
1011
1012    // If we couldn't fold both operands into the address at the same time,
1013    // see if we can just put each operand into a register and fold at least
1014    // the add.
1015    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1016        !AM.Base.Reg.getNode() &&
1017        !AM.IndexReg.getNode()) {
1018      AM.Base.Reg = N.getNode()->getOperand(0);
1019      AM.IndexReg = N.getNode()->getOperand(1);
1020      AM.Scale = 1;
1021      return false;
1022    }
1023    break;
1024  }
1025
1026  case ISD::OR:
1027    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
1028    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1029      X86ISelAddressMode Backup = AM;
1030      uint64_t Offset = CN->getSExtValue();
1031      // Start with the LHS as an addr mode.
1032      if (!MatchAddress(N.getOperand(0), AM, Depth+1) &&
1033          // Address could not have picked a GV address for the displacement.
1034          AM.GV == NULL &&
1035          // On x86-64, the resultant disp must fit in 32-bits.
1036          (!is64Bit || isInt32(AM.Disp + Offset)) &&
1037          // Check to see if the LHS & C is zero.
1038          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
1039        AM.Disp += Offset;
1040        return false;
1041      }
1042      AM = Backup;
1043    }
1044    break;
1045
1046  case ISD::AND: {
1047    // Perform some heroic transforms on an and of a constant-count shift
1048    // with a constant to enable use of the scaled offset field.
1049
1050    SDValue Shift = N.getOperand(0);
1051    if (Shift.getNumOperands() != 2) break;
1052
1053    // Scale must not be used already.
1054    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
1055
1056    SDValue X = Shift.getOperand(0);
1057    ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
1058    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
1059    if (!C1 || !C2) break;
1060
1061    // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
1062    // allows us to convert the shift and and into an h-register extract and
1063    // a scaled index.
1064    if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
1065      unsigned ScaleLog = 8 - C1->getZExtValue();
1066      if (ScaleLog > 0 && ScaleLog < 4 &&
1067          C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
1068        SDValue Eight = CurDAG->getConstant(8, MVT::i8);
1069        SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
1070        SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1071                                      X, Eight);
1072        SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
1073                                      Srl, Mask);
1074        SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
1075        SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1076                                      And, ShlCount);
1077
1078        // Insert the new nodes into the topological ordering.
1079        if (Eight.getNode()->getNodeId() == -1 ||
1080            Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1081          CurDAG->RepositionNode(X.getNode(), Eight.getNode());
1082          Eight.getNode()->setNodeId(X.getNode()->getNodeId());
1083        }
1084        if (Mask.getNode()->getNodeId() == -1 ||
1085            Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1086          CurDAG->RepositionNode(X.getNode(), Mask.getNode());
1087          Mask.getNode()->setNodeId(X.getNode()->getNodeId());
1088        }
1089        if (Srl.getNode()->getNodeId() == -1 ||
1090            Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1091          CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
1092          Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
1093        }
1094        if (And.getNode()->getNodeId() == -1 ||
1095            And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1096          CurDAG->RepositionNode(N.getNode(), And.getNode());
1097          And.getNode()->setNodeId(N.getNode()->getNodeId());
1098        }
1099        if (ShlCount.getNode()->getNodeId() == -1 ||
1100            ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1101          CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
1102          ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
1103        }
1104        if (Shl.getNode()->getNodeId() == -1 ||
1105            Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1106          CurDAG->RepositionNode(N.getNode(), Shl.getNode());
1107          Shl.getNode()->setNodeId(N.getNode()->getNodeId());
1108        }
1109        CurDAG->ReplaceAllUsesWith(N, Shl);
1110        AM.IndexReg = And;
1111        AM.Scale = (1 << ScaleLog);
1112        return false;
1113      }
1114    }
1115
1116    // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1117    // allows us to fold the shift into this addressing mode.
1118    if (Shift.getOpcode() != ISD::SHL) break;
1119
1120    // Not likely to be profitable if either the AND or SHIFT node has more
1121    // than one use (unless all uses are for address computation). Besides,
1122    // isel mechanism requires their node ids to be reused.
1123    if (!N.hasOneUse() || !Shift.hasOneUse())
1124      break;
1125
1126    // Verify that the shift amount is something we can fold.
1127    unsigned ShiftCst = C1->getZExtValue();
1128    if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
1129      break;
1130
1131    // Get the new AND mask, this folds to a constant.
1132    SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1133                                         SDValue(C2, 0), SDValue(C1, 0));
1134    SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
1135                                     NewANDMask);
1136    SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1137                                       NewAND, SDValue(C1, 0));
1138
1139    // Insert the new nodes into the topological ordering.
1140    if (C1->getNodeId() > X.getNode()->getNodeId()) {
1141      CurDAG->RepositionNode(X.getNode(), C1);
1142      C1->setNodeId(X.getNode()->getNodeId());
1143    }
1144    if (NewANDMask.getNode()->getNodeId() == -1 ||
1145        NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1146      CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
1147      NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
1148    }
1149    if (NewAND.getNode()->getNodeId() == -1 ||
1150        NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1151      CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
1152      NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
1153    }
1154    if (NewSHIFT.getNode()->getNodeId() == -1 ||
1155        NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1156      CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
1157      NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
1158    }
1159
1160    CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
1161
1162    AM.Scale = 1 << ShiftCst;
1163    AM.IndexReg = NewAND;
1164    return false;
1165  }
1166  }
1167
1168  return MatchAddressBase(N, AM);
1169}
1170
1171/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1172/// specified addressing mode without any further recursion.
1173bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1174  // Is the base register already occupied?
1175  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
1176    // If so, check to see if the scale index register is set.
1177    if (AM.IndexReg.getNode() == 0) {
1178      AM.IndexReg = N;
1179      AM.Scale = 1;
1180      return false;
1181    }
1182
1183    // Otherwise, we cannot select it.
1184    return true;
1185  }
1186
1187  // Default, generate it as a register.
1188  AM.BaseType = X86ISelAddressMode::RegBase;
1189  AM.Base.Reg = N;
1190  return false;
1191}
1192
1193/// SelectAddr - returns true if it is able pattern match an addressing mode.
1194/// It returns the operands which make up the maximal addressing mode it can
1195/// match by reference.
1196bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
1197                                 SDValue &Scale, SDValue &Index,
1198                                 SDValue &Disp, SDValue &Segment) {
1199  X86ISelAddressMode AM;
1200  bool Done = false;
1201  if (AvoidDupAddrCompute && !N.hasOneUse()) {
1202    unsigned Opcode = N.getOpcode();
1203    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex &&
1204        Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) {
1205      // If we are able to fold N into addressing mode, then we'll allow it even
1206      // if N has multiple uses. In general, addressing computation is used as
1207      // addresses by all of its uses. But watch out for CopyToReg uses, that
1208      // means the address computation is liveout. It will be computed by a LEA
1209      // so we want to avoid computing the address twice.
1210      for (SDNode::use_iterator UI = N.getNode()->use_begin(),
1211             UE = N.getNode()->use_end(); UI != UE; ++UI) {
1212        if (UI->getOpcode() == ISD::CopyToReg) {
1213          MatchAddressBase(N, AM);
1214          Done = true;
1215          break;
1216        }
1217      }
1218    }
1219  }
1220
1221  if (!Done && MatchAddress(N, AM))
1222    return false;
1223
1224  MVT VT = N.getValueType();
1225  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1226    if (!AM.Base.Reg.getNode())
1227      AM.Base.Reg = CurDAG->getRegister(0, VT);
1228  }
1229
1230  if (!AM.IndexReg.getNode())
1231    AM.IndexReg = CurDAG->getRegister(0, VT);
1232
1233  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1234  return true;
1235}
1236
1237/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
1238/// match a load whose top elements are either undef or zeros.  The load flavor
1239/// is derived from the type of N, which is either v4f32 or v2f64.
1240bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
1241                                          SDValue N, SDValue &Base,
1242                                          SDValue &Scale, SDValue &Index,
1243                                          SDValue &Disp, SDValue &Segment,
1244                                          SDValue &InChain,
1245                                          SDValue &OutChain) {
1246  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1247    InChain = N.getOperand(0).getValue(1);
1248    if (ISD::isNON_EXTLoad(InChain.getNode()) &&
1249        InChain.getValue(0).hasOneUse() &&
1250        N.hasOneUse() &&
1251        IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
1252      LoadSDNode *LD = cast<LoadSDNode>(InChain);
1253      if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1254        return false;
1255      OutChain = LD->getChain();
1256      return true;
1257    }
1258  }
1259
1260  // Also handle the case where we explicitly require zeros in the top
1261  // elements.  This is a vector shuffle from the zero vector.
1262  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1263      // Check to see if the top elements are all zeros (or bitcast of zeros).
1264      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
1265      N.getOperand(0).getNode()->hasOneUse() &&
1266      ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
1267      N.getOperand(0).getOperand(0).hasOneUse()) {
1268    // Okay, this is a zero extending load.  Fold it.
1269    LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1270    if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1271      return false;
1272    OutChain = LD->getChain();
1273    InChain = SDValue(LD, 1);
1274    return true;
1275  }
1276  return false;
1277}
1278
1279
1280/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1281/// mode it matches can be cost effectively emitted as an LEA instruction.
1282bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
1283                                    SDValue &Base, SDValue &Scale,
1284                                    SDValue &Index, SDValue &Disp) {
1285  X86ISelAddressMode AM;
1286
1287  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1288  // segments.
1289  SDValue Copy = AM.Segment;
1290  SDValue T = CurDAG->getRegister(0, MVT::i32);
1291  AM.Segment = T;
1292  if (MatchAddress(N, AM))
1293    return false;
1294  assert (T == AM.Segment);
1295  AM.Segment = Copy;
1296
1297  MVT VT = N.getValueType();
1298  unsigned Complexity = 0;
1299  if (AM.BaseType == X86ISelAddressMode::RegBase)
1300    if (AM.Base.Reg.getNode())
1301      Complexity = 1;
1302    else
1303      AM.Base.Reg = CurDAG->getRegister(0, VT);
1304  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1305    Complexity = 4;
1306
1307  if (AM.IndexReg.getNode())
1308    Complexity++;
1309  else
1310    AM.IndexReg = CurDAG->getRegister(0, VT);
1311
1312  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1313  // a simple shift.
1314  if (AM.Scale > 1)
1315    Complexity++;
1316
1317  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1318  // to a LEA. This is determined with some expermentation but is by no means
1319  // optimal (especially for code size consideration). LEA is nice because of
1320  // its three-address nature. Tweak the cost function again when we can run
1321  // convertToThreeAddress() at register allocation time.
1322  if (AM.hasSymbolicDisplacement()) {
1323    // For X86-64, we should always use lea to materialize RIP relative
1324    // addresses.
1325    if (Subtarget->is64Bit())
1326      Complexity = 4;
1327    else
1328      Complexity += 2;
1329  }
1330
1331  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
1332    Complexity++;
1333
1334  // If it isn't worth using an LEA, reject it.
1335  if (Complexity <= 2)
1336    return false;
1337
1338  SDValue Segment;
1339  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1340  return true;
1341}
1342
1343/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1344bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
1345                                        SDValue &Scale, SDValue &Index,
1346                                        SDValue &Disp) {
1347  assert(Op.getOpcode() == X86ISD::TLSADDR);
1348  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
1349  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1350
1351  X86ISelAddressMode AM;
1352  AM.GV = GA->getGlobal();
1353  AM.Disp += GA->getOffset();
1354  AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
1355  AM.SymbolFlags = GA->getTargetFlags();
1356
1357  if (N.getValueType() == MVT::i32) {
1358    AM.Scale = 1;
1359    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1360  } else {
1361    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1362  }
1363
1364  SDValue Segment;
1365  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1366  return true;
1367}
1368
1369
1370bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
1371                                  SDValue &Base, SDValue &Scale,
1372                                  SDValue &Index, SDValue &Disp,
1373                                  SDValue &Segment) {
1374  if (ISD::isNON_EXTLoad(N.getNode()) &&
1375      N.hasOneUse() &&
1376      IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
1377    return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
1378  return false;
1379}
1380
1381/// getGlobalBaseReg - Return an SDNode that returns the value of
1382/// the global base register. Output instructions required to
1383/// initialize the global base register, if necessary.
1384///
1385SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1386  MachineFunction *MF = CurBB->getParent();
1387  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1388  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
1389}
1390
1391static SDNode *FindCallStartFromCall(SDNode *Node) {
1392  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
1393    assert(Node->getOperand(0).getValueType() == MVT::Other &&
1394         "Node doesn't have a token chain argument!");
1395  return FindCallStartFromCall(Node->getOperand(0).getNode());
1396}
1397
1398SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
1399  SDValue Chain = Node->getOperand(0);
1400  SDValue In1 = Node->getOperand(1);
1401  SDValue In2L = Node->getOperand(2);
1402  SDValue In2H = Node->getOperand(3);
1403  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1404  if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1405    return NULL;
1406  SDValue LSI = Node->getOperand(4);    // MemOperand
1407  const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain};
1408  return CurDAG->getTargetNode(Opc, Node->getDebugLoc(),
1409                               MVT::i32, MVT::i32, MVT::Other, Ops,
1410                               array_lengthof(Ops));
1411}
1412
1413SDNode *X86DAGToDAGISel::Select(SDValue N) {
1414  SDNode *Node = N.getNode();
1415  MVT NVT = Node->getValueType(0);
1416  unsigned Opc, MOpc;
1417  unsigned Opcode = Node->getOpcode();
1418  DebugLoc dl = Node->getDebugLoc();
1419
1420#ifndef NDEBUG
1421  DOUT << std::string(Indent, ' ') << "Selecting: ";
1422  DEBUG(Node->dump(CurDAG));
1423  DOUT << "\n";
1424  Indent += 2;
1425#endif
1426
1427  if (Node->isMachineOpcode()) {
1428#ifndef NDEBUG
1429    DOUT << std::string(Indent-2, ' ') << "== ";
1430    DEBUG(Node->dump(CurDAG));
1431    DOUT << "\n";
1432    Indent -= 2;
1433#endif
1434    return NULL;   // Already selected.
1435  }
1436
1437  switch (Opcode) {
1438    default: break;
1439    case X86ISD::GlobalBaseReg:
1440      return getGlobalBaseReg();
1441
1442    case X86ISD::ATOMOR64_DAG:
1443      return SelectAtomic64(Node, X86::ATOMOR6432);
1444    case X86ISD::ATOMXOR64_DAG:
1445      return SelectAtomic64(Node, X86::ATOMXOR6432);
1446    case X86ISD::ATOMADD64_DAG:
1447      return SelectAtomic64(Node, X86::ATOMADD6432);
1448    case X86ISD::ATOMSUB64_DAG:
1449      return SelectAtomic64(Node, X86::ATOMSUB6432);
1450    case X86ISD::ATOMNAND64_DAG:
1451      return SelectAtomic64(Node, X86::ATOMNAND6432);
1452    case X86ISD::ATOMAND64_DAG:
1453      return SelectAtomic64(Node, X86::ATOMAND6432);
1454    case X86ISD::ATOMSWAP64_DAG:
1455      return SelectAtomic64(Node, X86::ATOMSWAP6432);
1456
1457    case ISD::SMUL_LOHI:
1458    case ISD::UMUL_LOHI: {
1459      SDValue N0 = Node->getOperand(0);
1460      SDValue N1 = Node->getOperand(1);
1461
1462      bool isSigned = Opcode == ISD::SMUL_LOHI;
1463      if (!isSigned)
1464        switch (NVT.getSimpleVT()) {
1465        default: llvm_unreachable("Unsupported VT!");
1466        case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
1467        case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
1468        case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
1469        case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
1470        }
1471      else
1472        switch (NVT.getSimpleVT()) {
1473        default: llvm_unreachable("Unsupported VT!");
1474        case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
1475        case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
1476        case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
1477        case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
1478        }
1479
1480      unsigned LoReg, HiReg;
1481      switch (NVT.getSimpleVT()) {
1482      default: llvm_unreachable("Unsupported VT!");
1483      case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
1484      case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
1485      case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
1486      case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
1487      }
1488
1489      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1490      bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1491      // multiplty is commmutative
1492      if (!foldedLoad) {
1493        foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1494        if (foldedLoad)
1495          std::swap(N0, N1);
1496      }
1497
1498      SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
1499                                              N0, SDValue()).getValue(1);
1500
1501      if (foldedLoad) {
1502        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1503                          InFlag };
1504        SDNode *CNode =
1505          CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1506                                array_lengthof(Ops));
1507        InFlag = SDValue(CNode, 1);
1508        // Update the chain.
1509        ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1510      } else {
1511        InFlag =
1512          SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1513      }
1514
1515      // Copy the low half of the result, if it is needed.
1516      if (!N.getValue(0).use_empty()) {
1517        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1518                                                  LoReg, NVT, InFlag);
1519        InFlag = Result.getValue(2);
1520        ReplaceUses(N.getValue(0), Result);
1521#ifndef NDEBUG
1522        DOUT << std::string(Indent-2, ' ') << "=> ";
1523        DEBUG(Result.getNode()->dump(CurDAG));
1524        DOUT << "\n";
1525#endif
1526      }
1527      // Copy the high half of the result, if it is needed.
1528      if (!N.getValue(1).use_empty()) {
1529        SDValue Result;
1530        if (HiReg == X86::AH && Subtarget->is64Bit()) {
1531          // Prevent use of AH in a REX instruction by referencing AX instead.
1532          // Shift it down 8 bits.
1533          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1534                                          X86::AX, MVT::i16, InFlag);
1535          InFlag = Result.getValue(2);
1536          Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
1537                                                 Result,
1538                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
1539          // Then truncate it down to i8.
1540          SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
1541          Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1542                                                   MVT::i8, Result, SRIdx), 0);
1543        } else {
1544          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1545                                          HiReg, NVT, InFlag);
1546          InFlag = Result.getValue(2);
1547        }
1548        ReplaceUses(N.getValue(1), Result);
1549#ifndef NDEBUG
1550        DOUT << std::string(Indent-2, ' ') << "=> ";
1551        DEBUG(Result.getNode()->dump(CurDAG));
1552        DOUT << "\n";
1553#endif
1554      }
1555
1556#ifndef NDEBUG
1557      Indent -= 2;
1558#endif
1559
1560      return NULL;
1561    }
1562
1563    case ISD::SDIVREM:
1564    case ISD::UDIVREM: {
1565      SDValue N0 = Node->getOperand(0);
1566      SDValue N1 = Node->getOperand(1);
1567
1568      bool isSigned = Opcode == ISD::SDIVREM;
1569      if (!isSigned)
1570        switch (NVT.getSimpleVT()) {
1571        default: llvm_unreachable("Unsupported VT!");
1572        case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
1573        case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
1574        case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
1575        case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
1576        }
1577      else
1578        switch (NVT.getSimpleVT()) {
1579        default: llvm_unreachable("Unsupported VT!");
1580        case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
1581        case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
1582        case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1583        case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1584        }
1585
1586      unsigned LoReg, HiReg;
1587      unsigned ClrOpcode, SExtOpcode;
1588      switch (NVT.getSimpleVT()) {
1589      default: llvm_unreachable("Unsupported VT!");
1590      case MVT::i8:
1591        LoReg = X86::AL;  HiReg = X86::AH;
1592        ClrOpcode  = 0;
1593        SExtOpcode = X86::CBW;
1594        break;
1595      case MVT::i16:
1596        LoReg = X86::AX;  HiReg = X86::DX;
1597        ClrOpcode  = X86::MOV16r0;
1598        SExtOpcode = X86::CWD;
1599        break;
1600      case MVT::i32:
1601        LoReg = X86::EAX; HiReg = X86::EDX;
1602        ClrOpcode  = X86::MOV32r0;
1603        SExtOpcode = X86::CDQ;
1604        break;
1605      case MVT::i64:
1606        LoReg = X86::RAX; HiReg = X86::RDX;
1607        ClrOpcode  = X86::MOV64r0;
1608        SExtOpcode = X86::CQO;
1609        break;
1610      }
1611
1612      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1613      bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1614      bool signBitIsZero = CurDAG->SignBitIsZero(N0);
1615
1616      SDValue InFlag;
1617      if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
1618        // Special case for div8, just use a move with zero extension to AX to
1619        // clear the upper 8 bits (AH).
1620        SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
1621        if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
1622          SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
1623          Move =
1624            SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16,
1625                                          MVT::Other, Ops,
1626                                          array_lengthof(Ops)), 0);
1627          Chain = Move.getValue(1);
1628          ReplaceUses(N0.getValue(1), Chain);
1629        } else {
1630          Move =
1631            SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
1632          Chain = CurDAG->getEntryNode();
1633        }
1634        Chain  = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
1635        InFlag = Chain.getValue(1);
1636      } else {
1637        InFlag =
1638          CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
1639                               LoReg, N0, SDValue()).getValue(1);
1640        if (isSigned && !signBitIsZero) {
1641          // Sign extend the low part into the high part.
1642          InFlag =
1643            SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
1644        } else {
1645          // Zero out the high part, effectively zero extending the input.
1646          SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT),
1647                                    0);
1648          InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
1649                                        ClrNode, InFlag).getValue(1);
1650        }
1651      }
1652
1653      if (foldedLoad) {
1654        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1655                          InFlag };
1656        SDNode *CNode =
1657          CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1658                                array_lengthof(Ops));
1659        InFlag = SDValue(CNode, 1);
1660        // Update the chain.
1661        ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1662      } else {
1663        InFlag =
1664          SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1665      }
1666
1667      // Copy the division (low) result, if it is needed.
1668      if (!N.getValue(0).use_empty()) {
1669        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1670                                                  LoReg, NVT, InFlag);
1671        InFlag = Result.getValue(2);
1672        ReplaceUses(N.getValue(0), Result);
1673#ifndef NDEBUG
1674        DOUT << std::string(Indent-2, ' ') << "=> ";
1675        DEBUG(Result.getNode()->dump(CurDAG));
1676        DOUT << "\n";
1677#endif
1678      }
1679      // Copy the remainder (high) result, if it is needed.
1680      if (!N.getValue(1).use_empty()) {
1681        SDValue Result;
1682        if (HiReg == X86::AH && Subtarget->is64Bit()) {
1683          // Prevent use of AH in a REX instruction by referencing AX instead.
1684          // Shift it down 8 bits.
1685          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1686                                          X86::AX, MVT::i16, InFlag);
1687          InFlag = Result.getValue(2);
1688          Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
1689                                        Result,
1690                                        CurDAG->getTargetConstant(8, MVT::i8)),
1691                           0);
1692          // Then truncate it down to i8.
1693          SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
1694          Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1695                                                   MVT::i8, Result, SRIdx), 0);
1696        } else {
1697          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1698                                          HiReg, NVT, InFlag);
1699          InFlag = Result.getValue(2);
1700        }
1701        ReplaceUses(N.getValue(1), Result);
1702#ifndef NDEBUG
1703        DOUT << std::string(Indent-2, ' ') << "=> ";
1704        DEBUG(Result.getNode()->dump(CurDAG));
1705        DOUT << "\n";
1706#endif
1707      }
1708
1709#ifndef NDEBUG
1710      Indent -= 2;
1711#endif
1712
1713      return NULL;
1714    }
1715
1716    case ISD::DECLARE: {
1717      // Handle DECLARE nodes here because the second operand may have been
1718      // wrapped in X86ISD::Wrapper.
1719      SDValue Chain = Node->getOperand(0);
1720      SDValue N1 = Node->getOperand(1);
1721      SDValue N2 = Node->getOperand(2);
1722      FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
1723
1724      // FIXME: We need to handle this for VLAs.
1725      if (!FINode) {
1726        ReplaceUses(N.getValue(0), Chain);
1727        return NULL;
1728      }
1729
1730      if (N2.getOpcode() == ISD::ADD &&
1731          N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg)
1732        N2 = N2.getOperand(1);
1733
1734      // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
1735      // somehow, just ignore it.
1736      if (N2.getOpcode() != X86ISD::Wrapper &&
1737          N2.getOpcode() != X86ISD::WrapperRIP) {
1738        ReplaceUses(N.getValue(0), Chain);
1739        return NULL;
1740      }
1741      GlobalAddressSDNode *GVNode =
1742        dyn_cast<GlobalAddressSDNode>(N2.getOperand(0));
1743      if (GVNode == 0) {
1744        ReplaceUses(N.getValue(0), Chain);
1745        return NULL;
1746      }
1747      SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
1748                                                 TLI.getPointerTy());
1749      SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(),
1750                                                    TLI.getPointerTy());
1751      SDValue Ops[] = { Tmp1, Tmp2, Chain };
1752      return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
1753                                   MVT::Other, Ops,
1754                                   array_lengthof(Ops));
1755    }
1756  }
1757
1758  SDNode *ResNode = SelectCode(N);
1759
1760#ifndef NDEBUG
1761  DOUT << std::string(Indent-2, ' ') << "=> ";
1762  if (ResNode == NULL || ResNode == N.getNode())
1763    DEBUG(N.getNode()->dump(CurDAG));
1764  else
1765    DEBUG(ResNode->dump(CurDAG));
1766  DOUT << "\n";
1767  Indent -= 2;
1768#endif
1769
1770  return ResNode;
1771}
1772
1773bool X86DAGToDAGISel::
1774SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
1775                             std::vector<SDValue> &OutOps) {
1776  SDValue Op0, Op1, Op2, Op3, Op4;
1777  switch (ConstraintCode) {
1778  case 'o':   // offsetable        ??
1779  case 'v':   // not offsetable    ??
1780  default: return true;
1781  case 'm':   // memory
1782    if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4))
1783      return true;
1784    break;
1785  }
1786
1787  OutOps.push_back(Op0);
1788  OutOps.push_back(Op1);
1789  OutOps.push_back(Op2);
1790  OutOps.push_back(Op3);
1791  OutOps.push_back(Op4);
1792  return false;
1793}
1794
1795/// createX86ISelDag - This pass converts a legalized DAG into a
1796/// X86-specific DAG, ready for instruction scheduling.
1797///
1798FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
1799                                     llvm::CodeGenOpt::Level OptLevel) {
1800  return new X86DAGToDAGISel(TM, OptLevel);
1801}
1802