X86ISelDAGToDAG.cpp revision 73ebbd8aac979cd6a62f15434f0e5423306f82dd
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a DAG pattern matching instruction selector for X86,
11// converting from a legalized dag to a X86 dag.
12//
13//===----------------------------------------------------------------------===//
14
15// Force NDEBUG on in any optimized build on Darwin.
16//
17// FIXME: This is a huge hack, to work around ridiculously awful compile times
18// on this file with gcc-4.2 on Darwin, in Release mode.
19#if (!defined(__llvm__) && defined(__APPLE__) && \
20     defined(__OPTIMIZE__) && !defined(NDEBUG))
21#define NDEBUG
22#endif
23
24#define DEBUG_TYPE "x86-isel"
25#include "X86.h"
26#include "X86InstrBuilder.h"
27#include "X86ISelLowering.h"
28#include "X86MachineFunctionInfo.h"
29#include "X86RegisterInfo.h"
30#include "X86Subtarget.h"
31#include "X86TargetMachine.h"
32#include "llvm/GlobalValue.h"
33#include "llvm/Instructions.h"
34#include "llvm/Intrinsics.h"
35#include "llvm/Support/CFG.h"
36#include "llvm/Type.h"
37#include "llvm/CodeGen/MachineConstantPool.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineFrameInfo.h"
40#include "llvm/CodeGen/MachineInstrBuilder.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/SelectionDAGISel.h"
43#include "llvm/Target/TargetMachine.h"
44#include "llvm/Target/TargetOptions.h"
45#include "llvm/Support/Debug.h"
46#include "llvm/Support/ErrorHandling.h"
47#include "llvm/Support/MathExtras.h"
48#include "llvm/Support/raw_ostream.h"
49#include "llvm/ADT/SmallPtrSet.h"
50#include "llvm/ADT/Statistic.h"
51using namespace llvm;
52
53STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
54
55//===----------------------------------------------------------------------===//
56//                      Pattern Matcher Implementation
57//===----------------------------------------------------------------------===//
58
59namespace {
60  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
61  /// SDValue's instead of register numbers for the leaves of the matched
62  /// tree.
63  struct X86ISelAddressMode {
64    enum {
65      RegBase,
66      FrameIndexBase
67    } BaseType;
68
69    struct {            // This is really a union, discriminated by BaseType!
70      SDValue Reg;
71      int FrameIndex;
72    } Base;
73
74    unsigned Scale;
75    SDValue IndexReg;
76    int32_t Disp;
77    SDValue Segment;
78    GlobalValue *GV;
79    Constant *CP;
80    BlockAddress *BlockAddr;
81    const char *ES;
82    int JT;
83    unsigned Align;    // CP alignment.
84    unsigned char SymbolFlags;  // X86II::MO_*
85
86    X86ISelAddressMode()
87      : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
88        Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
89        SymbolFlags(X86II::MO_NO_FLAG) {
90    }
91
92    bool hasSymbolicDisplacement() const {
93      return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
94    }
95
96    bool hasBaseOrIndexReg() const {
97      return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0;
98    }
99
100    /// isRIPRelative - Return true if this addressing mode is already RIP
101    /// relative.
102    bool isRIPRelative() const {
103      if (BaseType != RegBase) return false;
104      if (RegisterSDNode *RegNode =
105            dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode()))
106        return RegNode->getReg() == X86::RIP;
107      return false;
108    }
109
110    void setBaseReg(SDValue Reg) {
111      BaseType = RegBase;
112      Base.Reg = Reg;
113    }
114
115    void dump() {
116      errs() << "X86ISelAddressMode " << this << '\n';
117      errs() << "Base.Reg ";
118      if (Base.Reg.getNode() != 0)
119        Base.Reg.getNode()->dump();
120      else
121        errs() << "nul";
122      errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'
123             << " Scale" << Scale << '\n'
124             << "IndexReg ";
125      if (IndexReg.getNode() != 0)
126        IndexReg.getNode()->dump();
127      else
128        errs() << "nul";
129      errs() << " Disp " << Disp << '\n'
130             << "GV ";
131      if (GV)
132        GV->dump();
133      else
134        errs() << "nul";
135      errs() << " CP ";
136      if (CP)
137        CP->dump();
138      else
139        errs() << "nul";
140      errs() << '\n'
141             << "ES ";
142      if (ES)
143        errs() << ES;
144      else
145        errs() << "nul";
146      errs() << " JT" << JT << " Align" << Align << '\n';
147    }
148  };
149}
150
151namespace {
152  //===--------------------------------------------------------------------===//
153  /// ISel - X86 specific code to select X86 machine instructions for
154  /// SelectionDAG operations.
155  ///
156  class X86DAGToDAGISel : public SelectionDAGISel {
157    /// X86Lowering - This object fully describes how to lower LLVM code to an
158    /// X86-specific SelectionDAG.
159    X86TargetLowering &X86Lowering;
160
161    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
162    /// make the right decision when generating code for different targets.
163    const X86Subtarget *Subtarget;
164
165    /// OptForSize - If true, selector should try to optimize for code size
166    /// instead of performance.
167    bool OptForSize;
168
169  public:
170    explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
171      : SelectionDAGISel(tm, OptLevel),
172        X86Lowering(*tm.getTargetLowering()),
173        Subtarget(&tm.getSubtarget<X86Subtarget>()),
174        OptForSize(false) {}
175
176    virtual const char *getPassName() const {
177      return "X86 DAG->DAG Instruction Selection";
178    }
179
180    /// InstructionSelect - This callback is invoked by
181    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
182    virtual void InstructionSelect();
183
184    virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
185
186    virtual
187      bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
188
189// Include the pieces autogenerated from the target description.
190#include "X86GenDAGISel.inc"
191
192  private:
193    SDNode *Select(SDValue N);
194    SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
195    SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
196
197    bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
198    bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
199    bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
200    bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
201    bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
202                                 unsigned Depth);
203    bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
204    bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
205                    SDValue &Scale, SDValue &Index, SDValue &Disp,
206                    SDValue &Segment);
207    bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
208                       SDValue &Scale, SDValue &Index, SDValue &Disp);
209    bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
210                       SDValue &Scale, SDValue &Index, SDValue &Disp);
211    bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
212                             SDValue N, SDValue &Base, SDValue &Scale,
213                             SDValue &Index, SDValue &Disp,
214                             SDValue &Segment,
215                             SDValue &InChain, SDValue &OutChain);
216    bool TryFoldLoad(SDValue P, SDValue N,
217                     SDValue &Base, SDValue &Scale,
218                     SDValue &Index, SDValue &Disp,
219                     SDValue &Segment);
220    void PreprocessForRMW();
221    void PreprocessForFPConvert();
222
223    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
224    /// inline asm expressions.
225    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
226                                              char ConstraintCode,
227                                              std::vector<SDValue> &OutOps);
228
229    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
230
231    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
232                                   SDValue &Scale, SDValue &Index,
233                                   SDValue &Disp, SDValue &Segment) {
234      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
235        CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
236        AM.Base.Reg;
237      Scale = getI8Imm(AM.Scale);
238      Index = AM.IndexReg;
239      // These are 32-bit even in 64-bit mode since RIP relative offset
240      // is 32-bit.
241      if (AM.GV)
242        Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp,
243                                              AM.SymbolFlags);
244      else if (AM.CP)
245        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
246                                             AM.Align, AM.Disp, AM.SymbolFlags);
247      else if (AM.ES)
248        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
249      else if (AM.JT != -1)
250        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
251      else if (AM.BlockAddr)
252        Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
253                                       true, AM.SymbolFlags);
254      else
255        Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
256
257      if (AM.Segment.getNode())
258        Segment = AM.Segment;
259      else
260        Segment = CurDAG->getRegister(0, MVT::i32);
261    }
262
263    /// getI8Imm - Return a target constant with the specified value, of type
264    /// i8.
265    inline SDValue getI8Imm(unsigned Imm) {
266      return CurDAG->getTargetConstant(Imm, MVT::i8);
267    }
268
269    /// getI16Imm - Return a target constant with the specified value, of type
270    /// i16.
271    inline SDValue getI16Imm(unsigned Imm) {
272      return CurDAG->getTargetConstant(Imm, MVT::i16);
273    }
274
275    /// getI32Imm - Return a target constant with the specified value, of type
276    /// i32.
277    inline SDValue getI32Imm(unsigned Imm) {
278      return CurDAG->getTargetConstant(Imm, MVT::i32);
279    }
280
281    /// getGlobalBaseReg - Return an SDNode that returns the value of
282    /// the global base register. Output instructions required to
283    /// initialize the global base register, if necessary.
284    ///
285    SDNode *getGlobalBaseReg();
286
287    /// getTargetMachine - Return a reference to the TargetMachine, casted
288    /// to the target-specific type.
289    const X86TargetMachine &getTargetMachine() {
290      return static_cast<const X86TargetMachine &>(TM);
291    }
292
293    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
294    /// to the target-specific type.
295    const X86InstrInfo *getInstrInfo() {
296      return getTargetMachine().getInstrInfo();
297    }
298
299#ifndef NDEBUG
300    unsigned Indent;
301#endif
302  };
303}
304
305
306bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
307                                                 SDNode *Root) const {
308  if (OptLevel == CodeGenOpt::None) return false;
309
310  if (U == Root)
311    switch (U->getOpcode()) {
312    default: break;
313    case ISD::ADD:
314    case ISD::ADDC:
315    case ISD::ADDE:
316    case ISD::AND:
317    case ISD::OR:
318    case ISD::XOR: {
319      SDValue Op1 = U->getOperand(1);
320
321      // If the other operand is a 8-bit immediate we should fold the immediate
322      // instead. This reduces code size.
323      // e.g.
324      // movl 4(%esp), %eax
325      // addl $4, %eax
326      // vs.
327      // movl $4, %eax
328      // addl 4(%esp), %eax
329      // The former is 2 bytes shorter. In case where the increment is 1, then
330      // the saving can be 4 bytes (by using incl %eax).
331      if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
332        if (Imm->getAPIntValue().isSignedIntN(8))
333          return false;
334
335      // If the other operand is a TLS address, we should fold it instead.
336      // This produces
337      // movl    %gs:0, %eax
338      // leal    i@NTPOFF(%eax), %eax
339      // instead of
340      // movl    $i@NTPOFF, %eax
341      // addl    %gs:0, %eax
342      // if the block also has an access to a second TLS address this will save
343      // a load.
344      // FIXME: This is probably also true for non TLS addresses.
345      if (Op1.getOpcode() == X86ISD::Wrapper) {
346        SDValue Val = Op1.getOperand(0);
347        if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
348          return false;
349      }
350    }
351    }
352
353  // Proceed to 'generic' cycle finder code
354  return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
355}
356
357/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
358/// and move load below the TokenFactor. Replace store's chain operand with
359/// load's chain result.
360static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
361                                 SDValue Store, SDValue TF) {
362  SmallVector<SDValue, 4> Ops;
363  for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
364    if (Load.getNode() == TF.getOperand(i).getNode())
365      Ops.push_back(Load.getOperand(0));
366    else
367      Ops.push_back(TF.getOperand(i));
368  SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
369  SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
370                                               Load.getOperand(1),
371                                               Load.getOperand(2));
372  CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
373                             Store.getOperand(2), Store.getOperand(3));
374}
375
376/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.  The
377/// chain produced by the load must only be used by the store's chain operand,
378/// otherwise this may produce a cycle in the DAG.
379///
380static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
381                      SDValue &Load) {
382  if (N.getOpcode() == ISD::BIT_CONVERT)
383    N = N.getOperand(0);
384
385  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
386  if (!LD || LD->isVolatile())
387    return false;
388  if (LD->getAddressingMode() != ISD::UNINDEXED)
389    return false;
390
391  ISD::LoadExtType ExtType = LD->getExtensionType();
392  if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
393    return false;
394
395  if (N.hasOneUse() &&
396      LD->hasNUsesOfValue(1, 1) &&
397      N.getOperand(1) == Address &&
398      LD->isOperandOf(Chain.getNode())) {
399    Load = N;
400    return true;
401  }
402  return false;
403}
404
405/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
406/// operand and move load below the call's chain operand.
407static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
408                                  SDValue Call, SDValue CallSeqStart) {
409  SmallVector<SDValue, 8> Ops;
410  SDValue Chain = CallSeqStart.getOperand(0);
411  if (Chain.getNode() == Load.getNode())
412    Ops.push_back(Load.getOperand(0));
413  else {
414    assert(Chain.getOpcode() == ISD::TokenFactor &&
415           "Unexpected CallSeqStart chain operand");
416    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
417      if (Chain.getOperand(i).getNode() == Load.getNode())
418        Ops.push_back(Load.getOperand(0));
419      else
420        Ops.push_back(Chain.getOperand(i));
421    SDValue NewChain =
422      CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
423                      MVT::Other, &Ops[0], Ops.size());
424    Ops.clear();
425    Ops.push_back(NewChain);
426  }
427  for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
428    Ops.push_back(CallSeqStart.getOperand(i));
429  CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
430  CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
431                             Load.getOperand(1), Load.getOperand(2));
432  Ops.clear();
433  Ops.push_back(SDValue(Load.getNode(), 1));
434  for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
435    Ops.push_back(Call.getOperand(i));
436  CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
437}
438
439/// isCalleeLoad - Return true if call address is a load and it can be
440/// moved below CALLSEQ_START and the chains leading up to the call.
441/// Return the CALLSEQ_START by reference as a second output.
442static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
443  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
444    return false;
445  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
446  if (!LD ||
447      LD->isVolatile() ||
448      LD->getAddressingMode() != ISD::UNINDEXED ||
449      LD->getExtensionType() != ISD::NON_EXTLOAD)
450    return false;
451
452  // Now let's find the callseq_start.
453  while (Chain.getOpcode() != ISD::CALLSEQ_START) {
454    if (!Chain.hasOneUse())
455      return false;
456    Chain = Chain.getOperand(0);
457  }
458
459  if (Chain.getOperand(0).getNode() == Callee.getNode())
460    return true;
461  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
462      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
463      Callee.getValue(1).hasOneUse())
464    return true;
465  return false;
466}
467
468
469/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
470/// This is only run if not in -O0 mode.
471/// This allows the instruction selector to pick more read-modify-write
472/// instructions. This is a common case:
473///
474///     [Load chain]
475///         ^
476///         |
477///       [Load]
478///       ^    ^
479///       |    |
480///      /      \-
481///     /         |
482/// [TokenFactor] [Op]
483///     ^          ^
484///     |          |
485///      \        /
486///       \      /
487///       [Store]
488///
489/// The fact the store's chain operand != load's chain will prevent the
490/// (store (op (load))) instruction from being selected. We can transform it to:
491///
492///     [Load chain]
493///         ^
494///         |
495///    [TokenFactor]
496///         ^
497///         |
498///       [Load]
499///       ^    ^
500///       |    |
501///       |     \-
502///       |       |
503///       |     [Op]
504///       |       ^
505///       |       |
506///       \      /
507///        \    /
508///       [Store]
509void X86DAGToDAGISel::PreprocessForRMW() {
510  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
511         E = CurDAG->allnodes_end(); I != E; ++I) {
512    if (I->getOpcode() == X86ISD::CALL) {
513      /// Also try moving call address load from outside callseq_start to just
514      /// before the call to allow it to be folded.
515      ///
516      ///     [Load chain]
517      ///         ^
518      ///         |
519      ///       [Load]
520      ///       ^    ^
521      ///       |    |
522      ///      /      \--
523      ///     /          |
524      ///[CALLSEQ_START] |
525      ///     ^          |
526      ///     |          |
527      /// [LOAD/C2Reg]   |
528      ///     |          |
529      ///      \        /
530      ///       \      /
531      ///       [CALL]
532      SDValue Chain = I->getOperand(0);
533      SDValue Load  = I->getOperand(1);
534      if (!isCalleeLoad(Load, Chain))
535        continue;
536      MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
537      ++NumLoadMoved;
538      continue;
539    }
540
541    if (!ISD::isNON_TRUNCStore(I))
542      continue;
543    SDValue Chain = I->getOperand(0);
544
545    if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
546      continue;
547
548    SDValue N1 = I->getOperand(1);
549    SDValue N2 = I->getOperand(2);
550    if ((N1.getValueType().isFloatingPoint() &&
551         !N1.getValueType().isVector()) ||
552        !N1.hasOneUse())
553      continue;
554
555    bool RModW = false;
556    SDValue Load;
557    unsigned Opcode = N1.getNode()->getOpcode();
558    switch (Opcode) {
559    case ISD::ADD:
560    case ISD::MUL:
561    case ISD::AND:
562    case ISD::OR:
563    case ISD::XOR:
564    case ISD::ADDC:
565    case ISD::ADDE:
566    case ISD::VECTOR_SHUFFLE: {
567      SDValue N10 = N1.getOperand(0);
568      SDValue N11 = N1.getOperand(1);
569      RModW = isRMWLoad(N10, Chain, N2, Load);
570      if (!RModW)
571        RModW = isRMWLoad(N11, Chain, N2, Load);
572      break;
573    }
574    case ISD::SUB:
575    case ISD::SHL:
576    case ISD::SRA:
577    case ISD::SRL:
578    case ISD::ROTL:
579    case ISD::ROTR:
580    case ISD::SUBC:
581    case ISD::SUBE:
582    case X86ISD::SHLD:
583    case X86ISD::SHRD: {
584      SDValue N10 = N1.getOperand(0);
585      RModW = isRMWLoad(N10, Chain, N2, Load);
586      break;
587    }
588    }
589
590    if (RModW) {
591      MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
592      ++NumLoadMoved;
593    }
594  }
595}
596
597
598/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
599/// nodes that target the FP stack to be store and load to the stack.  This is a
600/// gross hack.  We would like to simply mark these as being illegal, but when
601/// we do that, legalize produces these when it expands calls, then expands
602/// these in the same legalize pass.  We would like dag combine to be able to
603/// hack on these between the call expansion and the node legalization.  As such
604/// this pass basically does "really late" legalization of these inline with the
605/// X86 isel pass.
606void X86DAGToDAGISel::PreprocessForFPConvert() {
607  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
608       E = CurDAG->allnodes_end(); I != E; ) {
609    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
610    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
611      continue;
612
613    // If the source and destination are SSE registers, then this is a legal
614    // conversion that should not be lowered.
615    EVT SrcVT = N->getOperand(0).getValueType();
616    EVT DstVT = N->getValueType(0);
617    bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
618    bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
619    if (SrcIsSSE && DstIsSSE)
620      continue;
621
622    if (!SrcIsSSE && !DstIsSSE) {
623      // If this is an FPStack extension, it is a noop.
624      if (N->getOpcode() == ISD::FP_EXTEND)
625        continue;
626      // If this is a value-preserving FPStack truncation, it is a noop.
627      if (N->getConstantOperandVal(1))
628        continue;
629    }
630
631    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
632    // FPStack has extload and truncstore.  SSE can fold direct loads into other
633    // operations.  Based on this, decide what we want to do.
634    EVT MemVT;
635    if (N->getOpcode() == ISD::FP_ROUND)
636      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
637    else
638      MemVT = SrcIsSSE ? SrcVT : DstVT;
639
640    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
641    DebugLoc dl = N->getDebugLoc();
642
643    // FIXME: optimize the case where the src/dest is a load or store?
644    SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
645                                          N->getOperand(0),
646                                          MemTmp, NULL, 0, MemVT);
647    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
648                                        NULL, 0, MemVT);
649
650    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
651    // extload we created.  This will cause general havok on the dag because
652    // anything below the conversion could be folded into other existing nodes.
653    // To avoid invalidating 'I', back it up to the convert node.
654    --I;
655    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
656
657    // Now that we did that, the node is dead.  Increment the iterator to the
658    // next node to process, then delete N.
659    ++I;
660    CurDAG->DeleteNode(N);
661  }
662}
663
664/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
665/// when it has created a SelectionDAG for us to codegen.
666void X86DAGToDAGISel::InstructionSelect() {
667  const Function *F = MF->getFunction();
668  OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
669
670  if (OptLevel != CodeGenOpt::None)
671    PreprocessForRMW();
672
673  // FIXME: This should only happen when not compiled with -O0.
674  PreprocessForFPConvert();
675
676  // Codegen the basic block.
677#ifndef NDEBUG
678  DEBUG(errs() << "===== Instruction selection begins:\n");
679  Indent = 0;
680#endif
681  SelectRoot(*CurDAG);
682#ifndef NDEBUG
683  DEBUG(errs() << "===== Instruction selection ends:\n");
684#endif
685
686  CurDAG->RemoveDeadNodes();
687}
688
689/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
690/// the main function.
691void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
692                                             MachineFrameInfo *MFI) {
693  const TargetInstrInfo *TII = TM.getInstrInfo();
694  if (Subtarget->isTargetCygMing())
695    BuildMI(BB, DebugLoc::getUnknownLoc(),
696            TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
697}
698
699void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
700  // If this is main, emit special code for main.
701  MachineBasicBlock *BB = MF.begin();
702  if (Fn.hasExternalLinkage() && Fn.getName() == "main")
703    EmitSpecialCodeForMain(BB, MF.getFrameInfo());
704}
705
706
707bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
708                                              X86ISelAddressMode &AM) {
709  assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
710  SDValue Segment = N.getOperand(0);
711
712  if (AM.Segment.getNode() == 0) {
713    AM.Segment = Segment;
714    return false;
715  }
716
717  return true;
718}
719
720bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
721  // This optimization is valid because the GNU TLS model defines that
722  // gs:0 (or fs:0 on X86-64) contains its own address.
723  // For more information see http://people.redhat.com/drepper/tls.pdf
724
725  SDValue Address = N.getOperand(1);
726  if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
727      !MatchSegmentBaseAddress (Address, AM))
728    return false;
729
730  return true;
731}
732
733/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
734/// into an addressing mode.  These wrap things that will resolve down into a
735/// symbol reference.  If no match is possible, this returns true, otherwise it
736/// returns false.
737bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
738  // If the addressing mode already has a symbol as the displacement, we can
739  // never match another symbol.
740  if (AM.hasSymbolicDisplacement())
741    return true;
742
743  SDValue N0 = N.getOperand(0);
744  CodeModel::Model M = TM.getCodeModel();
745
746  // Handle X86-64 rip-relative addresses.  We check this before checking direct
747  // folding because RIP is preferable to non-RIP accesses.
748  if (Subtarget->is64Bit() &&
749      // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
750      // they cannot be folded into immediate fields.
751      // FIXME: This can be improved for kernel and other models?
752      (M == CodeModel::Small || M == CodeModel::Kernel) &&
753      // Base and index reg must be 0 in order to use %rip as base and lowering
754      // must allow RIP.
755      !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
756    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
757      int64_t Offset = AM.Disp + G->getOffset();
758      if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
759      AM.GV = G->getGlobal();
760      AM.Disp = Offset;
761      AM.SymbolFlags = G->getTargetFlags();
762    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
763      int64_t Offset = AM.Disp + CP->getOffset();
764      if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
765      AM.CP = CP->getConstVal();
766      AM.Align = CP->getAlignment();
767      AM.Disp = Offset;
768      AM.SymbolFlags = CP->getTargetFlags();
769    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
770      AM.ES = S->getSymbol();
771      AM.SymbolFlags = S->getTargetFlags();
772    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
773      AM.JT = J->getIndex();
774      AM.SymbolFlags = J->getTargetFlags();
775    } else {
776      AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
777      AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
778    }
779
780    if (N.getOpcode() == X86ISD::WrapperRIP)
781      AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
782    return false;
783  }
784
785  // Handle the case when globals fit in our immediate field: This is true for
786  // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
787  // mode, this results in a non-RIP-relative computation.
788  if (!Subtarget->is64Bit() ||
789      ((M == CodeModel::Small || M == CodeModel::Kernel) &&
790       TM.getRelocationModel() == Reloc::Static)) {
791    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
792      AM.GV = G->getGlobal();
793      AM.Disp += G->getOffset();
794      AM.SymbolFlags = G->getTargetFlags();
795    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
796      AM.CP = CP->getConstVal();
797      AM.Align = CP->getAlignment();
798      AM.Disp += CP->getOffset();
799      AM.SymbolFlags = CP->getTargetFlags();
800    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
801      AM.ES = S->getSymbol();
802      AM.SymbolFlags = S->getTargetFlags();
803    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
804      AM.JT = J->getIndex();
805      AM.SymbolFlags = J->getTargetFlags();
806    } else {
807      AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
808      AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
809    }
810    return false;
811  }
812
813  return true;
814}
815
816/// MatchAddress - Add the specified node to the specified addressing mode,
817/// returning true if it cannot be done.  This just pattern matches for the
818/// addressing mode.
819bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
820  if (MatchAddressRecursively(N, AM, 0))
821    return true;
822
823  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
824  // a smaller encoding and avoids a scaled-index.
825  if (AM.Scale == 2 &&
826      AM.BaseType == X86ISelAddressMode::RegBase &&
827      AM.Base.Reg.getNode() == 0) {
828    AM.Base.Reg = AM.IndexReg;
829    AM.Scale = 1;
830  }
831
832  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
833  // because it has a smaller encoding.
834  // TODO: Which other code models can use this?
835  if (TM.getCodeModel() == CodeModel::Small &&
836      Subtarget->is64Bit() &&
837      AM.Scale == 1 &&
838      AM.BaseType == X86ISelAddressMode::RegBase &&
839      AM.Base.Reg.getNode() == 0 &&
840      AM.IndexReg.getNode() == 0 &&
841      AM.SymbolFlags == X86II::MO_NO_FLAG &&
842      AM.hasSymbolicDisplacement())
843    AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
844
845  return false;
846}
847
848bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
849                                              unsigned Depth) {
850  bool is64Bit = Subtarget->is64Bit();
851  DebugLoc dl = N.getDebugLoc();
852  DEBUG({
853      errs() << "MatchAddress: ";
854      AM.dump();
855    });
856  // Limit recursion.
857  if (Depth > 5)
858    return MatchAddressBase(N, AM);
859
860  CodeModel::Model M = TM.getCodeModel();
861
862  // If this is already a %rip relative address, we can only merge immediates
863  // into it.  Instead of handling this in every case, we handle it here.
864  // RIP relative addressing: %rip + 32-bit displacement!
865  if (AM.isRIPRelative()) {
866    // FIXME: JumpTable and ExternalSymbol address currently don't like
867    // displacements.  It isn't very important, but this should be fixed for
868    // consistency.
869    if (!AM.ES && AM.JT != -1) return true;
870
871    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
872      int64_t Val = AM.Disp + Cst->getSExtValue();
873      if (X86::isOffsetSuitableForCodeModel(Val, M,
874                                            AM.hasSymbolicDisplacement())) {
875        AM.Disp = Val;
876        return false;
877      }
878    }
879    return true;
880  }
881
882  switch (N.getOpcode()) {
883  default: break;
884  case ISD::Constant: {
885    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
886    if (!is64Bit ||
887        X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M,
888                                          AM.hasSymbolicDisplacement())) {
889      AM.Disp += Val;
890      return false;
891    }
892    break;
893  }
894
895  case X86ISD::SegmentBaseAddress:
896    if (!MatchSegmentBaseAddress(N, AM))
897      return false;
898    break;
899
900  case X86ISD::Wrapper:
901  case X86ISD::WrapperRIP:
902    if (!MatchWrapper(N, AM))
903      return false;
904    break;
905
906  case ISD::LOAD:
907    if (!MatchLoad(N, AM))
908      return false;
909    break;
910
911  case ISD::FrameIndex:
912    if (AM.BaseType == X86ISelAddressMode::RegBase
913        && AM.Base.Reg.getNode() == 0) {
914      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
915      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
916      return false;
917    }
918    break;
919
920  case ISD::SHL:
921    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
922      break;
923
924    if (ConstantSDNode
925          *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
926      unsigned Val = CN->getZExtValue();
927      // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
928      // that the base operand remains free for further matching. If
929      // the base doesn't end up getting used, a post-processing step
930      // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
931      if (Val == 1 || Val == 2 || Val == 3) {
932        AM.Scale = 1 << Val;
933        SDValue ShVal = N.getNode()->getOperand(0);
934
935        // Okay, we know that we have a scale by now.  However, if the scaled
936        // value is an add of something and a constant, we can fold the
937        // constant into the disp field here.
938        if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
939            isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
940          AM.IndexReg = ShVal.getNode()->getOperand(0);
941          ConstantSDNode *AddVal =
942            cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
943          uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
944          if (!is64Bit ||
945              X86::isOffsetSuitableForCodeModel(Disp, M,
946                                                AM.hasSymbolicDisplacement()))
947            AM.Disp = Disp;
948          else
949            AM.IndexReg = ShVal;
950        } else {
951          AM.IndexReg = ShVal;
952        }
953        return false;
954      }
955    break;
956    }
957
958  case ISD::SMUL_LOHI:
959  case ISD::UMUL_LOHI:
960    // A mul_lohi where we need the low part can be folded as a plain multiply.
961    if (N.getResNo() != 0) break;
962    // FALL THROUGH
963  case ISD::MUL:
964  case X86ISD::MUL_IMM:
965    // X*[3,5,9] -> X+X*[2,4,8]
966    if (AM.BaseType == X86ISelAddressMode::RegBase &&
967        AM.Base.Reg.getNode() == 0 &&
968        AM.IndexReg.getNode() == 0) {
969      if (ConstantSDNode
970            *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
971        if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
972            CN->getZExtValue() == 9) {
973          AM.Scale = unsigned(CN->getZExtValue())-1;
974
975          SDValue MulVal = N.getNode()->getOperand(0);
976          SDValue Reg;
977
978          // Okay, we know that we have a scale by now.  However, if the scaled
979          // value is an add of something and a constant, we can fold the
980          // constant into the disp field here.
981          if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
982              isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
983            Reg = MulVal.getNode()->getOperand(0);
984            ConstantSDNode *AddVal =
985              cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
986            uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
987                                      CN->getZExtValue();
988            if (!is64Bit ||
989                X86::isOffsetSuitableForCodeModel(Disp, M,
990                                                  AM.hasSymbolicDisplacement()))
991              AM.Disp = Disp;
992            else
993              Reg = N.getNode()->getOperand(0);
994          } else {
995            Reg = N.getNode()->getOperand(0);
996          }
997
998          AM.IndexReg = AM.Base.Reg = Reg;
999          return false;
1000        }
1001    }
1002    break;
1003
1004  case ISD::SUB: {
1005    // Given A-B, if A can be completely folded into the address and
1006    // the index field with the index field unused, use -B as the index.
1007    // This is a win if a has multiple parts that can be folded into
1008    // the address. Also, this saves a mov if the base register has
1009    // other uses, since it avoids a two-address sub instruction, however
1010    // it costs an additional mov if the index register has other uses.
1011
1012    // Test if the LHS of the sub can be folded.
1013    X86ISelAddressMode Backup = AM;
1014    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
1015      AM = Backup;
1016      break;
1017    }
1018    // Test if the index field is free for use.
1019    if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1020      AM = Backup;
1021      break;
1022    }
1023    int Cost = 0;
1024    SDValue RHS = N.getNode()->getOperand(1);
1025    // If the RHS involves a register with multiple uses, this
1026    // transformation incurs an extra mov, due to the neg instruction
1027    // clobbering its operand.
1028    if (!RHS.getNode()->hasOneUse() ||
1029        RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1030        RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1031        RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1032        (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1033         RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
1034      ++Cost;
1035    // If the base is a register with multiple uses, this
1036    // transformation may save a mov.
1037    if ((AM.BaseType == X86ISelAddressMode::RegBase &&
1038         AM.Base.Reg.getNode() &&
1039         !AM.Base.Reg.getNode()->hasOneUse()) ||
1040        AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1041      --Cost;
1042    // If the folded LHS was interesting, this transformation saves
1043    // address arithmetic.
1044    if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1045        ((AM.Disp != 0) && (Backup.Disp == 0)) +
1046        (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1047      --Cost;
1048    // If it doesn't look like it may be an overall win, don't do it.
1049    if (Cost >= 0) {
1050      AM = Backup;
1051      break;
1052    }
1053
1054    // Ok, the transformation is legal and appears profitable. Go for it.
1055    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
1056    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1057    AM.IndexReg = Neg;
1058    AM.Scale = 1;
1059
1060    // Insert the new nodes into the topological ordering.
1061    if (Zero.getNode()->getNodeId() == -1 ||
1062        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1063      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
1064      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
1065    }
1066    if (Neg.getNode()->getNodeId() == -1 ||
1067        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1068      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
1069      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
1070    }
1071    return false;
1072  }
1073
1074  case ISD::ADD: {
1075    X86ISelAddressMode Backup = AM;
1076    if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) &&
1077        !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1))
1078      return false;
1079    AM = Backup;
1080    if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) &&
1081        !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1))
1082      return false;
1083    AM = Backup;
1084
1085    // If we couldn't fold both operands into the address at the same time,
1086    // see if we can just put each operand into a register and fold at least
1087    // the add.
1088    if (AM.BaseType == X86ISelAddressMode::RegBase &&
1089        !AM.Base.Reg.getNode() &&
1090        !AM.IndexReg.getNode()) {
1091      AM.Base.Reg = N.getNode()->getOperand(0);
1092      AM.IndexReg = N.getNode()->getOperand(1);
1093      AM.Scale = 1;
1094      return false;
1095    }
1096    break;
1097  }
1098
1099  case ISD::OR:
1100    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
1101    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1102      X86ISelAddressMode Backup = AM;
1103      uint64_t Offset = CN->getSExtValue();
1104      // Start with the LHS as an addr mode.
1105      if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1106          // Address could not have picked a GV address for the displacement.
1107          AM.GV == NULL &&
1108          // On x86-64, the resultant disp must fit in 32-bits.
1109          (!is64Bit ||
1110           X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
1111                                             AM.hasSymbolicDisplacement())) &&
1112          // Check to see if the LHS & C is zero.
1113          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
1114        AM.Disp += Offset;
1115        return false;
1116      }
1117      AM = Backup;
1118    }
1119    break;
1120
1121  case ISD::AND: {
1122    // Perform some heroic transforms on an and of a constant-count shift
1123    // with a constant to enable use of the scaled offset field.
1124
1125    SDValue Shift = N.getOperand(0);
1126    if (Shift.getNumOperands() != 2) break;
1127
1128    // Scale must not be used already.
1129    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
1130
1131    SDValue X = Shift.getOperand(0);
1132    ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
1133    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
1134    if (!C1 || !C2) break;
1135
1136    // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
1137    // allows us to convert the shift and and into an h-register extract and
1138    // a scaled index.
1139    if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
1140      unsigned ScaleLog = 8 - C1->getZExtValue();
1141      if (ScaleLog > 0 && ScaleLog < 4 &&
1142          C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
1143        SDValue Eight = CurDAG->getConstant(8, MVT::i8);
1144        SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
1145        SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1146                                      X, Eight);
1147        SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
1148                                      Srl, Mask);
1149        SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
1150        SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1151                                      And, ShlCount);
1152
1153        // Insert the new nodes into the topological ordering.
1154        if (Eight.getNode()->getNodeId() == -1 ||
1155            Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1156          CurDAG->RepositionNode(X.getNode(), Eight.getNode());
1157          Eight.getNode()->setNodeId(X.getNode()->getNodeId());
1158        }
1159        if (Mask.getNode()->getNodeId() == -1 ||
1160            Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1161          CurDAG->RepositionNode(X.getNode(), Mask.getNode());
1162          Mask.getNode()->setNodeId(X.getNode()->getNodeId());
1163        }
1164        if (Srl.getNode()->getNodeId() == -1 ||
1165            Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1166          CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
1167          Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
1168        }
1169        if (And.getNode()->getNodeId() == -1 ||
1170            And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1171          CurDAG->RepositionNode(N.getNode(), And.getNode());
1172          And.getNode()->setNodeId(N.getNode()->getNodeId());
1173        }
1174        if (ShlCount.getNode()->getNodeId() == -1 ||
1175            ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1176          CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
1177          ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
1178        }
1179        if (Shl.getNode()->getNodeId() == -1 ||
1180            Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1181          CurDAG->RepositionNode(N.getNode(), Shl.getNode());
1182          Shl.getNode()->setNodeId(N.getNode()->getNodeId());
1183        }
1184        CurDAG->ReplaceAllUsesWith(N, Shl);
1185        AM.IndexReg = And;
1186        AM.Scale = (1 << ScaleLog);
1187        return false;
1188      }
1189    }
1190
1191    // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1192    // allows us to fold the shift into this addressing mode.
1193    if (Shift.getOpcode() != ISD::SHL) break;
1194
1195    // Not likely to be profitable if either the AND or SHIFT node has more
1196    // than one use (unless all uses are for address computation). Besides,
1197    // isel mechanism requires their node ids to be reused.
1198    if (!N.hasOneUse() || !Shift.hasOneUse())
1199      break;
1200
1201    // Verify that the shift amount is something we can fold.
1202    unsigned ShiftCst = C1->getZExtValue();
1203    if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
1204      break;
1205
1206    // Get the new AND mask, this folds to a constant.
1207    SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1208                                         SDValue(C2, 0), SDValue(C1, 0));
1209    SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
1210                                     NewANDMask);
1211    SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1212                                       NewAND, SDValue(C1, 0));
1213
1214    // Insert the new nodes into the topological ordering.
1215    if (C1->getNodeId() > X.getNode()->getNodeId()) {
1216      CurDAG->RepositionNode(X.getNode(), C1);
1217      C1->setNodeId(X.getNode()->getNodeId());
1218    }
1219    if (NewANDMask.getNode()->getNodeId() == -1 ||
1220        NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1221      CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
1222      NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
1223    }
1224    if (NewAND.getNode()->getNodeId() == -1 ||
1225        NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1226      CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
1227      NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
1228    }
1229    if (NewSHIFT.getNode()->getNodeId() == -1 ||
1230        NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1231      CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
1232      NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
1233    }
1234
1235    CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
1236
1237    AM.Scale = 1 << ShiftCst;
1238    AM.IndexReg = NewAND;
1239    return false;
1240  }
1241  }
1242
1243  return MatchAddressBase(N, AM);
1244}
1245
1246/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1247/// specified addressing mode without any further recursion.
1248bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1249  // Is the base register already occupied?
1250  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
1251    // If so, check to see if the scale index register is set.
1252    if (AM.IndexReg.getNode() == 0) {
1253      AM.IndexReg = N;
1254      AM.Scale = 1;
1255      return false;
1256    }
1257
1258    // Otherwise, we cannot select it.
1259    return true;
1260  }
1261
1262  // Default, generate it as a register.
1263  AM.BaseType = X86ISelAddressMode::RegBase;
1264  AM.Base.Reg = N;
1265  return false;
1266}
1267
1268/// SelectAddr - returns true if it is able pattern match an addressing mode.
1269/// It returns the operands which make up the maximal addressing mode it can
1270/// match by reference.
1271bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
1272                                 SDValue &Scale, SDValue &Index,
1273                                 SDValue &Disp, SDValue &Segment) {
1274  X86ISelAddressMode AM;
1275  bool Done = false;
1276  if (!N.hasOneUse()) {
1277    unsigned Opcode = N.getOpcode();
1278    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex &&
1279        Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) {
1280      // If we are able to fold N into addressing mode, then we'll allow it even
1281      // if N has multiple uses. In general, addressing computation is used as
1282      // addresses by all of its uses. But watch out for CopyToReg uses, that
1283      // means the address computation is liveout. It will be computed by a LEA
1284      // so we want to avoid computing the address twice.
1285      for (SDNode::use_iterator UI = N.getNode()->use_begin(),
1286             UE = N.getNode()->use_end(); UI != UE; ++UI) {
1287        if (UI->getOpcode() == ISD::CopyToReg) {
1288          MatchAddressBase(N, AM);
1289          Done = true;
1290          break;
1291        }
1292      }
1293    }
1294  }
1295
1296  if (!Done && MatchAddress(N, AM))
1297    return false;
1298
1299  EVT VT = N.getValueType();
1300  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1301    if (!AM.Base.Reg.getNode())
1302      AM.Base.Reg = CurDAG->getRegister(0, VT);
1303  }
1304
1305  if (!AM.IndexReg.getNode())
1306    AM.IndexReg = CurDAG->getRegister(0, VT);
1307
1308  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1309  return true;
1310}
1311
1312/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
1313/// match a load whose top elements are either undef or zeros.  The load flavor
1314/// is derived from the type of N, which is either v4f32 or v2f64.
1315bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
1316                                          SDValue N, SDValue &Base,
1317                                          SDValue &Scale, SDValue &Index,
1318                                          SDValue &Disp, SDValue &Segment,
1319                                          SDValue &InChain,
1320                                          SDValue &OutChain) {
1321  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1322    InChain = N.getOperand(0).getValue(1);
1323    if (ISD::isNON_EXTLoad(InChain.getNode()) &&
1324        InChain.getValue(0).hasOneUse() &&
1325        N.hasOneUse() &&
1326        IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
1327      LoadSDNode *LD = cast<LoadSDNode>(InChain);
1328      if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1329        return false;
1330      OutChain = LD->getChain();
1331      return true;
1332    }
1333  }
1334
1335  // Also handle the case where we explicitly require zeros in the top
1336  // elements.  This is a vector shuffle from the zero vector.
1337  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1338      // Check to see if the top elements are all zeros (or bitcast of zeros).
1339      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
1340      N.getOperand(0).getNode()->hasOneUse() &&
1341      ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
1342      N.getOperand(0).getOperand(0).hasOneUse()) {
1343    // Okay, this is a zero extending load.  Fold it.
1344    LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1345    if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1346      return false;
1347    OutChain = LD->getChain();
1348    InChain = SDValue(LD, 1);
1349    return true;
1350  }
1351  return false;
1352}
1353
1354
1355/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1356/// mode it matches can be cost effectively emitted as an LEA instruction.
1357bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
1358                                    SDValue &Base, SDValue &Scale,
1359                                    SDValue &Index, SDValue &Disp) {
1360  X86ISelAddressMode AM;
1361
1362  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1363  // segments.
1364  SDValue Copy = AM.Segment;
1365  SDValue T = CurDAG->getRegister(0, MVT::i32);
1366  AM.Segment = T;
1367  if (MatchAddress(N, AM))
1368    return false;
1369  assert (T == AM.Segment);
1370  AM.Segment = Copy;
1371
1372  EVT VT = N.getValueType();
1373  unsigned Complexity = 0;
1374  if (AM.BaseType == X86ISelAddressMode::RegBase)
1375    if (AM.Base.Reg.getNode())
1376      Complexity = 1;
1377    else
1378      AM.Base.Reg = CurDAG->getRegister(0, VT);
1379  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1380    Complexity = 4;
1381
1382  if (AM.IndexReg.getNode())
1383    Complexity++;
1384  else
1385    AM.IndexReg = CurDAG->getRegister(0, VT);
1386
1387  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1388  // a simple shift.
1389  if (AM.Scale > 1)
1390    Complexity++;
1391
1392  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1393  // to a LEA. This is determined with some expermentation but is by no means
1394  // optimal (especially for code size consideration). LEA is nice because of
1395  // its three-address nature. Tweak the cost function again when we can run
1396  // convertToThreeAddress() at register allocation time.
1397  if (AM.hasSymbolicDisplacement()) {
1398    // For X86-64, we should always use lea to materialize RIP relative
1399    // addresses.
1400    if (Subtarget->is64Bit())
1401      Complexity = 4;
1402    else
1403      Complexity += 2;
1404  }
1405
1406  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
1407    Complexity++;
1408
1409  // If it isn't worth using an LEA, reject it.
1410  if (Complexity <= 2)
1411    return false;
1412
1413  SDValue Segment;
1414  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1415  return true;
1416}
1417
1418/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1419bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
1420                                        SDValue &Scale, SDValue &Index,
1421                                        SDValue &Disp) {
1422  assert(Op.getOpcode() == X86ISD::TLSADDR);
1423  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
1424  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1425
1426  X86ISelAddressMode AM;
1427  AM.GV = GA->getGlobal();
1428  AM.Disp += GA->getOffset();
1429  AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
1430  AM.SymbolFlags = GA->getTargetFlags();
1431
1432  if (N.getValueType() == MVT::i32) {
1433    AM.Scale = 1;
1434    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1435  } else {
1436    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1437  }
1438
1439  SDValue Segment;
1440  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1441  return true;
1442}
1443
1444
1445bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
1446                                  SDValue &Base, SDValue &Scale,
1447                                  SDValue &Index, SDValue &Disp,
1448                                  SDValue &Segment) {
1449  if (ISD::isNON_EXTLoad(N.getNode()) &&
1450      N.hasOneUse() &&
1451      IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
1452    return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
1453  return false;
1454}
1455
1456/// getGlobalBaseReg - Return an SDNode that returns the value of
1457/// the global base register. Output instructions required to
1458/// initialize the global base register, if necessary.
1459///
1460SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1461  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1462  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
1463}
1464
1465static SDNode *FindCallStartFromCall(SDNode *Node) {
1466  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
1467    assert(Node->getOperand(0).getValueType() == MVT::Other &&
1468         "Node doesn't have a token chain argument!");
1469  return FindCallStartFromCall(Node->getOperand(0).getNode());
1470}
1471
1472SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
1473  SDValue Chain = Node->getOperand(0);
1474  SDValue In1 = Node->getOperand(1);
1475  SDValue In2L = Node->getOperand(2);
1476  SDValue In2H = Node->getOperand(3);
1477  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1478  if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1479    return NULL;
1480  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1481  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
1482  const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
1483  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
1484                                           MVT::i32, MVT::i32, MVT::Other, Ops,
1485                                           array_lengthof(Ops));
1486  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
1487  return ResNode;
1488}
1489
1490SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
1491  if (Node->hasAnyUseOfValue(0))
1492    return 0;
1493
1494  // Optimize common patterns for __sync_add_and_fetch and
1495  // __sync_sub_and_fetch where the result is not used. This allows us
1496  // to use "lock" version of add, sub, inc, dec instructions.
1497  // FIXME: Do not use special instructions but instead add the "lock"
1498  // prefix to the target node somehow. The extra information will then be
1499  // transferred to machine instruction and it denotes the prefix.
1500  SDValue Chain = Node->getOperand(0);
1501  SDValue Ptr = Node->getOperand(1);
1502  SDValue Val = Node->getOperand(2);
1503  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1504  if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1505    return 0;
1506
1507  bool isInc = false, isDec = false, isSub = false, isCN = false;
1508  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
1509  if (CN) {
1510    isCN = true;
1511    int64_t CNVal = CN->getSExtValue();
1512    if (CNVal == 1)
1513      isInc = true;
1514    else if (CNVal == -1)
1515      isDec = true;
1516    else if (CNVal >= 0)
1517      Val = CurDAG->getTargetConstant(CNVal, NVT);
1518    else {
1519      isSub = true;
1520      Val = CurDAG->getTargetConstant(-CNVal, NVT);
1521    }
1522  } else if (Val.hasOneUse() &&
1523             Val.getOpcode() == ISD::SUB &&
1524             X86::isZeroNode(Val.getOperand(0))) {
1525    isSub = true;
1526    Val = Val.getOperand(1);
1527  }
1528
1529  unsigned Opc = 0;
1530  switch (NVT.getSimpleVT().SimpleTy) {
1531  default: return 0;
1532  case MVT::i8:
1533    if (isInc)
1534      Opc = X86::LOCK_INC8m;
1535    else if (isDec)
1536      Opc = X86::LOCK_DEC8m;
1537    else if (isSub) {
1538      if (isCN)
1539        Opc = X86::LOCK_SUB8mi;
1540      else
1541        Opc = X86::LOCK_SUB8mr;
1542    } else {
1543      if (isCN)
1544        Opc = X86::LOCK_ADD8mi;
1545      else
1546        Opc = X86::LOCK_ADD8mr;
1547    }
1548    break;
1549  case MVT::i16:
1550    if (isInc)
1551      Opc = X86::LOCK_INC16m;
1552    else if (isDec)
1553      Opc = X86::LOCK_DEC16m;
1554    else if (isSub) {
1555      if (isCN) {
1556        if (Predicate_i16immSExt8(Val.getNode()))
1557          Opc = X86::LOCK_SUB16mi8;
1558        else
1559          Opc = X86::LOCK_SUB16mi;
1560      } else
1561        Opc = X86::LOCK_SUB16mr;
1562    } else {
1563      if (isCN) {
1564        if (Predicate_i16immSExt8(Val.getNode()))
1565          Opc = X86::LOCK_ADD16mi8;
1566        else
1567          Opc = X86::LOCK_ADD16mi;
1568      } else
1569        Opc = X86::LOCK_ADD16mr;
1570    }
1571    break;
1572  case MVT::i32:
1573    if (isInc)
1574      Opc = X86::LOCK_INC32m;
1575    else if (isDec)
1576      Opc = X86::LOCK_DEC32m;
1577    else if (isSub) {
1578      if (isCN) {
1579        if (Predicate_i32immSExt8(Val.getNode()))
1580          Opc = X86::LOCK_SUB32mi8;
1581        else
1582          Opc = X86::LOCK_SUB32mi;
1583      } else
1584        Opc = X86::LOCK_SUB32mr;
1585    } else {
1586      if (isCN) {
1587        if (Predicate_i32immSExt8(Val.getNode()))
1588          Opc = X86::LOCK_ADD32mi8;
1589        else
1590          Opc = X86::LOCK_ADD32mi;
1591      } else
1592        Opc = X86::LOCK_ADD32mr;
1593    }
1594    break;
1595  case MVT::i64:
1596    if (isInc)
1597      Opc = X86::LOCK_INC64m;
1598    else if (isDec)
1599      Opc = X86::LOCK_DEC64m;
1600    else if (isSub) {
1601      Opc = X86::LOCK_SUB64mr;
1602      if (isCN) {
1603        if (Predicate_i64immSExt8(Val.getNode()))
1604          Opc = X86::LOCK_SUB64mi8;
1605        else if (Predicate_i64immSExt32(Val.getNode()))
1606          Opc = X86::LOCK_SUB64mi32;
1607      }
1608    } else {
1609      Opc = X86::LOCK_ADD64mr;
1610      if (isCN) {
1611        if (Predicate_i64immSExt8(Val.getNode()))
1612          Opc = X86::LOCK_ADD64mi8;
1613        else if (Predicate_i64immSExt32(Val.getNode()))
1614          Opc = X86::LOCK_ADD64mi32;
1615      }
1616    }
1617    break;
1618  }
1619
1620  DebugLoc dl = Node->getDebugLoc();
1621  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
1622                                                 dl, NVT), 0);
1623  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1624  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
1625  if (isInc || isDec) {
1626    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
1627    SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
1628    cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
1629    SDValue RetVals[] = { Undef, Ret };
1630    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
1631  } else {
1632    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
1633    SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
1634    cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
1635    SDValue RetVals[] = { Undef, Ret };
1636    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
1637  }
1638}
1639
1640/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
1641/// any uses which require the SF or OF bits to be accurate.
1642static bool HasNoSignedComparisonUses(SDNode *N) {
1643  // Examine each user of the node.
1644  for (SDNode::use_iterator UI = N->use_begin(),
1645         UE = N->use_end(); UI != UE; ++UI) {
1646    // Only examine CopyToReg uses.
1647    if (UI->getOpcode() != ISD::CopyToReg)
1648      return false;
1649    // Only examine CopyToReg uses that copy to EFLAGS.
1650    if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
1651          X86::EFLAGS)
1652      return false;
1653    // Examine each user of the CopyToReg use.
1654    for (SDNode::use_iterator FlagUI = UI->use_begin(),
1655           FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
1656      // Only examine the Flag result.
1657      if (FlagUI.getUse().getResNo() != 1) continue;
1658      // Anything unusual: assume conservatively.
1659      if (!FlagUI->isMachineOpcode()) return false;
1660      // Examine the opcode of the user.
1661      switch (FlagUI->getMachineOpcode()) {
1662      // These comparisons don't treat the most significant bit specially.
1663      case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
1664      case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
1665      case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
1666      case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
1667      case X86::JA: case X86::JAE: case X86::JB: case X86::JBE:
1668      case X86::JE: case X86::JNE: case X86::JP: case X86::JNP:
1669      case X86::CMOVA16rr: case X86::CMOVA16rm:
1670      case X86::CMOVA32rr: case X86::CMOVA32rm:
1671      case X86::CMOVA64rr: case X86::CMOVA64rm:
1672      case X86::CMOVAE16rr: case X86::CMOVAE16rm:
1673      case X86::CMOVAE32rr: case X86::CMOVAE32rm:
1674      case X86::CMOVAE64rr: case X86::CMOVAE64rm:
1675      case X86::CMOVB16rr: case X86::CMOVB16rm:
1676      case X86::CMOVB32rr: case X86::CMOVB32rm:
1677      case X86::CMOVB64rr: case X86::CMOVB64rm:
1678      case X86::CMOVBE16rr: case X86::CMOVBE16rm:
1679      case X86::CMOVBE32rr: case X86::CMOVBE32rm:
1680      case X86::CMOVBE64rr: case X86::CMOVBE64rm:
1681      case X86::CMOVE16rr: case X86::CMOVE16rm:
1682      case X86::CMOVE32rr: case X86::CMOVE32rm:
1683      case X86::CMOVE64rr: case X86::CMOVE64rm:
1684      case X86::CMOVNE16rr: case X86::CMOVNE16rm:
1685      case X86::CMOVNE32rr: case X86::CMOVNE32rm:
1686      case X86::CMOVNE64rr: case X86::CMOVNE64rm:
1687      case X86::CMOVNP16rr: case X86::CMOVNP16rm:
1688      case X86::CMOVNP32rr: case X86::CMOVNP32rm:
1689      case X86::CMOVNP64rr: case X86::CMOVNP64rm:
1690      case X86::CMOVP16rr: case X86::CMOVP16rm:
1691      case X86::CMOVP32rr: case X86::CMOVP32rm:
1692      case X86::CMOVP64rr: case X86::CMOVP64rm:
1693        continue;
1694      // Anything else: assume conservatively.
1695      default: return false;
1696      }
1697    }
1698  }
1699  return true;
1700}
1701
1702SDNode *X86DAGToDAGISel::Select(SDValue N) {
1703  SDNode *Node = N.getNode();
1704  EVT NVT = Node->getValueType(0);
1705  unsigned Opc, MOpc;
1706  unsigned Opcode = Node->getOpcode();
1707  DebugLoc dl = Node->getDebugLoc();
1708
1709#ifndef NDEBUG
1710  DEBUG({
1711      errs() << std::string(Indent, ' ') << "Selecting: ";
1712      Node->dump(CurDAG);
1713      errs() << '\n';
1714    });
1715  Indent += 2;
1716#endif
1717
1718  if (Node->isMachineOpcode()) {
1719#ifndef NDEBUG
1720    DEBUG({
1721        errs() << std::string(Indent-2, ' ') << "== ";
1722        Node->dump(CurDAG);
1723        errs() << '\n';
1724      });
1725    Indent -= 2;
1726#endif
1727    return NULL;   // Already selected.
1728  }
1729
1730  switch (Opcode) {
1731  default: break;
1732  case X86ISD::GlobalBaseReg:
1733    return getGlobalBaseReg();
1734
1735  case X86ISD::ATOMOR64_DAG:
1736    return SelectAtomic64(Node, X86::ATOMOR6432);
1737  case X86ISD::ATOMXOR64_DAG:
1738    return SelectAtomic64(Node, X86::ATOMXOR6432);
1739  case X86ISD::ATOMADD64_DAG:
1740    return SelectAtomic64(Node, X86::ATOMADD6432);
1741  case X86ISD::ATOMSUB64_DAG:
1742    return SelectAtomic64(Node, X86::ATOMSUB6432);
1743  case X86ISD::ATOMNAND64_DAG:
1744    return SelectAtomic64(Node, X86::ATOMNAND6432);
1745  case X86ISD::ATOMAND64_DAG:
1746    return SelectAtomic64(Node, X86::ATOMAND6432);
1747  case X86ISD::ATOMSWAP64_DAG:
1748    return SelectAtomic64(Node, X86::ATOMSWAP6432);
1749
1750  case ISD::ATOMIC_LOAD_ADD: {
1751    SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
1752    if (RetVal)
1753      return RetVal;
1754    break;
1755  }
1756
1757  case ISD::SMUL_LOHI:
1758  case ISD::UMUL_LOHI: {
1759    SDValue N0 = Node->getOperand(0);
1760    SDValue N1 = Node->getOperand(1);
1761
1762    bool isSigned = Opcode == ISD::SMUL_LOHI;
1763    if (!isSigned) {
1764      switch (NVT.getSimpleVT().SimpleTy) {
1765      default: llvm_unreachable("Unsupported VT!");
1766      case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
1767      case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
1768      case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
1769      case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
1770      }
1771    } else {
1772      switch (NVT.getSimpleVT().SimpleTy) {
1773      default: llvm_unreachable("Unsupported VT!");
1774      case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
1775      case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
1776      case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
1777      case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
1778      }
1779    }
1780
1781    unsigned LoReg, HiReg;
1782    switch (NVT.getSimpleVT().SimpleTy) {
1783    default: llvm_unreachable("Unsupported VT!");
1784    case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
1785    case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
1786    case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
1787    case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
1788    }
1789
1790    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1791    bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1792    // Multiply is commmutative.
1793    if (!foldedLoad) {
1794      foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1795      if (foldedLoad)
1796        std::swap(N0, N1);
1797    }
1798
1799    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
1800                                            N0, SDValue()).getValue(1);
1801
1802    if (foldedLoad) {
1803      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1804                        InFlag };
1805      SDNode *CNode =
1806        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1807                               array_lengthof(Ops));
1808      InFlag = SDValue(CNode, 1);
1809      // Update the chain.
1810      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1811    } else {
1812      InFlag =
1813        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1814    }
1815
1816    // Copy the low half of the result, if it is needed.
1817    if (!N.getValue(0).use_empty()) {
1818      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1819                                                LoReg, NVT, InFlag);
1820      InFlag = Result.getValue(2);
1821      ReplaceUses(N.getValue(0), Result);
1822#ifndef NDEBUG
1823      DEBUG({
1824          errs() << std::string(Indent-2, ' ') << "=> ";
1825          Result.getNode()->dump(CurDAG);
1826          errs() << '\n';
1827        });
1828#endif
1829    }
1830    // Copy the high half of the result, if it is needed.
1831    if (!N.getValue(1).use_empty()) {
1832      SDValue Result;
1833      if (HiReg == X86::AH && Subtarget->is64Bit()) {
1834        // Prevent use of AH in a REX instruction by referencing AX instead.
1835        // Shift it down 8 bits.
1836        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1837                                        X86::AX, MVT::i16, InFlag);
1838        InFlag = Result.getValue(2);
1839        Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
1840                                                Result,
1841                                   CurDAG->getTargetConstant(8, MVT::i8)), 0);
1842        // Then truncate it down to i8.
1843        Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
1844                                                MVT::i8, Result);
1845      } else {
1846        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1847                                        HiReg, NVT, InFlag);
1848        InFlag = Result.getValue(2);
1849      }
1850      ReplaceUses(N.getValue(1), Result);
1851#ifndef NDEBUG
1852      DEBUG({
1853          errs() << std::string(Indent-2, ' ') << "=> ";
1854          Result.getNode()->dump(CurDAG);
1855          errs() << '\n';
1856        });
1857#endif
1858    }
1859
1860#ifndef NDEBUG
1861    Indent -= 2;
1862#endif
1863
1864    return NULL;
1865  }
1866
1867  case ISD::SDIVREM:
1868  case ISD::UDIVREM: {
1869    SDValue N0 = Node->getOperand(0);
1870    SDValue N1 = Node->getOperand(1);
1871
1872    bool isSigned = Opcode == ISD::SDIVREM;
1873    if (!isSigned) {
1874      switch (NVT.getSimpleVT().SimpleTy) {
1875      default: llvm_unreachable("Unsupported VT!");
1876      case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
1877      case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
1878      case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
1879      case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
1880      }
1881    } else {
1882      switch (NVT.getSimpleVT().SimpleTy) {
1883      default: llvm_unreachable("Unsupported VT!");
1884      case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
1885      case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
1886      case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1887      case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1888      }
1889    }
1890
1891    unsigned LoReg, HiReg;
1892    unsigned ClrOpcode, SExtOpcode;
1893    switch (NVT.getSimpleVT().SimpleTy) {
1894    default: llvm_unreachable("Unsupported VT!");
1895    case MVT::i8:
1896      LoReg = X86::AL;  HiReg = X86::AH;
1897      ClrOpcode  = 0;
1898      SExtOpcode = X86::CBW;
1899      break;
1900    case MVT::i16:
1901      LoReg = X86::AX;  HiReg = X86::DX;
1902      ClrOpcode  = X86::MOV16r0;
1903      SExtOpcode = X86::CWD;
1904      break;
1905    case MVT::i32:
1906      LoReg = X86::EAX; HiReg = X86::EDX;
1907      ClrOpcode  = X86::MOV32r0;
1908      SExtOpcode = X86::CDQ;
1909      break;
1910    case MVT::i64:
1911      LoReg = X86::RAX; HiReg = X86::RDX;
1912      ClrOpcode  = ~0U; // NOT USED.
1913      SExtOpcode = X86::CQO;
1914      break;
1915    }
1916
1917    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1918    bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1919    bool signBitIsZero = CurDAG->SignBitIsZero(N0);
1920
1921    SDValue InFlag;
1922    if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
1923      // Special case for div8, just use a move with zero extension to AX to
1924      // clear the upper 8 bits (AH).
1925      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
1926      if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
1927        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
1928        Move =
1929          SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,
1930                                         MVT::Other, Ops,
1931                                         array_lengthof(Ops)), 0);
1932        Chain = Move.getValue(1);
1933        ReplaceUses(N0.getValue(1), Chain);
1934      } else {
1935        Move =
1936          SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
1937        Chain = CurDAG->getEntryNode();
1938      }
1939      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
1940      InFlag = Chain.getValue(1);
1941    } else {
1942      InFlag =
1943        CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
1944                             LoReg, N0, SDValue()).getValue(1);
1945      if (isSigned && !signBitIsZero) {
1946        // Sign extend the low part into the high part.
1947        InFlag =
1948          SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
1949      } else {
1950        // Zero out the high part, effectively zero extending the input.
1951        SDValue ClrNode;
1952
1953        if (NVT.getSimpleVT() == MVT::i64) {
1954          ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32),
1955                            0);
1956          // We just did a 32-bit clear, insert it into a 64-bit register to
1957          // clear the whole 64-bit reg.
1958          SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);
1959          SDValue SubRegNo =
1960            CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
1961          ClrNode =
1962            SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,
1963                                           MVT::i64, Zero, ClrNode, SubRegNo),
1964                    0);
1965        } else {
1966          ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
1967        }
1968
1969        InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
1970                                      ClrNode, InFlag).getValue(1);
1971      }
1972    }
1973
1974    if (foldedLoad) {
1975      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1976                        InFlag };
1977      SDNode *CNode =
1978        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1979                               array_lengthof(Ops));
1980      InFlag = SDValue(CNode, 1);
1981      // Update the chain.
1982      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1983    } else {
1984      InFlag =
1985        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1986    }
1987
1988    // Copy the division (low) result, if it is needed.
1989    if (!N.getValue(0).use_empty()) {
1990      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1991                                                LoReg, NVT, InFlag);
1992      InFlag = Result.getValue(2);
1993      ReplaceUses(N.getValue(0), Result);
1994#ifndef NDEBUG
1995      DEBUG({
1996          errs() << std::string(Indent-2, ' ') << "=> ";
1997          Result.getNode()->dump(CurDAG);
1998          errs() << '\n';
1999        });
2000#endif
2001    }
2002    // Copy the remainder (high) result, if it is needed.
2003    if (!N.getValue(1).use_empty()) {
2004      SDValue Result;
2005      if (HiReg == X86::AH && Subtarget->is64Bit()) {
2006        // Prevent use of AH in a REX instruction by referencing AX instead.
2007        // Shift it down 8 bits.
2008        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2009                                        X86::AX, MVT::i16, InFlag);
2010        InFlag = Result.getValue(2);
2011        Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
2012                                      Result,
2013                                      CurDAG->getTargetConstant(8, MVT::i8)),
2014                         0);
2015        // Then truncate it down to i8.
2016        Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
2017                                                MVT::i8, Result);
2018      } else {
2019        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2020                                        HiReg, NVT, InFlag);
2021        InFlag = Result.getValue(2);
2022      }
2023      ReplaceUses(N.getValue(1), Result);
2024#ifndef NDEBUG
2025      DEBUG({
2026          errs() << std::string(Indent-2, ' ') << "=> ";
2027          Result.getNode()->dump(CurDAG);
2028          errs() << '\n';
2029        });
2030#endif
2031    }
2032
2033#ifndef NDEBUG
2034    Indent -= 2;
2035#endif
2036
2037    return NULL;
2038  }
2039
2040  case X86ISD::CMP: {
2041    SDValue N0 = Node->getOperand(0);
2042    SDValue N1 = Node->getOperand(1);
2043
2044    // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
2045    // use a smaller encoding.
2046    if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
2047        N0.getValueType() != MVT::i8 &&
2048        X86::isZeroNode(N1)) {
2049      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
2050      if (!C) break;
2051
2052      // For example, convert "testl %eax, $8" to "testb %al, $8"
2053      if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
2054          (!(C->getZExtValue() & 0x80) ||
2055           HasNoSignedComparisonUses(Node))) {
2056        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
2057        SDValue Reg = N0.getNode()->getOperand(0);
2058
2059        // On x86-32, only the ABCD registers have 8-bit subregisters.
2060        if (!Subtarget->is64Bit()) {
2061          TargetRegisterClass *TRC = 0;
2062          switch (N0.getValueType().getSimpleVT().SimpleTy) {
2063          case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2064          case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2065          default: llvm_unreachable("Unsupported TEST operand type!");
2066          }
2067          SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
2068          Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2069                                               Reg.getValueType(), Reg, RC), 0);
2070        }
2071
2072        // Extract the l-register.
2073        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
2074                                                        MVT::i8, Reg);
2075
2076        // Emit a testb.
2077        return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
2078      }
2079
2080      // For example, "testl %eax, $2048" to "testb %ah, $8".
2081      if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
2082          (!(C->getZExtValue() & 0x8000) ||
2083           HasNoSignedComparisonUses(Node))) {
2084        // Shift the immediate right by 8 bits.
2085        SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
2086                                                       MVT::i8);
2087        SDValue Reg = N0.getNode()->getOperand(0);
2088
2089        // Put the value in an ABCD register.
2090        TargetRegisterClass *TRC = 0;
2091        switch (N0.getValueType().getSimpleVT().SimpleTy) {
2092        case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
2093        case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2094        case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2095        default: llvm_unreachable("Unsupported TEST operand type!");
2096        }
2097        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
2098        Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2099                                             Reg.getValueType(), Reg, RC), 0);
2100
2101        // Extract the h-register.
2102        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl,
2103                                                        MVT::i8, Reg);
2104
2105        // Emit a testb. No special NOREX tricks are needed since there's
2106        // only one GPR operand!
2107        return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
2108                                      Subreg, ShiftedImm);
2109      }
2110
2111      // For example, "testl %eax, $32776" to "testw %ax, $32776".
2112      if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
2113          N0.getValueType() != MVT::i16 &&
2114          (!(C->getZExtValue() & 0x8000) ||
2115           HasNoSignedComparisonUses(Node))) {
2116        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
2117        SDValue Reg = N0.getNode()->getOperand(0);
2118
2119        // Extract the 16-bit subregister.
2120        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl,
2121                                                        MVT::i16, Reg);
2122
2123        // Emit a testw.
2124        return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
2125      }
2126
2127      // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
2128      if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
2129          N0.getValueType() == MVT::i64 &&
2130          (!(C->getZExtValue() & 0x80000000) ||
2131           HasNoSignedComparisonUses(Node))) {
2132        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
2133        SDValue Reg = N0.getNode()->getOperand(0);
2134
2135        // Extract the 32-bit subregister.
2136        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl,
2137                                                        MVT::i32, Reg);
2138
2139        // Emit a testl.
2140        return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
2141      }
2142    }
2143    break;
2144  }
2145  }
2146
2147  SDNode *ResNode = SelectCode(N);
2148
2149#ifndef NDEBUG
2150  DEBUG({
2151      errs() << std::string(Indent-2, ' ') << "=> ";
2152      if (ResNode == NULL || ResNode == N.getNode())
2153        N.getNode()->dump(CurDAG);
2154      else
2155        ResNode->dump(CurDAG);
2156      errs() << '\n';
2157    });
2158  Indent -= 2;
2159#endif
2160
2161  return ResNode;
2162}
2163
2164bool X86DAGToDAGISel::
2165SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
2166                             std::vector<SDValue> &OutOps) {
2167  SDValue Op0, Op1, Op2, Op3, Op4;
2168  switch (ConstraintCode) {
2169  case 'o':   // offsetable        ??
2170  case 'v':   // not offsetable    ??
2171  default: return true;
2172  case 'm':   // memory
2173    if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4))
2174      return true;
2175    break;
2176  }
2177
2178  OutOps.push_back(Op0);
2179  OutOps.push_back(Op1);
2180  OutOps.push_back(Op2);
2181  OutOps.push_back(Op3);
2182  OutOps.push_back(Op4);
2183  return false;
2184}
2185
2186/// createX86ISelDag - This pass converts a legalized DAG into a
2187/// X86-specific DAG, ready for instruction scheduling.
2188///
2189FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
2190                                     llvm::CodeGenOpt::Level OptLevel) {
2191  return new X86DAGToDAGISel(TM, OptLevel);
2192}
2193