X86ISelDAGToDAG.cpp revision ed2eee63a6858312ed17582d8cb85a6856d8eb34
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a DAG pattern matching instruction selector for X86,
11// converting from a legalized dag to a X86 dag.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "x86-isel"
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86MachineFunctionInfo.h"
20#include "X86RegisterInfo.h"
21#include "X86Subtarget.h"
22#include "X86TargetMachine.h"
23#include "llvm/GlobalValue.h"
24#include "llvm/Instructions.h"
25#include "llvm/Intrinsics.h"
26#include "llvm/Support/CFG.h"
27#include "llvm/Type.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineFrameInfo.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/CodeGen/SelectionDAGISel.h"
34#include "llvm/Target/TargetMachine.h"
35#include "llvm/Target/TargetOptions.h"
36#include "llvm/Support/Compiler.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/MathExtras.h"
39#include "llvm/Support/Streams.h"
40#include "llvm/ADT/SmallPtrSet.h"
41#include "llvm/ADT/Statistic.h"
42using namespace llvm;
43
44STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
45
46//===----------------------------------------------------------------------===//
47//                      Pattern Matcher Implementation
48//===----------------------------------------------------------------------===//
49
50namespace {
51  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
52  /// SDValue's instead of register numbers for the leaves of the matched
53  /// tree.
54  struct X86ISelAddressMode {
55    enum {
56      RegBase,
57      FrameIndexBase
58    } BaseType;
59
60    struct {            // This is really a union, discriminated by BaseType!
61      SDValue Reg;
62      int FrameIndex;
63    } Base;
64
65    bool isRIPRel;     // RIP as base?
66    unsigned Scale;
67    SDValue IndexReg;
68    int32_t Disp;
69    GlobalValue *GV;
70    Constant *CP;
71    const char *ES;
72    int JT;
73    unsigned Align;    // CP alignment.
74
75    X86ISelAddressMode()
76      : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
77        GV(0), CP(0), ES(0), JT(-1), Align(0) {
78    }
79    void dump() {
80      cerr << "X86ISelAddressMode " << this << "\n";
81      cerr << "Base.Reg ";
82              if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump();
83              else cerr << "nul";
84      cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
85      cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n";
86      cerr << "IndexReg ";
87              if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
88              else cerr << "nul";
89      cerr << " Disp " << Disp << "\n";
90      cerr << "GV "; if (GV) GV->dump();
91                     else cerr << "nul";
92      cerr << " CP "; if (CP) CP->dump();
93                     else cerr << "nul";
94      cerr << "\n";
95      cerr << "ES "; if (ES) cerr << ES; else cerr << "nul";
96      cerr  << " JT" << JT << " Align" << Align << "\n";
97    }
98  };
99}
100
101namespace {
102  //===--------------------------------------------------------------------===//
103  /// ISel - X86 specific code to select X86 machine instructions for
104  /// SelectionDAG operations.
105  ///
106  class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
107    /// TM - Keep a reference to X86TargetMachine.
108    ///
109    X86TargetMachine &TM;
110
111    /// X86Lowering - This object fully describes how to lower LLVM code to an
112    /// X86-specific SelectionDAG.
113    X86TargetLowering &X86Lowering;
114
115    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
116    /// make the right decision when generating code for different targets.
117    const X86Subtarget *Subtarget;
118
119    /// CurBB - Current BB being isel'd.
120    ///
121    MachineBasicBlock *CurBB;
122
123    /// OptForSize - If true, selector should try to optimize for code size
124    /// instead of performance.
125    bool OptForSize;
126
127  public:
128    X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
129      : SelectionDAGISel(tm, fast),
130        TM(tm), X86Lowering(*TM.getTargetLowering()),
131        Subtarget(&TM.getSubtarget<X86Subtarget>()),
132        OptForSize(false) {}
133
134    virtual const char *getPassName() const {
135      return "X86 DAG->DAG Instruction Selection";
136    }
137
138    /// InstructionSelect - This callback is invoked by
139    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
140    virtual void InstructionSelect();
141
142    virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
143
144    virtual
145      bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
146
147// Include the pieces autogenerated from the target description.
148#include "X86GenDAGISel.inc"
149
150  private:
151    SDNode *Select(SDValue N);
152    SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
153
154    bool MatchAddress(SDValue N, X86ISelAddressMode &AM,
155                      bool isRoot = true, unsigned Depth = 0);
156    bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM,
157                          bool isRoot, unsigned Depth);
158    bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
159                    SDValue &Scale, SDValue &Index, SDValue &Disp);
160    bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
161                       SDValue &Scale, SDValue &Index, SDValue &Disp);
162    bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
163                             SDValue N, SDValue &Base, SDValue &Scale,
164                             SDValue &Index, SDValue &Disp,
165                             SDValue &InChain, SDValue &OutChain);
166    bool TryFoldLoad(SDValue P, SDValue N,
167                     SDValue &Base, SDValue &Scale,
168                     SDValue &Index, SDValue &Disp);
169    void PreprocessForRMW();
170    void PreprocessForFPConvert();
171
172    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
173    /// inline asm expressions.
174    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
175                                              char ConstraintCode,
176                                              std::vector<SDValue> &OutOps);
177
178    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
179
180    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
181                                   SDValue &Scale, SDValue &Index,
182                                   SDValue &Disp) {
183      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
184        CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
185        AM.Base.Reg;
186      Scale = getI8Imm(AM.Scale);
187      Index = AM.IndexReg;
188      // These are 32-bit even in 64-bit mode since RIP relative offset
189      // is 32-bit.
190      if (AM.GV)
191        Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
192      else if (AM.CP)
193        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
194                                             AM.Align, AM.Disp);
195      else if (AM.ES)
196        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
197      else if (AM.JT != -1)
198        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
199      else
200        Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
201    }
202
203    /// getI8Imm - Return a target constant with the specified value, of type
204    /// i8.
205    inline SDValue getI8Imm(unsigned Imm) {
206      return CurDAG->getTargetConstant(Imm, MVT::i8);
207    }
208
209    /// getI16Imm - Return a target constant with the specified value, of type
210    /// i16.
211    inline SDValue getI16Imm(unsigned Imm) {
212      return CurDAG->getTargetConstant(Imm, MVT::i16);
213    }
214
215    /// getI32Imm - Return a target constant with the specified value, of type
216    /// i32.
217    inline SDValue getI32Imm(unsigned Imm) {
218      return CurDAG->getTargetConstant(Imm, MVT::i32);
219    }
220
221    /// getGlobalBaseReg - Return an SDNode that returns the value of
222    /// the global base register. Output instructions required to
223    /// initialize the global base register, if necessary.
224    ///
225    SDNode *getGlobalBaseReg();
226
227    /// getTruncateTo8Bit - return an SDNode that implements a subreg based
228    /// truncate of the specified operand to i8. This can be done with tablegen,
229    /// except that this code uses MVT::Flag in a tricky way that happens to
230    /// improve scheduling in some cases.
231    SDNode *getTruncateTo8Bit(SDValue N0);
232
233#ifndef NDEBUG
234    unsigned Indent;
235#endif
236  };
237}
238
239/// findFlagUse - Return use of MVT::Flag value produced by the specified
240/// SDNode.
241///
242static SDNode *findFlagUse(SDNode *N) {
243  unsigned FlagResNo = N->getNumValues()-1;
244  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
245    SDUse &Use = I.getUse();
246    if (Use.getResNo() == FlagResNo)
247      return Use.getUser();
248  }
249  return NULL;
250}
251
252/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
253/// This function recursively traverses up the operand chain, ignoring
254/// certain nodes.
255static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
256                          SDNode *Root,
257                          SmallPtrSet<SDNode*, 16> &Visited) {
258  if (Use->getNodeId() < Def->getNodeId() ||
259      !Visited.insert(Use))
260    return false;
261
262  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
263    SDNode *N = Use->getOperand(i).getNode();
264    if (N == Def) {
265      if (Use == ImmedUse || Use == Root)
266        continue;  // We are not looking for immediate use.
267      assert(N != Root);
268      return true;
269    }
270
271    // Traverse up the operand chain.
272    if (findNonImmUse(N, Def, ImmedUse, Root, Visited))
273      return true;
274  }
275  return false;
276}
277
278/// isNonImmUse - Start searching from Root up the DAG to check is Def can
279/// be reached. Return true if that's the case. However, ignore direct uses
280/// by ImmedUse (which would be U in the example illustrated in
281/// IsLegalAndProfitableToFold) and by Root (which can happen in the store
282/// case).
283/// FIXME: to be really generic, we should allow direct use by any node
284/// that is being folded. But realisticly since we only fold loads which
285/// have one non-chain use, we only need to watch out for load/op/store
286/// and load/op/cmp case where the root (store / cmp) may reach the load via
287/// its chain operand.
288static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
289  SmallPtrSet<SDNode*, 16> Visited;
290  return findNonImmUse(Root, Def, ImmedUse, Root, Visited);
291}
292
293
294bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
295                                                 SDNode *Root) const {
296  if (Fast) return false;
297
298  if (U == Root)
299    switch (U->getOpcode()) {
300    default: break;
301    case ISD::ADD:
302    case ISD::ADDC:
303    case ISD::ADDE:
304    case ISD::AND:
305    case ISD::OR:
306    case ISD::XOR: {
307      // If the other operand is a 8-bit immediate we should fold the immediate
308      // instead. This reduces code size.
309      // e.g.
310      // movl 4(%esp), %eax
311      // addl $4, %eax
312      // vs.
313      // movl $4, %eax
314      // addl 4(%esp), %eax
315      // The former is 2 bytes shorter. In case where the increment is 1, then
316      // the saving can be 4 bytes (by using incl %eax).
317      ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(U->getOperand(1));
318      if (Imm) {
319        if (U->getValueType(0) == MVT::i64) {
320          if ((int32_t)Imm->getZExtValue() == (int64_t)Imm->getZExtValue())
321            return false;
322        } else {
323          if ((int8_t)Imm->getZExtValue() == (int64_t)Imm->getZExtValue())
324            return false;
325        }
326      }
327    }
328    }
329
330  // If Root use can somehow reach N through a path that that doesn't contain
331  // U then folding N would create a cycle. e.g. In the following
332  // diagram, Root can reach N through X. If N is folded into into Root, then
333  // X is both a predecessor and a successor of U.
334  //
335  //          [N*]           //
336  //         ^   ^           //
337  //        /     \          //
338  //      [U*]    [X]?       //
339  //        ^     ^          //
340  //         \   /           //
341  //          \ /            //
342  //         [Root*]         //
343  //
344  // * indicates nodes to be folded together.
345  //
346  // If Root produces a flag, then it gets (even more) interesting. Since it
347  // will be "glued" together with its flag use in the scheduler, we need to
348  // check if it might reach N.
349  //
350  //          [N*]           //
351  //         ^   ^           //
352  //        /     \          //
353  //      [U*]    [X]?       //
354  //        ^       ^        //
355  //         \       \       //
356  //          \      |       //
357  //         [Root*] |       //
358  //          ^      |       //
359  //          f      |       //
360  //          |      /       //
361  //         [Y]    /        //
362  //           ^   /         //
363  //           f  /          //
364  //           | /           //
365  //          [FU]           //
366  //
367  // If FU (flag use) indirectly reaches N (the load), and Root folds N
368  // (call it Fold), then X is a predecessor of FU and a successor of
369  // Fold. But since Fold and FU are flagged together, this will create
370  // a cycle in the scheduling graph.
371
372  MVT VT = Root->getValueType(Root->getNumValues()-1);
373  while (VT == MVT::Flag) {
374    SDNode *FU = findFlagUse(Root);
375    if (FU == NULL)
376      break;
377    Root = FU;
378    VT = Root->getValueType(Root->getNumValues()-1);
379  }
380
381  return !isNonImmUse(Root, N, U);
382}
383
384/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
385/// and move load below the TokenFactor. Replace store's chain operand with
386/// load's chain result.
387static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
388                                 SDValue Store, SDValue TF) {
389  SmallVector<SDValue, 4> Ops;
390  for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
391    if (Load.getNode() == TF.getOperand(i).getNode())
392      Ops.push_back(Load.getOperand(0));
393    else
394      Ops.push_back(TF.getOperand(i));
395  CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
396  CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
397  CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
398                             Store.getOperand(2), Store.getOperand(3));
399}
400
401/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
402///
403static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
404                      SDValue &Load) {
405  if (N.getOpcode() == ISD::BIT_CONVERT)
406    N = N.getOperand(0);
407
408  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
409  if (!LD || LD->isVolatile())
410    return false;
411  if (LD->getAddressingMode() != ISD::UNINDEXED)
412    return false;
413
414  ISD::LoadExtType ExtType = LD->getExtensionType();
415  if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
416    return false;
417
418  if (N.hasOneUse() &&
419      N.getOperand(1) == Address &&
420      N.getNode()->isOperandOf(Chain.getNode())) {
421    Load = N;
422    return true;
423  }
424  return false;
425}
426
427/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
428/// operand and move load below the call's chain operand.
429static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
430                                  SDValue Call, SDValue CallSeqStart) {
431  SmallVector<SDValue, 8> Ops;
432  SDValue Chain = CallSeqStart.getOperand(0);
433  if (Chain.getNode() == Load.getNode())
434    Ops.push_back(Load.getOperand(0));
435  else {
436    assert(Chain.getOpcode() == ISD::TokenFactor &&
437           "Unexpected CallSeqStart chain operand");
438    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
439      if (Chain.getOperand(i).getNode() == Load.getNode())
440        Ops.push_back(Load.getOperand(0));
441      else
442        Ops.push_back(Chain.getOperand(i));
443    SDValue NewChain =
444      CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
445                      MVT::Other, &Ops[0], Ops.size());
446    Ops.clear();
447    Ops.push_back(NewChain);
448  }
449  for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
450    Ops.push_back(CallSeqStart.getOperand(i));
451  CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
452  CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
453                             Load.getOperand(1), Load.getOperand(2));
454  Ops.clear();
455  Ops.push_back(SDValue(Load.getNode(), 1));
456  for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
457    Ops.push_back(Call.getOperand(i));
458  CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
459}
460
461/// isCalleeLoad - Return true if call address is a load and it can be
462/// moved below CALLSEQ_START and the chains leading up to the call.
463/// Return the CALLSEQ_START by reference as a second output.
464static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
465  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
466    return false;
467  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
468  if (!LD ||
469      LD->isVolatile() ||
470      LD->getAddressingMode() != ISD::UNINDEXED ||
471      LD->getExtensionType() != ISD::NON_EXTLOAD)
472    return false;
473
474  // Now let's find the callseq_start.
475  while (Chain.getOpcode() != ISD::CALLSEQ_START) {
476    if (!Chain.hasOneUse())
477      return false;
478    Chain = Chain.getOperand(0);
479  }
480
481  if (Chain.getOperand(0).getNode() == Callee.getNode())
482    return true;
483  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
484      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()))
485    return true;
486  return false;
487}
488
489
490/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
491/// This is only run if not in -fast mode (aka -O0).
492/// This allows the instruction selector to pick more read-modify-write
493/// instructions. This is a common case:
494///
495///     [Load chain]
496///         ^
497///         |
498///       [Load]
499///       ^    ^
500///       |    |
501///      /      \-
502///     /         |
503/// [TokenFactor] [Op]
504///     ^          ^
505///     |          |
506///      \        /
507///       \      /
508///       [Store]
509///
510/// The fact the store's chain operand != load's chain will prevent the
511/// (store (op (load))) instruction from being selected. We can transform it to:
512///
513///     [Load chain]
514///         ^
515///         |
516///    [TokenFactor]
517///         ^
518///         |
519///       [Load]
520///       ^    ^
521///       |    |
522///       |     \-
523///       |       |
524///       |     [Op]
525///       |       ^
526///       |       |
527///       \      /
528///        \    /
529///       [Store]
530void X86DAGToDAGISel::PreprocessForRMW() {
531  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
532         E = CurDAG->allnodes_end(); I != E; ++I) {
533    if (I->getOpcode() == X86ISD::CALL) {
534      /// Also try moving call address load from outside callseq_start to just
535      /// before the call to allow it to be folded.
536      ///
537      ///     [Load chain]
538      ///         ^
539      ///         |
540      ///       [Load]
541      ///       ^    ^
542      ///       |    |
543      ///      /      \--
544      ///     /          |
545      ///[CALLSEQ_START] |
546      ///     ^          |
547      ///     |          |
548      /// [LOAD/C2Reg]   |
549      ///     |          |
550      ///      \        /
551      ///       \      /
552      ///       [CALL]
553      SDValue Chain = I->getOperand(0);
554      SDValue Load  = I->getOperand(1);
555      if (!isCalleeLoad(Load, Chain))
556        continue;
557      MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
558      ++NumLoadMoved;
559      continue;
560    }
561
562    if (!ISD::isNON_TRUNCStore(I))
563      continue;
564    SDValue Chain = I->getOperand(0);
565
566    if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
567      continue;
568
569    SDValue N1 = I->getOperand(1);
570    SDValue N2 = I->getOperand(2);
571    if ((N1.getValueType().isFloatingPoint() &&
572         !N1.getValueType().isVector()) ||
573        !N1.hasOneUse())
574      continue;
575
576    bool RModW = false;
577    SDValue Load;
578    unsigned Opcode = N1.getNode()->getOpcode();
579    switch (Opcode) {
580    case ISD::ADD:
581    case ISD::MUL:
582    case ISD::AND:
583    case ISD::OR:
584    case ISD::XOR:
585    case ISD::ADDC:
586    case ISD::ADDE:
587    case ISD::VECTOR_SHUFFLE: {
588      SDValue N10 = N1.getOperand(0);
589      SDValue N11 = N1.getOperand(1);
590      RModW = isRMWLoad(N10, Chain, N2, Load);
591      if (!RModW)
592        RModW = isRMWLoad(N11, Chain, N2, Load);
593      break;
594    }
595    case ISD::SUB:
596    case ISD::SHL:
597    case ISD::SRA:
598    case ISD::SRL:
599    case ISD::ROTL:
600    case ISD::ROTR:
601    case ISD::SUBC:
602    case ISD::SUBE:
603    case X86ISD::SHLD:
604    case X86ISD::SHRD: {
605      SDValue N10 = N1.getOperand(0);
606      RModW = isRMWLoad(N10, Chain, N2, Load);
607      break;
608    }
609    }
610
611    if (RModW) {
612      MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
613      ++NumLoadMoved;
614    }
615  }
616}
617
618
619/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
620/// nodes that target the FP stack to be store and load to the stack.  This is a
621/// gross hack.  We would like to simply mark these as being illegal, but when
622/// we do that, legalize produces these when it expands calls, then expands
623/// these in the same legalize pass.  We would like dag combine to be able to
624/// hack on these between the call expansion and the node legalization.  As such
625/// this pass basically does "really late" legalization of these inline with the
626/// X86 isel pass.
627void X86DAGToDAGISel::PreprocessForFPConvert() {
628  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
629       E = CurDAG->allnodes_end(); I != E; ) {
630    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
631    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
632      continue;
633
634    // If the source and destination are SSE registers, then this is a legal
635    // conversion that should not be lowered.
636    MVT SrcVT = N->getOperand(0).getValueType();
637    MVT DstVT = N->getValueType(0);
638    bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
639    bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
640    if (SrcIsSSE && DstIsSSE)
641      continue;
642
643    if (!SrcIsSSE && !DstIsSSE) {
644      // If this is an FPStack extension, it is a noop.
645      if (N->getOpcode() == ISD::FP_EXTEND)
646        continue;
647      // If this is a value-preserving FPStack truncation, it is a noop.
648      if (N->getConstantOperandVal(1))
649        continue;
650    }
651
652    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
653    // FPStack has extload and truncstore.  SSE can fold direct loads into other
654    // operations.  Based on this, decide what we want to do.
655    MVT MemVT;
656    if (N->getOpcode() == ISD::FP_ROUND)
657      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
658    else
659      MemVT = SrcIsSSE ? SrcVT : DstVT;
660
661    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
662    DebugLoc dl = N->getDebugLoc();
663
664    // FIXME: optimize the case where the src/dest is a load or store?
665    SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
666                                          N->getOperand(0),
667                                          MemTmp, NULL, 0, MemVT);
668    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
669                                        NULL, 0, MemVT);
670
671    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
672    // extload we created.  This will cause general havok on the dag because
673    // anything below the conversion could be folded into other existing nodes.
674    // To avoid invalidating 'I', back it up to the convert node.
675    --I;
676    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
677
678    // Now that we did that, the node is dead.  Increment the iterator to the
679    // next node to process, then delete N.
680    ++I;
681    CurDAG->DeleteNode(N);
682  }
683}
684
685/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
686/// when it has created a SelectionDAG for us to codegen.
687void X86DAGToDAGISel::InstructionSelect() {
688  CurBB = BB;  // BB can change as result of isel.
689  const Function *F = CurDAG->getMachineFunction().getFunction();
690  OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
691
692  DEBUG(BB->dump());
693  if (!Fast)
694    PreprocessForRMW();
695
696  // FIXME: This should only happen when not -fast.
697  PreprocessForFPConvert();
698
699  // Codegen the basic block.
700#ifndef NDEBUG
701  DOUT << "===== Instruction selection begins:\n";
702  Indent = 0;
703#endif
704  SelectRoot(*CurDAG);
705#ifndef NDEBUG
706  DOUT << "===== Instruction selection ends:\n";
707#endif
708
709  CurDAG->RemoveDeadNodes();
710}
711
712/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
713/// the main function.
714void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
715                                             MachineFrameInfo *MFI) {
716  const TargetInstrInfo *TII = TM.getInstrInfo();
717  if (Subtarget->isTargetCygMing())
718    BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
719}
720
721void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
722  // If this is main, emit special code for main.
723  MachineBasicBlock *BB = MF.begin();
724  if (Fn.hasExternalLinkage() && Fn.getName() == "main")
725    EmitSpecialCodeForMain(BB, MF.getFrameInfo());
726}
727
728/// MatchAddress - Add the specified node to the specified addressing mode,
729/// returning true if it cannot be done.  This just pattern matches for the
730/// addressing mode.
731bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
732                                   bool isRoot, unsigned Depth) {
733  bool is64Bit = Subtarget->is64Bit();
734  DebugLoc dl = N.getNode()->getDebugLoc();
735  DOUT << "MatchAddress: "; DEBUG(AM.dump());
736  // Limit recursion.
737  if (Depth > 5)
738    return MatchAddressBase(N, AM, isRoot, Depth);
739
740  // RIP relative addressing: %rip + 32-bit displacement!
741  if (AM.isRIPRel) {
742    if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
743      uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
744      if (!is64Bit || isInt32(AM.Disp + Val)) {
745        AM.Disp += Val;
746        return false;
747      }
748    }
749    return true;
750  }
751
752  switch (N.getOpcode()) {
753  default: break;
754  case ISD::Constant: {
755    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
756    if (!is64Bit || isInt32(AM.Disp + Val)) {
757      AM.Disp += Val;
758      return false;
759    }
760    break;
761  }
762
763  case X86ISD::Wrapper: {
764    DOUT << "Wrapper: 64bit " << is64Bit;
765    DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n";
766    // Under X86-64 non-small code model, GV (and friends) are 64-bits.
767    // Also, base and index reg must be 0 in order to use rip as base.
768    if (is64Bit && (TM.getCodeModel() != CodeModel::Small ||
769                    AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
770      break;
771    if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1)
772      break;
773    // If value is available in a register both base and index components have
774    // been picked, we can't fit the result available in the register in the
775    // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
776    {
777      SDValue N0 = N.getOperand(0);
778      if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
779        uint64_t Offset = G->getOffset();
780        if (!is64Bit || isInt32(AM.Disp + Offset)) {
781          GlobalValue *GV = G->getGlobal();
782          AM.GV = GV;
783          AM.Disp += Offset;
784          AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
785          return false;
786        }
787      } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
788        uint64_t Offset = CP->getOffset();
789        if (!is64Bit || isInt32(AM.Disp + Offset)) {
790          AM.CP = CP->getConstVal();
791          AM.Align = CP->getAlignment();
792          AM.Disp += Offset;
793          AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
794          return false;
795        }
796      } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
797        AM.ES = S->getSymbol();
798        AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
799        return false;
800      } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
801        AM.JT = J->getIndex();
802        AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
803        return false;
804      }
805    }
806    break;
807  }
808
809  case ISD::FrameIndex:
810    if (AM.BaseType == X86ISelAddressMode::RegBase
811        && AM.Base.Reg.getNode() == 0) {
812      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
813      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
814      return false;
815    }
816    break;
817
818  case ISD::SHL:
819    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1 || AM.isRIPRel)
820      break;
821
822    if (ConstantSDNode
823          *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
824      unsigned Val = CN->getZExtValue();
825      if (Val == 1 || Val == 2 || Val == 3) {
826        AM.Scale = 1 << Val;
827        SDValue ShVal = N.getNode()->getOperand(0);
828
829        // Okay, we know that we have a scale by now.  However, if the scaled
830        // value is an add of something and a constant, we can fold the
831        // constant into the disp field here.
832        if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
833            isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
834          AM.IndexReg = ShVal.getNode()->getOperand(0);
835          ConstantSDNode *AddVal =
836            cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
837          uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
838          if (!is64Bit || isInt32(Disp))
839            AM.Disp = Disp;
840          else
841            AM.IndexReg = ShVal;
842        } else {
843          AM.IndexReg = ShVal;
844        }
845        return false;
846      }
847    break;
848    }
849
850  case ISD::SMUL_LOHI:
851  case ISD::UMUL_LOHI:
852    // A mul_lohi where we need the low part can be folded as a plain multiply.
853    if (N.getResNo() != 0) break;
854    // FALL THROUGH
855  case ISD::MUL:
856    // X*[3,5,9] -> X+X*[2,4,8]
857    if (AM.BaseType == X86ISelAddressMode::RegBase &&
858        AM.Base.Reg.getNode() == 0 &&
859        AM.IndexReg.getNode() == 0 &&
860        !AM.isRIPRel) {
861      if (ConstantSDNode
862            *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
863        if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
864            CN->getZExtValue() == 9) {
865          AM.Scale = unsigned(CN->getZExtValue())-1;
866
867          SDValue MulVal = N.getNode()->getOperand(0);
868          SDValue Reg;
869
870          // Okay, we know that we have a scale by now.  However, if the scaled
871          // value is an add of something and a constant, we can fold the
872          // constant into the disp field here.
873          if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
874              isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
875            Reg = MulVal.getNode()->getOperand(0);
876            ConstantSDNode *AddVal =
877              cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
878            uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
879                                      CN->getZExtValue();
880            if (!is64Bit || isInt32(Disp))
881              AM.Disp = Disp;
882            else
883              Reg = N.getNode()->getOperand(0);
884          } else {
885            Reg = N.getNode()->getOperand(0);
886          }
887
888          AM.IndexReg = AM.Base.Reg = Reg;
889          return false;
890        }
891    }
892    break;
893
894  case ISD::ADD: {
895    X86ISelAddressMode Backup = AM;
896    if (!MatchAddress(N.getNode()->getOperand(0), AM, false, Depth+1) &&
897        !MatchAddress(N.getNode()->getOperand(1), AM, false, Depth+1))
898      return false;
899    AM = Backup;
900    if (!MatchAddress(N.getNode()->getOperand(1), AM, false, Depth+1) &&
901        !MatchAddress(N.getNode()->getOperand(0), AM, false, Depth+1))
902      return false;
903    AM = Backup;
904    break;
905  }
906
907  case ISD::OR:
908    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
909    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
910      X86ISelAddressMode Backup = AM;
911      uint64_t Offset = CN->getSExtValue();
912      // Start with the LHS as an addr mode.
913      if (!MatchAddress(N.getOperand(0), AM, false) &&
914          // Address could not have picked a GV address for the displacement.
915          AM.GV == NULL &&
916          // On x86-64, the resultant disp must fit in 32-bits.
917          (!is64Bit || isInt32(AM.Disp + Offset)) &&
918          // Check to see if the LHS & C is zero.
919          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
920        AM.Disp += Offset;
921        return false;
922      }
923      AM = Backup;
924    }
925    break;
926
927  case ISD::AND: {
928    // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
929    // allows us to fold the shift into this addressing mode.
930    SDValue Shift = N.getOperand(0);
931    if (Shift.getOpcode() != ISD::SHL) break;
932
933    // Scale must not be used already.
934    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
935
936    // Not when RIP is used as the base.
937    if (AM.isRIPRel) break;
938
939    ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
940    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
941    if (!C1 || !C2) break;
942
943    // Not likely to be profitable if either the AND or SHIFT node has more
944    // than one use (unless all uses are for address computation). Besides,
945    // isel mechanism requires their node ids to be reused.
946    if (!N.hasOneUse() || !Shift.hasOneUse())
947      break;
948
949    // Verify that the shift amount is something we can fold.
950    unsigned ShiftCst = C1->getZExtValue();
951    if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
952      break;
953
954    // Get the new AND mask, this folds to a constant.
955    SDValue X = Shift.getOperand(0);
956    SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
957                                         SDValue(C2, 0), SDValue(C1, 0));
958    SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
959                                     NewANDMask);
960    SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
961                                       NewAND, SDValue(C1, 0));
962
963    // Insert the new nodes into the topological ordering.
964    if (C1->getNodeId() > X.getNode()->getNodeId()) {
965      CurDAG->RepositionNode(X.getNode(), C1);
966      C1->setNodeId(X.getNode()->getNodeId());
967    }
968    if (NewANDMask.getNode()->getNodeId() == -1 ||
969        NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
970      CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
971      NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
972    }
973    if (NewAND.getNode()->getNodeId() == -1 ||
974        NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
975      CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
976      NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
977    }
978    if (NewSHIFT.getNode()->getNodeId() == -1 ||
979        NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
980      CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
981      NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
982    }
983
984    CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
985
986    AM.Scale = 1 << ShiftCst;
987    AM.IndexReg = NewAND;
988    return false;
989  }
990  }
991
992  return MatchAddressBase(N, AM, isRoot, Depth);
993}
994
995/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
996/// specified addressing mode without any further recursion.
997bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM,
998                                       bool isRoot, unsigned Depth) {
999  // Is the base register already occupied?
1000  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
1001    // If so, check to see if the scale index register is set.
1002    if (AM.IndexReg.getNode() == 0 && !AM.isRIPRel) {
1003      AM.IndexReg = N;
1004      AM.Scale = 1;
1005      return false;
1006    }
1007
1008    // Otherwise, we cannot select it.
1009    return true;
1010  }
1011
1012  // Default, generate it as a register.
1013  AM.BaseType = X86ISelAddressMode::RegBase;
1014  AM.Base.Reg = N;
1015  return false;
1016}
1017
1018/// SelectAddr - returns true if it is able pattern match an addressing mode.
1019/// It returns the operands which make up the maximal addressing mode it can
1020/// match by reference.
1021bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
1022                                 SDValue &Scale, SDValue &Index,
1023                                 SDValue &Disp) {
1024  X86ISelAddressMode AM;
1025  if (MatchAddress(N, AM))
1026    return false;
1027
1028  MVT VT = N.getValueType();
1029  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1030    if (!AM.Base.Reg.getNode())
1031      AM.Base.Reg = CurDAG->getRegister(0, VT);
1032  }
1033
1034  if (!AM.IndexReg.getNode())
1035    AM.IndexReg = CurDAG->getRegister(0, VT);
1036
1037  getAddressOperands(AM, Base, Scale, Index, Disp);
1038  return true;
1039}
1040
1041/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
1042/// match a load whose top elements are either undef or zeros.  The load flavor
1043/// is derived from the type of N, which is either v4f32 or v2f64.
1044bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
1045                                          SDValue N, SDValue &Base,
1046                                          SDValue &Scale, SDValue &Index,
1047                                          SDValue &Disp, SDValue &InChain,
1048                                          SDValue &OutChain) {
1049  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1050    InChain = N.getOperand(0).getValue(1);
1051    if (ISD::isNON_EXTLoad(InChain.getNode()) &&
1052        InChain.getValue(0).hasOneUse() &&
1053        N.hasOneUse() &&
1054        IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
1055      LoadSDNode *LD = cast<LoadSDNode>(InChain);
1056      if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
1057        return false;
1058      OutChain = LD->getChain();
1059      return true;
1060    }
1061  }
1062
1063  // Also handle the case where we explicitly require zeros in the top
1064  // elements.  This is a vector shuffle from the zero vector.
1065  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1066      // Check to see if the top elements are all zeros (or bitcast of zeros).
1067      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
1068      N.getOperand(0).getNode()->hasOneUse() &&
1069      ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
1070      N.getOperand(0).getOperand(0).hasOneUse()) {
1071    // Okay, this is a zero extending load.  Fold it.
1072    LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1073    if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
1074      return false;
1075    OutChain = LD->getChain();
1076    InChain = SDValue(LD, 1);
1077    return true;
1078  }
1079  return false;
1080}
1081
1082
1083/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1084/// mode it matches can be cost effectively emitted as an LEA instruction.
1085bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
1086                                    SDValue &Base, SDValue &Scale,
1087                                    SDValue &Index, SDValue &Disp) {
1088  X86ISelAddressMode AM;
1089  if (MatchAddress(N, AM))
1090    return false;
1091
1092  MVT VT = N.getValueType();
1093  unsigned Complexity = 0;
1094  if (AM.BaseType == X86ISelAddressMode::RegBase)
1095    if (AM.Base.Reg.getNode())
1096      Complexity = 1;
1097    else
1098      AM.Base.Reg = CurDAG->getRegister(0, VT);
1099  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1100    Complexity = 4;
1101
1102  if (AM.IndexReg.getNode())
1103    Complexity++;
1104  else
1105    AM.IndexReg = CurDAG->getRegister(0, VT);
1106
1107  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1108  // a simple shift.
1109  if (AM.Scale > 1)
1110    Complexity++;
1111
1112  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1113  // to a LEA. This is determined with some expermentation but is by no means
1114  // optimal (especially for code size consideration). LEA is nice because of
1115  // its three-address nature. Tweak the cost function again when we can run
1116  // convertToThreeAddress() at register allocation time.
1117  if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
1118    // For X86-64, we should always use lea to materialize RIP relative
1119    // addresses.
1120    if (Subtarget->is64Bit())
1121      Complexity = 4;
1122    else
1123      Complexity += 2;
1124  }
1125
1126  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
1127    Complexity++;
1128
1129  if (Complexity > 2) {
1130    getAddressOperands(AM, Base, Scale, Index, Disp);
1131    return true;
1132  }
1133  return false;
1134}
1135
1136bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
1137                                  SDValue &Base, SDValue &Scale,
1138                                  SDValue &Index, SDValue &Disp) {
1139  if (ISD::isNON_EXTLoad(N.getNode()) &&
1140      N.hasOneUse() &&
1141      IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
1142    return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp);
1143  return false;
1144}
1145
1146/// getGlobalBaseReg - Return an SDNode that returns the value of
1147/// the global base register. Output instructions required to
1148/// initialize the global base register, if necessary.
1149///
1150SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1151  MachineFunction *MF = CurBB->getParent();
1152  unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
1153  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
1154}
1155
1156static SDNode *FindCallStartFromCall(SDNode *Node) {
1157  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
1158    assert(Node->getOperand(0).getValueType() == MVT::Other &&
1159         "Node doesn't have a token chain argument!");
1160  return FindCallStartFromCall(Node->getOperand(0).getNode());
1161}
1162
1163/// getTruncateTo8Bit - return an SDNode that implements a subreg based
1164/// truncate of the specified operand to i8. This can be done with tablegen,
1165/// except that this code uses MVT::Flag in a tricky way that happens to
1166/// improve scheduling in some cases.
1167SDNode *X86DAGToDAGISel::getTruncateTo8Bit(SDValue N0) {
1168  assert(!Subtarget->is64Bit() &&
1169         "getTruncateTo8Bit is only needed on x86-32!");
1170  SDValue SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
1171  DebugLoc dl = N0.getNode()->getDebugLoc();
1172
1173  // Ensure that the source register has an 8-bit subreg on 32-bit targets
1174  unsigned Opc;
1175  MVT N0VT = N0.getValueType();
1176  switch (N0VT.getSimpleVT()) {
1177  default: assert(0 && "Unknown truncate!");
1178  case MVT::i16:
1179    Opc = X86::MOV16to16_;
1180    break;
1181  case MVT::i32:
1182    Opc = X86::MOV32to32_;
1183    break;
1184  }
1185
1186  // The use of MVT::Flag here is not strictly accurate, but it helps
1187  // scheduling in some cases.
1188  N0 = SDValue(CurDAG->getTargetNode(Opc, dl, N0VT, MVT::Flag, N0), 0);
1189  return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1190                               MVT::i8, N0, SRIdx, N0.getValue(1));
1191}
1192
1193SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
1194  SDValue Chain = Node->getOperand(0);
1195  SDValue In1 = Node->getOperand(1);
1196  SDValue In2L = Node->getOperand(2);
1197  SDValue In2H = Node->getOperand(3);
1198  SDValue Tmp0, Tmp1, Tmp2, Tmp3;
1199  if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3))
1200    return NULL;
1201  SDValue LSI = Node->getOperand(4);    // MemOperand
1202  const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, In2L, In2H, LSI, Chain };
1203  return CurDAG->getTargetNode(Opc, Node->getDebugLoc(),
1204                               MVT::i32, MVT::i32, MVT::Other, Ops, 8);
1205}
1206
1207SDNode *X86DAGToDAGISel::Select(SDValue N) {
1208  SDNode *Node = N.getNode();
1209  MVT NVT = Node->getValueType(0);
1210  unsigned Opc, MOpc;
1211  unsigned Opcode = Node->getOpcode();
1212  DebugLoc dl = Node->getDebugLoc();
1213
1214#ifndef NDEBUG
1215  DOUT << std::string(Indent, ' ') << "Selecting: ";
1216  DEBUG(Node->dump(CurDAG));
1217  DOUT << "\n";
1218  Indent += 2;
1219#endif
1220
1221  if (Node->isMachineOpcode()) {
1222#ifndef NDEBUG
1223    DOUT << std::string(Indent-2, ' ') << "== ";
1224    DEBUG(Node->dump(CurDAG));
1225    DOUT << "\n";
1226    Indent -= 2;
1227#endif
1228    return NULL;   // Already selected.
1229  }
1230
1231  switch (Opcode) {
1232    default: break;
1233    case X86ISD::GlobalBaseReg:
1234      return getGlobalBaseReg();
1235
1236    case X86ISD::ATOMOR64_DAG:
1237      return SelectAtomic64(Node, X86::ATOMOR6432);
1238    case X86ISD::ATOMXOR64_DAG:
1239      return SelectAtomic64(Node, X86::ATOMXOR6432);
1240    case X86ISD::ATOMADD64_DAG:
1241      return SelectAtomic64(Node, X86::ATOMADD6432);
1242    case X86ISD::ATOMSUB64_DAG:
1243      return SelectAtomic64(Node, X86::ATOMSUB6432);
1244    case X86ISD::ATOMNAND64_DAG:
1245      return SelectAtomic64(Node, X86::ATOMNAND6432);
1246    case X86ISD::ATOMAND64_DAG:
1247      return SelectAtomic64(Node, X86::ATOMAND6432);
1248    case X86ISD::ATOMSWAP64_DAG:
1249      return SelectAtomic64(Node, X86::ATOMSWAP6432);
1250
1251    case ISD::SMUL_LOHI:
1252    case ISD::UMUL_LOHI: {
1253      SDValue N0 = Node->getOperand(0);
1254      SDValue N1 = Node->getOperand(1);
1255
1256      bool isSigned = Opcode == ISD::SMUL_LOHI;
1257      if (!isSigned)
1258        switch (NVT.getSimpleVT()) {
1259        default: assert(0 && "Unsupported VT!");
1260        case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
1261        case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
1262        case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
1263        case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
1264        }
1265      else
1266        switch (NVT.getSimpleVT()) {
1267        default: assert(0 && "Unsupported VT!");
1268        case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
1269        case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
1270        case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
1271        case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
1272        }
1273
1274      unsigned LoReg, HiReg;
1275      switch (NVT.getSimpleVT()) {
1276      default: assert(0 && "Unsupported VT!");
1277      case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
1278      case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
1279      case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
1280      case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
1281      }
1282
1283      SDValue Tmp0, Tmp1, Tmp2, Tmp3;
1284      bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
1285      // multiplty is commmutative
1286      if (!foldedLoad) {
1287        foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
1288        if (foldedLoad)
1289          std::swap(N0, N1);
1290      }
1291
1292      SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
1293                                              N0, SDValue()).getValue(1);
1294
1295      if (foldedLoad) {
1296        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
1297        SDNode *CNode =
1298          CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 6);
1299        InFlag = SDValue(CNode, 1);
1300        // Update the chain.
1301        ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1302      } else {
1303        InFlag =
1304          SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1305      }
1306
1307      // Copy the low half of the result, if it is needed.
1308      if (!N.getValue(0).use_empty()) {
1309        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1310                                                  LoReg, NVT, InFlag);
1311        InFlag = Result.getValue(2);
1312        ReplaceUses(N.getValue(0), Result);
1313#ifndef NDEBUG
1314        DOUT << std::string(Indent-2, ' ') << "=> ";
1315        DEBUG(Result.getNode()->dump(CurDAG));
1316        DOUT << "\n";
1317#endif
1318      }
1319      // Copy the high half of the result, if it is needed.
1320      if (!N.getValue(1).use_empty()) {
1321        SDValue Result;
1322        if (HiReg == X86::AH && Subtarget->is64Bit()) {
1323          // Prevent use of AH in a REX instruction by referencing AX instead.
1324          // Shift it down 8 bits.
1325          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1326                                          X86::AX, MVT::i16, InFlag);
1327          InFlag = Result.getValue(2);
1328          Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
1329                                                 Result,
1330                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
1331          // Then truncate it down to i8.
1332          SDValue SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
1333          Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1334                                                   MVT::i8, Result, SRIdx), 0);
1335        } else {
1336          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1337                                          HiReg, NVT, InFlag);
1338          InFlag = Result.getValue(2);
1339        }
1340        ReplaceUses(N.getValue(1), Result);
1341#ifndef NDEBUG
1342        DOUT << std::string(Indent-2, ' ') << "=> ";
1343        DEBUG(Result.getNode()->dump(CurDAG));
1344        DOUT << "\n";
1345#endif
1346      }
1347
1348#ifndef NDEBUG
1349      Indent -= 2;
1350#endif
1351
1352      return NULL;
1353    }
1354
1355    case ISD::SDIVREM:
1356    case ISD::UDIVREM: {
1357      SDValue N0 = Node->getOperand(0);
1358      SDValue N1 = Node->getOperand(1);
1359
1360      bool isSigned = Opcode == ISD::SDIVREM;
1361      if (!isSigned)
1362        switch (NVT.getSimpleVT()) {
1363        default: assert(0 && "Unsupported VT!");
1364        case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
1365        case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
1366        case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
1367        case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
1368        }
1369      else
1370        switch (NVT.getSimpleVT()) {
1371        default: assert(0 && "Unsupported VT!");
1372        case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
1373        case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
1374        case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1375        case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1376        }
1377
1378      unsigned LoReg, HiReg;
1379      unsigned ClrOpcode, SExtOpcode;
1380      switch (NVT.getSimpleVT()) {
1381      default: assert(0 && "Unsupported VT!");
1382      case MVT::i8:
1383        LoReg = X86::AL;  HiReg = X86::AH;
1384        ClrOpcode  = 0;
1385        SExtOpcode = X86::CBW;
1386        break;
1387      case MVT::i16:
1388        LoReg = X86::AX;  HiReg = X86::DX;
1389        ClrOpcode  = X86::MOV16r0;
1390        SExtOpcode = X86::CWD;
1391        break;
1392      case MVT::i32:
1393        LoReg = X86::EAX; HiReg = X86::EDX;
1394        ClrOpcode  = X86::MOV32r0;
1395        SExtOpcode = X86::CDQ;
1396        break;
1397      case MVT::i64:
1398        LoReg = X86::RAX; HiReg = X86::RDX;
1399        ClrOpcode  = X86::MOV64r0;
1400        SExtOpcode = X86::CQO;
1401        break;
1402      }
1403
1404      SDValue Tmp0, Tmp1, Tmp2, Tmp3;
1405      bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
1406      bool signBitIsZero = CurDAG->SignBitIsZero(N0);
1407
1408      SDValue InFlag;
1409      if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
1410        // Special case for div8, just use a move with zero extension to AX to
1411        // clear the upper 8 bits (AH).
1412        SDValue Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain;
1413        if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) {
1414          SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) };
1415          Move =
1416            SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16,
1417                                           MVT::Other, Ops, 5), 0);
1418          Chain = Move.getValue(1);
1419          ReplaceUses(N0.getValue(1), Chain);
1420        } else {
1421          Move =
1422            SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
1423          Chain = CurDAG->getEntryNode();
1424        }
1425        Chain  = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
1426        InFlag = Chain.getValue(1);
1427      } else {
1428        InFlag =
1429          CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
1430                               LoReg, N0, SDValue()).getValue(1);
1431        if (isSigned && !signBitIsZero) {
1432          // Sign extend the low part into the high part.
1433          InFlag =
1434            SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
1435        } else {
1436          // Zero out the high part, effectively zero extending the input.
1437          SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT),
1438                                    0);
1439          InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
1440                                        ClrNode, InFlag).getValue(1);
1441        }
1442      }
1443
1444      if (foldedLoad) {
1445        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
1446        SDNode *CNode =
1447          CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 6);
1448        InFlag = SDValue(CNode, 1);
1449        // Update the chain.
1450        ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1451      } else {
1452        InFlag =
1453          SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1454      }
1455
1456      // Copy the division (low) result, if it is needed.
1457      if (!N.getValue(0).use_empty()) {
1458        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1459                                                  LoReg, NVT, InFlag);
1460        InFlag = Result.getValue(2);
1461        ReplaceUses(N.getValue(0), Result);
1462#ifndef NDEBUG
1463        DOUT << std::string(Indent-2, ' ') << "=> ";
1464        DEBUG(Result.getNode()->dump(CurDAG));
1465        DOUT << "\n";
1466#endif
1467      }
1468      // Copy the remainder (high) result, if it is needed.
1469      if (!N.getValue(1).use_empty()) {
1470        SDValue Result;
1471        if (HiReg == X86::AH && Subtarget->is64Bit()) {
1472          // Prevent use of AH in a REX instruction by referencing AX instead.
1473          // Shift it down 8 bits.
1474          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1475                                          X86::AX, MVT::i16, InFlag);
1476          InFlag = Result.getValue(2);
1477          Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
1478                                        Result,
1479                                        CurDAG->getTargetConstant(8, MVT::i8)),
1480                           0);
1481          // Then truncate it down to i8.
1482          SDValue SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
1483          Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1484                                                   MVT::i8, Result, SRIdx), 0);
1485        } else {
1486          Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1487                                          HiReg, NVT, InFlag);
1488          InFlag = Result.getValue(2);
1489        }
1490        ReplaceUses(N.getValue(1), Result);
1491#ifndef NDEBUG
1492        DOUT << std::string(Indent-2, ' ') << "=> ";
1493        DEBUG(Result.getNode()->dump(CurDAG));
1494        DOUT << "\n";
1495#endif
1496      }
1497
1498#ifndef NDEBUG
1499      Indent -= 2;
1500#endif
1501
1502      return NULL;
1503    }
1504
1505    case ISD::SIGN_EXTEND_INREG: {
1506      MVT SVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
1507      if (SVT == MVT::i8 && !Subtarget->is64Bit()) {
1508        SDValue N0 = Node->getOperand(0);
1509
1510        SDValue TruncOp = SDValue(getTruncateTo8Bit(N0), 0);
1511        unsigned Opc = 0;
1512        switch (NVT.getSimpleVT()) {
1513        default: assert(0 && "Unknown sign_extend_inreg!");
1514        case MVT::i16:
1515          Opc = X86::MOVSX16rr8;
1516          break;
1517        case MVT::i32:
1518          Opc = X86::MOVSX32rr8;
1519          break;
1520        }
1521
1522        SDNode *ResNode = CurDAG->getTargetNode(Opc, dl, NVT, TruncOp);
1523
1524#ifndef NDEBUG
1525        DOUT << std::string(Indent-2, ' ') << "=> ";
1526        DEBUG(TruncOp.getNode()->dump(CurDAG));
1527        DOUT << "\n";
1528        DOUT << std::string(Indent-2, ' ') << "=> ";
1529        DEBUG(ResNode->dump(CurDAG));
1530        DOUT << "\n";
1531        Indent -= 2;
1532#endif
1533        return ResNode;
1534      }
1535      break;
1536    }
1537
1538    case ISD::TRUNCATE: {
1539      if (NVT == MVT::i8 && !Subtarget->is64Bit()) {
1540        SDValue Input = Node->getOperand(0);
1541        SDNode *ResNode = getTruncateTo8Bit(Input);
1542
1543#ifndef NDEBUG
1544        DOUT << std::string(Indent-2, ' ') << "=> ";
1545        DEBUG(ResNode->dump(CurDAG));
1546        DOUT << "\n";
1547        Indent -= 2;
1548#endif
1549        return ResNode;
1550      }
1551      break;
1552    }
1553
1554    case ISD::DECLARE: {
1555      // Handle DECLARE nodes here because the second operand may have been
1556      // wrapped in X86ISD::Wrapper.
1557      SDValue Chain = Node->getOperand(0);
1558      SDValue N1 = Node->getOperand(1);
1559      SDValue N2 = Node->getOperand(2);
1560      FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
1561      if (!FINode)
1562        break;
1563      if (N2.getOpcode() == ISD::ADD &&
1564          N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg)
1565        N2 = N2.getOperand(1);
1566      if (N2.getOpcode() != X86ISD::Wrapper)
1567        break;
1568      GlobalAddressSDNode *GVNode =
1569        dyn_cast<GlobalAddressSDNode>(N2.getOperand(0));
1570      if (!GVNode)
1571        break;
1572      SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
1573                                                 TLI.getPointerTy());
1574      SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(),
1575                                                    TLI.getPointerTy());
1576      SDValue Ops[] = { Tmp1, Tmp2, Chain };
1577      return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
1578                                   MVT::Other, Ops, 3);
1579      break;
1580    }
1581  }
1582
1583  SDNode *ResNode = SelectCode(N);
1584
1585#ifndef NDEBUG
1586  DOUT << std::string(Indent-2, ' ') << "=> ";
1587  if (ResNode == NULL || ResNode == N.getNode())
1588    DEBUG(N.getNode()->dump(CurDAG));
1589  else
1590    DEBUG(ResNode->dump(CurDAG));
1591  DOUT << "\n";
1592  Indent -= 2;
1593#endif
1594
1595  return ResNode;
1596}
1597
1598bool X86DAGToDAGISel::
1599SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
1600                             std::vector<SDValue> &OutOps) {
1601  SDValue Op0, Op1, Op2, Op3;
1602  switch (ConstraintCode) {
1603  case 'o':   // offsetable        ??
1604  case 'v':   // not offsetable    ??
1605  default: return true;
1606  case 'm':   // memory
1607    if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3))
1608      return true;
1609    break;
1610  }
1611
1612  OutOps.push_back(Op0);
1613  OutOps.push_back(Op1);
1614  OutOps.push_back(Op2);
1615  OutOps.push_back(Op3);
1616  return false;
1617}
1618
1619/// createX86ISelDag - This pass converts a legalized DAG into a
1620/// X86-specific DAG, ready for instruction scheduling.
1621///
1622FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) {
1623  return new X86DAGToDAGISel(TM, Fast);
1624}
1625