X86ISelDAGToDAG.cpp revision c45a2c72ccf57e84a18beb898740fe5b8fe0d2c8
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by the Evan Cheng and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a DAG pattern matching instruction selector for X86,
11// converting from a legalized dag to a X86 dag.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "x86-isel"
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86RegisterInfo.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/GlobalValue.h"
23#include "llvm/Instructions.h"
24#include "llvm/Intrinsics.h"
25#include "llvm/Support/CFG.h"
26#include "llvm/CodeGen/MachineConstantPool.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/SSARegMap.h"
31#include "llvm/CodeGen/SelectionDAGISel.h"
32#include "llvm/Target/TargetMachine.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/MathExtras.h"
36#include "llvm/ADT/Statistic.h"
37#include <deque>
38#include <iostream>
39#include <queue>
40#include <set>
41using namespace llvm;
42
43//===----------------------------------------------------------------------===//
44//                      Pattern Matcher Implementation
45//===----------------------------------------------------------------------===//
46
47namespace {
48  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
49  /// SDOperand's instead of register numbers for the leaves of the matched
50  /// tree.
51  struct X86ISelAddressMode {
52    enum {
53      RegBase,
54      FrameIndexBase
55    } BaseType;
56
57    struct {            // This is really a union, discriminated by BaseType!
58      SDOperand Reg;
59      int FrameIndex;
60    } Base;
61
62    bool isRIPRel;     // RIP relative?
63    unsigned Scale;
64    SDOperand IndexReg;
65    unsigned Disp;
66    GlobalValue *GV;
67    Constant *CP;
68    const char *ES;
69    int JT;
70    unsigned Align;    // CP alignment.
71
72    X86ISelAddressMode()
73      : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
74        GV(0), CP(0), ES(0), JT(-1), Align(0) {
75    }
76  };
77}
78
79namespace {
80  Statistic<>
81  NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
82
83  Statistic<>
84  NumLoadMoved("x86-codegen", "Number of loads moved below TokenFactor");
85
86  //===--------------------------------------------------------------------===//
87  /// ISel - X86 specific code to select X86 machine instructions for
88  /// SelectionDAG operations.
89  ///
90  class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
91    /// ContainsFPCode - Every instruction we select that uses or defines a FP
92    /// register should set this to true.
93    bool ContainsFPCode;
94
95    /// FastISel - Enable fast(er) instruction selection.
96    ///
97    bool FastISel;
98
99    /// TM - Keep a reference to X86TargetMachine.
100    ///
101    X86TargetMachine &TM;
102
103    /// X86Lowering - This object fully describes how to lower LLVM code to an
104    /// X86-specific SelectionDAG.
105    X86TargetLowering X86Lowering;
106
107    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
108    /// make the right decision when generating code for different targets.
109    const X86Subtarget *Subtarget;
110
111    /// GlobalBaseReg - keeps track of the virtual register mapped onto global
112    /// base register.
113    unsigned GlobalBaseReg;
114
115  public:
116    X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
117      : SelectionDAGISel(X86Lowering),
118        ContainsFPCode(false), FastISel(fast), TM(tm),
119        X86Lowering(*TM.getTargetLowering()),
120        Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
121
122    virtual bool runOnFunction(Function &Fn) {
123      // Make sure we re-emit a set of the global base reg if necessary
124      GlobalBaseReg = 0;
125      return SelectionDAGISel::runOnFunction(Fn);
126    }
127
128    virtual const char *getPassName() const {
129      return "X86 DAG->DAG Instruction Selection";
130    }
131
132    /// InstructionSelectBasicBlock - This callback is invoked by
133    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
134    virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
135
136    virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
137
138    virtual bool CanBeFoldedBy(SDNode *N, SDNode *U);
139
140// Include the pieces autogenerated from the target description.
141#include "X86GenDAGISel.inc"
142
143  private:
144    SDNode *Select(SDOperand N);
145
146    bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot = true);
147    bool SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
148                    SDOperand &Index, SDOperand &Disp);
149    bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
150                       SDOperand &Index, SDOperand &Disp);
151    bool TryFoldLoad(SDOperand P, SDOperand N,
152                     SDOperand &Base, SDOperand &Scale,
153                     SDOperand &Index, SDOperand &Disp);
154    void InstructionSelectPreprocess(SelectionDAG &DAG);
155
156    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
157    /// inline asm expressions.
158    virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
159                                              char ConstraintCode,
160                                              std::vector<SDOperand> &OutOps,
161                                              SelectionDAG &DAG);
162
163    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
164
165    inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
166                                   SDOperand &Scale, SDOperand &Index,
167                                   SDOperand &Disp) {
168      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
169        CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
170        AM.Base.Reg;
171      Scale = getI8Imm(AM.Scale);
172      Index = AM.IndexReg;
173      // These are 32-bit even in 64-bit mode since RIP relative offset
174      // is 32-bit.
175      if (AM.GV)
176        Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
177      else if (AM.CP)
178        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
179      else if (AM.ES)
180        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
181      else if (AM.JT != -1)
182        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
183      else
184        Disp = getI32Imm(AM.Disp);
185    }
186
187    /// getI8Imm - Return a target constant with the specified value, of type
188    /// i8.
189    inline SDOperand getI8Imm(unsigned Imm) {
190      return CurDAG->getTargetConstant(Imm, MVT::i8);
191    }
192
193    /// getI16Imm - Return a target constant with the specified value, of type
194    /// i16.
195    inline SDOperand getI16Imm(unsigned Imm) {
196      return CurDAG->getTargetConstant(Imm, MVT::i16);
197    }
198
199    /// getI32Imm - Return a target constant with the specified value, of type
200    /// i32.
201    inline SDOperand getI32Imm(unsigned Imm) {
202      return CurDAG->getTargetConstant(Imm, MVT::i32);
203    }
204
205    /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
206    /// base register.  Return the virtual register that holds this value.
207    SDNode *getGlobalBaseReg();
208
209#ifndef NDEBUG
210    unsigned Indent;
211#endif
212  };
213}
214
215static void findNonImmUse(SDNode* Use, SDNode* Def, bool &found,
216                          std::set<SDNode *> &Visited) {
217  if (found ||
218      Use->getNodeId() > Def->getNodeId() ||
219      !Visited.insert(Use).second)
220    return;
221
222  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
223    SDNode *N = Use->getOperand(i).Val;
224    if (N != Def) {
225      findNonImmUse(N, Def, found, Visited);
226    } else {
227      found = true;
228      break;
229    }
230  }
231}
232
233static inline bool isNonImmUse(SDNode* Use, SDNode* Def) {
234  std::set<SDNode *> Visited;
235  bool found = false;
236  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
237    SDNode *N = Use->getOperand(i).Val;
238    if (N != Def) {
239      findNonImmUse(N, Def, found, Visited);
240      if (found) break;
241    }
242  }
243  return found;
244}
245
246
247bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U) {
248  // If U use can somehow reach N through another path then U can't fold N or
249  // it will create a cycle. e.g. In the following diagram, U can reach N
250  // through X. If N is folded into into U, then X is both a predecessor and
251  // a successor of U.
252  //
253  //         [ N ]
254  //         ^  ^
255  //         |  |
256  //        /   \---
257  //      /        [X]
258  //      |         ^
259  //     [U]--------|
260  return !FastISel && !isNonImmUse(U, N);
261}
262
263/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
264/// and move load below the TokenFactor. Replace store's chain operand with
265/// load's chain result.
266static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
267                                 SDOperand Store, SDOperand TF) {
268  std::vector<SDOperand> Ops;
269  for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
270    if (Load.Val == TF.Val->getOperand(i).Val)
271      Ops.push_back(Load.Val->getOperand(0));
272    else
273      Ops.push_back(TF.Val->getOperand(i));
274  DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
275  DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
276  DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
277                         Store.getOperand(2), Store.getOperand(3));
278}
279
280/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
281/// selector to pick more load-modify-store instructions. This is a common
282/// case:
283///
284///     [Load chain]
285///         ^
286///         |
287///       [Load]
288///       ^    ^
289///       |    |
290///      /      \-
291///     /         |
292/// [TokenFactor] [Op]
293///     ^          ^
294///     |          |
295///      \        /
296///       \      /
297///       [Store]
298///
299/// The fact the store's chain operand != load's chain will prevent the
300/// (store (op (load))) instruction from being selected. We can transform it to:
301///
302///     [Load chain]
303///         ^
304///         |
305///    [TokenFactor]
306///         ^
307///         |
308///       [Load]
309///       ^    ^
310///       |    |
311///       |     \-
312///       |       |
313///       |     [Op]
314///       |       ^
315///       |       |
316///       \      /
317///        \    /
318///       [Store]
319void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
320  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
321         E = DAG.allnodes_end(); I != E; ++I) {
322    if (I->getOpcode() != ISD::STORE)
323      continue;
324    SDOperand Chain = I->getOperand(0);
325    if (Chain.Val->getOpcode() != ISD::TokenFactor)
326      continue;
327
328    SDOperand N1 = I->getOperand(1);
329    SDOperand N2 = I->getOperand(2);
330    if (MVT::isFloatingPoint(N1.getValueType()) ||
331        MVT::isVector(N1.getValueType()) ||
332        !N1.hasOneUse())
333      continue;
334
335    bool RModW = false;
336    SDOperand Load;
337    unsigned Opcode = N1.Val->getOpcode();
338    switch (Opcode) {
339      case ISD::ADD:
340      case ISD::MUL:
341      case ISD::AND:
342      case ISD::OR:
343      case ISD::XOR:
344      case ISD::ADDC:
345      case ISD::ADDE: {
346        SDOperand N10 = N1.getOperand(0);
347        SDOperand N11 = N1.getOperand(1);
348        if (N10.Val->getOpcode() == ISD::LOAD)
349          RModW = true;
350        else if (N11.Val->getOpcode() == ISD::LOAD) {
351          RModW = true;
352          std::swap(N10, N11);
353        }
354        RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
355          (N10.getOperand(1) == N2) &&
356          (N10.Val->getValueType(0) == N1.getValueType());
357        if (RModW)
358          Load = N10;
359        break;
360      }
361      case ISD::SUB:
362      case ISD::SHL:
363      case ISD::SRA:
364      case ISD::SRL:
365      case ISD::ROTL:
366      case ISD::ROTR:
367      case ISD::SUBC:
368      case ISD::SUBE:
369      case X86ISD::SHLD:
370      case X86ISD::SHRD: {
371        SDOperand N10 = N1.getOperand(0);
372        if (N10.Val->getOpcode() == ISD::LOAD)
373          RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
374            (N10.getOperand(1) == N2) &&
375            (N10.Val->getValueType(0) == N1.getValueType());
376        if (RModW)
377          Load = N10;
378        break;
379      }
380    }
381
382    if (RModW) {
383      MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
384      ++NumLoadMoved;
385    }
386  }
387}
388
389/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
390/// when it has created a SelectionDAG for us to codegen.
391void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
392  DEBUG(BB->dump());
393  MachineFunction::iterator FirstMBB = BB;
394
395  if (!FastISel)
396    InstructionSelectPreprocess(DAG);
397
398  // Codegen the basic block.
399#ifndef NDEBUG
400  DEBUG(std::cerr << "===== Instruction selection begins:\n");
401  Indent = 0;
402#endif
403  DAG.setRoot(SelectRoot(DAG.getRoot()));
404#ifndef NDEBUG
405  DEBUG(std::cerr << "===== Instruction selection ends:\n");
406#endif
407
408  DAG.RemoveDeadNodes();
409
410  // Emit machine code to BB.
411  ScheduleAndEmitDAG(DAG);
412
413  // If we are emitting FP stack code, scan the basic block to determine if this
414  // block defines any FP values.  If so, put an FP_REG_KILL instruction before
415  // the terminator of the block.
416  if (!Subtarget->hasSSE2()) {
417    // Note that FP stack instructions *are* used in SSE code when returning
418    // values, but these are not live out of the basic block, so we don't need
419    // an FP_REG_KILL in this case either.
420    bool ContainsFPCode = false;
421
422    // Scan all of the machine instructions in these MBBs, checking for FP
423    // stores.
424    MachineFunction::iterator MBBI = FirstMBB;
425    do {
426      for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
427           !ContainsFPCode && I != E; ++I) {
428        for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
429          if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
430              MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
431              RegMap->getRegClass(I->getOperand(0).getReg()) ==
432                X86::RFPRegisterClass) {
433            ContainsFPCode = true;
434            break;
435          }
436        }
437      }
438    } while (!ContainsFPCode && &*(MBBI++) != BB);
439
440    // Check PHI nodes in successor blocks.  These PHI's will be lowered to have
441    // a copy of the input value in this block.
442    if (!ContainsFPCode) {
443      // Final check, check LLVM BB's that are successors to the LLVM BB
444      // corresponding to BB for FP PHI nodes.
445      const BasicBlock *LLVMBB = BB->getBasicBlock();
446      const PHINode *PN;
447      for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
448           !ContainsFPCode && SI != E; ++SI) {
449        for (BasicBlock::const_iterator II = SI->begin();
450             (PN = dyn_cast<PHINode>(II)); ++II) {
451          if (PN->getType()->isFloatingPoint()) {
452            ContainsFPCode = true;
453            break;
454          }
455        }
456      }
457    }
458
459    // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
460    if (ContainsFPCode) {
461      BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
462      ++NumFPKill;
463    }
464  }
465}
466
467/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
468/// the main function.
469void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
470                                             MachineFrameInfo *MFI) {
471  if (Subtarget->TargetType == X86Subtarget::isCygwin)
472    BuildMI(BB, X86::CALLpcrel32, 1).addExternalSymbol("__main");
473
474  // Switch the FPU to 64-bit precision mode for better compatibility and speed.
475  int CWFrameIdx = MFI->CreateStackObject(2, 2);
476  addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
477
478  // Set the high part to be 64-bit precision.
479  addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
480                    CWFrameIdx, 1).addImm(2);
481
482  // Reload the modified control word now.
483  addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
484}
485
486void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
487  // If this is main, emit special code for main.
488  MachineBasicBlock *BB = MF.begin();
489  if (Fn.hasExternalLinkage() && Fn.getName() == "main")
490    EmitSpecialCodeForMain(BB, MF.getFrameInfo());
491}
492
493/// MatchAddress - Add the specified node to the specified addressing mode,
494/// returning true if it cannot be done.  This just pattern matches for the
495/// addressing mode
496bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
497                                   bool isRoot) {
498  // RIP relative addressing: %rip + 32-bit displacement!
499  if (AM.isRIPRel) {
500    if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
501      int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
502      if (isInt32(AM.Disp + Val)) {
503        AM.Disp += Val;
504        return false;
505      }
506    }
507    return true;
508  }
509
510  int id = N.Val->getNodeId();
511  bool Available = isSelected(id);
512
513  switch (N.getOpcode()) {
514  default: break;
515  case ISD::Constant: {
516    int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
517    if (isInt32(AM.Disp + Val)) {
518      AM.Disp += Val;
519      return false;
520    }
521    break;
522  }
523
524  case X86ISD::Wrapper:
525    // If value is available in a register both base and index components have
526    // been picked, we can't fit the result available in the register in the
527    // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
528
529    // Can't fit GV or CP in addressing mode for X86-64 medium or large code
530    // model since the displacement field is 32-bit. Ok for small code model.
531
532    // For X86-64 PIC code, only allow GV / CP + displacement so we can use RIP
533    // relative addressing mode.
534    if ((!Subtarget->is64Bit() || TM.getCodeModel() == CodeModel::Small) &&
535        (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val))) {
536      bool isRIP = Subtarget->is64Bit();
537      if (isRIP && (AM.Base.Reg.Val || AM.Scale > 1 || AM.IndexReg.Val ||
538                    AM.BaseType == X86ISelAddressMode::FrameIndexBase))
539        break;
540      if (ConstantPoolSDNode *CP =
541          dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) {
542        if (AM.CP == 0) {
543          AM.CP = CP->getConstVal();
544          AM.Align = CP->getAlignment();
545          AM.Disp += CP->getOffset();
546          if (isRIP)
547            AM.isRIPRel = true;
548          return false;
549        }
550      } else if (GlobalAddressSDNode *G =
551                 dyn_cast<GlobalAddressSDNode>(N.getOperand(0))) {
552        if (AM.GV == 0) {
553          AM.GV = G->getGlobal();
554          AM.Disp += G->getOffset();
555          if (isRIP)
556            AM.isRIPRel = true;
557          return false;
558        }
559      } else if (isRoot && isRIP) {
560        if (ExternalSymbolSDNode *S =
561            dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) {
562          AM.ES = S->getSymbol();
563          AM.isRIPRel = true;
564          return false;
565        } else if (JumpTableSDNode *J =
566                   dyn_cast<JumpTableSDNode>(N.getOperand(0))) {
567          AM.JT = J->getIndex();
568          AM.isRIPRel = true;
569          return false;
570        }
571      }
572    }
573    break;
574
575  case ISD::FrameIndex:
576    if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
577      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
578      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
579      return false;
580    }
581    break;
582
583  case ISD::SHL:
584    if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1)
585      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
586        unsigned Val = CN->getValue();
587        if (Val == 1 || Val == 2 || Val == 3) {
588          AM.Scale = 1 << Val;
589          SDOperand ShVal = N.Val->getOperand(0);
590
591          // Okay, we know that we have a scale by now.  However, if the scaled
592          // value is an add of something and a constant, we can fold the
593          // constant into the disp field here.
594          if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
595              isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
596            AM.IndexReg = ShVal.Val->getOperand(0);
597            ConstantSDNode *AddVal =
598              cast<ConstantSDNode>(ShVal.Val->getOperand(1));
599            uint64_t Disp = AM.Disp + AddVal->getValue() << Val;
600            if (isInt32(Disp))
601              AM.Disp = Disp;
602            else
603              AM.IndexReg = ShVal;
604          } else {
605            AM.IndexReg = ShVal;
606          }
607          return false;
608        }
609      }
610    break;
611
612  case ISD::MUL:
613    // X*[3,5,9] -> X+X*[2,4,8]
614    if (!Available &&
615        AM.BaseType == X86ISelAddressMode::RegBase &&
616        AM.Base.Reg.Val == 0 &&
617        AM.IndexReg.Val == 0)
618      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
619        if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
620          AM.Scale = unsigned(CN->getValue())-1;
621
622          SDOperand MulVal = N.Val->getOperand(0);
623          SDOperand Reg;
624
625          // Okay, we know that we have a scale by now.  However, if the scaled
626          // value is an add of something and a constant, we can fold the
627          // constant into the disp field here.
628          if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
629              isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
630            Reg = MulVal.Val->getOperand(0);
631            ConstantSDNode *AddVal =
632              cast<ConstantSDNode>(MulVal.Val->getOperand(1));
633            uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
634            if (isInt32(Disp))
635              AM.Disp = Disp;
636            else
637              Reg = N.Val->getOperand(0);
638          } else {
639            Reg = N.Val->getOperand(0);
640          }
641
642          AM.IndexReg = AM.Base.Reg = Reg;
643          return false;
644        }
645    break;
646
647  case ISD::ADD: {
648    if (!Available) {
649      X86ISelAddressMode Backup = AM;
650      if (!MatchAddress(N.Val->getOperand(0), AM, false) &&
651          !MatchAddress(N.Val->getOperand(1), AM, false))
652        return false;
653      AM = Backup;
654      if (!MatchAddress(N.Val->getOperand(1), AM, false) &&
655          !MatchAddress(N.Val->getOperand(0), AM, false))
656        return false;
657      AM = Backup;
658    }
659    break;
660  }
661
662  case ISD::OR: {
663    if (!Available) {
664      X86ISelAddressMode Backup = AM;
665      // Look for (x << c1) | c2 where (c2 < c1)
666      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(0));
667      if (CN && !MatchAddress(N.Val->getOperand(1), AM, false)) {
668        if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
669          AM.Disp = CN->getValue();
670          return false;
671        }
672      }
673      AM = Backup;
674      CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1));
675      if (CN && !MatchAddress(N.Val->getOperand(0), AM, false)) {
676        if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
677          AM.Disp = CN->getValue();
678          return false;
679        }
680      }
681      AM = Backup;
682    }
683    break;
684  }
685  }
686
687  // Is the base register already occupied?
688  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
689    // If so, check to see if the scale index register is set.
690    if (AM.IndexReg.Val == 0) {
691      AM.IndexReg = N;
692      AM.Scale = 1;
693      return false;
694    }
695
696    // Otherwise, we cannot select it.
697    return true;
698  }
699
700  // Default, generate it as a register.
701  AM.BaseType = X86ISelAddressMode::RegBase;
702  AM.Base.Reg = N;
703  return false;
704}
705
706/// SelectAddr - returns true if it is able pattern match an addressing mode.
707/// It returns the operands which make up the maximal addressing mode it can
708/// match by reference.
709bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
710                                 SDOperand &Index, SDOperand &Disp) {
711  X86ISelAddressMode AM;
712  if (MatchAddress(N, AM))
713    return false;
714
715  MVT::ValueType VT = N.getValueType();
716  if (AM.BaseType == X86ISelAddressMode::RegBase) {
717    if (!AM.Base.Reg.Val)
718      AM.Base.Reg = CurDAG->getRegister(0, VT);
719  }
720
721  if (!AM.IndexReg.Val)
722    AM.IndexReg = CurDAG->getRegister(0, VT);
723
724  getAddressOperands(AM, Base, Scale, Index, Disp);
725  return true;
726}
727
728/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
729/// mode it matches can be cost effectively emitted as an LEA instruction.
730bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
731                                    SDOperand &Scale,
732                                    SDOperand &Index, SDOperand &Disp) {
733  X86ISelAddressMode AM;
734  if (MatchAddress(N, AM))
735    return false;
736
737  MVT::ValueType VT = N.getValueType();
738  unsigned Complexity = 0;
739  if (AM.BaseType == X86ISelAddressMode::RegBase)
740    if (AM.Base.Reg.Val)
741      Complexity = 1;
742    else
743      AM.Base.Reg = CurDAG->getRegister(0, VT);
744  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
745    Complexity = 4;
746
747  if (AM.IndexReg.Val)
748    Complexity++;
749  else
750    AM.IndexReg = CurDAG->getRegister(0, VT);
751
752  if (AM.Scale > 2)
753    Complexity += 2;
754  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg
755  else if (AM.Scale > 1)
756    Complexity++;
757
758  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
759  // to a LEA. This is determined with some expermentation but is by no means
760  // optimal (especially for code size consideration). LEA is nice because of
761  // its three-address nature. Tweak the cost function again when we can run
762  // convertToThreeAddress() at register allocation time.
763  if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
764    // For X86-64, we should always use lea to materialize RIP relative
765    // addresses.
766    if (Subtarget->is64Bit())
767      Complexity = 4;
768    else
769      Complexity += 2;
770  }
771
772  if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
773    Complexity++;
774
775  if (Complexity > 2) {
776    getAddressOperands(AM, Base, Scale, Index, Disp);
777    return true;
778  }
779  return false;
780}
781
782bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
783                                  SDOperand &Base, SDOperand &Scale,
784                                  SDOperand &Index, SDOperand &Disp) {
785  if (N.getOpcode() == ISD::LOAD &&
786      N.hasOneUse() &&
787      CanBeFoldedBy(N.Val, P.Val))
788    return SelectAddr(N.getOperand(1), Base, Scale, Index, Disp);
789  return false;
790}
791
792static bool isRegister0(SDOperand Op) {
793  if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op))
794    return (R->getReg() == 0);
795  return false;
796}
797
798/// getGlobalBaseReg - Output the instructions required to put the
799/// base address to use for accessing globals into a register.
800///
801SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
802  assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
803  if (!GlobalBaseReg) {
804    // Insert the set of GlobalBaseReg into the first MBB of the function
805    MachineBasicBlock &FirstMBB = BB->getParent()->front();
806    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
807    SSARegMap *RegMap = BB->getParent()->getSSARegMap();
808    // FIXME: when we get to LP64, we will need to create the appropriate
809    // type of register here.
810    GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass);
811    BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0);
812    BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg);
813  }
814  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
815}
816
817static SDNode *FindCallStartFromCall(SDNode *Node) {
818  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
819    assert(Node->getOperand(0).getValueType() == MVT::Other &&
820         "Node doesn't have a token chain argument!");
821  return FindCallStartFromCall(Node->getOperand(0).Val);
822}
823
824SDNode *X86DAGToDAGISel::Select(SDOperand N) {
825  SDNode *Node = N.Val;
826  MVT::ValueType NVT = Node->getValueType(0);
827  unsigned Opc, MOpc;
828  unsigned Opcode = Node->getOpcode();
829
830#ifndef NDEBUG
831  DEBUG(std::cerr << std::string(Indent, ' '));
832  DEBUG(std::cerr << "Selecting: ");
833  DEBUG(Node->dump(CurDAG));
834  DEBUG(std::cerr << "\n");
835  Indent += 2;
836#endif
837
838  if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
839#ifndef NDEBUG
840    DEBUG(std::cerr << std::string(Indent-2, ' '));
841    DEBUG(std::cerr << "== ");
842    DEBUG(Node->dump(CurDAG));
843    DEBUG(std::cerr << "\n");
844    Indent -= 2;
845#endif
846    return NULL;   // Already selected.
847  }
848
849  switch (Opcode) {
850    default: break;
851    case X86ISD::GlobalBaseReg:
852      return getGlobalBaseReg();
853
854    case ISD::ADD: {
855      // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
856      // code and is matched first so to prevent it from being turned into
857      // LEA32r X+c.
858      // In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
859      MVT::ValueType PtrVT = TLI.getPointerTy();
860      SDOperand N0 = N.getOperand(0);
861      SDOperand N1 = N.getOperand(1);
862      if (N.Val->getValueType(0) == PtrVT &&
863          N0.getOpcode() == X86ISD::Wrapper &&
864          N1.getOpcode() == ISD::Constant) {
865        unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
866        SDOperand C(0, 0);
867        // TODO: handle ExternalSymbolSDNode.
868        if (GlobalAddressSDNode *G =
869            dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
870          C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
871                                             G->getOffset() + Offset);
872        } else if (ConstantPoolSDNode *CP =
873                   dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
874          C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT,
875                                            CP->getAlignment(),
876                                            CP->getOffset()+Offset);
877        }
878
879        if (C.Val) {
880          if (Subtarget->is64Bit()) {
881            SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
882                                CurDAG->getRegister(0, PtrVT), C };
883            return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
884          } else
885            return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
886        }
887      }
888
889      // Other cases are handled by auto-generated code.
890      break;
891    }
892
893    case ISD::MULHU:
894    case ISD::MULHS: {
895      if (Opcode == ISD::MULHU)
896        switch (NVT) {
897        default: assert(0 && "Unsupported VT!");
898        case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
899        case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
900        case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
901        case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
902        }
903      else
904        switch (NVT) {
905        default: assert(0 && "Unsupported VT!");
906        case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
907        case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
908        case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
909        case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
910        }
911
912      unsigned LoReg, HiReg;
913      switch (NVT) {
914      default: assert(0 && "Unsupported VT!");
915      case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
916      case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
917      case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
918      case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
919      }
920
921      SDOperand N0 = Node->getOperand(0);
922      SDOperand N1 = Node->getOperand(1);
923
924      bool foldedLoad = false;
925      SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
926      foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
927      // MULHU and MULHS are commmutative
928      if (!foldedLoad) {
929        foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
930        if (foldedLoad) {
931          N0 = Node->getOperand(1);
932          N1 = Node->getOperand(0);
933        }
934      }
935
936      SDOperand Chain;
937      if (foldedLoad) {
938        Chain = N1.getOperand(0);
939        AddToISelQueue(Chain);
940      } else
941        Chain = CurDAG->getEntryNode();
942
943      SDOperand InFlag(0, 0);
944      AddToISelQueue(N0);
945      Chain  = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
946                                    N0, InFlag);
947      InFlag = Chain.getValue(1);
948
949      if (foldedLoad) {
950        AddToISelQueue(Tmp0);
951        AddToISelQueue(Tmp1);
952        AddToISelQueue(Tmp2);
953        AddToISelQueue(Tmp3);
954        SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
955        SDNode *CNode =
956          CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
957        Chain  = SDOperand(CNode, 0);
958        InFlag = SDOperand(CNode, 1);
959      } else {
960        AddToISelQueue(N1);
961        InFlag =
962          SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
963      }
964
965      SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
966      ReplaceUses(N.getValue(0), Result);
967      if (foldedLoad)
968        ReplaceUses(N1.getValue(1), Result.getValue(1));
969
970#ifndef NDEBUG
971      DEBUG(std::cerr << std::string(Indent-2, ' '));
972      DEBUG(std::cerr << "=> ");
973      DEBUG(Result.Val->dump(CurDAG));
974      DEBUG(std::cerr << "\n");
975      Indent -= 2;
976#endif
977      return NULL;
978    }
979
980    case ISD::SDIV:
981    case ISD::UDIV:
982    case ISD::SREM:
983    case ISD::UREM: {
984      bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
985      bool isDiv    = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
986      if (!isSigned)
987        switch (NVT) {
988        default: assert(0 && "Unsupported VT!");
989        case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
990        case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
991        case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
992        case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
993        }
994      else
995        switch (NVT) {
996        default: assert(0 && "Unsupported VT!");
997        case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
998        case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
999        case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1000        case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1001        }
1002
1003      unsigned LoReg, HiReg;
1004      unsigned ClrOpcode, SExtOpcode;
1005      switch (NVT) {
1006      default: assert(0 && "Unsupported VT!");
1007      case MVT::i8:
1008        LoReg = X86::AL;  HiReg = X86::AH;
1009        ClrOpcode  = X86::MOV8r0;
1010        SExtOpcode = X86::CBW;
1011        break;
1012      case MVT::i16:
1013        LoReg = X86::AX;  HiReg = X86::DX;
1014        ClrOpcode  = X86::MOV16r0;
1015        SExtOpcode = X86::CWD;
1016        break;
1017      case MVT::i32:
1018        LoReg = X86::EAX; HiReg = X86::EDX;
1019        ClrOpcode  = X86::MOV32r0;
1020        SExtOpcode = X86::CDQ;
1021        break;
1022      case MVT::i64:
1023        LoReg = X86::RAX; HiReg = X86::RDX;
1024        ClrOpcode  = X86::MOV64r0;
1025        SExtOpcode = X86::CQO;
1026        break;
1027      }
1028
1029      SDOperand N0 = Node->getOperand(0);
1030      SDOperand N1 = Node->getOperand(1);
1031
1032      bool foldedLoad = false;
1033      SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
1034      foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
1035      SDOperand Chain;
1036      if (foldedLoad) {
1037        Chain = N1.getOperand(0);
1038        AddToISelQueue(Chain);
1039      } else
1040        Chain = CurDAG->getEntryNode();
1041
1042      SDOperand InFlag(0, 0);
1043      AddToISelQueue(N0);
1044      Chain  = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
1045                                    N0, InFlag);
1046      InFlag = Chain.getValue(1);
1047
1048      if (isSigned) {
1049        // Sign extend the low part into the high part.
1050        InFlag =
1051          SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
1052      } else {
1053        // Zero out the high part, effectively zero extending the input.
1054        SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
1055        Chain  = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(HiReg, NVT),
1056                                      ClrNode, InFlag);
1057        InFlag = Chain.getValue(1);
1058      }
1059
1060      if (foldedLoad) {
1061        AddToISelQueue(Tmp0);
1062        AddToISelQueue(Tmp1);
1063        AddToISelQueue(Tmp2);
1064        AddToISelQueue(Tmp3);
1065        SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
1066        SDNode *CNode =
1067          CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
1068        Chain  = SDOperand(CNode, 0);
1069        InFlag = SDOperand(CNode, 1);
1070      } else {
1071        AddToISelQueue(N1);
1072        InFlag =
1073          SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
1074      }
1075
1076      SDOperand Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg,
1077                                                NVT, InFlag);
1078      ReplaceUses(N.getValue(0), Result);
1079      if (foldedLoad)
1080        ReplaceUses(N1.getValue(1), Result.getValue(1));
1081
1082#ifndef NDEBUG
1083      DEBUG(std::cerr << std::string(Indent-2, ' '));
1084      DEBUG(std::cerr << "=> ");
1085      DEBUG(Result.Val->dump(CurDAG));
1086      DEBUG(std::cerr << "\n");
1087      Indent -= 2;
1088#endif
1089
1090      return NULL;
1091    }
1092
1093    case ISD::TRUNCATE: {
1094      if (!Subtarget->is64Bit() && NVT == MVT::i8) {
1095        unsigned Opc2;
1096        MVT::ValueType VT;
1097        switch (Node->getOperand(0).getValueType()) {
1098        default: assert(0 && "Unknown truncate!");
1099        case MVT::i16:
1100          Opc = X86::MOV16to16_;
1101          VT = MVT::i16;
1102          Opc2 = X86::TRUNC_16_to8;
1103          break;
1104        case MVT::i32:
1105          Opc = X86::MOV32to32_;
1106          VT = MVT::i32;
1107          Opc2 = X86::TRUNC_32_to8;
1108          break;
1109        }
1110
1111        AddToISelQueue(Node->getOperand(0));
1112        SDOperand Tmp =
1113          SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0);
1114        SDNode *ResNode = CurDAG->getTargetNode(Opc2, NVT, Tmp);
1115
1116#ifndef NDEBUG
1117        DEBUG(std::cerr << std::string(Indent-2, ' '));
1118        DEBUG(std::cerr << "=> ");
1119        DEBUG(ResNode->dump(CurDAG));
1120        DEBUG(std::cerr << "\n");
1121        Indent -= 2;
1122#endif
1123        return ResNode;
1124      }
1125
1126      break;
1127    }
1128  }
1129
1130  SDNode *ResNode = SelectCode(N);
1131
1132#ifndef NDEBUG
1133  DEBUG(std::cerr << std::string(Indent-2, ' '));
1134  DEBUG(std::cerr << "=> ");
1135  if (ResNode == NULL || ResNode == N.Val)
1136    DEBUG(N.Val->dump(CurDAG));
1137  else
1138    DEBUG(ResNode->dump(CurDAG));
1139  DEBUG(std::cerr << "\n");
1140  Indent -= 2;
1141#endif
1142
1143  return ResNode;
1144}
1145
1146bool X86DAGToDAGISel::
1147SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
1148                             std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
1149  SDOperand Op0, Op1, Op2, Op3;
1150  switch (ConstraintCode) {
1151  case 'o':   // offsetable        ??
1152  case 'v':   // not offsetable    ??
1153  default: return true;
1154  case 'm':   // memory
1155    if (!SelectAddr(Op, Op0, Op1, Op2, Op3))
1156      return true;
1157    break;
1158  }
1159
1160  OutOps.push_back(Op0);
1161  OutOps.push_back(Op1);
1162  OutOps.push_back(Op2);
1163  OutOps.push_back(Op3);
1164  AddToISelQueue(Op0);
1165  AddToISelQueue(Op1);
1166  AddToISelQueue(Op2);
1167  AddToISelQueue(Op3);
1168  return false;
1169}
1170
1171/// createX86ISelDag - This pass converts a legalized DAG into a
1172/// X86-specific DAG, ready for instruction scheduling.
1173///
1174FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) {
1175  return new X86DAGToDAGISel(TM, Fast);
1176}
1177