X86ISelDAGToDAG.cpp revision 70e674e95e73c33e747b2c0d97a40e3b06d163de
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by the Evan Cheng and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a DAG pattern matching instruction selector for X86,
11// converting from a legalized dag to a X86 dag.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "x86-isel"
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86RegisterInfo.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/GlobalValue.h"
23#include "llvm/Instructions.h"
24#include "llvm/Intrinsics.h"
25#include "llvm/Support/CFG.h"
26#include "llvm/CodeGen/MachineConstantPool.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/SSARegMap.h"
31#include "llvm/CodeGen/SelectionDAGISel.h"
32#include "llvm/Target/TargetMachine.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/Compiler.h"
35#include "llvm/ADT/Statistic.h"
36#include <deque>
37#include <iostream>
38#include <queue>
39#include <set>
40using namespace llvm;
41
42#include "llvm/Support/CommandLine.h"
43static cl::opt<bool> X86ISelPreproc("enable-x86-isel-preprocessing", cl::Hidden,
44                                  cl::desc("Enable isel preprocessing on X86"));
45
46//===----------------------------------------------------------------------===//
47//                      Pattern Matcher Implementation
48//===----------------------------------------------------------------------===//
49
50namespace {
51  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
52  /// SDOperand's instead of register numbers for the leaves of the matched
53  /// tree.
54  struct X86ISelAddressMode {
55    enum {
56      RegBase,
57      FrameIndexBase
58    } BaseType;
59
60    struct {            // This is really a union, discriminated by BaseType!
61      SDOperand Reg;
62      int FrameIndex;
63    } Base;
64
65    unsigned Scale;
66    SDOperand IndexReg;
67    unsigned Disp;
68    GlobalValue *GV;
69    Constant *CP;
70    unsigned Align;    // CP alignment.
71
72    X86ISelAddressMode()
73      : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), GV(0),
74        CP(0), Align(0) {
75    }
76  };
77}
78
79namespace {
80  Statistic<>
81  NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
82
83  //===--------------------------------------------------------------------===//
84  /// ISel - X86 specific code to select X86 machine instructions for
85  /// SelectionDAG operations.
86  ///
87  class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
88    /// ContainsFPCode - Every instruction we select that uses or defines a FP
89    /// register should set this to true.
90    bool ContainsFPCode;
91
92    /// X86Lowering - This object fully describes how to lower LLVM code to an
93    /// X86-specific SelectionDAG.
94    X86TargetLowering X86Lowering;
95
96    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
97    /// make the right decision when generating code for different targets.
98    const X86Subtarget *Subtarget;
99
100    unsigned GlobalBaseReg;
101
102  public:
103    X86DAGToDAGISel(X86TargetMachine &TM)
104      : SelectionDAGISel(X86Lowering),
105        X86Lowering(*TM.getTargetLowering()),
106        Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
107
108    virtual bool runOnFunction(Function &Fn) {
109      // Make sure we re-emit a set of the global base reg if necessary
110      GlobalBaseReg = 0;
111      return SelectionDAGISel::runOnFunction(Fn);
112    }
113
114    virtual const char *getPassName() const {
115      return "X86 DAG->DAG Instruction Selection";
116    }
117
118    /// InstructionSelectBasicBlock - This callback is invoked by
119    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
120    virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
121
122    virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
123
124    virtual bool CanBeFoldedBy(SDNode *N, SDNode *U);
125
126// Include the pieces autogenerated from the target description.
127#include "X86GenDAGISel.inc"
128
129  private:
130    SDNode *Select(SDOperand N);
131
132    bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot = true);
133    bool SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
134                    SDOperand &Index, SDOperand &Disp);
135    bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
136                       SDOperand &Index, SDOperand &Disp);
137    bool TryFoldLoad(SDOperand P, SDOperand N,
138                     SDOperand &Base, SDOperand &Scale,
139                     SDOperand &Index, SDOperand &Disp);
140    void InstructionSelectPreprocess(SelectionDAG &DAG);
141
142    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
143    /// inline asm expressions.
144    virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
145                                              char ConstraintCode,
146                                              std::vector<SDOperand> &OutOps,
147                                              SelectionDAG &DAG);
148
149    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
150
151    inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
152                                   SDOperand &Scale, SDOperand &Index,
153                                   SDOperand &Disp) {
154      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
155        CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, MVT::i32) : AM.Base.Reg;
156      Scale = getI8Imm(AM.Scale);
157      Index = AM.IndexReg;
158      Disp  = AM.GV ? CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp)
159        : (AM.CP ?
160           CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp)
161           : getI32Imm(AM.Disp));
162    }
163
164    /// getI8Imm - Return a target constant with the specified value, of type
165    /// i8.
166    inline SDOperand getI8Imm(unsigned Imm) {
167      return CurDAG->getTargetConstant(Imm, MVT::i8);
168    }
169
170    /// getI16Imm - Return a target constant with the specified value, of type
171    /// i16.
172    inline SDOperand getI16Imm(unsigned Imm) {
173      return CurDAG->getTargetConstant(Imm, MVT::i16);
174    }
175
176    /// getI32Imm - Return a target constant with the specified value, of type
177    /// i32.
178    inline SDOperand getI32Imm(unsigned Imm) {
179      return CurDAG->getTargetConstant(Imm, MVT::i32);
180    }
181
182    /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
183    /// base register.  Return the virtual register that holds this value.
184    SDNode *getGlobalBaseReg();
185
186#ifndef NDEBUG
187    unsigned Indent;
188#endif
189  };
190}
191
192static void findNonImmUse(SDNode* Use, SDNode* Def, bool &found,
193                          std::set<SDNode *> &Visited) {
194  if (found ||
195      Use->getNodeId() > Def->getNodeId() ||
196      !Visited.insert(Use).second)
197    return;
198
199  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
200    SDNode *N = Use->getOperand(i).Val;
201    if (N != Def) {
202      findNonImmUse(N, Def, found, Visited);
203    } else {
204      found = true;
205      break;
206    }
207  }
208}
209
210static inline bool isNonImmUse(SDNode* Use, SDNode* Def) {
211  std::set<SDNode *> Visited;
212  bool found = false;
213  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
214    SDNode *N = Use->getOperand(i).Val;
215    if (N != Def) {
216      findNonImmUse(N, Def, found, Visited);
217      if (found) break;
218    }
219  }
220  return found;
221}
222
223
224bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U) {
225  // If U use can somehow reach N through another path then U can't fold N or
226  // it will create a cycle. e.g. In the following diagram, U can reach N
227  // through X. If N is folded into into U, then X is both a predecessor and
228  // a successor of U.
229  //
230  //         [ N ]
231  //         ^  ^
232  //         |  |
233  //        /   \---
234  //      /        [X]
235  //      |         ^
236  //     [U]--------|
237  return !isNonImmUse(U, N);
238}
239
240/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
241/// and move load below the TokenFactor. Replace store's chain operand with
242/// load's chain result.
243static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
244                                 SDOperand Store, SDOperand TF) {
245  std::vector<SDOperand> Ops;
246  for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
247    if (Load.Val == TF.Val->getOperand(i).Val)
248      Ops.push_back(Load.Val->getOperand(0));
249    else
250      Ops.push_back(TF.Val->getOperand(i));
251  DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
252  DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
253  DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
254                         Store.getOperand(2), Store.getOperand(3));
255}
256
257/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
258/// selector to pick more load-modify-store instructions. This is a common
259/// case:
260///
261///     [Load chain]
262///         ^
263///         |
264///       [Load]
265///       ^    ^
266///       |    |
267///      /      \-
268///     /         |
269/// [TokenFactor] [Op]
270///     ^          ^
271///     |          |
272///      \        /
273///       \      /
274///       [Store]
275///
276/// The fact the store's chain operand != load's chain will prevent the
277/// (store (op (load))) instruction from being selected. We can transform it to:
278///
279///     [Load chain]
280///         ^
281///         |
282///    [TokenFactor]
283///         ^
284///         |
285///       [Load]
286///       ^    ^
287///       |    |
288///       |     \-
289///       |       |
290///       |     [Op]
291///       |       ^
292///       |       |
293///       \      /
294///        \    /
295///       [Store]
296void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
297  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
298         E = DAG.allnodes_end(); I != E; ++I) {
299    if (I->getOpcode() != ISD::STORE)
300      continue;
301    SDOperand Chain = I->getOperand(0);
302    if (Chain.Val->getOpcode() != ISD::TokenFactor)
303      continue;
304
305    SDOperand N1 = I->getOperand(1);
306    SDOperand N2 = I->getOperand(2);
307    if (!N1.hasOneUse())
308      continue;
309
310    bool RModW = false;
311    SDOperand Load;
312    unsigned Opcode = N1.Val->getOpcode();
313    switch (Opcode) {
314      case ISD::ADD:
315      case ISD::MUL:
316      case ISD::FADD:
317      case ISD::FMUL:
318      case ISD::AND:
319      case ISD::OR:
320      case ISD::XOR:
321      case ISD::ADDC:
322      case ISD::ADDE: {
323        SDOperand N10 = N1.getOperand(0);
324        SDOperand N11 = N1.getOperand(1);
325        if (N10.Val->getOpcode() == ISD::LOAD)
326          RModW = true;
327        else if (N11.Val->getOpcode() == ISD::LOAD) {
328          RModW = true;
329          std::swap(N10, N11);
330        }
331        RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
332          N10.getOperand(1) == N2;
333        if (RModW)
334          Load = N10;
335        break;
336      }
337      case ISD::SUB:
338      case ISD::SHL:
339      case ISD::SRA:
340      case ISD::SRL:
341      case ISD::ROTL:
342      case ISD::ROTR:
343      case ISD::SUBC:
344      case ISD::SUBE:
345      case X86ISD::SHLD:
346      case X86ISD::SHRD: {
347        SDOperand N10 = N1.getOperand(0);
348        if (N10.Val->getOpcode() == ISD::LOAD)
349          RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
350            N10.getOperand(1) == N2;
351        if (RModW)
352          Load = N10;
353        break;
354      }
355    }
356
357    if (RModW)
358      MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
359  }
360}
361
362/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
363/// when it has created a SelectionDAG for us to codegen.
364void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
365  DEBUG(BB->dump());
366  MachineFunction::iterator FirstMBB = BB;
367
368  if (X86ISelPreproc)
369    InstructionSelectPreprocess(DAG);
370
371  // Codegen the basic block.
372#ifndef NDEBUG
373  DEBUG(std::cerr << "===== Instruction selection begins:\n");
374  Indent = 0;
375#endif
376  DAG.setRoot(SelectRoot(DAG.getRoot()));
377#ifndef NDEBUG
378  DEBUG(std::cerr << "===== Instruction selection ends:\n");
379#endif
380
381  DAG.RemoveDeadNodes();
382
383  // Emit machine code to BB.
384  ScheduleAndEmitDAG(DAG);
385
386  // If we are emitting FP stack code, scan the basic block to determine if this
387  // block defines any FP values.  If so, put an FP_REG_KILL instruction before
388  // the terminator of the block.
389  if (!Subtarget->hasSSE2()) {
390    // Note that FP stack instructions *are* used in SSE code when returning
391    // values, but these are not live out of the basic block, so we don't need
392    // an FP_REG_KILL in this case either.
393    bool ContainsFPCode = false;
394
395    // Scan all of the machine instructions in these MBBs, checking for FP
396    // stores.
397    MachineFunction::iterator MBBI = FirstMBB;
398    do {
399      for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
400           !ContainsFPCode && I != E; ++I) {
401        for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
402          if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
403              MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
404              RegMap->getRegClass(I->getOperand(0).getReg()) ==
405                X86::RFPRegisterClass) {
406            ContainsFPCode = true;
407            break;
408          }
409        }
410      }
411    } while (!ContainsFPCode && &*(MBBI++) != BB);
412
413    // Check PHI nodes in successor blocks.  These PHI's will be lowered to have
414    // a copy of the input value in this block.
415    if (!ContainsFPCode) {
416      // Final check, check LLVM BB's that are successors to the LLVM BB
417      // corresponding to BB for FP PHI nodes.
418      const BasicBlock *LLVMBB = BB->getBasicBlock();
419      const PHINode *PN;
420      for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
421           !ContainsFPCode && SI != E; ++SI) {
422        for (BasicBlock::const_iterator II = SI->begin();
423             (PN = dyn_cast<PHINode>(II)); ++II) {
424          if (PN->getType()->isFloatingPoint()) {
425            ContainsFPCode = true;
426            break;
427          }
428        }
429      }
430    }
431
432    // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
433    if (ContainsFPCode) {
434      BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
435      ++NumFPKill;
436    }
437  }
438}
439
440/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
441/// the main function.
442void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
443                                             MachineFrameInfo *MFI) {
444  if (Subtarget->TargetType == X86Subtarget::isCygwin)
445    BuildMI(BB, X86::CALLpcrel32, 1).addExternalSymbol("__main");
446
447  // Switch the FPU to 64-bit precision mode for better compatibility and speed.
448  int CWFrameIdx = MFI->CreateStackObject(2, 2);
449  addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
450
451  // Set the high part to be 64-bit precision.
452  addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
453                    CWFrameIdx, 1).addImm(2);
454
455  // Reload the modified control word now.
456  addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
457}
458
459void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
460  // If this is main, emit special code for main.
461  MachineBasicBlock *BB = MF.begin();
462  if (Fn.hasExternalLinkage() && Fn.getName() == "main")
463    EmitSpecialCodeForMain(BB, MF.getFrameInfo());
464}
465
466/// MatchAddress - Add the specified node to the specified addressing mode,
467/// returning true if it cannot be done.  This just pattern matches for the
468/// addressing mode
469bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
470                                   bool isRoot) {
471  int id = N.Val->getNodeId();
472  bool Available = isSelected(id);
473
474  switch (N.getOpcode()) {
475  default: break;
476  case ISD::Constant:
477    AM.Disp += cast<ConstantSDNode>(N)->getValue();
478    return false;
479
480  case X86ISD::Wrapper:
481    // If both base and index components have been picked, we can't fit
482    // the result available in the register in the addressing mode. Duplicate
483    // GlobalAddress or ConstantPool as displacement.
484    if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
485      if (ConstantPoolSDNode *CP =
486          dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) {
487        if (AM.CP == 0) {
488          AM.CP = CP->get();
489          AM.Align = CP->getAlignment();
490          AM.Disp += CP->getOffset();
491          return false;
492        }
493      } else if (GlobalAddressSDNode *G =
494                 dyn_cast<GlobalAddressSDNode>(N.getOperand(0))) {
495        if (AM.GV == 0) {
496          AM.GV = G->getGlobal();
497          AM.Disp += G->getOffset();
498          return false;
499        }
500      }
501    }
502    break;
503
504  case ISD::FrameIndex:
505    if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
506      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
507      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
508      return false;
509    }
510    break;
511
512  case ISD::SHL:
513    if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1)
514      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
515        unsigned Val = CN->getValue();
516        if (Val == 1 || Val == 2 || Val == 3) {
517          AM.Scale = 1 << Val;
518          SDOperand ShVal = N.Val->getOperand(0);
519
520          // Okay, we know that we have a scale by now.  However, if the scaled
521          // value is an add of something and a constant, we can fold the
522          // constant into the disp field here.
523          if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
524              isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
525            AM.IndexReg = ShVal.Val->getOperand(0);
526            ConstantSDNode *AddVal =
527              cast<ConstantSDNode>(ShVal.Val->getOperand(1));
528            AM.Disp += AddVal->getValue() << Val;
529          } else {
530            AM.IndexReg = ShVal;
531          }
532          return false;
533        }
534      }
535    break;
536
537  case ISD::MUL:
538    // X*[3,5,9] -> X+X*[2,4,8]
539    if (!Available &&
540        AM.BaseType == X86ISelAddressMode::RegBase &&
541        AM.Base.Reg.Val == 0 &&
542        AM.IndexReg.Val == 0)
543      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
544        if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
545          AM.Scale = unsigned(CN->getValue())-1;
546
547          SDOperand MulVal = N.Val->getOperand(0);
548          SDOperand Reg;
549
550          // Okay, we know that we have a scale by now.  However, if the scaled
551          // value is an add of something and a constant, we can fold the
552          // constant into the disp field here.
553          if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
554              isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
555            Reg = MulVal.Val->getOperand(0);
556            ConstantSDNode *AddVal =
557              cast<ConstantSDNode>(MulVal.Val->getOperand(1));
558            AM.Disp += AddVal->getValue() * CN->getValue();
559          } else {
560            Reg = N.Val->getOperand(0);
561          }
562
563          AM.IndexReg = AM.Base.Reg = Reg;
564          return false;
565        }
566    break;
567
568  case ISD::ADD: {
569    if (!Available) {
570      X86ISelAddressMode Backup = AM;
571      if (!MatchAddress(N.Val->getOperand(0), AM, false) &&
572          !MatchAddress(N.Val->getOperand(1), AM, false))
573        return false;
574      AM = Backup;
575      if (!MatchAddress(N.Val->getOperand(1), AM, false) &&
576          !MatchAddress(N.Val->getOperand(0), AM, false))
577        return false;
578      AM = Backup;
579    }
580    break;
581  }
582
583  case ISD::OR: {
584    if (!Available) {
585      X86ISelAddressMode Backup = AM;
586      // Look for (x << c1) | c2 where (c2 < c1)
587      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(0));
588      if (CN && !MatchAddress(N.Val->getOperand(1), AM, false)) {
589        if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
590          AM.Disp = CN->getValue();
591          return false;
592        }
593      }
594      AM = Backup;
595      CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1));
596      if (CN && !MatchAddress(N.Val->getOperand(0), AM, false)) {
597        if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
598          AM.Disp = CN->getValue();
599          return false;
600        }
601      }
602      AM = Backup;
603    }
604    break;
605  }
606  }
607
608  // Is the base register already occupied?
609  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
610    // If so, check to see if the scale index register is set.
611    if (AM.IndexReg.Val == 0) {
612      AM.IndexReg = N;
613      AM.Scale = 1;
614      return false;
615    }
616
617    // Otherwise, we cannot select it.
618    return true;
619  }
620
621  // Default, generate it as a register.
622  AM.BaseType = X86ISelAddressMode::RegBase;
623  AM.Base.Reg = N;
624  return false;
625}
626
627/// SelectAddr - returns true if it is able pattern match an addressing mode.
628/// It returns the operands which make up the maximal addressing mode it can
629/// match by reference.
630bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
631                                 SDOperand &Index, SDOperand &Disp) {
632  X86ISelAddressMode AM;
633  if (MatchAddress(N, AM))
634    return false;
635
636  if (AM.BaseType == X86ISelAddressMode::RegBase) {
637    if (!AM.Base.Reg.Val)
638      AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
639  }
640
641  if (!AM.IndexReg.Val)
642    AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
643
644  getAddressOperands(AM, Base, Scale, Index, Disp);
645  return true;
646}
647
648/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
649/// mode it matches can be cost effectively emitted as an LEA instruction.
650bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
651                                    SDOperand &Scale,
652                                    SDOperand &Index, SDOperand &Disp) {
653  X86ISelAddressMode AM;
654  if (MatchAddress(N, AM))
655    return false;
656
657  unsigned Complexity = 0;
658  if (AM.BaseType == X86ISelAddressMode::RegBase)
659    if (AM.Base.Reg.Val)
660      Complexity = 1;
661    else
662      AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
663  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
664    Complexity = 4;
665
666  if (AM.IndexReg.Val)
667    Complexity++;
668  else
669    AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
670
671  if (AM.Scale > 2)
672    Complexity += 2;
673  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg
674  else if (AM.Scale > 1)
675    Complexity++;
676
677  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
678  // to a LEA. This is determined with some expermentation but is by no means
679  // optimal (especially for code size consideration). LEA is nice because of
680  // its three-address nature. Tweak the cost function again when we can run
681  // convertToThreeAddress() at register allocation time.
682  if (AM.GV || AM.CP)
683    Complexity += 2;
684
685  if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
686    Complexity++;
687
688  if (Complexity > 2) {
689    getAddressOperands(AM, Base, Scale, Index, Disp);
690    return true;
691  }
692  return false;
693}
694
695bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
696                                  SDOperand &Base, SDOperand &Scale,
697                                  SDOperand &Index, SDOperand &Disp) {
698  if (N.getOpcode() == ISD::LOAD &&
699      N.hasOneUse() &&
700      P.Val->isOnlyUse(N.Val) &&
701      CanBeFoldedBy(N.Val, P.Val))
702    return SelectAddr(N.getOperand(1), Base, Scale, Index, Disp);
703  return false;
704}
705
706static bool isRegister0(SDOperand Op) {
707  if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op))
708    return (R->getReg() == 0);
709  return false;
710}
711
712/// getGlobalBaseReg - Output the instructions required to put the
713/// base address to use for accessing globals into a register.
714///
715SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
716  if (!GlobalBaseReg) {
717    // Insert the set of GlobalBaseReg into the first MBB of the function
718    MachineBasicBlock &FirstMBB = BB->getParent()->front();
719    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
720    SSARegMap *RegMap = BB->getParent()->getSSARegMap();
721    // FIXME: when we get to LP64, we will need to create the appropriate
722    // type of register here.
723    GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass);
724    BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0);
725    BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg);
726  }
727  return CurDAG->getRegister(GlobalBaseReg, MVT::i32).Val;
728}
729
730static SDNode *FindCallStartFromCall(SDNode *Node) {
731  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
732    assert(Node->getOperand(0).getValueType() == MVT::Other &&
733         "Node doesn't have a token chain argument!");
734  return FindCallStartFromCall(Node->getOperand(0).Val);
735}
736
737SDNode *X86DAGToDAGISel::Select(SDOperand N) {
738  SDNode *Node = N.Val;
739  MVT::ValueType NVT = Node->getValueType(0);
740  unsigned Opc, MOpc;
741  unsigned Opcode = Node->getOpcode();
742
743#ifndef NDEBUG
744  DEBUG(std::cerr << std::string(Indent, ' '));
745  DEBUG(std::cerr << "Selecting: ");
746  DEBUG(Node->dump(CurDAG));
747  DEBUG(std::cerr << "\n");
748  Indent += 2;
749#endif
750
751  if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
752#ifndef NDEBUG
753    DEBUG(std::cerr << std::string(Indent-2, ' '));
754    DEBUG(std::cerr << "== ");
755    DEBUG(Node->dump(CurDAG));
756    DEBUG(std::cerr << "\n");
757    Indent -= 2;
758#endif
759    return NULL;   // Already selected.
760  }
761
762  switch (Opcode) {
763    default: break;
764    case X86ISD::GlobalBaseReg:
765      return getGlobalBaseReg();
766
767    case ISD::ADD: {
768      // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
769      // code and is matched first so to prevent it from being turned into
770      // LEA32r X+c.
771      SDOperand N0 = N.getOperand(0);
772      SDOperand N1 = N.getOperand(1);
773      if (N.Val->getValueType(0) == MVT::i32 &&
774          N0.getOpcode() == X86ISD::Wrapper &&
775          N1.getOpcode() == ISD::Constant) {
776        unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
777        SDOperand C(0, 0);
778        // TODO: handle ExternalSymbolSDNode.
779        if (GlobalAddressSDNode *G =
780            dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
781          C = CurDAG->getTargetGlobalAddress(G->getGlobal(), MVT::i32,
782                                             G->getOffset() + Offset);
783        } else if (ConstantPoolSDNode *CP =
784                   dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
785          C = CurDAG->getTargetConstantPool(CP->get(), MVT::i32,
786                                            CP->getAlignment(),
787                                            CP->getOffset()+Offset);
788        }
789
790        if (C.Val)
791          return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, MVT::i32, C);
792      }
793
794      // Other cases are handled by auto-generated code.
795      break;
796    }
797
798    case ISD::MULHU:
799    case ISD::MULHS: {
800      if (Opcode == ISD::MULHU)
801        switch (NVT) {
802        default: assert(0 && "Unsupported VT!");
803        case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
804        case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
805        case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
806        }
807      else
808        switch (NVT) {
809        default: assert(0 && "Unsupported VT!");
810        case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
811        case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
812        case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
813        }
814
815      unsigned LoReg, HiReg;
816      switch (NVT) {
817      default: assert(0 && "Unsupported VT!");
818      case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
819      case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
820      case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
821      }
822
823      SDOperand N0 = Node->getOperand(0);
824      SDOperand N1 = Node->getOperand(1);
825
826      bool foldedLoad = false;
827      SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
828      foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
829      // MULHU and MULHS are commmutative
830      if (!foldedLoad) {
831        foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
832        if (foldedLoad) {
833          N0 = Node->getOperand(1);
834          N1 = Node->getOperand(0);
835        }
836      }
837
838      SDOperand Chain;
839      if (foldedLoad) {
840        Chain = N1.getOperand(0);
841        AddToISelQueue(Chain);
842      } else
843        Chain = CurDAG->getEntryNode();
844
845      SDOperand InFlag(0, 0);
846      AddToISelQueue(N0);
847      Chain  = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
848                                    N0, InFlag);
849      InFlag = Chain.getValue(1);
850
851      if (foldedLoad) {
852        AddToISelQueue(Tmp0);
853        AddToISelQueue(Tmp1);
854        AddToISelQueue(Tmp2);
855        AddToISelQueue(Tmp3);
856        SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
857        SDNode *CNode =
858          CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
859        Chain  = SDOperand(CNode, 0);
860        InFlag = SDOperand(CNode, 1);
861      } else {
862        AddToISelQueue(N1);
863        InFlag =
864          SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
865      }
866
867      SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
868      ReplaceUses(N.getValue(0), Result);
869      if (foldedLoad)
870        ReplaceUses(N1.getValue(1), Result.getValue(1));
871
872#ifndef NDEBUG
873      DEBUG(std::cerr << std::string(Indent-2, ' '));
874      DEBUG(std::cerr << "=> ");
875      DEBUG(Result.Val->dump(CurDAG));
876      DEBUG(std::cerr << "\n");
877      Indent -= 2;
878#endif
879      return NULL;
880    }
881
882    case ISD::SDIV:
883    case ISD::UDIV:
884    case ISD::SREM:
885    case ISD::UREM: {
886      bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
887      bool isDiv    = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
888      if (!isSigned)
889        switch (NVT) {
890        default: assert(0 && "Unsupported VT!");
891        case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
892        case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
893        case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
894        }
895      else
896        switch (NVT) {
897        default: assert(0 && "Unsupported VT!");
898        case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
899        case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
900        case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
901        }
902
903      unsigned LoReg, HiReg;
904      unsigned ClrOpcode, SExtOpcode;
905      switch (NVT) {
906      default: assert(0 && "Unsupported VT!");
907      case MVT::i8:
908        LoReg = X86::AL;  HiReg = X86::AH;
909        ClrOpcode  = X86::MOV8r0;
910        SExtOpcode = X86::CBW;
911        break;
912      case MVT::i16:
913        LoReg = X86::AX;  HiReg = X86::DX;
914        ClrOpcode  = X86::MOV16r0;
915        SExtOpcode = X86::CWD;
916        break;
917      case MVT::i32:
918        LoReg = X86::EAX; HiReg = X86::EDX;
919        ClrOpcode  = X86::MOV32r0;
920        SExtOpcode = X86::CDQ;
921        break;
922      }
923
924      SDOperand N0 = Node->getOperand(0);
925      SDOperand N1 = Node->getOperand(1);
926
927      bool foldedLoad = false;
928      SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
929      foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
930      SDOperand Chain;
931      if (foldedLoad) {
932        Chain = N1.getOperand(0);
933        AddToISelQueue(Chain);
934      } else
935        Chain = CurDAG->getEntryNode();
936
937      SDOperand InFlag(0, 0);
938      AddToISelQueue(N0);
939      Chain  = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
940                                    N0, InFlag);
941      InFlag = Chain.getValue(1);
942
943      if (isSigned) {
944        // Sign extend the low part into the high part.
945        InFlag =
946          SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
947      } else {
948        // Zero out the high part, effectively zero extending the input.
949        SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
950        Chain  = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(HiReg, NVT),
951                                      ClrNode, InFlag);
952        InFlag = Chain.getValue(1);
953      }
954
955      if (foldedLoad) {
956        AddToISelQueue(Tmp0);
957        AddToISelQueue(Tmp1);
958        AddToISelQueue(Tmp2);
959        AddToISelQueue(Tmp3);
960        SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
961        SDNode *CNode =
962          CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
963        Chain  = SDOperand(CNode, 0);
964        InFlag = SDOperand(CNode, 1);
965      } else {
966        AddToISelQueue(N1);
967        InFlag =
968          SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
969      }
970
971      SDOperand Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg,
972                                                NVT, InFlag);
973      ReplaceUses(N.getValue(0), Result);
974      if (foldedLoad)
975        ReplaceUses(N1.getValue(1), Result.getValue(1));
976
977#ifndef NDEBUG
978      DEBUG(std::cerr << std::string(Indent-2, ' '));
979      DEBUG(std::cerr << "=> ");
980      DEBUG(Result.Val->dump(CurDAG));
981      DEBUG(std::cerr << "\n");
982      Indent -= 2;
983#endif
984
985      return NULL;
986    }
987
988    case ISD::TRUNCATE: {
989      if (NVT == MVT::i8) {
990        unsigned Opc2;
991        MVT::ValueType VT;
992        switch (Node->getOperand(0).getValueType()) {
993        default: assert(0 && "Unknown truncate!");
994        case MVT::i16:
995          Opc = X86::MOV16to16_;
996          VT = MVT::i16;
997          Opc2 = X86::TRUNC_GR16_GR8;
998          break;
999        case MVT::i32:
1000          Opc = X86::MOV32to32_;
1001          VT = MVT::i32;
1002          Opc2 = X86::TRUNC_GR32_GR8;
1003          break;
1004        }
1005
1006        AddToISelQueue(Node->getOperand(0));
1007        SDOperand Tmp =
1008          SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0);
1009        SDNode *ResNode = CurDAG->getTargetNode(Opc2, NVT, Tmp);
1010
1011#ifndef NDEBUG
1012        DEBUG(std::cerr << std::string(Indent-2, ' '));
1013        DEBUG(std::cerr << "=> ");
1014        DEBUG(ResNode->dump(CurDAG));
1015        DEBUG(std::cerr << "\n");
1016        Indent -= 2;
1017#endif
1018        return ResNode;
1019      }
1020
1021      break;
1022    }
1023  }
1024
1025  SDNode *ResNode = SelectCode(N);
1026
1027#ifndef NDEBUG
1028  DEBUG(std::cerr << std::string(Indent-2, ' '));
1029  DEBUG(std::cerr << "=> ");
1030  if (ResNode == NULL || ResNode == N.Val)
1031    DEBUG(N.Val->dump(CurDAG));
1032  else
1033    DEBUG(ResNode->dump(CurDAG));
1034  DEBUG(std::cerr << "\n");
1035  Indent -= 2;
1036#endif
1037
1038  return ResNode;
1039}
1040
1041bool X86DAGToDAGISel::
1042SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
1043                             std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
1044  SDOperand Op0, Op1, Op2, Op3;
1045  switch (ConstraintCode) {
1046  case 'o':   // offsetable        ??
1047  case 'v':   // not offsetable    ??
1048  default: return true;
1049  case 'm':   // memory
1050    if (!SelectAddr(Op, Op0, Op1, Op2, Op3))
1051      return true;
1052    break;
1053  }
1054
1055  OutOps.push_back(Op0);
1056  OutOps.push_back(Op1);
1057  OutOps.push_back(Op2);
1058  OutOps.push_back(Op3);
1059  AddToISelQueue(Op0);
1060  AddToISelQueue(Op1);
1061  AddToISelQueue(Op2);
1062  AddToISelQueue(Op3);
1063  return false;
1064}
1065
1066/// createX86ISelDag - This pass converts a legalized DAG into a
1067/// X86-specific DAG, ready for instruction scheduling.
1068///
1069FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM) {
1070  return new X86DAGToDAGISel(TM);
1071}
1072