X86ISelDAGToDAG.cpp revision eb8730d131be888ba93f7dfc2e5457375190107e
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by the Evan Cheng and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a DAG pattern matching instruction selector for X86,
11// converting from a legalized dag to a X86 dag.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "x86-isel"
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86RegisterInfo.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/GlobalValue.h"
23#include "llvm/Instructions.h"
24#include "llvm/Intrinsics.h"
25#include "llvm/Support/CFG.h"
26#include "llvm/CodeGen/MachineConstantPool.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/SSARegMap.h"
31#include "llvm/CodeGen/SelectionDAGISel.h"
32#include "llvm/Target/TargetMachine.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/Visibility.h"
35#include "llvm/ADT/Statistic.h"
36#include <deque>
37#include <iostream>
38#include <queue>
39#include <set>
40using namespace llvm;
41
42//===----------------------------------------------------------------------===//
43//                      Pattern Matcher Implementation
44//===----------------------------------------------------------------------===//
45
46namespace {
47  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
48  /// SDOperand's instead of register numbers for the leaves of the matched
49  /// tree.
50  struct X86ISelAddressMode {
51    enum {
52      RegBase,
53      FrameIndexBase
54    } BaseType;
55
56    struct {            // This is really a union, discriminated by BaseType!
57      SDOperand Reg;
58      int FrameIndex;
59    } Base;
60
61    unsigned Scale;
62    SDOperand IndexReg;
63    unsigned Disp;
64    GlobalValue *GV;
65    Constant *CP;
66    unsigned Align;    // CP alignment.
67
68    X86ISelAddressMode()
69      : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), GV(0),
70        CP(0), Align(0) {
71    }
72  };
73}
74
75namespace {
76  Statistic<>
77  NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
78
79  //===--------------------------------------------------------------------===//
80  /// ISel - X86 specific code to select X86 machine instructions for
81  /// SelectionDAG operations.
82  ///
83  class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
84    /// ContainsFPCode - Every instruction we select that uses or defines a FP
85    /// register should set this to true.
86    bool ContainsFPCode;
87
88    /// X86Lowering - This object fully describes how to lower LLVM code to an
89    /// X86-specific SelectionDAG.
90    X86TargetLowering X86Lowering;
91
92    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
93    /// make the right decision when generating code for different targets.
94    const X86Subtarget *Subtarget;
95
96    unsigned GlobalBaseReg;
97
98  public:
99    X86DAGToDAGISel(X86TargetMachine &TM)
100      : SelectionDAGISel(X86Lowering),
101        X86Lowering(*TM.getTargetLowering()),
102        Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
103
104    virtual bool runOnFunction(Function &Fn) {
105      // Make sure we re-emit a set of the global base reg if necessary
106      GlobalBaseReg = 0;
107      return SelectionDAGISel::runOnFunction(Fn);
108    }
109
110    virtual const char *getPassName() const {
111      return "X86 DAG->DAG Instruction Selection";
112    }
113
114    /// InstructionSelectBasicBlock - This callback is invoked by
115    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
116    virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
117
118    virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
119
120    virtual bool CanBeFoldedBy(SDNode *N, SDNode *U);
121
122// Include the pieces autogenerated from the target description.
123#include "X86GenDAGISel.inc"
124
125  private:
126    SDNode *Select(SDOperand &Result, SDOperand N);
127
128    bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot = true);
129    bool SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
130                    SDOperand &Index, SDOperand &Disp);
131    bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
132                       SDOperand &Index, SDOperand &Disp);
133    bool TryFoldLoad(SDOperand P, SDOperand N,
134                     SDOperand &Base, SDOperand &Scale,
135                     SDOperand &Index, SDOperand &Disp);
136
137    virtual void SelectRootInit() {
138      DAGSize = CurDAG->AssignTopologicalOrder(TopOrder);
139      unsigned NumBytes = (DAGSize + 7) / 8;
140      UnfoldableSet = new unsigned char[NumBytes];
141      memset(UnfoldableSet, 0, NumBytes);
142    }
143
144    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
145    /// inline asm expressions.
146    virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
147                                              char ConstraintCode,
148                                              std::vector<SDOperand> &OutOps,
149                                              SelectionDAG &DAG);
150
151    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
152
153    inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
154                                   SDOperand &Scale, SDOperand &Index,
155                                   SDOperand &Disp) {
156      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
157        CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, MVT::i32) : AM.Base.Reg;
158      Scale = getI8Imm(AM.Scale);
159      Index = AM.IndexReg;
160      Disp  = AM.GV ? CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp)
161        : (AM.CP ?
162           CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp)
163           : getI32Imm(AM.Disp));
164    }
165
166    /// getI8Imm - Return a target constant with the specified value, of type
167    /// i8.
168    inline SDOperand getI8Imm(unsigned Imm) {
169      return CurDAG->getTargetConstant(Imm, MVT::i8);
170    }
171
172    /// getI16Imm - Return a target constant with the specified value, of type
173    /// i16.
174    inline SDOperand getI16Imm(unsigned Imm) {
175      return CurDAG->getTargetConstant(Imm, MVT::i16);
176    }
177
178    /// getI32Imm - Return a target constant with the specified value, of type
179    /// i32.
180    inline SDOperand getI32Imm(unsigned Imm) {
181      return CurDAG->getTargetConstant(Imm, MVT::i32);
182    }
183
184    /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
185    /// base register.  Return the virtual register that holds this value.
186    SDOperand getGlobalBaseReg();
187
188    /// UnfoldableSet - An boolean array representing nodes which have been
189    /// folded into addressing modes and therefore should not be folded in
190    /// another operation.
191    unsigned char *UnfoldableSet;
192
193    inline void setUnfoldable(SDNode *N) {
194      unsigned Id = N->getNodeId();
195      UnfoldableSet[Id / 8] |= 1 << (Id % 8);
196    }
197
198    inline bool isUnfoldable(SDNode *N) {
199      unsigned Id = N->getNodeId();
200      return UnfoldableSet[Id / 8] & (1 << (Id % 8));
201    }
202
203#ifndef NDEBUG
204    unsigned Indent;
205#endif
206  };
207}
208
209static void findNonImmUse(SDNode* Use, SDNode* Def, bool &found,
210                          std::set<SDNode *> &Visited) {
211  if (found ||
212      Use->getNodeId() > Def->getNodeId() ||
213      !Visited.insert(Use).second)
214    return;
215
216  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
217    SDNode *N = Use->getOperand(i).Val;
218    if (N != Def) {
219      findNonImmUse(N, Def, found, Visited);
220    } else {
221      found = true;
222      break;
223    }
224  }
225}
226
227static inline bool isNonImmUse(SDNode* Use, SDNode* Def) {
228  std::set<SDNode *> Visited;
229  bool found = false;
230  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
231    SDNode *N = Use->getOperand(i).Val;
232    if (N != Def) {
233      findNonImmUse(N, Def, found, Visited);
234      if (found) break;
235    }
236  }
237  return found;
238}
239
240
241bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U) {
242  // Is it already folded by SelectAddr / SelectLEAAddr?
243  if (isUnfoldable(N))
244    return false;
245
246  // If U use can somehow reach N through another path then U can't fold N or
247  // it will create a cycle. e.g. In the following diagram, U can reach N
248  // through X. If N is folded into into U, then X is both a predecessor and
249  // a successor of U.
250  //
251  //         [ N ]
252  //         ^  ^
253  //         |  |
254  //        /   \---
255  //      /        [X]
256  //      |         ^
257  //     [U]--------|
258  return !isNonImmUse(U, N);
259}
260
261/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
262/// when it has created a SelectionDAG for us to codegen.
263void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
264  DEBUG(BB->dump());
265  MachineFunction::iterator FirstMBB = BB;
266
267  // Codegen the basic block.
268#ifndef NDEBUG
269  DEBUG(std::cerr << "===== Instruction selection begins:\n");
270  Indent = 0;
271#endif
272  DAG.setRoot(SelectRoot(DAG.getRoot()));
273#ifndef NDEBUG
274  DEBUG(std::cerr << "===== Instruction selection ends:\n");
275#endif
276
277  UnfoldableSet = NULL;
278  DAG.RemoveDeadNodes();
279
280  // Emit machine code to BB.
281  ScheduleAndEmitDAG(DAG);
282
283  // If we are emitting FP stack code, scan the basic block to determine if this
284  // block defines any FP values.  If so, put an FP_REG_KILL instruction before
285  // the terminator of the block.
286  if (!Subtarget->hasSSE2()) {
287    // Note that FP stack instructions *are* used in SSE code when returning
288    // values, but these are not live out of the basic block, so we don't need
289    // an FP_REG_KILL in this case either.
290    bool ContainsFPCode = false;
291
292    // Scan all of the machine instructions in these MBBs, checking for FP
293    // stores.
294    MachineFunction::iterator MBBI = FirstMBB;
295    do {
296      for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
297           !ContainsFPCode && I != E; ++I) {
298        for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
299          if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
300              MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
301              RegMap->getRegClass(I->getOperand(0).getReg()) ==
302                X86::RFPRegisterClass) {
303            ContainsFPCode = true;
304            break;
305          }
306        }
307      }
308    } while (!ContainsFPCode && &*(MBBI++) != BB);
309
310    // Check PHI nodes in successor blocks.  These PHI's will be lowered to have
311    // a copy of the input value in this block.
312    if (!ContainsFPCode) {
313      // Final check, check LLVM BB's that are successors to the LLVM BB
314      // corresponding to BB for FP PHI nodes.
315      const BasicBlock *LLVMBB = BB->getBasicBlock();
316      const PHINode *PN;
317      for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
318           !ContainsFPCode && SI != E; ++SI) {
319        for (BasicBlock::const_iterator II = SI->begin();
320             (PN = dyn_cast<PHINode>(II)); ++II) {
321          if (PN->getType()->isFloatingPoint()) {
322            ContainsFPCode = true;
323            break;
324          }
325        }
326      }
327    }
328
329    // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
330    if (ContainsFPCode) {
331      BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
332      ++NumFPKill;
333    }
334  }
335}
336
337/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
338/// the main function.
339void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
340                                             MachineFrameInfo *MFI) {
341  if (Subtarget->TargetType == X86Subtarget::isCygwin)
342    BuildMI(BB, X86::CALLpcrel32, 1).addExternalSymbol("__main");
343
344  // Switch the FPU to 64-bit precision mode for better compatibility and speed.
345  int CWFrameIdx = MFI->CreateStackObject(2, 2);
346  addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
347
348  // Set the high part to be 64-bit precision.
349  addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
350                    CWFrameIdx, 1).addImm(2);
351
352  // Reload the modified control word now.
353  addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
354}
355
356void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
357  // If this is main, emit special code for main.
358  MachineBasicBlock *BB = MF.begin();
359  if (Fn.hasExternalLinkage() && Fn.getName() == "main")
360    EmitSpecialCodeForMain(BB, MF.getFrameInfo());
361}
362
363/// MatchAddress - Add the specified node to the specified addressing mode,
364/// returning true if it cannot be done.  This just pattern matches for the
365/// addressing mode
366bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
367                                   bool isRoot) {
368  int id = N.Val->getNodeId();
369  bool Available = isSelected(id);
370
371  switch (N.getOpcode()) {
372  default: break;
373  case ISD::Constant:
374    AM.Disp += cast<ConstantSDNode>(N)->getValue();
375    return false;
376
377  case X86ISD::Wrapper:
378    // If both base and index components have been picked, we can't fit
379    // the result available in the register in the addressing mode. Duplicate
380    // GlobalAddress or ConstantPool as displacement.
381    if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
382      if (ConstantPoolSDNode *CP =
383          dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) {
384        if (AM.CP == 0) {
385          AM.CP = CP->get();
386          AM.Align = CP->getAlignment();
387          AM.Disp += CP->getOffset();
388          return false;
389        }
390      } else if (GlobalAddressSDNode *G =
391                 dyn_cast<GlobalAddressSDNode>(N.getOperand(0))) {
392        if (AM.GV == 0) {
393          AM.GV = G->getGlobal();
394          AM.Disp += G->getOffset();
395          return false;
396        }
397      }
398    }
399    break;
400
401  case ISD::FrameIndex:
402    if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
403      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
404      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
405      return false;
406    }
407    break;
408
409  case ISD::SHL:
410    if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1)
411      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
412        unsigned Val = CN->getValue();
413        if (Val == 1 || Val == 2 || Val == 3) {
414          AM.Scale = 1 << Val;
415          SDOperand ShVal = N.Val->getOperand(0);
416
417          // Okay, we know that we have a scale by now.  However, if the scaled
418          // value is an add of something and a constant, we can fold the
419          // constant into the disp field here.
420          if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
421              isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
422            AM.IndexReg = ShVal.Val->getOperand(0);
423            ConstantSDNode *AddVal =
424              cast<ConstantSDNode>(ShVal.Val->getOperand(1));
425            AM.Disp += AddVal->getValue() << Val;
426          } else {
427            AM.IndexReg = ShVal;
428          }
429          return false;
430        }
431      }
432    break;
433
434  case ISD::MUL:
435    // X*[3,5,9] -> X+X*[2,4,8]
436    if (!Available &&
437        AM.BaseType == X86ISelAddressMode::RegBase &&
438        AM.Base.Reg.Val == 0 &&
439        AM.IndexReg.Val == 0)
440      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
441        if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
442          AM.Scale = unsigned(CN->getValue())-1;
443
444          SDOperand MulVal = N.Val->getOperand(0);
445          SDOperand Reg;
446
447          // Okay, we know that we have a scale by now.  However, if the scaled
448          // value is an add of something and a constant, we can fold the
449          // constant into the disp field here.
450          if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
451              isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
452            Reg = MulVal.Val->getOperand(0);
453            ConstantSDNode *AddVal =
454              cast<ConstantSDNode>(MulVal.Val->getOperand(1));
455            AM.Disp += AddVal->getValue() * CN->getValue();
456          } else {
457            Reg = N.Val->getOperand(0);
458          }
459
460          AM.IndexReg = AM.Base.Reg = Reg;
461          return false;
462        }
463    break;
464
465  case ISD::ADD: {
466    if (!Available) {
467      X86ISelAddressMode Backup = AM;
468      if (!MatchAddress(N.Val->getOperand(0), AM, false) &&
469          !MatchAddress(N.Val->getOperand(1), AM, false))
470        return false;
471      AM = Backup;
472      if (!MatchAddress(N.Val->getOperand(1), AM, false) &&
473          !MatchAddress(N.Val->getOperand(0), AM, false))
474        return false;
475      AM = Backup;
476    }
477    break;
478  }
479
480  case ISD::OR: {
481    if (!Available) {
482      X86ISelAddressMode Backup = AM;
483      // Look for (x << c1) | c2 where (c2 < c1)
484      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(0));
485      if (CN && !MatchAddress(N.Val->getOperand(1), AM, false)) {
486        if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
487          AM.Disp = CN->getValue();
488          return false;
489        }
490      }
491      AM = Backup;
492      CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1));
493      if (CN && !MatchAddress(N.Val->getOperand(0), AM, false)) {
494        if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
495          AM.Disp = CN->getValue();
496          return false;
497        }
498      }
499      AM = Backup;
500    }
501    break;
502  }
503  }
504
505  // Is the base register already occupied?
506  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
507    // If so, check to see if the scale index register is set.
508    if (AM.IndexReg.Val == 0) {
509      AM.IndexReg = N;
510      AM.Scale = 1;
511      return false;
512    }
513
514    // Otherwise, we cannot select it.
515    return true;
516  }
517
518  // Default, generate it as a register.
519  AM.BaseType = X86ISelAddressMode::RegBase;
520  AM.Base.Reg = N;
521  return false;
522}
523
524/// SelectAddr - returns true if it is able pattern match an addressing mode.
525/// It returns the operands which make up the maximal addressing mode it can
526/// match by reference.
527bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
528                                 SDOperand &Index, SDOperand &Disp) {
529  X86ISelAddressMode AM;
530  if (MatchAddress(N, AM))
531    return false;
532
533  if (AM.BaseType == X86ISelAddressMode::RegBase) {
534    if (!AM.Base.Reg.Val)
535      AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
536  }
537
538  if (!AM.IndexReg.Val)
539    AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
540
541  getAddressOperands(AM, Base, Scale, Index, Disp);
542
543  int Id = Base.Val ? Base.Val->getNodeId() : -1;
544  if (Id != -1)
545    setUnfoldable(Base.Val);
546  Id = Index.Val ? Index.Val->getNodeId() : -1;
547  if (Id != -1)
548    setUnfoldable(Index.Val);
549
550  return true;
551}
552
553/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
554/// mode it matches can be cost effectively emitted as an LEA instruction.
555bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
556                                    SDOperand &Scale,
557                                    SDOperand &Index, SDOperand &Disp) {
558  X86ISelAddressMode AM;
559  if (MatchAddress(N, AM))
560    return false;
561
562  unsigned Complexity = 0;
563  if (AM.BaseType == X86ISelAddressMode::RegBase)
564    if (AM.Base.Reg.Val)
565      Complexity = 1;
566    else
567      AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
568  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
569    Complexity = 4;
570
571  if (AM.IndexReg.Val)
572    Complexity++;
573  else
574    AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
575
576  if (AM.Scale > 2)
577    Complexity += 2;
578  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg
579  else if (AM.Scale > 1)
580    Complexity++;
581
582  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
583  // to a LEA. This is determined with some expermentation but is by no means
584  // optimal (especially for code size consideration). LEA is nice because of
585  // its three-address nature. Tweak the cost function again when we can run
586  // convertToThreeAddress() at register allocation time.
587  if (AM.GV || AM.CP)
588    Complexity += 2;
589
590  if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
591    Complexity++;
592
593  if (Complexity > 2) {
594    getAddressOperands(AM, Base, Scale, Index, Disp);
595    return true;
596  }
597
598  int Id = Base.Val ? Base.Val->getNodeId() : -1;
599  if (Id != -1)
600    setUnfoldable(Base.Val);
601  Id = Index.Val ? Index.Val->getNodeId() : -1;
602  if (Id != -1)
603    setUnfoldable(Index.Val);
604
605  return false;
606}
607
608bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
609                                  SDOperand &Base, SDOperand &Scale,
610                                  SDOperand &Index, SDOperand &Disp) {
611  if (N.getOpcode() == ISD::LOAD &&
612      N.hasOneUse() &&
613      P.Val->isOnlyUse(N.Val) &&
614      CanBeFoldedBy(N.Val, P.Val))
615    return SelectAddr(N.getOperand(1), Base, Scale, Index, Disp);
616  return false;
617}
618
619static bool isRegister0(SDOperand Op) {
620  if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op))
621    return (R->getReg() == 0);
622  return false;
623}
624
625/// getGlobalBaseReg - Output the instructions required to put the
626/// base address to use for accessing globals into a register.
627///
628SDOperand X86DAGToDAGISel::getGlobalBaseReg() {
629  if (!GlobalBaseReg) {
630    // Insert the set of GlobalBaseReg into the first MBB of the function
631    MachineBasicBlock &FirstMBB = BB->getParent()->front();
632    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
633    SSARegMap *RegMap = BB->getParent()->getSSARegMap();
634    // FIXME: when we get to LP64, we will need to create the appropriate
635    // type of register here.
636    GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass);
637    BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0);
638    BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg);
639  }
640  return CurDAG->getRegister(GlobalBaseReg, MVT::i32);
641}
642
643static SDNode *FindCallStartFromCall(SDNode *Node) {
644  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
645    assert(Node->getOperand(0).getValueType() == MVT::Other &&
646         "Node doesn't have a token chain argument!");
647  return FindCallStartFromCall(Node->getOperand(0).Val);
648}
649
650SDNode *X86DAGToDAGISel::Select(SDOperand &Result, SDOperand N) {
651  SDNode *Node = N.Val;
652  MVT::ValueType NVT = Node->getValueType(0);
653  unsigned Opc, MOpc;
654  unsigned Opcode = Node->getOpcode();
655
656#ifndef NDEBUG
657  DEBUG(std::cerr << std::string(Indent, ' '));
658  DEBUG(std::cerr << "Selecting: ");
659  DEBUG(Node->dump(CurDAG));
660  DEBUG(std::cerr << "\n");
661  Indent += 2;
662#endif
663
664  if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
665    Result = N;
666#ifndef NDEBUG
667    DEBUG(std::cerr << std::string(Indent-2, ' '));
668    DEBUG(std::cerr << "== ");
669    DEBUG(Node->dump(CurDAG));
670    DEBUG(std::cerr << "\n");
671    Indent -= 2;
672#endif
673    return NULL;   // Already selected.
674  }
675
676  switch (Opcode) {
677    default: break;
678    case X86ISD::GlobalBaseReg:
679      Result = getGlobalBaseReg();
680      return Result.Val;
681
682    case ISD::ADD: {
683      // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
684      // code and is matched first so to prevent it from being turned into
685      // LEA32r X+c.
686      SDOperand N0 = N.getOperand(0);
687      SDOperand N1 = N.getOperand(1);
688      if (N.Val->getValueType(0) == MVT::i32 &&
689          N0.getOpcode() == X86ISD::Wrapper &&
690          N1.getOpcode() == ISD::Constant) {
691        unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
692        SDOperand C(0, 0);
693        // TODO: handle ExternalSymbolSDNode.
694        if (GlobalAddressSDNode *G =
695            dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
696          C = CurDAG->getTargetGlobalAddress(G->getGlobal(), MVT::i32,
697                                             G->getOffset() + Offset);
698        } else if (ConstantPoolSDNode *CP =
699                   dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
700          C = CurDAG->getTargetConstantPool(CP->get(), MVT::i32,
701                                            CP->getAlignment(),
702                                            CP->getOffset()+Offset);
703        }
704
705        if (C.Val) {
706          if (N.Val->hasOneUse()) {
707            return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, MVT::i32, C).Val;
708          } else {
709            SDNode *ResNode = CurDAG->getTargetNode(X86::MOV32ri, MVT::i32, C);
710            Result = SDOperand(ResNode, 0);
711	    return ResNode;
712          }
713        }
714      }
715
716      // Other cases are handled by auto-generated code.
717      break;
718    }
719
720    case ISD::MULHU:
721    case ISD::MULHS: {
722      if (Opcode == ISD::MULHU)
723        switch (NVT) {
724        default: assert(0 && "Unsupported VT!");
725        case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
726        case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
727        case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
728        }
729      else
730        switch (NVT) {
731        default: assert(0 && "Unsupported VT!");
732        case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
733        case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
734        case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
735        }
736
737      unsigned LoReg, HiReg;
738      switch (NVT) {
739      default: assert(0 && "Unsupported VT!");
740      case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
741      case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
742      case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
743      }
744
745      SDOperand N0 = Node->getOperand(0);
746      SDOperand N1 = Node->getOperand(1);
747
748      bool foldedLoad = false;
749      SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
750      foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
751      // MULHU and MULHS are commmutative
752      if (!foldedLoad) {
753        foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
754        if (foldedLoad) {
755          N0 = Node->getOperand(1);
756          N1 = Node->getOperand(0);
757        }
758      }
759
760      SDOperand Chain;
761      if (foldedLoad)
762        AddToQueue(Chain, N1.getOperand(0));
763      else
764        Chain = CurDAG->getEntryNode();
765
766      SDOperand InFlag(0, 0);
767      AddToQueue(N0, N0);
768      Chain  = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
769                                    N0, InFlag);
770      InFlag = Chain.getValue(1);
771
772      if (foldedLoad) {
773        AddToQueue(Tmp0, Tmp0);
774        AddToQueue(Tmp1, Tmp1);
775        AddToQueue(Tmp2, Tmp2);
776        AddToQueue(Tmp3, Tmp3);
777        SDNode *CNode =
778          CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1,
779                                Tmp2, Tmp3, Chain, InFlag);
780        Chain  = SDOperand(CNode, 0);
781        InFlag = SDOperand(CNode, 1);
782      } else {
783        AddToQueue(N1, N1);
784        InFlag =
785          SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
786      }
787
788      Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
789      ReplaceUses(N.getValue(0), Result);
790      if (foldedLoad)
791        ReplaceUses(N1.getValue(1), Result.getValue(1));
792
793#ifndef NDEBUG
794      DEBUG(std::cerr << std::string(Indent-2, ' '));
795      DEBUG(std::cerr << "=> ");
796      DEBUG(Result.Val->dump(CurDAG));
797      DEBUG(std::cerr << "\n");
798      Indent -= 2;
799#endif
800      return NULL;
801    }
802
803    case ISD::SDIV:
804    case ISD::UDIV:
805    case ISD::SREM:
806    case ISD::UREM: {
807      bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
808      bool isDiv    = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
809      if (!isSigned)
810        switch (NVT) {
811        default: assert(0 && "Unsupported VT!");
812        case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
813        case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
814        case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
815        }
816      else
817        switch (NVT) {
818        default: assert(0 && "Unsupported VT!");
819        case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
820        case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
821        case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
822        }
823
824      unsigned LoReg, HiReg;
825      unsigned ClrOpcode, SExtOpcode;
826      switch (NVT) {
827      default: assert(0 && "Unsupported VT!");
828      case MVT::i8:
829        LoReg = X86::AL;  HiReg = X86::AH;
830        ClrOpcode  = X86::MOV8r0;
831        SExtOpcode = X86::CBW;
832        break;
833      case MVT::i16:
834        LoReg = X86::AX;  HiReg = X86::DX;
835        ClrOpcode  = X86::MOV16r0;
836        SExtOpcode = X86::CWD;
837        break;
838      case MVT::i32:
839        LoReg = X86::EAX; HiReg = X86::EDX;
840        ClrOpcode  = X86::MOV32r0;
841        SExtOpcode = X86::CDQ;
842        break;
843      }
844
845      SDOperand N0 = Node->getOperand(0);
846      SDOperand N1 = Node->getOperand(1);
847
848      bool foldedLoad = false;
849      SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
850      foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
851      SDOperand Chain;
852      if (foldedLoad)
853        AddToQueue(Chain, N1.getOperand(0));
854      else
855        Chain = CurDAG->getEntryNode();
856
857      SDOperand InFlag(0, 0);
858      AddToQueue(N0, N0);
859      Chain  = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
860                                    N0, InFlag);
861      InFlag = Chain.getValue(1);
862
863      if (isSigned) {
864        // Sign extend the low part into the high part.
865        InFlag =
866          SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
867      } else {
868        // Zero out the high part, effectively zero extending the input.
869        SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
870        Chain  = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(HiReg, NVT),
871                                      ClrNode, InFlag);
872        InFlag = Chain.getValue(1);
873      }
874
875      if (foldedLoad) {
876        AddToQueue(Tmp0, Tmp0);
877        AddToQueue(Tmp1, Tmp1);
878        AddToQueue(Tmp2, Tmp2);
879        AddToQueue(Tmp3, Tmp3);
880        SDNode *CNode =
881          CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1,
882                                Tmp2, Tmp3, Chain, InFlag);
883        Chain  = SDOperand(CNode, 0);
884        InFlag = SDOperand(CNode, 1);
885      } else {
886        AddToQueue(N1, N1);
887        InFlag =
888          SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
889      }
890
891      Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg,
892                                      NVT, InFlag);
893      ReplaceUses(N.getValue(0), Result);
894      if (foldedLoad)
895        ReplaceUses(N1.getValue(1), Result.getValue(1));
896
897#ifndef NDEBUG
898      DEBUG(std::cerr << std::string(Indent-2, ' '));
899      DEBUG(std::cerr << "=> ");
900      DEBUG(Result.Val->dump(CurDAG));
901      DEBUG(std::cerr << "\n");
902      Indent -= 2;
903#endif
904
905      return NULL;
906    }
907
908    case ISD::TRUNCATE: {
909      if (NVT == MVT::i8) {
910        unsigned Opc2;
911        MVT::ValueType VT;
912        switch (Node->getOperand(0).getValueType()) {
913        default: assert(0 && "Unknown truncate!");
914        case MVT::i16:
915          Opc = X86::MOV16to16_;
916          VT = MVT::i16;
917          Opc2 = X86::TRUNC_GR16_GR8;
918          break;
919        case MVT::i32:
920          Opc = X86::MOV32to32_;
921          VT = MVT::i32;
922          Opc2 = X86::TRUNC_GR32_GR8;
923          break;
924        }
925
926        SDOperand Tmp0, Tmp1;
927        AddToQueue(Tmp0, Node->getOperand(0));
928        Tmp1 = SDOperand(CurDAG->getTargetNode(Opc, VT, Tmp0), 0);
929        Result = SDOperand(CurDAG->getTargetNode(Opc2, NVT, Tmp1), 0);
930
931#ifndef NDEBUG
932        DEBUG(std::cerr << std::string(Indent-2, ' '));
933        DEBUG(std::cerr << "=> ");
934        DEBUG(Result.Val->dump(CurDAG));
935        DEBUG(std::cerr << "\n");
936        Indent -= 2;
937#endif
938        return Result.Val;
939      }
940
941      break;
942    }
943  }
944
945  SDNode *ResNode = SelectCode(Result, N);
946
947#ifndef NDEBUG
948  DEBUG(std::cerr << std::string(Indent-2, ' '));
949  DEBUG(std::cerr << "=> ");
950  DEBUG(Result.Val->dump(CurDAG));
951  DEBUG(std::cerr << "\n");
952  Indent -= 2;
953#endif
954
955  return ResNode;
956}
957
958bool X86DAGToDAGISel::
959SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
960                             std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
961  SDOperand Op0, Op1, Op2, Op3;
962  switch (ConstraintCode) {
963  case 'o':   // offsetable        ??
964  case 'v':   // not offsetable    ??
965  default: return true;
966  case 'm':   // memory
967    if (!SelectAddr(Op, Op0, Op1, Op2, Op3))
968      return true;
969    break;
970  }
971
972  OutOps.resize(4);
973  AddToQueue(OutOps[0], Op0);
974  AddToQueue(OutOps[1], Op1);
975  AddToQueue(OutOps[2], Op2);
976  AddToQueue(OutOps[3], Op3);
977  return false;
978}
979
980/// createX86ISelDag - This pass converts a legalized DAG into a
981/// X86-specific DAG, ready for instruction scheduling.
982///
983FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM) {
984  return new X86DAGToDAGISel(TM);
985}
986