1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the X86-specific support for the FastISel class. Much
11// of the target-specific code is generated by tablegen in the file
12// X86GenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86InstrBuilder.h"
19#include "X86InstrInfo.h"
20#include "X86MachineFunctionInfo.h"
21#include "X86RegisterInfo.h"
22#include "X86Subtarget.h"
23#include "X86TargetMachine.h"
24#include "llvm/Analysis/BranchProbabilityInfo.h"
25#include "llvm/CodeGen/Analysis.h"
26#include "llvm/CodeGen/FastISel.h"
27#include "llvm/CodeGen/FunctionLoweringInfo.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/IR/CallSite.h"
32#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/GetElementPtrTypeIterator.h"
35#include "llvm/IR/GlobalAlias.h"
36#include "llvm/IR/GlobalVariable.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/Support/ErrorHandling.h"
41#include "llvm/Target/TargetOptions.h"
42using namespace llvm;
43
44namespace {
45
46class X86FastISel final : public FastISel {
47  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
48  /// make the right decision when generating code for different targets.
49  const X86Subtarget *Subtarget;
50
51  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
52  /// floating point ops.
53  /// When SSE is available, use it for f32 operations.
54  /// When SSE2 is available, use it for f64 operations.
55  bool X86ScalarSSEf64;
56  bool X86ScalarSSEf32;
57
58public:
59  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
60                       const TargetLibraryInfo *libInfo)
61    : FastISel(funcInfo, libInfo) {
62    Subtarget = &TM.getSubtarget<X86Subtarget>();
63    X86ScalarSSEf64 = Subtarget->hasSSE2();
64    X86ScalarSSEf32 = Subtarget->hasSSE1();
65  }
66
67  bool TargetSelectInstruction(const Instruction *I) override;
68
69  /// \brief The specified machine instr operand is a vreg, and that
70  /// vreg is being provided by the specified load instruction.  If possible,
71  /// try to fold the load as an operand to the instruction, returning true if
72  /// possible.
73  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
74                           const LoadInst *LI) override;
75
76  bool FastLowerArguments() override;
77
78#include "X86GenFastISel.inc"
79
80private:
81  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
82
83  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
84                       unsigned &ResultReg);
85
86  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
87                        MachineMemOperand *MMO = nullptr, bool Aligned = false);
88  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
89                        const X86AddressMode &AM,
90                        MachineMemOperand *MMO = nullptr, bool Aligned = false);
91
92  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
93                         unsigned &ResultReg);
94
95  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
96  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
97
98  bool X86SelectLoad(const Instruction *I);
99
100  bool X86SelectStore(const Instruction *I);
101
102  bool X86SelectRet(const Instruction *I);
103
104  bool X86SelectCmp(const Instruction *I);
105
106  bool X86SelectZExt(const Instruction *I);
107
108  bool X86SelectBranch(const Instruction *I);
109
110  bool X86SelectShift(const Instruction *I);
111
112  bool X86SelectDivRem(const Instruction *I);
113
114  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
115
116  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
117
118  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
119
120  bool X86SelectSelect(const Instruction *I);
121
122  bool X86SelectTrunc(const Instruction *I);
123
124  bool X86SelectFPExt(const Instruction *I);
125  bool X86SelectFPTrunc(const Instruction *I);
126
127  bool X86VisitIntrinsicCall(const IntrinsicInst &I);
128  bool X86SelectCall(const Instruction *I);
129
130  bool DoSelectCall(const Instruction *I, const char *MemIntName);
131
132  const X86InstrInfo *getInstrInfo() const {
133    return getTargetMachine()->getInstrInfo();
134  }
135  const X86TargetMachine *getTargetMachine() const {
136    return static_cast<const X86TargetMachine *>(&TM);
137  }
138
139  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
140
141  unsigned TargetMaterializeConstant(const Constant *C) override;
142
143  unsigned TargetMaterializeAlloca(const AllocaInst *C) override;
144
145  unsigned TargetMaterializeFloatZero(const ConstantFP *CF) override;
146
147  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
148  /// computed in an SSE register, not on the X87 floating point stack.
149  bool isScalarFPTypeInSSEReg(EVT VT) const {
150    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
151      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
152  }
153
154  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
155
156  bool IsMemcpySmall(uint64_t Len);
157
158  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
159                          X86AddressMode SrcAM, uint64_t Len);
160
161  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
162                            const Value *Cond);
163};
164
165} // end anonymous namespace.
166
167static CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) {
168  // If both operands are the same, then try to optimize or fold the cmp.
169  CmpInst::Predicate Predicate = CI->getPredicate();
170  if (CI->getOperand(0) != CI->getOperand(1))
171    return Predicate;
172
173  switch (Predicate) {
174  default: llvm_unreachable("Invalid predicate!");
175  case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
176  case CmpInst::FCMP_OEQ:   Predicate = CmpInst::FCMP_ORD;   break;
177  case CmpInst::FCMP_OGT:   Predicate = CmpInst::FCMP_FALSE; break;
178  case CmpInst::FCMP_OGE:   Predicate = CmpInst::FCMP_ORD;   break;
179  case CmpInst::FCMP_OLT:   Predicate = CmpInst::FCMP_FALSE; break;
180  case CmpInst::FCMP_OLE:   Predicate = CmpInst::FCMP_ORD;   break;
181  case CmpInst::FCMP_ONE:   Predicate = CmpInst::FCMP_FALSE; break;
182  case CmpInst::FCMP_ORD:   Predicate = CmpInst::FCMP_ORD;   break;
183  case CmpInst::FCMP_UNO:   Predicate = CmpInst::FCMP_UNO;   break;
184  case CmpInst::FCMP_UEQ:   Predicate = CmpInst::FCMP_TRUE;  break;
185  case CmpInst::FCMP_UGT:   Predicate = CmpInst::FCMP_UNO;   break;
186  case CmpInst::FCMP_UGE:   Predicate = CmpInst::FCMP_TRUE;  break;
187  case CmpInst::FCMP_ULT:   Predicate = CmpInst::FCMP_UNO;   break;
188  case CmpInst::FCMP_ULE:   Predicate = CmpInst::FCMP_TRUE;  break;
189  case CmpInst::FCMP_UNE:   Predicate = CmpInst::FCMP_UNO;   break;
190  case CmpInst::FCMP_TRUE:  Predicate = CmpInst::FCMP_TRUE;  break;
191
192  case CmpInst::ICMP_EQ:    Predicate = CmpInst::FCMP_TRUE;  break;
193  case CmpInst::ICMP_NE:    Predicate = CmpInst::FCMP_FALSE; break;
194  case CmpInst::ICMP_UGT:   Predicate = CmpInst::FCMP_FALSE; break;
195  case CmpInst::ICMP_UGE:   Predicate = CmpInst::FCMP_TRUE;  break;
196  case CmpInst::ICMP_ULT:   Predicate = CmpInst::FCMP_FALSE; break;
197  case CmpInst::ICMP_ULE:   Predicate = CmpInst::FCMP_TRUE;  break;
198  case CmpInst::ICMP_SGT:   Predicate = CmpInst::FCMP_FALSE; break;
199  case CmpInst::ICMP_SGE:   Predicate = CmpInst::FCMP_TRUE;  break;
200  case CmpInst::ICMP_SLT:   Predicate = CmpInst::FCMP_FALSE; break;
201  case CmpInst::ICMP_SLE:   Predicate = CmpInst::FCMP_TRUE;  break;
202  }
203
204  return Predicate;
205}
206
207static std::pair<X86::CondCode, bool>
208getX86ConditionCode(CmpInst::Predicate Predicate) {
209  X86::CondCode CC = X86::COND_INVALID;
210  bool NeedSwap = false;
211  switch (Predicate) {
212  default: break;
213  // Floating-point Predicates
214  case CmpInst::FCMP_UEQ: CC = X86::COND_E;       break;
215  case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through
216  case CmpInst::FCMP_OGT: CC = X86::COND_A;       break;
217  case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through
218  case CmpInst::FCMP_OGE: CC = X86::COND_AE;      break;
219  case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through
220  case CmpInst::FCMP_ULT: CC = X86::COND_B;       break;
221  case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through
222  case CmpInst::FCMP_ULE: CC = X86::COND_BE;      break;
223  case CmpInst::FCMP_ONE: CC = X86::COND_NE;      break;
224  case CmpInst::FCMP_UNO: CC = X86::COND_P;       break;
225  case CmpInst::FCMP_ORD: CC = X86::COND_NP;      break;
226  case CmpInst::FCMP_OEQ: // fall-through
227  case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
228
229  // Integer Predicates
230  case CmpInst::ICMP_EQ:  CC = X86::COND_E;       break;
231  case CmpInst::ICMP_NE:  CC = X86::COND_NE;      break;
232  case CmpInst::ICMP_UGT: CC = X86::COND_A;       break;
233  case CmpInst::ICMP_UGE: CC = X86::COND_AE;      break;
234  case CmpInst::ICMP_ULT: CC = X86::COND_B;       break;
235  case CmpInst::ICMP_ULE: CC = X86::COND_BE;      break;
236  case CmpInst::ICMP_SGT: CC = X86::COND_G;       break;
237  case CmpInst::ICMP_SGE: CC = X86::COND_GE;      break;
238  case CmpInst::ICMP_SLT: CC = X86::COND_L;       break;
239  case CmpInst::ICMP_SLE: CC = X86::COND_LE;      break;
240  }
241
242  return std::make_pair(CC, NeedSwap);
243}
244
245static std::pair<unsigned, bool>
246getX86SSEConditionCode(CmpInst::Predicate Predicate) {
247  unsigned CC;
248  bool NeedSwap = false;
249
250  // SSE Condition code mapping:
251  //  0 - EQ
252  //  1 - LT
253  //  2 - LE
254  //  3 - UNORD
255  //  4 - NEQ
256  //  5 - NLT
257  //  6 - NLE
258  //  7 - ORD
259  switch (Predicate) {
260  default: llvm_unreachable("Unexpected predicate");
261  case CmpInst::FCMP_OEQ: CC = 0;          break;
262  case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
263  case CmpInst::FCMP_OLT: CC = 1;          break;
264  case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
265  case CmpInst::FCMP_OLE: CC = 2;          break;
266  case CmpInst::FCMP_UNO: CC = 3;          break;
267  case CmpInst::FCMP_UNE: CC = 4;          break;
268  case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
269  case CmpInst::FCMP_UGE: CC = 5;          break;
270  case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
271  case CmpInst::FCMP_UGT: CC = 6;          break;
272  case CmpInst::FCMP_ORD: CC = 7;          break;
273  case CmpInst::FCMP_UEQ:
274  case CmpInst::FCMP_ONE: CC = 8;          break;
275  }
276
277  return std::make_pair(CC, NeedSwap);
278}
279
280/// \brief Check if it is possible to fold the condition from the XALU intrinsic
281/// into the user. The condition code will only be updated on success.
282bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
283                                       const Value *Cond) {
284  if (!isa<ExtractValueInst>(Cond))
285    return false;
286
287  const auto *EV = cast<ExtractValueInst>(Cond);
288  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
289    return false;
290
291  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
292  MVT RetVT;
293  const Function *Callee = II->getCalledFunction();
294  Type *RetTy =
295    cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
296  if (!isTypeLegal(RetTy, RetVT))
297    return false;
298
299  if (RetVT != MVT::i32 && RetVT != MVT::i64)
300    return false;
301
302  X86::CondCode TmpCC;
303  switch (II->getIntrinsicID()) {
304  default: return false;
305  case Intrinsic::sadd_with_overflow:
306  case Intrinsic::ssub_with_overflow:
307  case Intrinsic::smul_with_overflow:
308  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
309  case Intrinsic::uadd_with_overflow:
310  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
311  }
312
313  // Check if both instructions are in the same basic block.
314  if (II->getParent() != I->getParent())
315    return false;
316
317  // Make sure nothing is in the way
318  BasicBlock::const_iterator Start = I;
319  BasicBlock::const_iterator End = II;
320  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
321    // We only expect extractvalue instructions between the intrinsic and the
322    // instruction to be selected.
323    if (!isa<ExtractValueInst>(Itr))
324      return false;
325
326    // Check that the extractvalue operand comes from the intrinsic.
327    const auto *EVI = cast<ExtractValueInst>(Itr);
328    if (EVI->getAggregateOperand() != II)
329      return false;
330  }
331
332  CC = TmpCC;
333  return true;
334}
335
336bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
337  EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
338  if (evt == MVT::Other || !evt.isSimple())
339    // Unhandled type. Halt "fast" selection and bail.
340    return false;
341
342  VT = evt.getSimpleVT();
343  // For now, require SSE/SSE2 for performing floating-point operations,
344  // since x87 requires additional work.
345  if (VT == MVT::f64 && !X86ScalarSSEf64)
346    return false;
347  if (VT == MVT::f32 && !X86ScalarSSEf32)
348    return false;
349  // Similarly, no f80 support yet.
350  if (VT == MVT::f80)
351    return false;
352  // We only handle legal types. For example, on x86-32 the instruction
353  // selector contains all of the 64-bit instructions from x86-64,
354  // under the assumption that i64 won't be used if the target doesn't
355  // support it.
356  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
357}
358
359#include "X86GenCallingConv.inc"
360
361/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
362/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
363/// Return true and the result register by reference if it is possible.
364bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
365                                  MachineMemOperand *MMO, unsigned &ResultReg) {
366  // Get opcode and regclass of the output for the given load instruction.
367  unsigned Opc = 0;
368  const TargetRegisterClass *RC = nullptr;
369  switch (VT.getSimpleVT().SimpleTy) {
370  default: return false;
371  case MVT::i1:
372  case MVT::i8:
373    Opc = X86::MOV8rm;
374    RC  = &X86::GR8RegClass;
375    break;
376  case MVT::i16:
377    Opc = X86::MOV16rm;
378    RC  = &X86::GR16RegClass;
379    break;
380  case MVT::i32:
381    Opc = X86::MOV32rm;
382    RC  = &X86::GR32RegClass;
383    break;
384  case MVT::i64:
385    // Must be in x86-64 mode.
386    Opc = X86::MOV64rm;
387    RC  = &X86::GR64RegClass;
388    break;
389  case MVT::f32:
390    if (X86ScalarSSEf32) {
391      Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
392      RC  = &X86::FR32RegClass;
393    } else {
394      Opc = X86::LD_Fp32m;
395      RC  = &X86::RFP32RegClass;
396    }
397    break;
398  case MVT::f64:
399    if (X86ScalarSSEf64) {
400      Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
401      RC  = &X86::FR64RegClass;
402    } else {
403      Opc = X86::LD_Fp64m;
404      RC  = &X86::RFP64RegClass;
405    }
406    break;
407  case MVT::f80:
408    // No f80 support yet.
409    return false;
410  }
411
412  ResultReg = createResultReg(RC);
413  MachineInstrBuilder MIB =
414    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
415  addFullAddress(MIB, AM);
416  if (MMO)
417    MIB->addMemOperand(*FuncInfo.MF, MMO);
418  return true;
419}
420
421/// X86FastEmitStore - Emit a machine instruction to store a value Val of
422/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
423/// and a displacement offset, or a GlobalAddress,
424/// i.e. V. Return true if it is possible.
425bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
426                                   const X86AddressMode &AM,
427                                   MachineMemOperand *MMO, bool Aligned) {
428  // Get opcode and regclass of the output for the given store instruction.
429  unsigned Opc = 0;
430  switch (VT.getSimpleVT().SimpleTy) {
431  case MVT::f80: // No f80 support yet.
432  default: return false;
433  case MVT::i1: {
434    // Mask out all but lowest bit.
435    unsigned AndResult = createResultReg(&X86::GR8RegClass);
436    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
437            TII.get(X86::AND8ri), AndResult)
438      .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
439    ValReg = AndResult;
440  }
441  // FALLTHROUGH, handling i1 as i8.
442  case MVT::i8:  Opc = X86::MOV8mr;  break;
443  case MVT::i16: Opc = X86::MOV16mr; break;
444  case MVT::i32: Opc = X86::MOV32mr; break;
445  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
446  case MVT::f32:
447    Opc = X86ScalarSSEf32 ?
448          (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
449    break;
450  case MVT::f64:
451    Opc = X86ScalarSSEf64 ?
452          (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
453    break;
454  case MVT::v4f32:
455    if (Aligned)
456      Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
457    else
458      Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
459    break;
460  case MVT::v2f64:
461    if (Aligned)
462      Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
463    else
464      Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
465    break;
466  case MVT::v4i32:
467  case MVT::v2i64:
468  case MVT::v8i16:
469  case MVT::v16i8:
470    if (Aligned)
471      Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
472    else
473      Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
474    break;
475  }
476
477  MachineInstrBuilder MIB =
478    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
479  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
480  if (MMO)
481    MIB->addMemOperand(*FuncInfo.MF, MMO);
482
483  return true;
484}
485
486bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
487                                   const X86AddressMode &AM,
488                                   MachineMemOperand *MMO, bool Aligned) {
489  // Handle 'null' like i32/i64 0.
490  if (isa<ConstantPointerNull>(Val))
491    Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
492
493  // If this is a store of a simple constant, fold the constant into the store.
494  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
495    unsigned Opc = 0;
496    bool Signed = true;
497    switch (VT.getSimpleVT().SimpleTy) {
498    default: break;
499    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
500    case MVT::i8:  Opc = X86::MOV8mi;  break;
501    case MVT::i16: Opc = X86::MOV16mi; break;
502    case MVT::i32: Opc = X86::MOV32mi; break;
503    case MVT::i64:
504      // Must be a 32-bit sign extended value.
505      if (isInt<32>(CI->getSExtValue()))
506        Opc = X86::MOV64mi32;
507      break;
508    }
509
510    if (Opc) {
511      MachineInstrBuilder MIB =
512        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
513      addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
514                                            : CI->getZExtValue());
515      if (MMO)
516        MIB->addMemOperand(*FuncInfo.MF, MMO);
517      return true;
518    }
519  }
520
521  unsigned ValReg = getRegForValue(Val);
522  if (ValReg == 0)
523    return false;
524
525  bool ValKill = hasTrivialKill(Val);
526  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
527}
528
529/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
530/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
531/// ISD::SIGN_EXTEND).
532bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
533                                    unsigned Src, EVT SrcVT,
534                                    unsigned &ResultReg) {
535  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
536                           Src, /*TODO: Kill=*/false);
537  if (RR == 0)
538    return false;
539
540  ResultReg = RR;
541  return true;
542}
543
544bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
545  // Handle constant address.
546  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
547    // Can't handle alternate code models yet.
548    if (TM.getCodeModel() != CodeModel::Small)
549      return false;
550
551    // Can't handle TLS yet.
552    if (GV->isThreadLocal())
553      return false;
554
555    // RIP-relative addresses can't have additional register operands, so if
556    // we've already folded stuff into the addressing mode, just force the
557    // global value into its own register, which we can use as the basereg.
558    if (!Subtarget->isPICStyleRIPRel() ||
559        (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
560      // Okay, we've committed to selecting this global. Set up the address.
561      AM.GV = GV;
562
563      // Allow the subtarget to classify the global.
564      unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
565
566      // If this reference is relative to the pic base, set it now.
567      if (isGlobalRelativeToPICBase(GVFlags)) {
568        // FIXME: How do we know Base.Reg is free??
569        AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
570      }
571
572      // Unless the ABI requires an extra load, return a direct reference to
573      // the global.
574      if (!isGlobalStubReference(GVFlags)) {
575        if (Subtarget->isPICStyleRIPRel()) {
576          // Use rip-relative addressing if we can.  Above we verified that the
577          // base and index registers are unused.
578          assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
579          AM.Base.Reg = X86::RIP;
580        }
581        AM.GVOpFlags = GVFlags;
582        return true;
583      }
584
585      // Ok, we need to do a load from a stub.  If we've already loaded from
586      // this stub, reuse the loaded pointer, otherwise emit the load now.
587      DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
588      unsigned LoadReg;
589      if (I != LocalValueMap.end() && I->second != 0) {
590        LoadReg = I->second;
591      } else {
592        // Issue load from stub.
593        unsigned Opc = 0;
594        const TargetRegisterClass *RC = nullptr;
595        X86AddressMode StubAM;
596        StubAM.Base.Reg = AM.Base.Reg;
597        StubAM.GV = GV;
598        StubAM.GVOpFlags = GVFlags;
599
600        // Prepare for inserting code in the local-value area.
601        SavePoint SaveInsertPt = enterLocalValueArea();
602
603        if (TLI.getPointerTy() == MVT::i64) {
604          Opc = X86::MOV64rm;
605          RC  = &X86::GR64RegClass;
606
607          if (Subtarget->isPICStyleRIPRel())
608            StubAM.Base.Reg = X86::RIP;
609        } else {
610          Opc = X86::MOV32rm;
611          RC  = &X86::GR32RegClass;
612        }
613
614        LoadReg = createResultReg(RC);
615        MachineInstrBuilder LoadMI =
616          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
617        addFullAddress(LoadMI, StubAM);
618
619        // Ok, back to normal mode.
620        leaveLocalValueArea(SaveInsertPt);
621
622        // Prevent loading GV stub multiple times in same MBB.
623        LocalValueMap[V] = LoadReg;
624      }
625
626      // Now construct the final address. Note that the Disp, Scale,
627      // and Index values may already be set here.
628      AM.Base.Reg = LoadReg;
629      AM.GV = nullptr;
630      return true;
631    }
632  }
633
634  // If all else fails, try to materialize the value in a register.
635  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
636    if (AM.Base.Reg == 0) {
637      AM.Base.Reg = getRegForValue(V);
638      return AM.Base.Reg != 0;
639    }
640    if (AM.IndexReg == 0) {
641      assert(AM.Scale == 1 && "Scale with no index!");
642      AM.IndexReg = getRegForValue(V);
643      return AM.IndexReg != 0;
644    }
645  }
646
647  return false;
648}
649
650/// X86SelectAddress - Attempt to fill in an address from the given value.
651///
652bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
653  SmallVector<const Value *, 32> GEPs;
654redo_gep:
655  const User *U = nullptr;
656  unsigned Opcode = Instruction::UserOp1;
657  if (const Instruction *I = dyn_cast<Instruction>(V)) {
658    // Don't walk into other basic blocks; it's possible we haven't
659    // visited them yet, so the instructions may not yet be assigned
660    // virtual registers.
661    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
662        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
663      Opcode = I->getOpcode();
664      U = I;
665    }
666  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
667    Opcode = C->getOpcode();
668    U = C;
669  }
670
671  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
672    if (Ty->getAddressSpace() > 255)
673      // Fast instruction selection doesn't support the special
674      // address spaces.
675      return false;
676
677  switch (Opcode) {
678  default: break;
679  case Instruction::BitCast:
680    // Look past bitcasts.
681    return X86SelectAddress(U->getOperand(0), AM);
682
683  case Instruction::IntToPtr:
684    // Look past no-op inttoptrs.
685    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
686      return X86SelectAddress(U->getOperand(0), AM);
687    break;
688
689  case Instruction::PtrToInt:
690    // Look past no-op ptrtoints.
691    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
692      return X86SelectAddress(U->getOperand(0), AM);
693    break;
694
695  case Instruction::Alloca: {
696    // Do static allocas.
697    const AllocaInst *A = cast<AllocaInst>(V);
698    DenseMap<const AllocaInst*, int>::iterator SI =
699      FuncInfo.StaticAllocaMap.find(A);
700    if (SI != FuncInfo.StaticAllocaMap.end()) {
701      AM.BaseType = X86AddressMode::FrameIndexBase;
702      AM.Base.FrameIndex = SI->second;
703      return true;
704    }
705    break;
706  }
707
708  case Instruction::Add: {
709    // Adds of constants are common and easy enough.
710    if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
711      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
712      // They have to fit in the 32-bit signed displacement field though.
713      if (isInt<32>(Disp)) {
714        AM.Disp = (uint32_t)Disp;
715        return X86SelectAddress(U->getOperand(0), AM);
716      }
717    }
718    break;
719  }
720
721  case Instruction::GetElementPtr: {
722    X86AddressMode SavedAM = AM;
723
724    // Pattern-match simple GEPs.
725    uint64_t Disp = (int32_t)AM.Disp;
726    unsigned IndexReg = AM.IndexReg;
727    unsigned Scale = AM.Scale;
728    gep_type_iterator GTI = gep_type_begin(U);
729    // Iterate through the indices, folding what we can. Constants can be
730    // folded, and one dynamic index can be handled, if the scale is supported.
731    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
732         i != e; ++i, ++GTI) {
733      const Value *Op = *i;
734      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
735        const StructLayout *SL = DL.getStructLayout(STy);
736        Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
737        continue;
738      }
739
740      // A array/variable index is always of the form i*S where S is the
741      // constant scale size.  See if we can push the scale into immediates.
742      uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
743      for (;;) {
744        if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
745          // Constant-offset addressing.
746          Disp += CI->getSExtValue() * S;
747          break;
748        }
749        if (canFoldAddIntoGEP(U, Op)) {
750          // A compatible add with a constant operand. Fold the constant.
751          ConstantInt *CI =
752            cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
753          Disp += CI->getSExtValue() * S;
754          // Iterate on the other operand.
755          Op = cast<AddOperator>(Op)->getOperand(0);
756          continue;
757        }
758        if (IndexReg == 0 &&
759            (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
760            (S == 1 || S == 2 || S == 4 || S == 8)) {
761          // Scaled-index addressing.
762          Scale = S;
763          IndexReg = getRegForGEPIndex(Op).first;
764          if (IndexReg == 0)
765            return false;
766          break;
767        }
768        // Unsupported.
769        goto unsupported_gep;
770      }
771    }
772
773    // Check for displacement overflow.
774    if (!isInt<32>(Disp))
775      break;
776
777    AM.IndexReg = IndexReg;
778    AM.Scale = Scale;
779    AM.Disp = (uint32_t)Disp;
780    GEPs.push_back(V);
781
782    if (const GetElementPtrInst *GEP =
783          dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
784      // Ok, the GEP indices were covered by constant-offset and scaled-index
785      // addressing. Update the address state and move on to examining the base.
786      V = GEP;
787      goto redo_gep;
788    } else if (X86SelectAddress(U->getOperand(0), AM)) {
789      return true;
790    }
791
792    // If we couldn't merge the gep value into this addr mode, revert back to
793    // our address and just match the value instead of completely failing.
794    AM = SavedAM;
795
796    for (SmallVectorImpl<const Value *>::reverse_iterator
797           I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I)
798      if (handleConstantAddresses(*I, AM))
799        return true;
800
801    return false;
802  unsupported_gep:
803    // Ok, the GEP indices weren't all covered.
804    break;
805  }
806  }
807
808  return handleConstantAddresses(V, AM);
809}
810
811/// X86SelectCallAddress - Attempt to fill in an address from the given value.
812///
813bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
814  const User *U = nullptr;
815  unsigned Opcode = Instruction::UserOp1;
816  const Instruction *I = dyn_cast<Instruction>(V);
817  // Record if the value is defined in the same basic block.
818  //
819  // This information is crucial to know whether or not folding an
820  // operand is valid.
821  // Indeed, FastISel generates or reuses a virtual register for all
822  // operands of all instructions it selects. Obviously, the definition and
823  // its uses must use the same virtual register otherwise the produced
824  // code is incorrect.
825  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
826  // registers for values that are alive across basic blocks. This ensures
827  // that the values are consistently set between across basic block, even
828  // if different instruction selection mechanisms are used (e.g., a mix of
829  // SDISel and FastISel).
830  // For values local to a basic block, the instruction selection process
831  // generates these virtual registers with whatever method is appropriate
832  // for its needs. In particular, FastISel and SDISel do not share the way
833  // local virtual registers are set.
834  // Therefore, this is impossible (or at least unsafe) to share values
835  // between basic blocks unless they use the same instruction selection
836  // method, which is not guarantee for X86.
837  // Moreover, things like hasOneUse could not be used accurately, if we
838  // allow to reference values across basic blocks whereas they are not
839  // alive across basic blocks initially.
840  bool InMBB = true;
841  if (I) {
842    Opcode = I->getOpcode();
843    U = I;
844    InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
845  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
846    Opcode = C->getOpcode();
847    U = C;
848  }
849
850  switch (Opcode) {
851  default: break;
852  case Instruction::BitCast:
853    // Look past bitcasts if its operand is in the same BB.
854    if (InMBB)
855      return X86SelectCallAddress(U->getOperand(0), AM);
856    break;
857
858  case Instruction::IntToPtr:
859    // Look past no-op inttoptrs if its operand is in the same BB.
860    if (InMBB &&
861        TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
862      return X86SelectCallAddress(U->getOperand(0), AM);
863    break;
864
865  case Instruction::PtrToInt:
866    // Look past no-op ptrtoints if its operand is in the same BB.
867    if (InMBB &&
868        TLI.getValueType(U->getType()) == TLI.getPointerTy())
869      return X86SelectCallAddress(U->getOperand(0), AM);
870    break;
871  }
872
873  // Handle constant address.
874  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
875    // Can't handle alternate code models yet.
876    if (TM.getCodeModel() != CodeModel::Small)
877      return false;
878
879    // RIP-relative addresses can't have additional register operands.
880    if (Subtarget->isPICStyleRIPRel() &&
881        (AM.Base.Reg != 0 || AM.IndexReg != 0))
882      return false;
883
884    // Can't handle DLL Import.
885    if (GV->hasDLLImportStorageClass())
886      return false;
887
888    // Can't handle TLS.
889    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
890      if (GVar->isThreadLocal())
891        return false;
892
893    // Okay, we've committed to selecting this global. Set up the basic address.
894    AM.GV = GV;
895
896    // No ABI requires an extra load for anything other than DLLImport, which
897    // we rejected above. Return a direct reference to the global.
898    if (Subtarget->isPICStyleRIPRel()) {
899      // Use rip-relative addressing if we can.  Above we verified that the
900      // base and index registers are unused.
901      assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
902      AM.Base.Reg = X86::RIP;
903    } else if (Subtarget->isPICStyleStubPIC()) {
904      AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
905    } else if (Subtarget->isPICStyleGOT()) {
906      AM.GVOpFlags = X86II::MO_GOTOFF;
907    }
908
909    return true;
910  }
911
912  // If all else fails, try to materialize the value in a register.
913  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
914    if (AM.Base.Reg == 0) {
915      AM.Base.Reg = getRegForValue(V);
916      return AM.Base.Reg != 0;
917    }
918    if (AM.IndexReg == 0) {
919      assert(AM.Scale == 1 && "Scale with no index!");
920      AM.IndexReg = getRegForValue(V);
921      return AM.IndexReg != 0;
922    }
923  }
924
925  return false;
926}
927
928
929/// X86SelectStore - Select and emit code to implement store instructions.
930bool X86FastISel::X86SelectStore(const Instruction *I) {
931  // Atomic stores need special handling.
932  const StoreInst *S = cast<StoreInst>(I);
933
934  if (S->isAtomic())
935    return false;
936
937  const Value *Val = S->getValueOperand();
938  const Value *Ptr = S->getPointerOperand();
939
940  MVT VT;
941  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
942    return false;
943
944  unsigned Alignment = S->getAlignment();
945  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
946  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
947    Alignment = ABIAlignment;
948  bool Aligned = Alignment >= ABIAlignment;
949
950  X86AddressMode AM;
951  if (!X86SelectAddress(Ptr, AM))
952    return false;
953
954  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
955}
956
957/// X86SelectRet - Select and emit code to implement ret instructions.
958bool X86FastISel::X86SelectRet(const Instruction *I) {
959  const ReturnInst *Ret = cast<ReturnInst>(I);
960  const Function &F = *I->getParent()->getParent();
961  const X86MachineFunctionInfo *X86MFInfo =
962      FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
963
964  if (!FuncInfo.CanLowerReturn)
965    return false;
966
967  CallingConv::ID CC = F.getCallingConv();
968  if (CC != CallingConv::C &&
969      CC != CallingConv::Fast &&
970      CC != CallingConv::X86_FastCall &&
971      CC != CallingConv::X86_64_SysV)
972    return false;
973
974  if (Subtarget->isCallingConvWin64(CC))
975    return false;
976
977  // Don't handle popping bytes on return for now.
978  if (X86MFInfo->getBytesToPopOnReturn() != 0)
979    return false;
980
981  // fastcc with -tailcallopt is intended to provide a guaranteed
982  // tail call optimization. Fastisel doesn't know how to do that.
983  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
984    return false;
985
986  // Let SDISel handle vararg functions.
987  if (F.isVarArg())
988    return false;
989
990  // Build a list of return value registers.
991  SmallVector<unsigned, 4> RetRegs;
992
993  if (Ret->getNumOperands() > 0) {
994    SmallVector<ISD::OutputArg, 4> Outs;
995    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
996
997    // Analyze operands of the call, assigning locations to each operand.
998    SmallVector<CCValAssign, 16> ValLocs;
999    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
1000                   I->getContext());
1001    CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1002
1003    const Value *RV = Ret->getOperand(0);
1004    unsigned Reg = getRegForValue(RV);
1005    if (Reg == 0)
1006      return false;
1007
1008    // Only handle a single return value for now.
1009    if (ValLocs.size() != 1)
1010      return false;
1011
1012    CCValAssign &VA = ValLocs[0];
1013
1014    // Don't bother handling odd stuff for now.
1015    if (VA.getLocInfo() != CCValAssign::Full)
1016      return false;
1017    // Only handle register returns for now.
1018    if (!VA.isRegLoc())
1019      return false;
1020
1021    // The calling-convention tables for x87 returns don't tell
1022    // the whole story.
1023    if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
1024      return false;
1025
1026    unsigned SrcReg = Reg + VA.getValNo();
1027    EVT SrcVT = TLI.getValueType(RV->getType());
1028    EVT DstVT = VA.getValVT();
1029    // Special handling for extended integers.
1030    if (SrcVT != DstVT) {
1031      if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1032        return false;
1033
1034      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1035        return false;
1036
1037      assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1038
1039      if (SrcVT == MVT::i1) {
1040        if (Outs[0].Flags.isSExt())
1041          return false;
1042        SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1043        SrcVT = MVT::i8;
1044      }
1045      unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1046                                             ISD::SIGN_EXTEND;
1047      SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1048                          SrcReg, /*TODO: Kill=*/false);
1049    }
1050
1051    // Make the copy.
1052    unsigned DstReg = VA.getLocReg();
1053    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
1054    // Avoid a cross-class copy. This is very unlikely.
1055    if (!SrcRC->contains(DstReg))
1056      return false;
1057    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1058            DstReg).addReg(SrcReg);
1059
1060    // Add register to return instruction.
1061    RetRegs.push_back(VA.getLocReg());
1062  }
1063
1064  // The x86-64 ABI for returning structs by value requires that we copy
1065  // the sret argument into %rax for the return. We saved the argument into
1066  // a virtual register in the entry block, so now we copy the value out
1067  // and into %rax. We also do the same with %eax for Win32.
1068  if (F.hasStructRetAttr() &&
1069      (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
1070    unsigned Reg = X86MFInfo->getSRetReturnReg();
1071    assert(Reg &&
1072           "SRetReturnReg should have been set in LowerFormalArguments()!");
1073    unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
1074    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1075            RetReg).addReg(Reg);
1076    RetRegs.push_back(RetReg);
1077  }
1078
1079  // Now emit the RET.
1080  MachineInstrBuilder MIB =
1081    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1082  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1083    MIB.addReg(RetRegs[i], RegState::Implicit);
1084  return true;
1085}
1086
1087/// X86SelectLoad - Select and emit code to implement load instructions.
1088///
1089bool X86FastISel::X86SelectLoad(const Instruction *I) {
1090  const LoadInst *LI = cast<LoadInst>(I);
1091
1092  // Atomic loads need special handling.
1093  if (LI->isAtomic())
1094    return false;
1095
1096  MVT VT;
1097  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1098    return false;
1099
1100  const Value *Ptr = LI->getPointerOperand();
1101
1102  X86AddressMode AM;
1103  if (!X86SelectAddress(Ptr, AM))
1104    return false;
1105
1106  unsigned ResultReg = 0;
1107  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
1108    return false;
1109
1110  UpdateValueMap(I, ResultReg);
1111  return true;
1112}
1113
1114static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1115  bool HasAVX = Subtarget->hasAVX();
1116  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1117  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1118
1119  switch (VT.getSimpleVT().SimpleTy) {
1120  default:       return 0;
1121  case MVT::i8:  return X86::CMP8rr;
1122  case MVT::i16: return X86::CMP16rr;
1123  case MVT::i32: return X86::CMP32rr;
1124  case MVT::i64: return X86::CMP64rr;
1125  case MVT::f32:
1126    return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
1127  case MVT::f64:
1128    return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
1129  }
1130}
1131
1132/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
1133/// of the comparison, return an opcode that works for the compare (e.g.
1134/// CMP32ri) otherwise return 0.
1135static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1136  switch (VT.getSimpleVT().SimpleTy) {
1137  // Otherwise, we can't fold the immediate into this comparison.
1138  default: return 0;
1139  case MVT::i8: return X86::CMP8ri;
1140  case MVT::i16: return X86::CMP16ri;
1141  case MVT::i32: return X86::CMP32ri;
1142  case MVT::i64:
1143    // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1144    // field.
1145    if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
1146      return X86::CMP64ri32;
1147    return 0;
1148  }
1149}
1150
1151bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
1152                                     EVT VT) {
1153  unsigned Op0Reg = getRegForValue(Op0);
1154  if (Op0Reg == 0) return false;
1155
1156  // Handle 'null' like i32/i64 0.
1157  if (isa<ConstantPointerNull>(Op1))
1158    Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1159
1160  // We have two options: compare with register or immediate.  If the RHS of
1161  // the compare is an immediate that we can fold into this compare, use
1162  // CMPri, otherwise use CMPrr.
1163  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1164    if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1165      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareImmOpc))
1166        .addReg(Op0Reg)
1167        .addImm(Op1C->getSExtValue());
1168      return true;
1169    }
1170  }
1171
1172  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1173  if (CompareOpc == 0) return false;
1174
1175  unsigned Op1Reg = getRegForValue(Op1);
1176  if (Op1Reg == 0) return false;
1177  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareOpc))
1178    .addReg(Op0Reg)
1179    .addReg(Op1Reg);
1180
1181  return true;
1182}
1183
1184bool X86FastISel::X86SelectCmp(const Instruction *I) {
1185  const CmpInst *CI = cast<CmpInst>(I);
1186
1187  MVT VT;
1188  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1189    return false;
1190
1191  // Try to optimize or fold the cmp.
1192  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1193  unsigned ResultReg = 0;
1194  switch (Predicate) {
1195  default: break;
1196  case CmpInst::FCMP_FALSE: {
1197    ResultReg = createResultReg(&X86::GR32RegClass);
1198    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1199            ResultReg);
1200    ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1201                                           X86::sub_8bit);
1202    if (!ResultReg)
1203      return false;
1204    break;
1205  }
1206  case CmpInst::FCMP_TRUE: {
1207    ResultReg = createResultReg(&X86::GR8RegClass);
1208    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1209            ResultReg).addImm(1);
1210    break;
1211  }
1212  }
1213
1214  if (ResultReg) {
1215    UpdateValueMap(I, ResultReg);
1216    return true;
1217  }
1218
1219  const Value *LHS = CI->getOperand(0);
1220  const Value *RHS = CI->getOperand(1);
1221
1222  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1223  // We don't have to materialize a zero constant for this case and can just use
1224  // %x again on the RHS.
1225  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1226    const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1227    if (RHSC && RHSC->isNullValue())
1228      RHS = LHS;
1229  }
1230
1231  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1232  static unsigned SETFOpcTable[2][3] = {
1233    { X86::SETEr,  X86::SETNPr, X86::AND8rr },
1234    { X86::SETNEr, X86::SETPr,  X86::OR8rr  }
1235  };
1236  unsigned *SETFOpc = nullptr;
1237  switch (Predicate) {
1238  default: break;
1239  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1240  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1241  }
1242
1243  ResultReg = createResultReg(&X86::GR8RegClass);
1244  if (SETFOpc) {
1245    if (!X86FastEmitCompare(LHS, RHS, VT))
1246      return false;
1247
1248    unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1249    unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1250    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1251            FlagReg1);
1252    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1253            FlagReg2);
1254    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1255            ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1256    UpdateValueMap(I, ResultReg);
1257    return true;
1258  }
1259
1260  X86::CondCode CC;
1261  bool SwapArgs;
1262  std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
1263  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1264  unsigned Opc = X86::getSETFromCond(CC);
1265
1266  if (SwapArgs)
1267    std::swap(LHS, RHS);
1268
1269  // Emit a compare of LHS/RHS.
1270  if (!X86FastEmitCompare(LHS, RHS, VT))
1271    return false;
1272
1273  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
1274  UpdateValueMap(I, ResultReg);
1275  return true;
1276}
1277
1278bool X86FastISel::X86SelectZExt(const Instruction *I) {
1279  EVT DstVT = TLI.getValueType(I->getType());
1280  if (!TLI.isTypeLegal(DstVT))
1281    return false;
1282
1283  unsigned ResultReg = getRegForValue(I->getOperand(0));
1284  if (ResultReg == 0)
1285    return false;
1286
1287  // Handle zero-extension from i1 to i8, which is common.
1288  MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
1289  if (SrcVT.SimpleTy == MVT::i1) {
1290    // Set the high bits to zero.
1291    ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1292    SrcVT = MVT::i8;
1293
1294    if (ResultReg == 0)
1295      return false;
1296  }
1297
1298  if (DstVT == MVT::i64) {
1299    // Handle extension to 64-bits via sub-register shenanigans.
1300    unsigned MovInst;
1301
1302    switch (SrcVT.SimpleTy) {
1303    case MVT::i8:  MovInst = X86::MOVZX32rr8;  break;
1304    case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1305    case MVT::i32: MovInst = X86::MOV32rr;     break;
1306    default: llvm_unreachable("Unexpected zext to i64 source type");
1307    }
1308
1309    unsigned Result32 = createResultReg(&X86::GR32RegClass);
1310    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1311      .addReg(ResultReg);
1312
1313    ResultReg = createResultReg(&X86::GR64RegClass);
1314    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1315            ResultReg)
1316      .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1317  } else if (DstVT != MVT::i8) {
1318    ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1319                           ResultReg, /*Kill=*/true);
1320    if (ResultReg == 0)
1321      return false;
1322  }
1323
1324  UpdateValueMap(I, ResultReg);
1325  return true;
1326}
1327
1328
1329bool X86FastISel::X86SelectBranch(const Instruction *I) {
1330  // Unconditional branches are selected by tablegen-generated code.
1331  // Handle a conditional branch.
1332  const BranchInst *BI = cast<BranchInst>(I);
1333  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1334  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1335
1336  // Fold the common case of a conditional branch with a comparison
1337  // in the same block (values defined on other blocks may not have
1338  // initialized registers).
1339  X86::CondCode CC;
1340  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1341    if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1342      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
1343
1344      // Try to optimize or fold the cmp.
1345      CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1346      switch (Predicate) {
1347      default: break;
1348      case CmpInst::FCMP_FALSE: FastEmitBranch(FalseMBB, DbgLoc); return true;
1349      case CmpInst::FCMP_TRUE:  FastEmitBranch(TrueMBB, DbgLoc); return true;
1350      }
1351
1352      const Value *CmpLHS = CI->getOperand(0);
1353      const Value *CmpRHS = CI->getOperand(1);
1354
1355      // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1356      // 0.0.
1357      // We don't have to materialize a zero constant for this case and can just
1358      // use %x again on the RHS.
1359      if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1360        const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1361        if (CmpRHSC && CmpRHSC->isNullValue())
1362          CmpRHS = CmpLHS;
1363      }
1364
1365      // Try to take advantage of fallthrough opportunities.
1366      if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1367        std::swap(TrueMBB, FalseMBB);
1368        Predicate = CmpInst::getInversePredicate(Predicate);
1369      }
1370
1371      // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1372      // code check. Instead two branch instructions are required to check all
1373      // the flags. First we change the predicate to a supported condition code,
1374      // which will be the first branch. Later one we will emit the second
1375      // branch.
1376      bool NeedExtraBranch = false;
1377      switch (Predicate) {
1378      default: break;
1379      case CmpInst::FCMP_OEQ:
1380        std::swap(TrueMBB, FalseMBB); // fall-through
1381      case CmpInst::FCMP_UNE:
1382        NeedExtraBranch = true;
1383        Predicate = CmpInst::FCMP_ONE;
1384        break;
1385      }
1386
1387      bool SwapArgs;
1388      unsigned BranchOpc;
1389      std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
1390      assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1391
1392      BranchOpc = X86::GetCondBranchFromCond(CC);
1393      if (SwapArgs)
1394        std::swap(CmpLHS, CmpRHS);
1395
1396      // Emit a compare of the LHS and RHS, setting the flags.
1397      if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT))
1398        return false;
1399
1400      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1401        .addMBB(TrueMBB);
1402
1403      // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1404      // to UNE above).
1405      if (NeedExtraBranch) {
1406        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_4))
1407          .addMBB(TrueMBB);
1408      }
1409
1410      // Obtain the branch weight and add the TrueBB to the successor list.
1411      uint32_t BranchWeight = 0;
1412      if (FuncInfo.BPI)
1413        BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1414                                                   TrueMBB->getBasicBlock());
1415      FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
1416
1417      // Emits an unconditional branch to the FalseBB, obtains the branch
1418      // weight, and adds it to the successor list.
1419      FastEmitBranch(FalseMBB, DbgLoc);
1420
1421      return true;
1422    }
1423  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1424    // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1425    // typically happen for _Bool and C++ bools.
1426    MVT SourceVT;
1427    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1428        isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1429      unsigned TestOpc = 0;
1430      switch (SourceVT.SimpleTy) {
1431      default: break;
1432      case MVT::i8:  TestOpc = X86::TEST8ri; break;
1433      case MVT::i16: TestOpc = X86::TEST16ri; break;
1434      case MVT::i32: TestOpc = X86::TEST32ri; break;
1435      case MVT::i64: TestOpc = X86::TEST64ri32; break;
1436      }
1437      if (TestOpc) {
1438        unsigned OpReg = getRegForValue(TI->getOperand(0));
1439        if (OpReg == 0) return false;
1440        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1441          .addReg(OpReg).addImm(1);
1442
1443        unsigned JmpOpc = X86::JNE_4;
1444        if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1445          std::swap(TrueMBB, FalseMBB);
1446          JmpOpc = X86::JE_4;
1447        }
1448
1449        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
1450          .addMBB(TrueMBB);
1451        FastEmitBranch(FalseMBB, DbgLoc);
1452        uint32_t BranchWeight = 0;
1453        if (FuncInfo.BPI)
1454          BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1455                                                     TrueMBB->getBasicBlock());
1456        FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
1457        return true;
1458      }
1459    }
1460  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1461    // Fake request the condition, otherwise the intrinsic might be completely
1462    // optimized away.
1463    unsigned TmpReg = getRegForValue(BI->getCondition());
1464    if (TmpReg == 0)
1465      return false;
1466
1467    unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
1468
1469    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1470      .addMBB(TrueMBB);
1471    FastEmitBranch(FalseMBB, DbgLoc);
1472    uint32_t BranchWeight = 0;
1473    if (FuncInfo.BPI)
1474      BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1475                                                 TrueMBB->getBasicBlock());
1476    FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
1477    return true;
1478  }
1479
1480  // Otherwise do a clumsy setcc and re-test it.
1481  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1482  // in an explicit cast, so make sure to handle that correctly.
1483  unsigned OpReg = getRegForValue(BI->getCondition());
1484  if (OpReg == 0) return false;
1485
1486  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1487    .addReg(OpReg).addImm(1);
1488  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_4))
1489    .addMBB(TrueMBB);
1490  FastEmitBranch(FalseMBB, DbgLoc);
1491  uint32_t BranchWeight = 0;
1492  if (FuncInfo.BPI)
1493    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1494                                               TrueMBB->getBasicBlock());
1495  FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
1496  return true;
1497}
1498
1499bool X86FastISel::X86SelectShift(const Instruction *I) {
1500  unsigned CReg = 0, OpReg = 0;
1501  const TargetRegisterClass *RC = nullptr;
1502  if (I->getType()->isIntegerTy(8)) {
1503    CReg = X86::CL;
1504    RC = &X86::GR8RegClass;
1505    switch (I->getOpcode()) {
1506    case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1507    case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1508    case Instruction::Shl:  OpReg = X86::SHL8rCL; break;
1509    default: return false;
1510    }
1511  } else if (I->getType()->isIntegerTy(16)) {
1512    CReg = X86::CX;
1513    RC = &X86::GR16RegClass;
1514    switch (I->getOpcode()) {
1515    case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1516    case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1517    case Instruction::Shl:  OpReg = X86::SHL16rCL; break;
1518    default: return false;
1519    }
1520  } else if (I->getType()->isIntegerTy(32)) {
1521    CReg = X86::ECX;
1522    RC = &X86::GR32RegClass;
1523    switch (I->getOpcode()) {
1524    case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1525    case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1526    case Instruction::Shl:  OpReg = X86::SHL32rCL; break;
1527    default: return false;
1528    }
1529  } else if (I->getType()->isIntegerTy(64)) {
1530    CReg = X86::RCX;
1531    RC = &X86::GR64RegClass;
1532    switch (I->getOpcode()) {
1533    case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1534    case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1535    case Instruction::Shl:  OpReg = X86::SHL64rCL; break;
1536    default: return false;
1537    }
1538  } else {
1539    return false;
1540  }
1541
1542  MVT VT;
1543  if (!isTypeLegal(I->getType(), VT))
1544    return false;
1545
1546  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1547  if (Op0Reg == 0) return false;
1548
1549  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1550  if (Op1Reg == 0) return false;
1551  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1552          CReg).addReg(Op1Reg);
1553
1554  // The shift instruction uses X86::CL. If we defined a super-register
1555  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1556  if (CReg != X86::CL)
1557    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1558            TII.get(TargetOpcode::KILL), X86::CL)
1559      .addReg(CReg, RegState::Kill);
1560
1561  unsigned ResultReg = createResultReg(RC);
1562  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1563    .addReg(Op0Reg);
1564  UpdateValueMap(I, ResultReg);
1565  return true;
1566}
1567
1568bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1569  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1570  const static unsigned NumOps   = 4; // SDiv, SRem, UDiv, URem
1571  const static bool S = true;  // IsSigned
1572  const static bool U = false; // !IsSigned
1573  const static unsigned Copy = TargetOpcode::COPY;
1574  // For the X86 DIV/IDIV instruction, in most cases the dividend
1575  // (numerator) must be in a specific register pair highreg:lowreg,
1576  // producing the quotient in lowreg and the remainder in highreg.
1577  // For most data types, to set up the instruction, the dividend is
1578  // copied into lowreg, and lowreg is sign-extended or zero-extended
1579  // into highreg.  The exception is i8, where the dividend is defined
1580  // as a single register rather than a register pair, and we
1581  // therefore directly sign-extend or zero-extend the dividend into
1582  // lowreg, instead of copying, and ignore the highreg.
1583  const static struct DivRemEntry {
1584    // The following portion depends only on the data type.
1585    const TargetRegisterClass *RC;
1586    unsigned LowInReg;  // low part of the register pair
1587    unsigned HighInReg; // high part of the register pair
1588    // The following portion depends on both the data type and the operation.
1589    struct DivRemResult {
1590    unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
1591    unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
1592                              // highreg, or copying a zero into highreg.
1593    unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
1594                              // zero/sign-extending into lowreg for i8.
1595    unsigned DivRemResultReg; // Register containing the desired result.
1596    bool IsOpSigned;          // Whether to use signed or unsigned form.
1597    } ResultTable[NumOps];
1598  } OpTable[NumTypes] = {
1599    { &X86::GR8RegClass,  X86::AX,  0, {
1600        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AL,  S }, // SDiv
1601        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AH,  S }, // SRem
1602        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AL,  U }, // UDiv
1603        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AH,  U }, // URem
1604      }
1605    }, // i8
1606    { &X86::GR16RegClass, X86::AX,  X86::DX, {
1607        { X86::IDIV16r, X86::CWD,     Copy,            X86::AX,  S }, // SDiv
1608        { X86::IDIV16r, X86::CWD,     Copy,            X86::DX,  S }, // SRem
1609        { X86::DIV16r,  X86::MOV32r0, Copy,            X86::AX,  U }, // UDiv
1610        { X86::DIV16r,  X86::MOV32r0, Copy,            X86::DX,  U }, // URem
1611      }
1612    }, // i16
1613    { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1614        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EAX, S }, // SDiv
1615        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EDX, S }, // SRem
1616        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EAX, U }, // UDiv
1617        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EDX, U }, // URem
1618      }
1619    }, // i32
1620    { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1621        { X86::IDIV64r, X86::CQO,     Copy,            X86::RAX, S }, // SDiv
1622        { X86::IDIV64r, X86::CQO,     Copy,            X86::RDX, S }, // SRem
1623        { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RAX, U }, // UDiv
1624        { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RDX, U }, // URem
1625      }
1626    }, // i64
1627  };
1628
1629  MVT VT;
1630  if (!isTypeLegal(I->getType(), VT))
1631    return false;
1632
1633  unsigned TypeIndex, OpIndex;
1634  switch (VT.SimpleTy) {
1635  default: return false;
1636  case MVT::i8:  TypeIndex = 0; break;
1637  case MVT::i16: TypeIndex = 1; break;
1638  case MVT::i32: TypeIndex = 2; break;
1639  case MVT::i64: TypeIndex = 3;
1640    if (!Subtarget->is64Bit())
1641      return false;
1642    break;
1643  }
1644
1645  switch (I->getOpcode()) {
1646  default: llvm_unreachable("Unexpected div/rem opcode");
1647  case Instruction::SDiv: OpIndex = 0; break;
1648  case Instruction::SRem: OpIndex = 1; break;
1649  case Instruction::UDiv: OpIndex = 2; break;
1650  case Instruction::URem: OpIndex = 3; break;
1651  }
1652
1653  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1654  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1655  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1656  if (Op0Reg == 0)
1657    return false;
1658  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1659  if (Op1Reg == 0)
1660    return false;
1661
1662  // Move op0 into low-order input register.
1663  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1664          TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1665  // Zero-extend or sign-extend into high-order input register.
1666  if (OpEntry.OpSignExtend) {
1667    if (OpEntry.IsOpSigned)
1668      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1669              TII.get(OpEntry.OpSignExtend));
1670    else {
1671      unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1672      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1673              TII.get(X86::MOV32r0), Zero32);
1674
1675      // Copy the zero into the appropriate sub/super/identical physical
1676      // register. Unfortunately the operations needed are not uniform enough to
1677      // fit neatly into the table above.
1678      if (VT.SimpleTy == MVT::i16) {
1679        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1680                TII.get(Copy), TypeEntry.HighInReg)
1681          .addReg(Zero32, 0, X86::sub_16bit);
1682      } else if (VT.SimpleTy == MVT::i32) {
1683        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1684                TII.get(Copy), TypeEntry.HighInReg)
1685            .addReg(Zero32);
1686      } else if (VT.SimpleTy == MVT::i64) {
1687        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1688                TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1689            .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1690      }
1691    }
1692  }
1693  // Generate the DIV/IDIV instruction.
1694  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1695          TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1696  // For i8 remainder, we can't reference AH directly, as we'll end
1697  // up with bogus copies like %R9B = COPY %AH. Reference AX
1698  // instead to prevent AH references in a REX instruction.
1699  //
1700  // The current assumption of the fast register allocator is that isel
1701  // won't generate explicit references to the GPR8_NOREX registers. If
1702  // the allocator and/or the backend get enhanced to be more robust in
1703  // that regard, this can be, and should be, removed.
1704  unsigned ResultReg = 0;
1705  if ((I->getOpcode() == Instruction::SRem ||
1706       I->getOpcode() == Instruction::URem) &&
1707      OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1708    unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1709    unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
1710    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1711            TII.get(Copy), SourceSuperReg).addReg(X86::AX);
1712
1713    // Shift AX right by 8 bits instead of using AH.
1714    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
1715            ResultSuperReg).addReg(SourceSuperReg).addImm(8);
1716
1717    // Now reference the 8-bit subreg of the result.
1718    ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
1719                                           /*Kill=*/true, X86::sub_8bit);
1720  }
1721  // Copy the result out of the physreg if we haven't already.
1722  if (!ResultReg) {
1723    ResultReg = createResultReg(TypeEntry.RC);
1724    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
1725        .addReg(OpEntry.DivRemResultReg);
1726  }
1727  UpdateValueMap(I, ResultReg);
1728
1729  return true;
1730}
1731
1732/// \brief Emit a conditional move instruction (if the are supported) to lower
1733/// the select.
1734bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
1735  // Check if the subtarget supports these instructions.
1736  if (!Subtarget->hasCMov())
1737    return false;
1738
1739  // FIXME: Add support for i8.
1740  if (RetVT < MVT::i16 || RetVT > MVT::i64)
1741    return false;
1742
1743  const Value *Cond = I->getOperand(0);
1744  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
1745  bool NeedTest = true;
1746  X86::CondCode CC = X86::COND_NE;
1747
1748  // Optimize conditions coming from a compare if both instructions are in the
1749  // same basic block (values defined in other basic blocks may not have
1750  // initialized registers).
1751  const auto *CI = dyn_cast<CmpInst>(Cond);
1752  if (CI && (CI->getParent() == I->getParent())) {
1753    CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1754
1755    // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1756    static unsigned SETFOpcTable[2][3] = {
1757      { X86::SETNPr, X86::SETEr , X86::TEST8rr },
1758      { X86::SETPr,  X86::SETNEr, X86::OR8rr   }
1759    };
1760    unsigned *SETFOpc = nullptr;
1761    switch (Predicate) {
1762    default: break;
1763    case CmpInst::FCMP_OEQ:
1764      SETFOpc = &SETFOpcTable[0][0];
1765      Predicate = CmpInst::ICMP_NE;
1766      break;
1767    case CmpInst::FCMP_UNE:
1768      SETFOpc = &SETFOpcTable[1][0];
1769      Predicate = CmpInst::ICMP_NE;
1770      break;
1771    }
1772
1773    bool NeedSwap;
1774    std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate);
1775    assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1776
1777    const Value *CmpLHS = CI->getOperand(0);
1778    const Value *CmpRHS = CI->getOperand(1);
1779    if (NeedSwap)
1780      std::swap(CmpLHS, CmpRHS);
1781
1782    EVT CmpVT = TLI.getValueType(CmpLHS->getType());
1783    // Emit a compare of the LHS and RHS, setting the flags.
1784    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
1785     return false;
1786
1787    if (SETFOpc) {
1788      unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1789      unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1790      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1791              FlagReg1);
1792      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1793              FlagReg2);
1794      auto const &II = TII.get(SETFOpc[2]);
1795      if (II.getNumDefs()) {
1796        unsigned TmpReg = createResultReg(&X86::GR8RegClass);
1797        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
1798          .addReg(FlagReg2).addReg(FlagReg1);
1799      } else {
1800        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1801          .addReg(FlagReg2).addReg(FlagReg1);
1802      }
1803    }
1804    NeedTest = false;
1805  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
1806    // Fake request the condition, otherwise the intrinsic might be completely
1807    // optimized away.
1808    unsigned TmpReg = getRegForValue(Cond);
1809    if (TmpReg == 0)
1810      return false;
1811
1812    NeedTest = false;
1813  }
1814
1815  if (NeedTest) {
1816    // Selects operate on i1, however, CondReg is 8 bits width and may contain
1817    // garbage. Indeed, only the less significant bit is supposed to be
1818    // accurate. If we read more than the lsb, we may see non-zero values
1819    // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
1820    // the select. This is achieved by performing TEST against 1.
1821    unsigned CondReg = getRegForValue(Cond);
1822    if (CondReg == 0)
1823      return false;
1824    bool CondIsKill = hasTrivialKill(Cond);
1825
1826    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1827      .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
1828  }
1829
1830  const Value *LHS = I->getOperand(1);
1831  const Value *RHS = I->getOperand(2);
1832
1833  unsigned RHSReg = getRegForValue(RHS);
1834  bool RHSIsKill = hasTrivialKill(RHS);
1835
1836  unsigned LHSReg = getRegForValue(LHS);
1837  bool LHSIsKill = hasTrivialKill(LHS);
1838
1839  if (!LHSReg || !RHSReg)
1840    return false;
1841
1842  unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
1843  unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
1844                                       LHSReg, LHSIsKill);
1845  UpdateValueMap(I, ResultReg);
1846  return true;
1847}
1848
1849/// \brief Emit SSE instructions to lower the select.
1850///
1851/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
1852/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
1853/// SSE instructions are available.
1854bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
1855  // Optimize conditions coming from a compare if both instructions are in the
1856  // same basic block (values defined in other basic blocks may not have
1857  // initialized registers).
1858  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
1859  if (!CI || (CI->getParent() != I->getParent()))
1860    return false;
1861
1862  if (I->getType() != CI->getOperand(0)->getType() ||
1863      !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
1864        (Subtarget->hasSSE2() && RetVT == MVT::f64)    ))
1865    return false;
1866
1867  const Value *CmpLHS = CI->getOperand(0);
1868  const Value *CmpRHS = CI->getOperand(1);
1869  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1870
1871  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1872  // We don't have to materialize a zero constant for this case and can just use
1873  // %x again on the RHS.
1874  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1875    const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1876    if (CmpRHSC && CmpRHSC->isNullValue())
1877      CmpRHS = CmpLHS;
1878  }
1879
1880  unsigned CC;
1881  bool NeedSwap;
1882  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
1883  if (CC > 7)
1884    return false;
1885
1886  if (NeedSwap)
1887    std::swap(CmpLHS, CmpRHS);
1888
1889  static unsigned OpcTable[2][2][4] = {
1890    { { X86::CMPSSrr,  X86::FsANDPSrr,  X86::FsANDNPSrr,  X86::FsORPSrr  },
1891      { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr }  },
1892    { { X86::CMPSDrr,  X86::FsANDPDrr,  X86::FsANDNPDrr,  X86::FsORPDrr  },
1893      { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr }  }
1894  };
1895
1896  bool HasAVX = Subtarget->hasAVX();
1897  unsigned *Opc = nullptr;
1898  switch (RetVT.SimpleTy) {
1899  default: return false;
1900  case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
1901  case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
1902  }
1903
1904  const Value *LHS = I->getOperand(1);
1905  const Value *RHS = I->getOperand(2);
1906
1907  unsigned LHSReg = getRegForValue(LHS);
1908  bool LHSIsKill = hasTrivialKill(LHS);
1909
1910  unsigned RHSReg = getRegForValue(RHS);
1911  bool RHSIsKill = hasTrivialKill(RHS);
1912
1913  unsigned CmpLHSReg = getRegForValue(CmpLHS);
1914  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
1915
1916  unsigned CmpRHSReg = getRegForValue(CmpRHS);
1917  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
1918
1919  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
1920    return false;
1921
1922  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
1923  unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
1924                                     CmpRHSReg, CmpRHSIsKill, CC);
1925  unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
1926                                    LHSReg, LHSIsKill);
1927  unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
1928                                     RHSReg, RHSIsKill);
1929  unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
1930                                       AndReg, /*IsKill=*/true);
1931  UpdateValueMap(I, ResultReg);
1932  return true;
1933}
1934
1935bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
1936  // These are pseudo CMOV instructions and will be later expanded into control-
1937  // flow.
1938  unsigned Opc;
1939  switch (RetVT.SimpleTy) {
1940  default: return false;
1941  case MVT::i8:  Opc = X86::CMOV_GR8;  break;
1942  case MVT::i16: Opc = X86::CMOV_GR16; break;
1943  case MVT::i32: Opc = X86::CMOV_GR32; break;
1944  case MVT::f32: Opc = X86::CMOV_FR32; break;
1945  case MVT::f64: Opc = X86::CMOV_FR64; break;
1946  }
1947
1948  const Value *Cond = I->getOperand(0);
1949  X86::CondCode CC = X86::COND_NE;
1950
1951  // Optimize conditions coming from a compare if both instructions are in the
1952  // same basic block (values defined in other basic blocks may not have
1953  // initialized registers).
1954  const auto *CI = dyn_cast<CmpInst>(Cond);
1955  if (CI && (CI->getParent() == I->getParent())) {
1956    bool NeedSwap;
1957    std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate());
1958    if (CC > X86::LAST_VALID_COND)
1959      return false;
1960
1961    const Value *CmpLHS = CI->getOperand(0);
1962    const Value *CmpRHS = CI->getOperand(1);
1963
1964    if (NeedSwap)
1965      std::swap(CmpLHS, CmpRHS);
1966
1967    EVT CmpVT = TLI.getValueType(CmpLHS->getType());
1968    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
1969      return false;
1970  } else {
1971    unsigned CondReg = getRegForValue(Cond);
1972    if (CondReg == 0)
1973      return false;
1974    bool CondIsKill = hasTrivialKill(Cond);
1975    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1976      .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
1977  }
1978
1979  const Value *LHS = I->getOperand(1);
1980  const Value *RHS = I->getOperand(2);
1981
1982  unsigned LHSReg = getRegForValue(LHS);
1983  bool LHSIsKill = hasTrivialKill(LHS);
1984
1985  unsigned RHSReg = getRegForValue(RHS);
1986  bool RHSIsKill = hasTrivialKill(RHS);
1987
1988  if (!LHSReg || !RHSReg)
1989    return false;
1990
1991  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
1992
1993  unsigned ResultReg =
1994    FastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
1995  UpdateValueMap(I, ResultReg);
1996  return true;
1997}
1998
1999bool X86FastISel::X86SelectSelect(const Instruction *I) {
2000  MVT RetVT;
2001  if (!isTypeLegal(I->getType(), RetVT))
2002    return false;
2003
2004  // Check if we can fold the select.
2005  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2006    CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2007    const Value *Opnd = nullptr;
2008    switch (Predicate) {
2009    default:                              break;
2010    case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2011    case CmpInst::FCMP_TRUE:  Opnd = I->getOperand(1); break;
2012    }
2013    // No need for a select anymore - this is an unconditional move.
2014    if (Opnd) {
2015      unsigned OpReg = getRegForValue(Opnd);
2016      if (OpReg == 0)
2017        return false;
2018      bool OpIsKill = hasTrivialKill(Opnd);
2019      const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2020      unsigned ResultReg = createResultReg(RC);
2021      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2022              TII.get(TargetOpcode::COPY), ResultReg)
2023        .addReg(OpReg, getKillRegState(OpIsKill));
2024      UpdateValueMap(I, ResultReg);
2025      return true;
2026    }
2027  }
2028
2029  // First try to use real conditional move instructions.
2030  if (X86FastEmitCMoveSelect(RetVT, I))
2031    return true;
2032
2033  // Try to use a sequence of SSE instructions to simulate a conditional move.
2034  if (X86FastEmitSSESelect(RetVT, I))
2035    return true;
2036
2037  // Fall-back to pseudo conditional move instructions, which will be later
2038  // converted to control-flow.
2039  if (X86FastEmitPseudoSelect(RetVT, I))
2040    return true;
2041
2042  return false;
2043}
2044
2045bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2046  // fpext from float to double.
2047  if (X86ScalarSSEf64 &&
2048      I->getType()->isDoubleTy()) {
2049    const Value *V = I->getOperand(0);
2050    if (V->getType()->isFloatTy()) {
2051      unsigned OpReg = getRegForValue(V);
2052      if (OpReg == 0) return false;
2053      unsigned ResultReg = createResultReg(&X86::FR64RegClass);
2054      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2055              TII.get(X86::CVTSS2SDrr), ResultReg)
2056        .addReg(OpReg);
2057      UpdateValueMap(I, ResultReg);
2058      return true;
2059    }
2060  }
2061
2062  return false;
2063}
2064
2065bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2066  if (X86ScalarSSEf64) {
2067    if (I->getType()->isFloatTy()) {
2068      const Value *V = I->getOperand(0);
2069      if (V->getType()->isDoubleTy()) {
2070        unsigned OpReg = getRegForValue(V);
2071        if (OpReg == 0) return false;
2072        unsigned ResultReg = createResultReg(&X86::FR32RegClass);
2073        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2074                TII.get(X86::CVTSD2SSrr), ResultReg)
2075          .addReg(OpReg);
2076        UpdateValueMap(I, ResultReg);
2077        return true;
2078      }
2079    }
2080  }
2081
2082  return false;
2083}
2084
2085bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2086  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
2087  EVT DstVT = TLI.getValueType(I->getType());
2088
2089  // This code only handles truncation to byte.
2090  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2091    return false;
2092  if (!TLI.isTypeLegal(SrcVT))
2093    return false;
2094
2095  unsigned InputReg = getRegForValue(I->getOperand(0));
2096  if (!InputReg)
2097    // Unhandled operand.  Halt "fast" selection and bail.
2098    return false;
2099
2100  if (SrcVT == MVT::i8) {
2101    // Truncate from i8 to i1; no code needed.
2102    UpdateValueMap(I, InputReg);
2103    return true;
2104  }
2105
2106  if (!Subtarget->is64Bit()) {
2107    // If we're on x86-32; we can't extract an i8 from a general register.
2108    // First issue a copy to GR16_ABCD or GR32_ABCD.
2109    const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ?
2110      (const TargetRegisterClass*)&X86::GR16_ABCDRegClass :
2111      (const TargetRegisterClass*)&X86::GR32_ABCDRegClass;
2112    unsigned CopyReg = createResultReg(CopyRC);
2113    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
2114            CopyReg).addReg(InputReg);
2115    InputReg = CopyReg;
2116  }
2117
2118  // Issue an extract_subreg.
2119  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
2120                                                  InputReg, /*Kill=*/true,
2121                                                  X86::sub_8bit);
2122  if (!ResultReg)
2123    return false;
2124
2125  UpdateValueMap(I, ResultReg);
2126  return true;
2127}
2128
2129bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2130  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2131}
2132
2133bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2134                                     X86AddressMode SrcAM, uint64_t Len) {
2135
2136  // Make sure we don't bloat code by inlining very large memcpy's.
2137  if (!IsMemcpySmall(Len))
2138    return false;
2139
2140  bool i64Legal = Subtarget->is64Bit();
2141
2142  // We don't care about alignment here since we just emit integer accesses.
2143  while (Len) {
2144    MVT VT;
2145    if (Len >= 8 && i64Legal)
2146      VT = MVT::i64;
2147    else if (Len >= 4)
2148      VT = MVT::i32;
2149    else if (Len >= 2)
2150      VT = MVT::i16;
2151    else {
2152      VT = MVT::i8;
2153    }
2154
2155    unsigned Reg;
2156    bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2157    RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2158    assert(RV && "Failed to emit load or store??");
2159
2160    unsigned Size = VT.getSizeInBits()/8;
2161    Len -= Size;
2162    DestAM.Disp += Size;
2163    SrcAM.Disp += Size;
2164  }
2165
2166  return true;
2167}
2168
2169static bool isCommutativeIntrinsic(IntrinsicInst const &I) {
2170  switch (I.getIntrinsicID()) {
2171  case Intrinsic::sadd_with_overflow:
2172  case Intrinsic::uadd_with_overflow:
2173  case Intrinsic::smul_with_overflow:
2174  case Intrinsic::umul_with_overflow:
2175    return true;
2176  default:
2177    return false;
2178  }
2179}
2180
2181bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
2182  // FIXME: Handle more intrinsics.
2183  switch (I.getIntrinsicID()) {
2184  default: return false;
2185  case Intrinsic::frameaddress: {
2186    Type *RetTy = I.getCalledFunction()->getReturnType();
2187
2188    MVT VT;
2189    if (!isTypeLegal(RetTy, VT))
2190      return false;
2191
2192    unsigned Opc;
2193    const TargetRegisterClass *RC = nullptr;
2194
2195    switch (VT.SimpleTy) {
2196    default: llvm_unreachable("Invalid result type for frameaddress.");
2197    case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2198    case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2199    }
2200
2201    // This needs to be set before we call getFrameRegister, otherwise we get
2202    // the wrong frame register.
2203    MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2204    MFI->setFrameAddressIsTaken(true);
2205
2206    const X86RegisterInfo *RegInfo =
2207      static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
2208    unsigned FrameReg = RegInfo->getFrameRegister(*(FuncInfo.MF));
2209    assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2210            (FrameReg == X86::EBP && VT == MVT::i32)) &&
2211           "Invalid Frame Register!");
2212
2213    // Always make a copy of the frame register to to a vreg first, so that we
2214    // never directly reference the frame register (the TwoAddressInstruction-
2215    // Pass doesn't like that).
2216    unsigned SrcReg = createResultReg(RC);
2217    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2218            TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2219
2220    // Now recursively load from the frame address.
2221    // movq (%rbp), %rax
2222    // movq (%rax), %rax
2223    // movq (%rax), %rax
2224    // ...
2225    unsigned DestReg;
2226    unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
2227    while (Depth--) {
2228      DestReg = createResultReg(RC);
2229      addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2230                           TII.get(Opc), DestReg), SrcReg);
2231      SrcReg = DestReg;
2232    }
2233
2234    UpdateValueMap(&I, SrcReg);
2235    return true;
2236  }
2237  case Intrinsic::memcpy: {
2238    const MemCpyInst &MCI = cast<MemCpyInst>(I);
2239    // Don't handle volatile or variable length memcpys.
2240    if (MCI.isVolatile())
2241      return false;
2242
2243    if (isa<ConstantInt>(MCI.getLength())) {
2244      // Small memcpy's are common enough that we want to do them
2245      // without a call if possible.
2246      uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
2247      if (IsMemcpySmall(Len)) {
2248        X86AddressMode DestAM, SrcAM;
2249        if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
2250            !X86SelectAddress(MCI.getRawSource(), SrcAM))
2251          return false;
2252        TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2253        return true;
2254      }
2255    }
2256
2257    unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2258    if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
2259      return false;
2260
2261    if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
2262      return false;
2263
2264    return DoSelectCall(&I, "memcpy");
2265  }
2266  case Intrinsic::memset: {
2267    const MemSetInst &MSI = cast<MemSetInst>(I);
2268
2269    if (MSI.isVolatile())
2270      return false;
2271
2272    unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2273    if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
2274      return false;
2275
2276    if (MSI.getDestAddressSpace() > 255)
2277      return false;
2278
2279    return DoSelectCall(&I, "memset");
2280  }
2281  case Intrinsic::stackprotector: {
2282    // Emit code to store the stack guard onto the stack.
2283    EVT PtrTy = TLI.getPointerTy();
2284
2285    const Value *Op1 = I.getArgOperand(0); // The guard's value.
2286    const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
2287
2288    MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2289
2290    // Grab the frame index.
2291    X86AddressMode AM;
2292    if (!X86SelectAddress(Slot, AM)) return false;
2293    if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2294    return true;
2295  }
2296  case Intrinsic::dbg_declare: {
2297    const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
2298    X86AddressMode AM;
2299    assert(DI->getAddress() && "Null address should be checked earlier!");
2300    if (!X86SelectAddress(DI->getAddress(), AM))
2301      return false;
2302    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2303    // FIXME may need to add RegState::Debug to any registers produced,
2304    // although ESP/EBP should be the only ones at the moment.
2305    addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM).
2306      addImm(0).addMetadata(DI->getVariable());
2307    return true;
2308  }
2309  case Intrinsic::trap: {
2310    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2311    return true;
2312  }
2313  case Intrinsic::sqrt: {
2314    if (!Subtarget->hasSSE1())
2315      return false;
2316
2317    Type *RetTy = I.getCalledFunction()->getReturnType();
2318
2319    MVT VT;
2320    if (!isTypeLegal(RetTy, VT))
2321      return false;
2322
2323    // Unfortunately we can't use FastEmit_r, because the AVX version of FSQRT
2324    // is not generated by FastISel yet.
2325    // FIXME: Update this code once tablegen can handle it.
2326    static const unsigned SqrtOpc[2][2] = {
2327      {X86::SQRTSSr, X86::VSQRTSSr},
2328      {X86::SQRTSDr, X86::VSQRTSDr}
2329    };
2330    bool HasAVX = Subtarget->hasAVX();
2331    unsigned Opc;
2332    const TargetRegisterClass *RC;
2333    switch (VT.SimpleTy) {
2334    default: return false;
2335    case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
2336    case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
2337    }
2338
2339    const Value *SrcVal = I.getArgOperand(0);
2340    unsigned SrcReg = getRegForValue(SrcVal);
2341
2342    if (SrcReg == 0)
2343      return false;
2344
2345    unsigned ImplicitDefReg = 0;
2346    if (HasAVX) {
2347      ImplicitDefReg = createResultReg(RC);
2348      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2349              TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2350    }
2351
2352    unsigned ResultReg = createResultReg(RC);
2353    MachineInstrBuilder MIB;
2354    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2355                  ResultReg);
2356
2357    if (ImplicitDefReg)
2358      MIB.addReg(ImplicitDefReg);
2359
2360    MIB.addReg(SrcReg);
2361
2362    UpdateValueMap(&I, ResultReg);
2363    return true;
2364  }
2365  case Intrinsic::sadd_with_overflow:
2366  case Intrinsic::uadd_with_overflow:
2367  case Intrinsic::ssub_with_overflow:
2368  case Intrinsic::usub_with_overflow:
2369  case Intrinsic::smul_with_overflow:
2370  case Intrinsic::umul_with_overflow: {
2371    // This implements the basic lowering of the xalu with overflow intrinsics
2372    // into add/sub/mul followed by either seto or setb.
2373    const Function *Callee = I.getCalledFunction();
2374    auto *Ty = cast<StructType>(Callee->getReturnType());
2375    Type *RetTy = Ty->getTypeAtIndex(0U);
2376    Type *CondTy = Ty->getTypeAtIndex(1);
2377
2378    MVT VT;
2379    if (!isTypeLegal(RetTy, VT))
2380      return false;
2381
2382    if (VT < MVT::i8 || VT > MVT::i64)
2383      return false;
2384
2385    const Value *LHS = I.getArgOperand(0);
2386    const Value *RHS = I.getArgOperand(1);
2387
2388    // Canonicalize immediate to the RHS.
2389    if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2390        isCommutativeIntrinsic(I))
2391      std::swap(LHS, RHS);
2392
2393    unsigned BaseOpc, CondOpc;
2394    switch (I.getIntrinsicID()) {
2395    default: llvm_unreachable("Unexpected intrinsic!");
2396    case Intrinsic::sadd_with_overflow:
2397      BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
2398    case Intrinsic::uadd_with_overflow:
2399      BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
2400    case Intrinsic::ssub_with_overflow:
2401      BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
2402    case Intrinsic::usub_with_overflow:
2403      BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
2404    case Intrinsic::smul_with_overflow:
2405      BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
2406    case Intrinsic::umul_with_overflow:
2407      BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
2408    }
2409
2410    unsigned LHSReg = getRegForValue(LHS);
2411    if (LHSReg == 0)
2412      return false;
2413    bool LHSIsKill = hasTrivialKill(LHS);
2414
2415    unsigned ResultReg = 0;
2416    // Check if we have an immediate version.
2417    if (auto const *C = dyn_cast<ConstantInt>(RHS)) {
2418      ResultReg = FastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2419                              C->getZExtValue());
2420    }
2421
2422    unsigned RHSReg;
2423    bool RHSIsKill;
2424    if (!ResultReg) {
2425      RHSReg = getRegForValue(RHS);
2426      if (RHSReg == 0)
2427        return false;
2428      RHSIsKill = hasTrivialKill(RHS);
2429      ResultReg = FastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2430                              RHSIsKill);
2431    }
2432
2433    // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2434    // it manually.
2435    if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2436      static const unsigned MULOpc[] =
2437        { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2438      static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2439      // First copy the first operand into RAX, which is an implicit input to
2440      // the X86::MUL*r instruction.
2441      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2442              TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2443        .addReg(LHSReg, getKillRegState(LHSIsKill));
2444      ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2445                                 TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2446    } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2447      static const unsigned MULOpc[] =
2448        { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2449      if (VT == MVT::i8) {
2450        // Copy the first operand into AL, which is an implicit input to the
2451        // X86::IMUL8r instruction.
2452        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2453               TII.get(TargetOpcode::COPY), X86::AL)
2454          .addReg(LHSReg, getKillRegState(LHSIsKill));
2455        ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2456                                   RHSIsKill);
2457      } else
2458        ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2459                                    TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2460                                    RHSReg, RHSIsKill);
2461    }
2462
2463    if (!ResultReg)
2464      return false;
2465
2466    unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
2467    assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2468    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
2469            ResultReg2);
2470
2471    UpdateValueMap(&I, ResultReg, 2);
2472    return true;
2473  }
2474  case Intrinsic::x86_sse_cvttss2si:
2475  case Intrinsic::x86_sse_cvttss2si64:
2476  case Intrinsic::x86_sse2_cvttsd2si:
2477  case Intrinsic::x86_sse2_cvttsd2si64: {
2478    bool IsInputDouble;
2479    switch (I.getIntrinsicID()) {
2480    default: llvm_unreachable("Unexpected intrinsic.");
2481    case Intrinsic::x86_sse_cvttss2si:
2482    case Intrinsic::x86_sse_cvttss2si64:
2483      if (!Subtarget->hasSSE1())
2484        return false;
2485      IsInputDouble = false;
2486      break;
2487    case Intrinsic::x86_sse2_cvttsd2si:
2488    case Intrinsic::x86_sse2_cvttsd2si64:
2489      if (!Subtarget->hasSSE2())
2490        return false;
2491      IsInputDouble = true;
2492      break;
2493    }
2494
2495    Type *RetTy = I.getCalledFunction()->getReturnType();
2496    MVT VT;
2497    if (!isTypeLegal(RetTy, VT))
2498      return false;
2499
2500    static const unsigned CvtOpc[2][2][2] = {
2501      { { X86::CVTTSS2SIrr,   X86::VCVTTSS2SIrr   },
2502        { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr }  },
2503      { { X86::CVTTSD2SIrr,   X86::VCVTTSD2SIrr   },
2504        { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr }  }
2505    };
2506    bool HasAVX = Subtarget->hasAVX();
2507    unsigned Opc;
2508    switch (VT.SimpleTy) {
2509    default: llvm_unreachable("Unexpected result type.");
2510    case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
2511    case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
2512    }
2513
2514    // Check if we can fold insertelement instructions into the convert.
2515    const Value *Op = I.getArgOperand(0);
2516    while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
2517      const Value *Index = IE->getOperand(2);
2518      if (!isa<ConstantInt>(Index))
2519        break;
2520      unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
2521
2522      if (Idx == 0) {
2523        Op = IE->getOperand(1);
2524        break;
2525      }
2526      Op = IE->getOperand(0);
2527    }
2528
2529    unsigned Reg = getRegForValue(Op);
2530    if (Reg == 0)
2531      return false;
2532
2533    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
2534    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2535      .addReg(Reg);
2536
2537    UpdateValueMap(&I, ResultReg);
2538    return true;
2539  }
2540  }
2541}
2542
2543bool X86FastISel::FastLowerArguments() {
2544  if (!FuncInfo.CanLowerReturn)
2545    return false;
2546
2547  const Function *F = FuncInfo.Fn;
2548  if (F->isVarArg())
2549    return false;
2550
2551  CallingConv::ID CC = F->getCallingConv();
2552  if (CC != CallingConv::C)
2553    return false;
2554
2555  if (Subtarget->isCallingConvWin64(CC))
2556    return false;
2557
2558  if (!Subtarget->is64Bit())
2559    return false;
2560
2561  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
2562  unsigned GPRCnt = 0;
2563  unsigned FPRCnt = 0;
2564  unsigned Idx = 0;
2565  for (auto const &Arg : F->args()) {
2566    // The first argument is at index 1.
2567    ++Idx;
2568    if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2569        F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2570        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2571        F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2572      return false;
2573
2574    Type *ArgTy = Arg.getType();
2575    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
2576      return false;
2577
2578    EVT ArgVT = TLI.getValueType(ArgTy);
2579    if (!ArgVT.isSimple()) return false;
2580    switch (ArgVT.getSimpleVT().SimpleTy) {
2581    default: return false;
2582    case MVT::i32:
2583    case MVT::i64:
2584      ++GPRCnt;
2585      break;
2586    case MVT::f32:
2587    case MVT::f64:
2588      if (!Subtarget->hasSSE1())
2589        return false;
2590      ++FPRCnt;
2591      break;
2592    }
2593
2594    if (GPRCnt > 6)
2595      return false;
2596
2597    if (FPRCnt > 8)
2598      return false;
2599  }
2600
2601  static const MCPhysReg GPR32ArgRegs[] = {
2602    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
2603  };
2604  static const MCPhysReg GPR64ArgRegs[] = {
2605    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
2606  };
2607  static const MCPhysReg XMMArgRegs[] = {
2608    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2609    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2610  };
2611
2612  unsigned GPRIdx = 0;
2613  unsigned FPRIdx = 0;
2614  for (auto const &Arg : F->args()) {
2615    MVT VT = TLI.getSimpleValueType(Arg.getType());
2616    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2617    unsigned SrcReg;
2618    switch (VT.SimpleTy) {
2619    default: llvm_unreachable("Unexpected value type.");
2620    case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
2621    case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
2622    case MVT::f32: // fall-through
2623    case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
2624    }
2625    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2626    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2627    // Without this, EmitLiveInCopies may eliminate the livein if its only
2628    // use is a bitcast (which isn't turned into an instruction).
2629    unsigned ResultReg = createResultReg(RC);
2630    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2631            TII.get(TargetOpcode::COPY), ResultReg)
2632      .addReg(DstReg, getKillRegState(true));
2633    UpdateValueMap(&Arg, ResultReg);
2634  }
2635  return true;
2636}
2637
2638bool X86FastISel::X86SelectCall(const Instruction *I) {
2639  const CallInst *CI = cast<CallInst>(I);
2640  const Value *Callee = CI->getCalledValue();
2641
2642  // Can't handle inline asm yet.
2643  if (isa<InlineAsm>(Callee))
2644    return false;
2645
2646  // Handle intrinsic calls.
2647  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
2648    return X86VisitIntrinsicCall(*II);
2649
2650  // Allow SelectionDAG isel to handle tail calls.
2651  if (cast<CallInst>(I)->isTailCall())
2652    return false;
2653
2654  return DoSelectCall(I, nullptr);
2655}
2656
2657static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
2658                                           const ImmutableCallSite &CS) {
2659  if (Subtarget.is64Bit())
2660    return 0;
2661  if (Subtarget.getTargetTriple().isOSMSVCRT())
2662    return 0;
2663  CallingConv::ID CC = CS.getCallingConv();
2664  if (CC == CallingConv::Fast || CC == CallingConv::GHC)
2665    return 0;
2666  if (!CS.paramHasAttr(1, Attribute::StructRet))
2667    return 0;
2668  if (CS.paramHasAttr(1, Attribute::InReg))
2669    return 0;
2670  return 4;
2671}
2672
2673// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
2674bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
2675  const CallInst *CI = cast<CallInst>(I);
2676  const Value *Callee = CI->getCalledValue();
2677
2678  // Handle only C and fastcc calling conventions for now.
2679  ImmutableCallSite CS(CI);
2680  CallingConv::ID CC = CS.getCallingConv();
2681  bool isWin64 = Subtarget->isCallingConvWin64(CC);
2682  if (CC != CallingConv::C && CC != CallingConv::Fast &&
2683      CC != CallingConv::X86_FastCall && CC != CallingConv::X86_64_Win64 &&
2684      CC != CallingConv::X86_64_SysV)
2685    return false;
2686
2687  // fastcc with -tailcallopt is intended to provide a guaranteed
2688  // tail call optimization. Fastisel doesn't know how to do that.
2689  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
2690    return false;
2691
2692  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
2693  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
2694  bool isVarArg = FTy->isVarArg();
2695
2696  // Don't know how to handle Win64 varargs yet.  Nothing special needed for
2697  // x86-32.  Special handling for x86-64 is implemented.
2698  if (isVarArg && isWin64)
2699    return false;
2700
2701  // Don't know about inalloca yet.
2702  if (CS.hasInAllocaArgument())
2703    return false;
2704
2705  // Fast-isel doesn't know about callee-pop yet.
2706  if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
2707                       TM.Options.GuaranteedTailCallOpt))
2708    return false;
2709
2710  // Check whether the function can return without sret-demotion.
2711  SmallVector<ISD::OutputArg, 4> Outs;
2712  GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI);
2713  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
2714                                           *FuncInfo.MF, FTy->isVarArg(),
2715                                           Outs, FTy->getContext());
2716  if (!CanLowerReturn)
2717    return false;
2718
2719  // Materialize callee address in a register. FIXME: GV address can be
2720  // handled with a CALLpcrel32 instead.
2721  X86AddressMode CalleeAM;
2722  if (!X86SelectCallAddress(Callee, CalleeAM))
2723    return false;
2724  unsigned CalleeOp = 0;
2725  const GlobalValue *GV = nullptr;
2726  if (CalleeAM.GV != nullptr) {
2727    GV = CalleeAM.GV;
2728  } else if (CalleeAM.Base.Reg != 0) {
2729    CalleeOp = CalleeAM.Base.Reg;
2730  } else
2731    return false;
2732
2733  // Deal with call operands first.
2734  SmallVector<const Value *, 8> ArgVals;
2735  SmallVector<unsigned, 8> Args;
2736  SmallVector<MVT, 8> ArgVTs;
2737  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2738  unsigned arg_size = CS.arg_size();
2739  Args.reserve(arg_size);
2740  ArgVals.reserve(arg_size);
2741  ArgVTs.reserve(arg_size);
2742  ArgFlags.reserve(arg_size);
2743  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
2744       i != e; ++i) {
2745    // If we're lowering a mem intrinsic instead of a regular call, skip the
2746    // last two arguments, which should not passed to the underlying functions.
2747    if (MemIntName && e-i <= 2)
2748      break;
2749    Value *ArgVal = *i;
2750    ISD::ArgFlagsTy Flags;
2751    unsigned AttrInd = i - CS.arg_begin() + 1;
2752    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
2753      Flags.setSExt();
2754    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
2755      Flags.setZExt();
2756
2757    if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
2758      PointerType *Ty = cast<PointerType>(ArgVal->getType());
2759      Type *ElementTy = Ty->getElementType();
2760      unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
2761      unsigned FrameAlign = CS.getParamAlignment(AttrInd);
2762      if (!FrameAlign)
2763        FrameAlign = TLI.getByValTypeAlignment(ElementTy);
2764      Flags.setByVal();
2765      Flags.setByValSize(FrameSize);
2766      Flags.setByValAlign(FrameAlign);
2767      if (!IsMemcpySmall(FrameSize))
2768        return false;
2769    }
2770
2771    if (CS.paramHasAttr(AttrInd, Attribute::InReg))
2772      Flags.setInReg();
2773    if (CS.paramHasAttr(AttrInd, Attribute::Nest))
2774      Flags.setNest();
2775
2776    // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
2777    // instruction.  This is safe because it is common to all fastisel supported
2778    // calling conventions on x86.
2779    if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
2780      if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
2781          CI->getBitWidth() == 16) {
2782        if (Flags.isSExt())
2783          ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
2784        else
2785          ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
2786      }
2787    }
2788
2789    unsigned ArgReg;
2790
2791    // Passing bools around ends up doing a trunc to i1 and passing it.
2792    // Codegen this as an argument + "and 1".
2793    if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
2794        cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
2795        ArgVal->hasOneUse()) {
2796      ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
2797      ArgReg = getRegForValue(ArgVal);
2798      if (ArgReg == 0) return false;
2799
2800      MVT ArgVT;
2801      if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
2802
2803      ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
2804                           ArgVal->hasOneUse(), 1);
2805    } else {
2806      ArgReg = getRegForValue(ArgVal);
2807    }
2808
2809    if (ArgReg == 0) return false;
2810
2811    Type *ArgTy = ArgVal->getType();
2812    MVT ArgVT;
2813    if (!isTypeLegal(ArgTy, ArgVT))
2814      return false;
2815    if (ArgVT == MVT::x86mmx)
2816      return false;
2817    unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
2818    Flags.setOrigAlign(OriginalAlignment);
2819
2820    Args.push_back(ArgReg);
2821    ArgVals.push_back(ArgVal);
2822    ArgVTs.push_back(ArgVT);
2823    ArgFlags.push_back(Flags);
2824  }
2825
2826  // Analyze operands of the call, assigning locations to each operand.
2827  SmallVector<CCValAssign, 16> ArgLocs;
2828  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
2829                 I->getParent()->getContext());
2830
2831  // Allocate shadow area for Win64
2832  if (isWin64)
2833    CCInfo.AllocateStack(32, 8);
2834
2835  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
2836
2837  // Get a count of how many bytes are to be pushed on the stack.
2838  unsigned NumBytes = CCInfo.getNextStackOffset();
2839
2840  // Issue CALLSEQ_START
2841  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2842  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2843    .addImm(NumBytes);
2844
2845  // Process argument: walk the register/memloc assignments, inserting
2846  // copies / loads.
2847  SmallVector<unsigned, 4> RegArgs;
2848  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2849    CCValAssign &VA = ArgLocs[i];
2850    unsigned Arg = Args[VA.getValNo()];
2851    EVT ArgVT = ArgVTs[VA.getValNo()];
2852
2853    // Promote the value if needed.
2854    switch (VA.getLocInfo()) {
2855    case CCValAssign::Full: break;
2856    case CCValAssign::SExt: {
2857      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
2858             "Unexpected extend");
2859      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
2860                                       Arg, ArgVT, Arg);
2861      assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
2862      ArgVT = VA.getLocVT();
2863      break;
2864    }
2865    case CCValAssign::ZExt: {
2866      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
2867             "Unexpected extend");
2868      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
2869                                       Arg, ArgVT, Arg);
2870      assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
2871      ArgVT = VA.getLocVT();
2872      break;
2873    }
2874    case CCValAssign::AExt: {
2875      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
2876             "Unexpected extend");
2877      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
2878                                       Arg, ArgVT, Arg);
2879      if (!Emitted)
2880        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
2881                                    Arg, ArgVT, Arg);
2882      if (!Emitted)
2883        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
2884                                    Arg, ArgVT, Arg);
2885
2886      assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
2887      ArgVT = VA.getLocVT();
2888      break;
2889    }
2890    case CCValAssign::BCvt: {
2891      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
2892                               ISD::BITCAST, Arg, /*TODO: Kill=*/false);
2893      assert(BC != 0 && "Failed to emit a bitcast!");
2894      Arg = BC;
2895      ArgVT = VA.getLocVT();
2896      break;
2897    }
2898    case CCValAssign::VExt:
2899      // VExt has not been implemented, so this should be impossible to reach
2900      // for now.  However, fallback to Selection DAG isel once implemented.
2901      return false;
2902    case CCValAssign::Indirect:
2903      // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
2904      // support this.
2905      return false;
2906    case CCValAssign::FPExt:
2907      llvm_unreachable("Unexpected loc info!");
2908    }
2909
2910    if (VA.isRegLoc()) {
2911      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2912              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
2913      RegArgs.push_back(VA.getLocReg());
2914    } else {
2915      unsigned LocMemOffset = VA.getLocMemOffset();
2916      X86AddressMode AM;
2917      const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>(
2918          getTargetMachine()->getRegisterInfo());
2919      AM.Base.Reg = RegInfo->getStackRegister();
2920      AM.Disp = LocMemOffset;
2921      const Value *ArgVal = ArgVals[VA.getValNo()];
2922      ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
2923
2924      if (Flags.isByVal()) {
2925        X86AddressMode SrcAM;
2926        SrcAM.Base.Reg = Arg;
2927        bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
2928        assert(Res && "memcpy length already checked!"); (void)Res;
2929      } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
2930        // If this is a really simple value, emit this with the Value* version
2931        // of X86FastEmitStore.  If it isn't simple, we don't want to do this,
2932        // as it can cause us to reevaluate the argument.
2933        if (!X86FastEmitStore(ArgVT, ArgVal, AM))
2934          return false;
2935      } else {
2936        if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM))
2937          return false;
2938      }
2939    }
2940  }
2941
2942  // ELF / PIC requires GOT in the EBX register before function calls via PLT
2943  // GOT pointer.
2944  if (Subtarget->isPICStyleGOT()) {
2945    unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
2946    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2947            TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
2948  }
2949
2950  if (Subtarget->is64Bit() && isVarArg && !isWin64) {
2951    // Count the number of XMM registers allocated.
2952    static const MCPhysReg XMMArgRegs[] = {
2953      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2954      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2955    };
2956    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
2957    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
2958            X86::AL).addImm(NumXMMRegs);
2959  }
2960
2961  // Issue the call.
2962  MachineInstrBuilder MIB;
2963  if (CalleeOp) {
2964    // Register-indirect call.
2965    unsigned CallOpc;
2966    if (Subtarget->is64Bit())
2967      CallOpc = X86::CALL64r;
2968    else
2969      CallOpc = X86::CALL32r;
2970    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
2971      .addReg(CalleeOp);
2972
2973  } else {
2974    // Direct call.
2975    assert(GV && "Not a direct call");
2976    unsigned CallOpc;
2977    if (Subtarget->is64Bit())
2978      CallOpc = X86::CALL64pcrel32;
2979    else
2980      CallOpc = X86::CALLpcrel32;
2981
2982    // See if we need any target-specific flags on the GV operand.
2983    unsigned char OpFlags = 0;
2984
2985    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
2986    // external symbols most go through the PLT in PIC mode.  If the symbol
2987    // has hidden or protected visibility, or if it is static or local, then
2988    // we don't need to use the PLT - we can directly call it.
2989    if (Subtarget->isTargetELF() &&
2990        TM.getRelocationModel() == Reloc::PIC_ &&
2991        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
2992      OpFlags = X86II::MO_PLT;
2993    } else if (Subtarget->isPICStyleStubAny() &&
2994               (GV->isDeclaration() || GV->isWeakForLinker()) &&
2995               (!Subtarget->getTargetTriple().isMacOSX() ||
2996                Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
2997      // PC-relative references to external symbols should go through $stub,
2998      // unless we're building with the leopard linker or later, which
2999      // automatically synthesizes these stubs.
3000      OpFlags = X86II::MO_DARWIN_STUB;
3001    }
3002
3003
3004    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3005    if (MemIntName)
3006      MIB.addExternalSymbol(MemIntName, OpFlags);
3007    else
3008      MIB.addGlobalAddress(GV, 0, OpFlags);
3009  }
3010
3011  // Add a register mask with the call-preserved registers.
3012  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3013  MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
3014
3015  // Add an implicit use GOT pointer in EBX.
3016  if (Subtarget->isPICStyleGOT())
3017    MIB.addReg(X86::EBX, RegState::Implicit);
3018
3019  if (Subtarget->is64Bit() && isVarArg && !isWin64)
3020    MIB.addReg(X86::AL, RegState::Implicit);
3021
3022  // Add implicit physical register uses to the call.
3023  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
3024    MIB.addReg(RegArgs[i], RegState::Implicit);
3025
3026  // Issue CALLSEQ_END
3027  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3028  const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
3029  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3030    .addImm(NumBytes).addImm(NumBytesCallee);
3031
3032  // Build info for return calling conv lowering code.
3033  // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
3034  SmallVector<ISD::InputArg, 32> Ins;
3035  SmallVector<EVT, 4> RetTys;
3036  ComputeValueVTs(TLI, I->getType(), RetTys);
3037  for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
3038    EVT VT = RetTys[i];
3039    MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
3040    unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
3041    for (unsigned j = 0; j != NumRegs; ++j) {
3042      ISD::InputArg MyFlags;
3043      MyFlags.VT = RegisterVT;
3044      MyFlags.Used = !CS.getInstruction()->use_empty();
3045      if (CS.paramHasAttr(0, Attribute::SExt))
3046        MyFlags.Flags.setSExt();
3047      if (CS.paramHasAttr(0, Attribute::ZExt))
3048        MyFlags.Flags.setZExt();
3049      if (CS.paramHasAttr(0, Attribute::InReg))
3050        MyFlags.Flags.setInReg();
3051      Ins.push_back(MyFlags);
3052    }
3053  }
3054
3055  // Now handle call return values.
3056  SmallVector<unsigned, 4> UsedRegs;
3057  SmallVector<CCValAssign, 16> RVLocs;
3058  CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
3059                    I->getParent()->getContext());
3060  unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
3061  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3062  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3063    EVT CopyVT = RVLocs[i].getValVT();
3064    unsigned CopyReg = ResultReg + i;
3065
3066    // If this is a call to a function that returns an fp value on the x87 fp
3067    // stack, but where we prefer to use the value in xmm registers, copy it
3068    // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
3069    if ((RVLocs[i].getLocReg() == X86::ST0 ||
3070         RVLocs[i].getLocReg() == X86::ST1)) {
3071      if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
3072        CopyVT = MVT::f80;
3073        CopyReg = createResultReg(&X86::RFP80RegClass);
3074      }
3075      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3076              TII.get(X86::FpPOP_RETVAL), CopyReg);
3077    } else {
3078      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3079              TII.get(TargetOpcode::COPY),
3080              CopyReg).addReg(RVLocs[i].getLocReg());
3081      UsedRegs.push_back(RVLocs[i].getLocReg());
3082    }
3083
3084    if (CopyVT != RVLocs[i].getValVT()) {
3085      // Round the F80 the right size, which also moves to the appropriate xmm
3086      // register. This is accomplished by storing the F80 value in memory and
3087      // then loading it back. Ewww...
3088      EVT ResVT = RVLocs[i].getValVT();
3089      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3090      unsigned MemSize = ResVT.getSizeInBits()/8;
3091      int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3092      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3093                                TII.get(Opc)), FI)
3094        .addReg(CopyReg);
3095      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3096      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3097                                TII.get(Opc), ResultReg + i), FI);
3098    }
3099  }
3100
3101  if (RVLocs.size())
3102    UpdateValueMap(I, ResultReg, RVLocs.size());
3103
3104  // Set all unused physreg defs as dead.
3105  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
3106
3107  return true;
3108}
3109
3110
3111bool
3112X86FastISel::TargetSelectInstruction(const Instruction *I)  {
3113  switch (I->getOpcode()) {
3114  default: break;
3115  case Instruction::Load:
3116    return X86SelectLoad(I);
3117  case Instruction::Store:
3118    return X86SelectStore(I);
3119  case Instruction::Ret:
3120    return X86SelectRet(I);
3121  case Instruction::ICmp:
3122  case Instruction::FCmp:
3123    return X86SelectCmp(I);
3124  case Instruction::ZExt:
3125    return X86SelectZExt(I);
3126  case Instruction::Br:
3127    return X86SelectBranch(I);
3128  case Instruction::Call:
3129    return X86SelectCall(I);
3130  case Instruction::LShr:
3131  case Instruction::AShr:
3132  case Instruction::Shl:
3133    return X86SelectShift(I);
3134  case Instruction::SDiv:
3135  case Instruction::UDiv:
3136  case Instruction::SRem:
3137  case Instruction::URem:
3138    return X86SelectDivRem(I);
3139  case Instruction::Select:
3140    return X86SelectSelect(I);
3141  case Instruction::Trunc:
3142    return X86SelectTrunc(I);
3143  case Instruction::FPExt:
3144    return X86SelectFPExt(I);
3145  case Instruction::FPTrunc:
3146    return X86SelectFPTrunc(I);
3147  case Instruction::IntToPtr: // Deliberate fall-through.
3148  case Instruction::PtrToInt: {
3149    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
3150    EVT DstVT = TLI.getValueType(I->getType());
3151    if (DstVT.bitsGT(SrcVT))
3152      return X86SelectZExt(I);
3153    if (DstVT.bitsLT(SrcVT))
3154      return X86SelectTrunc(I);
3155    unsigned Reg = getRegForValue(I->getOperand(0));
3156    if (Reg == 0) return false;
3157    UpdateValueMap(I, Reg);
3158    return true;
3159  }
3160  }
3161
3162  return false;
3163}
3164
3165unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
3166  MVT VT;
3167  if (!isTypeLegal(C->getType(), VT))
3168    return 0;
3169
3170  // Can't handle alternate code models yet.
3171  if (TM.getCodeModel() != CodeModel::Small)
3172    return 0;
3173
3174  // Get opcode and regclass of the output for the given load instruction.
3175  unsigned Opc = 0;
3176  const TargetRegisterClass *RC = nullptr;
3177  switch (VT.SimpleTy) {
3178  default: return 0;
3179  case MVT::i8:
3180    Opc = X86::MOV8rm;
3181    RC  = &X86::GR8RegClass;
3182    break;
3183  case MVT::i16:
3184    Opc = X86::MOV16rm;
3185    RC  = &X86::GR16RegClass;
3186    break;
3187  case MVT::i32:
3188    Opc = X86::MOV32rm;
3189    RC  = &X86::GR32RegClass;
3190    break;
3191  case MVT::i64:
3192    // Must be in x86-64 mode.
3193    Opc = X86::MOV64rm;
3194    RC  = &X86::GR64RegClass;
3195    break;
3196  case MVT::f32:
3197    if (X86ScalarSSEf32) {
3198      Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
3199      RC  = &X86::FR32RegClass;
3200    } else {
3201      Opc = X86::LD_Fp32m;
3202      RC  = &X86::RFP32RegClass;
3203    }
3204    break;
3205  case MVT::f64:
3206    if (X86ScalarSSEf64) {
3207      Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
3208      RC  = &X86::FR64RegClass;
3209    } else {
3210      Opc = X86::LD_Fp64m;
3211      RC  = &X86::RFP64RegClass;
3212    }
3213    break;
3214  case MVT::f80:
3215    // No f80 support yet.
3216    return 0;
3217  }
3218
3219  // Materialize addresses with LEA/MOV instructions.
3220  if (isa<GlobalValue>(C)) {
3221    X86AddressMode AM;
3222    if (X86SelectAddress(C, AM)) {
3223      // If the expression is just a basereg, then we're done, otherwise we need
3224      // to emit an LEA.
3225      if (AM.BaseType == X86AddressMode::RegBase &&
3226          AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3227        return AM.Base.Reg;
3228
3229      unsigned ResultReg = createResultReg(RC);
3230      if (TM.getRelocationModel() == Reloc::Static &&
3231          TLI.getPointerTy() == MVT::i64) {
3232        // The displacement code be more than 32 bits away so we need to use
3233        // an instruction with a 64 bit immediate
3234        Opc = X86::MOV64ri;
3235        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3236              TII.get(Opc), ResultReg).addGlobalAddress(cast<GlobalValue>(C));
3237      } else {
3238        Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
3239        addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3240                             TII.get(Opc), ResultReg), AM);
3241      }
3242      return ResultReg;
3243    }
3244    return 0;
3245  }
3246
3247  // MachineConstantPool wants an explicit alignment.
3248  unsigned Align = DL.getPrefTypeAlignment(C->getType());
3249  if (Align == 0) {
3250    // Alignment of vector types.  FIXME!
3251    Align = DL.getTypeAllocSize(C->getType());
3252  }
3253
3254  // x86-32 PIC requires a PIC base register for constant pools.
3255  unsigned PICBase = 0;
3256  unsigned char OpFlag = 0;
3257  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
3258    OpFlag = X86II::MO_PIC_BASE_OFFSET;
3259    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3260  } else if (Subtarget->isPICStyleGOT()) {
3261    OpFlag = X86II::MO_GOTOFF;
3262    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3263  } else if (Subtarget->isPICStyleRIPRel() &&
3264             TM.getCodeModel() == CodeModel::Small) {
3265    PICBase = X86::RIP;
3266  }
3267
3268  // Create the load from the constant pool.
3269  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
3270  unsigned ResultReg = createResultReg(RC);
3271  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3272                                   TII.get(Opc), ResultReg),
3273                           MCPOffset, PICBase, OpFlag);
3274
3275  return ResultReg;
3276}
3277
3278unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
3279  // Fail on dynamic allocas. At this point, getRegForValue has already
3280  // checked its CSE maps, so if we're here trying to handle a dynamic
3281  // alloca, we're not going to succeed. X86SelectAddress has a
3282  // check for dynamic allocas, because it's called directly from
3283  // various places, but TargetMaterializeAlloca also needs a check
3284  // in order to avoid recursion between getRegForValue,
3285  // X86SelectAddrss, and TargetMaterializeAlloca.
3286  if (!FuncInfo.StaticAllocaMap.count(C))
3287    return 0;
3288  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3289
3290  X86AddressMode AM;
3291  if (!X86SelectAddress(C, AM))
3292    return 0;
3293  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
3294  const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
3295  unsigned ResultReg = createResultReg(RC);
3296  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3297                         TII.get(Opc), ResultReg), AM);
3298  return ResultReg;
3299}
3300
3301unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
3302  MVT VT;
3303  if (!isTypeLegal(CF->getType(), VT))
3304    return 0;
3305
3306  // Get opcode and regclass for the given zero.
3307  unsigned Opc = 0;
3308  const TargetRegisterClass *RC = nullptr;
3309  switch (VT.SimpleTy) {
3310  default: return 0;
3311  case MVT::f32:
3312    if (X86ScalarSSEf32) {
3313      Opc = X86::FsFLD0SS;
3314      RC  = &X86::FR32RegClass;
3315    } else {
3316      Opc = X86::LD_Fp032;
3317      RC  = &X86::RFP32RegClass;
3318    }
3319    break;
3320  case MVT::f64:
3321    if (X86ScalarSSEf64) {
3322      Opc = X86::FsFLD0SD;
3323      RC  = &X86::FR64RegClass;
3324    } else {
3325      Opc = X86::LD_Fp064;
3326      RC  = &X86::RFP64RegClass;
3327    }
3328    break;
3329  case MVT::f80:
3330    // No f80 support yet.
3331    return 0;
3332  }
3333
3334  unsigned ResultReg = createResultReg(RC);
3335  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3336  return ResultReg;
3337}
3338
3339
3340bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3341                                      const LoadInst *LI) {
3342  const Value *Ptr = LI->getPointerOperand();
3343  X86AddressMode AM;
3344  if (!X86SelectAddress(Ptr, AM))
3345    return false;
3346
3347  const X86InstrInfo &XII = (const X86InstrInfo&)TII;
3348
3349  unsigned Size = DL.getTypeAllocSize(LI->getType());
3350  unsigned Alignment = LI->getAlignment();
3351
3352  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
3353    Alignment = DL.getABITypeAlignment(LI->getType());
3354
3355  SmallVector<MachineOperand, 8> AddrOps;
3356  AM.getFullAddress(AddrOps);
3357
3358  MachineInstr *Result =
3359    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
3360  if (!Result)
3361    return false;
3362
3363  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3364  FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
3365  MI->eraseFromParent();
3366  return true;
3367}
3368
3369
3370namespace llvm {
3371  FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
3372                                const TargetLibraryInfo *libInfo) {
3373    return new X86FastISel(funcInfo, libInfo);
3374  }
3375}
3376