1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the X86-specific support for the FastISel class. Much
11// of the target-specific code is generated by tablegen in the file
12// X86GenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86InstrBuilder.h"
19#include "X86InstrInfo.h"
20#include "X86MachineFunctionInfo.h"
21#include "X86RegisterInfo.h"
22#include "X86Subtarget.h"
23#include "X86TargetMachine.h"
24#include "llvm/Analysis/BranchProbabilityInfo.h"
25#include "llvm/CodeGen/Analysis.h"
26#include "llvm/CodeGen/FastISel.h"
27#include "llvm/CodeGen/FunctionLoweringInfo.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/IR/CallSite.h"
32#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/GetElementPtrTypeIterator.h"
35#include "llvm/IR/GlobalAlias.h"
36#include "llvm/IR/GlobalVariable.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/MC/MCAsmInfo.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Target/TargetOptions.h"
43using namespace llvm;
44
45namespace {
46
47class X86FastISel final : public FastISel {
48  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49  /// make the right decision when generating code for different targets.
50  const X86Subtarget *Subtarget;
51
52  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
53  /// floating point ops.
54  /// When SSE is available, use it for f32 operations.
55  /// When SSE2 is available, use it for f64 operations.
56  bool X86ScalarSSEf64;
57  bool X86ScalarSSEf32;
58
59public:
60  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
61                       const TargetLibraryInfo *libInfo)
62      : FastISel(funcInfo, libInfo) {
63    Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
64    X86ScalarSSEf64 = Subtarget->hasSSE2();
65    X86ScalarSSEf32 = Subtarget->hasSSE1();
66  }
67
68  bool fastSelectInstruction(const Instruction *I) override;
69
70  /// \brief The specified machine instr operand is a vreg, and that
71  /// vreg is being provided by the specified load instruction.  If possible,
72  /// try to fold the load as an operand to the instruction, returning true if
73  /// possible.
74  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
75                           const LoadInst *LI) override;
76
77  bool fastLowerArguments() override;
78  bool fastLowerCall(CallLoweringInfo &CLI) override;
79  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
80
81#include "X86GenFastISel.inc"
82
83private:
84  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL);
85
86  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
87                       unsigned &ResultReg, unsigned Alignment = 1);
88
89  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
90                        MachineMemOperand *MMO = nullptr, bool Aligned = false);
91  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
92                        const X86AddressMode &AM,
93                        MachineMemOperand *MMO = nullptr, bool Aligned = false);
94
95  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
96                         unsigned &ResultReg);
97
98  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
99  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
100
101  bool X86SelectLoad(const Instruction *I);
102
103  bool X86SelectStore(const Instruction *I);
104
105  bool X86SelectRet(const Instruction *I);
106
107  bool X86SelectCmp(const Instruction *I);
108
109  bool X86SelectZExt(const Instruction *I);
110
111  bool X86SelectBranch(const Instruction *I);
112
113  bool X86SelectShift(const Instruction *I);
114
115  bool X86SelectDivRem(const Instruction *I);
116
117  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
118
119  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
120
121  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
122
123  bool X86SelectSelect(const Instruction *I);
124
125  bool X86SelectTrunc(const Instruction *I);
126
127  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
128                               const TargetRegisterClass *RC);
129
130  bool X86SelectFPExt(const Instruction *I);
131  bool X86SelectFPTrunc(const Instruction *I);
132  bool X86SelectSIToFP(const Instruction *I);
133
134  const X86InstrInfo *getInstrInfo() const {
135    return Subtarget->getInstrInfo();
136  }
137  const X86TargetMachine *getTargetMachine() const {
138    return static_cast<const X86TargetMachine *>(&TM);
139  }
140
141  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
142
143  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
144  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
145  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
146  unsigned fastMaterializeConstant(const Constant *C) override;
147
148  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
149
150  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
151
152  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
153  /// computed in an SSE register, not on the X87 floating point stack.
154  bool isScalarFPTypeInSSEReg(EVT VT) const {
155    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
156      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
157  }
158
159  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
160
161  bool IsMemcpySmall(uint64_t Len);
162
163  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
164                          X86AddressMode SrcAM, uint64_t Len);
165
166  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
167                            const Value *Cond);
168};
169
170} // end anonymous namespace.
171
172static std::pair<X86::CondCode, bool>
173getX86ConditionCode(CmpInst::Predicate Predicate) {
174  X86::CondCode CC = X86::COND_INVALID;
175  bool NeedSwap = false;
176  switch (Predicate) {
177  default: break;
178  // Floating-point Predicates
179  case CmpInst::FCMP_UEQ: CC = X86::COND_E;       break;
180  case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through
181  case CmpInst::FCMP_OGT: CC = X86::COND_A;       break;
182  case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through
183  case CmpInst::FCMP_OGE: CC = X86::COND_AE;      break;
184  case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through
185  case CmpInst::FCMP_ULT: CC = X86::COND_B;       break;
186  case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through
187  case CmpInst::FCMP_ULE: CC = X86::COND_BE;      break;
188  case CmpInst::FCMP_ONE: CC = X86::COND_NE;      break;
189  case CmpInst::FCMP_UNO: CC = X86::COND_P;       break;
190  case CmpInst::FCMP_ORD: CC = X86::COND_NP;      break;
191  case CmpInst::FCMP_OEQ: // fall-through
192  case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
193
194  // Integer Predicates
195  case CmpInst::ICMP_EQ:  CC = X86::COND_E;       break;
196  case CmpInst::ICMP_NE:  CC = X86::COND_NE;      break;
197  case CmpInst::ICMP_UGT: CC = X86::COND_A;       break;
198  case CmpInst::ICMP_UGE: CC = X86::COND_AE;      break;
199  case CmpInst::ICMP_ULT: CC = X86::COND_B;       break;
200  case CmpInst::ICMP_ULE: CC = X86::COND_BE;      break;
201  case CmpInst::ICMP_SGT: CC = X86::COND_G;       break;
202  case CmpInst::ICMP_SGE: CC = X86::COND_GE;      break;
203  case CmpInst::ICMP_SLT: CC = X86::COND_L;       break;
204  case CmpInst::ICMP_SLE: CC = X86::COND_LE;      break;
205  }
206
207  return std::make_pair(CC, NeedSwap);
208}
209
210static std::pair<unsigned, bool>
211getX86SSEConditionCode(CmpInst::Predicate Predicate) {
212  unsigned CC;
213  bool NeedSwap = false;
214
215  // SSE Condition code mapping:
216  //  0 - EQ
217  //  1 - LT
218  //  2 - LE
219  //  3 - UNORD
220  //  4 - NEQ
221  //  5 - NLT
222  //  6 - NLE
223  //  7 - ORD
224  switch (Predicate) {
225  default: llvm_unreachable("Unexpected predicate");
226  case CmpInst::FCMP_OEQ: CC = 0;          break;
227  case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
228  case CmpInst::FCMP_OLT: CC = 1;          break;
229  case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
230  case CmpInst::FCMP_OLE: CC = 2;          break;
231  case CmpInst::FCMP_UNO: CC = 3;          break;
232  case CmpInst::FCMP_UNE: CC = 4;          break;
233  case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
234  case CmpInst::FCMP_UGE: CC = 5;          break;
235  case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
236  case CmpInst::FCMP_UGT: CC = 6;          break;
237  case CmpInst::FCMP_ORD: CC = 7;          break;
238  case CmpInst::FCMP_UEQ:
239  case CmpInst::FCMP_ONE: CC = 8;          break;
240  }
241
242  return std::make_pair(CC, NeedSwap);
243}
244
245/// \brief Check if it is possible to fold the condition from the XALU intrinsic
246/// into the user. The condition code will only be updated on success.
247bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
248                                       const Value *Cond) {
249  if (!isa<ExtractValueInst>(Cond))
250    return false;
251
252  const auto *EV = cast<ExtractValueInst>(Cond);
253  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
254    return false;
255
256  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
257  MVT RetVT;
258  const Function *Callee = II->getCalledFunction();
259  Type *RetTy =
260    cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
261  if (!isTypeLegal(RetTy, RetVT))
262    return false;
263
264  if (RetVT != MVT::i32 && RetVT != MVT::i64)
265    return false;
266
267  X86::CondCode TmpCC;
268  switch (II->getIntrinsicID()) {
269  default: return false;
270  case Intrinsic::sadd_with_overflow:
271  case Intrinsic::ssub_with_overflow:
272  case Intrinsic::smul_with_overflow:
273  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
274  case Intrinsic::uadd_with_overflow:
275  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
276  }
277
278  // Check if both instructions are in the same basic block.
279  if (II->getParent() != I->getParent())
280    return false;
281
282  // Make sure nothing is in the way
283  BasicBlock::const_iterator Start = I;
284  BasicBlock::const_iterator End = II;
285  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
286    // We only expect extractvalue instructions between the intrinsic and the
287    // instruction to be selected.
288    if (!isa<ExtractValueInst>(Itr))
289      return false;
290
291    // Check that the extractvalue operand comes from the intrinsic.
292    const auto *EVI = cast<ExtractValueInst>(Itr);
293    if (EVI->getAggregateOperand() != II)
294      return false;
295  }
296
297  CC = TmpCC;
298  return true;
299}
300
301bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
302  EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
303  if (evt == MVT::Other || !evt.isSimple())
304    // Unhandled type. Halt "fast" selection and bail.
305    return false;
306
307  VT = evt.getSimpleVT();
308  // For now, require SSE/SSE2 for performing floating-point operations,
309  // since x87 requires additional work.
310  if (VT == MVT::f64 && !X86ScalarSSEf64)
311    return false;
312  if (VT == MVT::f32 && !X86ScalarSSEf32)
313    return false;
314  // Similarly, no f80 support yet.
315  if (VT == MVT::f80)
316    return false;
317  // We only handle legal types. For example, on x86-32 the instruction
318  // selector contains all of the 64-bit instructions from x86-64,
319  // under the assumption that i64 won't be used if the target doesn't
320  // support it.
321  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
322}
323
324#include "X86GenCallingConv.inc"
325
326/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
327/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
328/// Return true and the result register by reference if it is possible.
329bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
330                                  MachineMemOperand *MMO, unsigned &ResultReg,
331                                  unsigned Alignment) {
332  // Get opcode and regclass of the output for the given load instruction.
333  unsigned Opc = 0;
334  const TargetRegisterClass *RC = nullptr;
335  switch (VT.getSimpleVT().SimpleTy) {
336  default: return false;
337  case MVT::i1:
338  case MVT::i8:
339    Opc = X86::MOV8rm;
340    RC  = &X86::GR8RegClass;
341    break;
342  case MVT::i16:
343    Opc = X86::MOV16rm;
344    RC  = &X86::GR16RegClass;
345    break;
346  case MVT::i32:
347    Opc = X86::MOV32rm;
348    RC  = &X86::GR32RegClass;
349    break;
350  case MVT::i64:
351    // Must be in x86-64 mode.
352    Opc = X86::MOV64rm;
353    RC  = &X86::GR64RegClass;
354    break;
355  case MVT::f32:
356    if (X86ScalarSSEf32) {
357      Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
358      RC  = &X86::FR32RegClass;
359    } else {
360      Opc = X86::LD_Fp32m;
361      RC  = &X86::RFP32RegClass;
362    }
363    break;
364  case MVT::f64:
365    if (X86ScalarSSEf64) {
366      Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
367      RC  = &X86::FR64RegClass;
368    } else {
369      Opc = X86::LD_Fp64m;
370      RC  = &X86::RFP64RegClass;
371    }
372    break;
373  case MVT::f80:
374    // No f80 support yet.
375    return false;
376  case MVT::v4f32:
377    if (Alignment >= 16)
378      Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
379    else
380      Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
381    RC  = &X86::VR128RegClass;
382    break;
383  case MVT::v2f64:
384    if (Alignment >= 16)
385      Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
386    else
387      Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
388    RC  = &X86::VR128RegClass;
389    break;
390  case MVT::v4i32:
391  case MVT::v2i64:
392  case MVT::v8i16:
393  case MVT::v16i8:
394    if (Alignment >= 16)
395      Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
396    else
397      Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
398    RC  = &X86::VR128RegClass;
399    break;
400  }
401
402  ResultReg = createResultReg(RC);
403  MachineInstrBuilder MIB =
404    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
405  addFullAddress(MIB, AM);
406  if (MMO)
407    MIB->addMemOperand(*FuncInfo.MF, MMO);
408  return true;
409}
410
411/// X86FastEmitStore - Emit a machine instruction to store a value Val of
412/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
413/// and a displacement offset, or a GlobalAddress,
414/// i.e. V. Return true if it is possible.
415bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
416                                   const X86AddressMode &AM,
417                                   MachineMemOperand *MMO, bool Aligned) {
418  // Get opcode and regclass of the output for the given store instruction.
419  unsigned Opc = 0;
420  switch (VT.getSimpleVT().SimpleTy) {
421  case MVT::f80: // No f80 support yet.
422  default: return false;
423  case MVT::i1: {
424    // Mask out all but lowest bit.
425    unsigned AndResult = createResultReg(&X86::GR8RegClass);
426    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
427            TII.get(X86::AND8ri), AndResult)
428      .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
429    ValReg = AndResult;
430  }
431  // FALLTHROUGH, handling i1 as i8.
432  case MVT::i8:  Opc = X86::MOV8mr;  break;
433  case MVT::i16: Opc = X86::MOV16mr; break;
434  case MVT::i32: Opc = X86::MOV32mr; break;
435  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
436  case MVT::f32:
437    Opc = X86ScalarSSEf32 ?
438          (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
439    break;
440  case MVT::f64:
441    Opc = X86ScalarSSEf64 ?
442          (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
443    break;
444  case MVT::v4f32:
445    if (Aligned)
446      Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
447    else
448      Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
449    break;
450  case MVT::v2f64:
451    if (Aligned)
452      Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
453    else
454      Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
455    break;
456  case MVT::v4i32:
457  case MVT::v2i64:
458  case MVT::v8i16:
459  case MVT::v16i8:
460    if (Aligned)
461      Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
462    else
463      Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
464    break;
465  }
466
467  MachineInstrBuilder MIB =
468    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
469  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
470  if (MMO)
471    MIB->addMemOperand(*FuncInfo.MF, MMO);
472
473  return true;
474}
475
476bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
477                                   const X86AddressMode &AM,
478                                   MachineMemOperand *MMO, bool Aligned) {
479  // Handle 'null' like i32/i64 0.
480  if (isa<ConstantPointerNull>(Val))
481    Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
482
483  // If this is a store of a simple constant, fold the constant into the store.
484  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
485    unsigned Opc = 0;
486    bool Signed = true;
487    switch (VT.getSimpleVT().SimpleTy) {
488    default: break;
489    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
490    case MVT::i8:  Opc = X86::MOV8mi;  break;
491    case MVT::i16: Opc = X86::MOV16mi; break;
492    case MVT::i32: Opc = X86::MOV32mi; break;
493    case MVT::i64:
494      // Must be a 32-bit sign extended value.
495      if (isInt<32>(CI->getSExtValue()))
496        Opc = X86::MOV64mi32;
497      break;
498    }
499
500    if (Opc) {
501      MachineInstrBuilder MIB =
502        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
503      addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
504                                            : CI->getZExtValue());
505      if (MMO)
506        MIB->addMemOperand(*FuncInfo.MF, MMO);
507      return true;
508    }
509  }
510
511  unsigned ValReg = getRegForValue(Val);
512  if (ValReg == 0)
513    return false;
514
515  bool ValKill = hasTrivialKill(Val);
516  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
517}
518
519/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
520/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
521/// ISD::SIGN_EXTEND).
522bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
523                                    unsigned Src, EVT SrcVT,
524                                    unsigned &ResultReg) {
525  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
526                           Src, /*TODO: Kill=*/false);
527  if (RR == 0)
528    return false;
529
530  ResultReg = RR;
531  return true;
532}
533
534bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
535  // Handle constant address.
536  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
537    // Can't handle alternate code models yet.
538    if (TM.getCodeModel() != CodeModel::Small)
539      return false;
540
541    // Can't handle TLS yet.
542    if (GV->isThreadLocal())
543      return false;
544
545    // RIP-relative addresses can't have additional register operands, so if
546    // we've already folded stuff into the addressing mode, just force the
547    // global value into its own register, which we can use as the basereg.
548    if (!Subtarget->isPICStyleRIPRel() ||
549        (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
550      // Okay, we've committed to selecting this global. Set up the address.
551      AM.GV = GV;
552
553      // Allow the subtarget to classify the global.
554      unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
555
556      // If this reference is relative to the pic base, set it now.
557      if (isGlobalRelativeToPICBase(GVFlags)) {
558        // FIXME: How do we know Base.Reg is free??
559        AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
560      }
561
562      // Unless the ABI requires an extra load, return a direct reference to
563      // the global.
564      if (!isGlobalStubReference(GVFlags)) {
565        if (Subtarget->isPICStyleRIPRel()) {
566          // Use rip-relative addressing if we can.  Above we verified that the
567          // base and index registers are unused.
568          assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
569          AM.Base.Reg = X86::RIP;
570        }
571        AM.GVOpFlags = GVFlags;
572        return true;
573      }
574
575      // Ok, we need to do a load from a stub.  If we've already loaded from
576      // this stub, reuse the loaded pointer, otherwise emit the load now.
577      DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
578      unsigned LoadReg;
579      if (I != LocalValueMap.end() && I->second != 0) {
580        LoadReg = I->second;
581      } else {
582        // Issue load from stub.
583        unsigned Opc = 0;
584        const TargetRegisterClass *RC = nullptr;
585        X86AddressMode StubAM;
586        StubAM.Base.Reg = AM.Base.Reg;
587        StubAM.GV = GV;
588        StubAM.GVOpFlags = GVFlags;
589
590        // Prepare for inserting code in the local-value area.
591        SavePoint SaveInsertPt = enterLocalValueArea();
592
593        if (TLI.getPointerTy() == MVT::i64) {
594          Opc = X86::MOV64rm;
595          RC  = &X86::GR64RegClass;
596
597          if (Subtarget->isPICStyleRIPRel())
598            StubAM.Base.Reg = X86::RIP;
599        } else {
600          Opc = X86::MOV32rm;
601          RC  = &X86::GR32RegClass;
602        }
603
604        LoadReg = createResultReg(RC);
605        MachineInstrBuilder LoadMI =
606          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
607        addFullAddress(LoadMI, StubAM);
608
609        // Ok, back to normal mode.
610        leaveLocalValueArea(SaveInsertPt);
611
612        // Prevent loading GV stub multiple times in same MBB.
613        LocalValueMap[V] = LoadReg;
614      }
615
616      // Now construct the final address. Note that the Disp, Scale,
617      // and Index values may already be set here.
618      AM.Base.Reg = LoadReg;
619      AM.GV = nullptr;
620      return true;
621    }
622  }
623
624  // If all else fails, try to materialize the value in a register.
625  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
626    if (AM.Base.Reg == 0) {
627      AM.Base.Reg = getRegForValue(V);
628      return AM.Base.Reg != 0;
629    }
630    if (AM.IndexReg == 0) {
631      assert(AM.Scale == 1 && "Scale with no index!");
632      AM.IndexReg = getRegForValue(V);
633      return AM.IndexReg != 0;
634    }
635  }
636
637  return false;
638}
639
640/// X86SelectAddress - Attempt to fill in an address from the given value.
641///
642bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
643  SmallVector<const Value *, 32> GEPs;
644redo_gep:
645  const User *U = nullptr;
646  unsigned Opcode = Instruction::UserOp1;
647  if (const Instruction *I = dyn_cast<Instruction>(V)) {
648    // Don't walk into other basic blocks; it's possible we haven't
649    // visited them yet, so the instructions may not yet be assigned
650    // virtual registers.
651    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
652        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
653      Opcode = I->getOpcode();
654      U = I;
655    }
656  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
657    Opcode = C->getOpcode();
658    U = C;
659  }
660
661  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
662    if (Ty->getAddressSpace() > 255)
663      // Fast instruction selection doesn't support the special
664      // address spaces.
665      return false;
666
667  switch (Opcode) {
668  default: break;
669  case Instruction::BitCast:
670    // Look past bitcasts.
671    return X86SelectAddress(U->getOperand(0), AM);
672
673  case Instruction::IntToPtr:
674    // Look past no-op inttoptrs.
675    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
676      return X86SelectAddress(U->getOperand(0), AM);
677    break;
678
679  case Instruction::PtrToInt:
680    // Look past no-op ptrtoints.
681    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
682      return X86SelectAddress(U->getOperand(0), AM);
683    break;
684
685  case Instruction::Alloca: {
686    // Do static allocas.
687    const AllocaInst *A = cast<AllocaInst>(V);
688    DenseMap<const AllocaInst *, int>::iterator SI =
689      FuncInfo.StaticAllocaMap.find(A);
690    if (SI != FuncInfo.StaticAllocaMap.end()) {
691      AM.BaseType = X86AddressMode::FrameIndexBase;
692      AM.Base.FrameIndex = SI->second;
693      return true;
694    }
695    break;
696  }
697
698  case Instruction::Add: {
699    // Adds of constants are common and easy enough.
700    if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
701      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
702      // They have to fit in the 32-bit signed displacement field though.
703      if (isInt<32>(Disp)) {
704        AM.Disp = (uint32_t)Disp;
705        return X86SelectAddress(U->getOperand(0), AM);
706      }
707    }
708    break;
709  }
710
711  case Instruction::GetElementPtr: {
712    X86AddressMode SavedAM = AM;
713
714    // Pattern-match simple GEPs.
715    uint64_t Disp = (int32_t)AM.Disp;
716    unsigned IndexReg = AM.IndexReg;
717    unsigned Scale = AM.Scale;
718    gep_type_iterator GTI = gep_type_begin(U);
719    // Iterate through the indices, folding what we can. Constants can be
720    // folded, and one dynamic index can be handled, if the scale is supported.
721    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
722         i != e; ++i, ++GTI) {
723      const Value *Op = *i;
724      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
725        const StructLayout *SL = DL.getStructLayout(STy);
726        Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
727        continue;
728      }
729
730      // A array/variable index is always of the form i*S where S is the
731      // constant scale size.  See if we can push the scale into immediates.
732      uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
733      for (;;) {
734        if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
735          // Constant-offset addressing.
736          Disp += CI->getSExtValue() * S;
737          break;
738        }
739        if (canFoldAddIntoGEP(U, Op)) {
740          // A compatible add with a constant operand. Fold the constant.
741          ConstantInt *CI =
742            cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
743          Disp += CI->getSExtValue() * S;
744          // Iterate on the other operand.
745          Op = cast<AddOperator>(Op)->getOperand(0);
746          continue;
747        }
748        if (IndexReg == 0 &&
749            (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
750            (S == 1 || S == 2 || S == 4 || S == 8)) {
751          // Scaled-index addressing.
752          Scale = S;
753          IndexReg = getRegForGEPIndex(Op).first;
754          if (IndexReg == 0)
755            return false;
756          break;
757        }
758        // Unsupported.
759        goto unsupported_gep;
760      }
761    }
762
763    // Check for displacement overflow.
764    if (!isInt<32>(Disp))
765      break;
766
767    AM.IndexReg = IndexReg;
768    AM.Scale = Scale;
769    AM.Disp = (uint32_t)Disp;
770    GEPs.push_back(V);
771
772    if (const GetElementPtrInst *GEP =
773          dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
774      // Ok, the GEP indices were covered by constant-offset and scaled-index
775      // addressing. Update the address state and move on to examining the base.
776      V = GEP;
777      goto redo_gep;
778    } else if (X86SelectAddress(U->getOperand(0), AM)) {
779      return true;
780    }
781
782    // If we couldn't merge the gep value into this addr mode, revert back to
783    // our address and just match the value instead of completely failing.
784    AM = SavedAM;
785
786    for (SmallVectorImpl<const Value *>::reverse_iterator
787           I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I)
788      if (handleConstantAddresses(*I, AM))
789        return true;
790
791    return false;
792  unsupported_gep:
793    // Ok, the GEP indices weren't all covered.
794    break;
795  }
796  }
797
798  return handleConstantAddresses(V, AM);
799}
800
801/// X86SelectCallAddress - Attempt to fill in an address from the given value.
802///
803bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
804  const User *U = nullptr;
805  unsigned Opcode = Instruction::UserOp1;
806  const Instruction *I = dyn_cast<Instruction>(V);
807  // Record if the value is defined in the same basic block.
808  //
809  // This information is crucial to know whether or not folding an
810  // operand is valid.
811  // Indeed, FastISel generates or reuses a virtual register for all
812  // operands of all instructions it selects. Obviously, the definition and
813  // its uses must use the same virtual register otherwise the produced
814  // code is incorrect.
815  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
816  // registers for values that are alive across basic blocks. This ensures
817  // that the values are consistently set between across basic block, even
818  // if different instruction selection mechanisms are used (e.g., a mix of
819  // SDISel and FastISel).
820  // For values local to a basic block, the instruction selection process
821  // generates these virtual registers with whatever method is appropriate
822  // for its needs. In particular, FastISel and SDISel do not share the way
823  // local virtual registers are set.
824  // Therefore, this is impossible (or at least unsafe) to share values
825  // between basic blocks unless they use the same instruction selection
826  // method, which is not guarantee for X86.
827  // Moreover, things like hasOneUse could not be used accurately, if we
828  // allow to reference values across basic blocks whereas they are not
829  // alive across basic blocks initially.
830  bool InMBB = true;
831  if (I) {
832    Opcode = I->getOpcode();
833    U = I;
834    InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
835  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
836    Opcode = C->getOpcode();
837    U = C;
838  }
839
840  switch (Opcode) {
841  default: break;
842  case Instruction::BitCast:
843    // Look past bitcasts if its operand is in the same BB.
844    if (InMBB)
845      return X86SelectCallAddress(U->getOperand(0), AM);
846    break;
847
848  case Instruction::IntToPtr:
849    // Look past no-op inttoptrs if its operand is in the same BB.
850    if (InMBB &&
851        TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
852      return X86SelectCallAddress(U->getOperand(0), AM);
853    break;
854
855  case Instruction::PtrToInt:
856    // Look past no-op ptrtoints if its operand is in the same BB.
857    if (InMBB &&
858        TLI.getValueType(U->getType()) == TLI.getPointerTy())
859      return X86SelectCallAddress(U->getOperand(0), AM);
860    break;
861  }
862
863  // Handle constant address.
864  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
865    // Can't handle alternate code models yet.
866    if (TM.getCodeModel() != CodeModel::Small)
867      return false;
868
869    // RIP-relative addresses can't have additional register operands.
870    if (Subtarget->isPICStyleRIPRel() &&
871        (AM.Base.Reg != 0 || AM.IndexReg != 0))
872      return false;
873
874    // Can't handle DLL Import.
875    if (GV->hasDLLImportStorageClass())
876      return false;
877
878    // Can't handle TLS.
879    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
880      if (GVar->isThreadLocal())
881        return false;
882
883    // Okay, we've committed to selecting this global. Set up the basic address.
884    AM.GV = GV;
885
886    // No ABI requires an extra load for anything other than DLLImport, which
887    // we rejected above. Return a direct reference to the global.
888    if (Subtarget->isPICStyleRIPRel()) {
889      // Use rip-relative addressing if we can.  Above we verified that the
890      // base and index registers are unused.
891      assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
892      AM.Base.Reg = X86::RIP;
893    } else if (Subtarget->isPICStyleStubPIC()) {
894      AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
895    } else if (Subtarget->isPICStyleGOT()) {
896      AM.GVOpFlags = X86II::MO_GOTOFF;
897    }
898
899    return true;
900  }
901
902  // If all else fails, try to materialize the value in a register.
903  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
904    if (AM.Base.Reg == 0) {
905      AM.Base.Reg = getRegForValue(V);
906      return AM.Base.Reg != 0;
907    }
908    if (AM.IndexReg == 0) {
909      assert(AM.Scale == 1 && "Scale with no index!");
910      AM.IndexReg = getRegForValue(V);
911      return AM.IndexReg != 0;
912    }
913  }
914
915  return false;
916}
917
918
919/// X86SelectStore - Select and emit code to implement store instructions.
920bool X86FastISel::X86SelectStore(const Instruction *I) {
921  // Atomic stores need special handling.
922  const StoreInst *S = cast<StoreInst>(I);
923
924  if (S->isAtomic())
925    return false;
926
927  const Value *Val = S->getValueOperand();
928  const Value *Ptr = S->getPointerOperand();
929
930  MVT VT;
931  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
932    return false;
933
934  unsigned Alignment = S->getAlignment();
935  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
936  if (Alignment == 0) // Ensure that codegen never sees alignment 0
937    Alignment = ABIAlignment;
938  bool Aligned = Alignment >= ABIAlignment;
939
940  X86AddressMode AM;
941  if (!X86SelectAddress(Ptr, AM))
942    return false;
943
944  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
945}
946
947/// X86SelectRet - Select and emit code to implement ret instructions.
948bool X86FastISel::X86SelectRet(const Instruction *I) {
949  const ReturnInst *Ret = cast<ReturnInst>(I);
950  const Function &F = *I->getParent()->getParent();
951  const X86MachineFunctionInfo *X86MFInfo =
952      FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
953
954  if (!FuncInfo.CanLowerReturn)
955    return false;
956
957  CallingConv::ID CC = F.getCallingConv();
958  if (CC != CallingConv::C &&
959      CC != CallingConv::Fast &&
960      CC != CallingConv::X86_FastCall &&
961      CC != CallingConv::X86_64_SysV)
962    return false;
963
964  if (Subtarget->isCallingConvWin64(CC))
965    return false;
966
967  // Don't handle popping bytes on return for now.
968  if (X86MFInfo->getBytesToPopOnReturn() != 0)
969    return false;
970
971  // fastcc with -tailcallopt is intended to provide a guaranteed
972  // tail call optimization. Fastisel doesn't know how to do that.
973  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
974    return false;
975
976  // Let SDISel handle vararg functions.
977  if (F.isVarArg())
978    return false;
979
980  // Build a list of return value registers.
981  SmallVector<unsigned, 4> RetRegs;
982
983  if (Ret->getNumOperands() > 0) {
984    SmallVector<ISD::OutputArg, 4> Outs;
985    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
986
987    // Analyze operands of the call, assigning locations to each operand.
988    SmallVector<CCValAssign, 16> ValLocs;
989    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
990    CCInfo.AnalyzeReturn(Outs, RetCC_X86);
991
992    const Value *RV = Ret->getOperand(0);
993    unsigned Reg = getRegForValue(RV);
994    if (Reg == 0)
995      return false;
996
997    // Only handle a single return value for now.
998    if (ValLocs.size() != 1)
999      return false;
1000
1001    CCValAssign &VA = ValLocs[0];
1002
1003    // Don't bother handling odd stuff for now.
1004    if (VA.getLocInfo() != CCValAssign::Full)
1005      return false;
1006    // Only handle register returns for now.
1007    if (!VA.isRegLoc())
1008      return false;
1009
1010    // The calling-convention tables for x87 returns don't tell
1011    // the whole story.
1012    if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1013      return false;
1014
1015    unsigned SrcReg = Reg + VA.getValNo();
1016    EVT SrcVT = TLI.getValueType(RV->getType());
1017    EVT DstVT = VA.getValVT();
1018    // Special handling for extended integers.
1019    if (SrcVT != DstVT) {
1020      if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1021        return false;
1022
1023      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1024        return false;
1025
1026      assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1027
1028      if (SrcVT == MVT::i1) {
1029        if (Outs[0].Flags.isSExt())
1030          return false;
1031        SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1032        SrcVT = MVT::i8;
1033      }
1034      unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1035                                             ISD::SIGN_EXTEND;
1036      SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1037                          SrcReg, /*TODO: Kill=*/false);
1038    }
1039
1040    // Make the copy.
1041    unsigned DstReg = VA.getLocReg();
1042    const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1043    // Avoid a cross-class copy. This is very unlikely.
1044    if (!SrcRC->contains(DstReg))
1045      return false;
1046    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1047            TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1048
1049    // Add register to return instruction.
1050    RetRegs.push_back(VA.getLocReg());
1051  }
1052
1053  // The x86-64 ABI for returning structs by value requires that we copy
1054  // the sret argument into %rax for the return. We saved the argument into
1055  // a virtual register in the entry block, so now we copy the value out
1056  // and into %rax. We also do the same with %eax for Win32.
1057  if (F.hasStructRetAttr() &&
1058      (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
1059    unsigned Reg = X86MFInfo->getSRetReturnReg();
1060    assert(Reg &&
1061           "SRetReturnReg should have been set in LowerFormalArguments()!");
1062    unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
1063    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1064            TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1065    RetRegs.push_back(RetReg);
1066  }
1067
1068  // Now emit the RET.
1069  MachineInstrBuilder MIB =
1070    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1071            TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1072  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1073    MIB.addReg(RetRegs[i], RegState::Implicit);
1074  return true;
1075}
1076
1077/// X86SelectLoad - Select and emit code to implement load instructions.
1078///
1079bool X86FastISel::X86SelectLoad(const Instruction *I) {
1080  const LoadInst *LI = cast<LoadInst>(I);
1081
1082  // Atomic loads need special handling.
1083  if (LI->isAtomic())
1084    return false;
1085
1086  MVT VT;
1087  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1088    return false;
1089
1090  const Value *Ptr = LI->getPointerOperand();
1091
1092  X86AddressMode AM;
1093  if (!X86SelectAddress(Ptr, AM))
1094    return false;
1095
1096  unsigned Alignment = LI->getAlignment();
1097  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1098  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1099    Alignment = ABIAlignment;
1100
1101  unsigned ResultReg = 0;
1102  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1103                       Alignment))
1104    return false;
1105
1106  updateValueMap(I, ResultReg);
1107  return true;
1108}
1109
1110static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1111  bool HasAVX = Subtarget->hasAVX();
1112  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1113  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1114
1115  switch (VT.getSimpleVT().SimpleTy) {
1116  default:       return 0;
1117  case MVT::i8:  return X86::CMP8rr;
1118  case MVT::i16: return X86::CMP16rr;
1119  case MVT::i32: return X86::CMP32rr;
1120  case MVT::i64: return X86::CMP64rr;
1121  case MVT::f32:
1122    return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
1123  case MVT::f64:
1124    return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
1125  }
1126}
1127
1128/// If we have a comparison with RHS as the RHS  of the comparison, return an
1129/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1130static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1131  int64_t Val = RHSC->getSExtValue();
1132  switch (VT.getSimpleVT().SimpleTy) {
1133  // Otherwise, we can't fold the immediate into this comparison.
1134  default:
1135    return 0;
1136  case MVT::i8:
1137    return X86::CMP8ri;
1138  case MVT::i16:
1139    if (isInt<8>(Val))
1140      return X86::CMP16ri8;
1141    return X86::CMP16ri;
1142  case MVT::i32:
1143    if (isInt<8>(Val))
1144      return X86::CMP32ri8;
1145    return X86::CMP32ri;
1146  case MVT::i64:
1147    if (isInt<8>(Val))
1148      return X86::CMP64ri8;
1149    // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1150    // field.
1151    if (isInt<32>(Val))
1152      return X86::CMP64ri32;
1153    return 0;
1154  }
1155}
1156
1157bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
1158                                     EVT VT, DebugLoc CurDbgLoc) {
1159  unsigned Op0Reg = getRegForValue(Op0);
1160  if (Op0Reg == 0) return false;
1161
1162  // Handle 'null' like i32/i64 0.
1163  if (isa<ConstantPointerNull>(Op1))
1164    Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1165
1166  // We have two options: compare with register or immediate.  If the RHS of
1167  // the compare is an immediate that we can fold into this compare, use
1168  // CMPri, otherwise use CMPrr.
1169  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1170    if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1171      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1172        .addReg(Op0Reg)
1173        .addImm(Op1C->getSExtValue());
1174      return true;
1175    }
1176  }
1177
1178  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1179  if (CompareOpc == 0) return false;
1180
1181  unsigned Op1Reg = getRegForValue(Op1);
1182  if (Op1Reg == 0) return false;
1183  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1184    .addReg(Op0Reg)
1185    .addReg(Op1Reg);
1186
1187  return true;
1188}
1189
1190bool X86FastISel::X86SelectCmp(const Instruction *I) {
1191  const CmpInst *CI = cast<CmpInst>(I);
1192
1193  MVT VT;
1194  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1195    return false;
1196
1197  // Try to optimize or fold the cmp.
1198  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1199  unsigned ResultReg = 0;
1200  switch (Predicate) {
1201  default: break;
1202  case CmpInst::FCMP_FALSE: {
1203    ResultReg = createResultReg(&X86::GR32RegClass);
1204    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1205            ResultReg);
1206    ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1207                                           X86::sub_8bit);
1208    if (!ResultReg)
1209      return false;
1210    break;
1211  }
1212  case CmpInst::FCMP_TRUE: {
1213    ResultReg = createResultReg(&X86::GR8RegClass);
1214    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1215            ResultReg).addImm(1);
1216    break;
1217  }
1218  }
1219
1220  if (ResultReg) {
1221    updateValueMap(I, ResultReg);
1222    return true;
1223  }
1224
1225  const Value *LHS = CI->getOperand(0);
1226  const Value *RHS = CI->getOperand(1);
1227
1228  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1229  // We don't have to materialize a zero constant for this case and can just use
1230  // %x again on the RHS.
1231  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1232    const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1233    if (RHSC && RHSC->isNullValue())
1234      RHS = LHS;
1235  }
1236
1237  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1238  static unsigned SETFOpcTable[2][3] = {
1239    { X86::SETEr,  X86::SETNPr, X86::AND8rr },
1240    { X86::SETNEr, X86::SETPr,  X86::OR8rr  }
1241  };
1242  unsigned *SETFOpc = nullptr;
1243  switch (Predicate) {
1244  default: break;
1245  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1246  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1247  }
1248
1249  ResultReg = createResultReg(&X86::GR8RegClass);
1250  if (SETFOpc) {
1251    if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1252      return false;
1253
1254    unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1255    unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1256    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1257            FlagReg1);
1258    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1259            FlagReg2);
1260    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1261            ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1262    updateValueMap(I, ResultReg);
1263    return true;
1264  }
1265
1266  X86::CondCode CC;
1267  bool SwapArgs;
1268  std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
1269  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1270  unsigned Opc = X86::getSETFromCond(CC);
1271
1272  if (SwapArgs)
1273    std::swap(LHS, RHS);
1274
1275  // Emit a compare of LHS/RHS.
1276  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1277    return false;
1278
1279  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
1280  updateValueMap(I, ResultReg);
1281  return true;
1282}
1283
1284bool X86FastISel::X86SelectZExt(const Instruction *I) {
1285  EVT DstVT = TLI.getValueType(I->getType());
1286  if (!TLI.isTypeLegal(DstVT))
1287    return false;
1288
1289  unsigned ResultReg = getRegForValue(I->getOperand(0));
1290  if (ResultReg == 0)
1291    return false;
1292
1293  // Handle zero-extension from i1 to i8, which is common.
1294  MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
1295  if (SrcVT.SimpleTy == MVT::i1) {
1296    // Set the high bits to zero.
1297    ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1298    SrcVT = MVT::i8;
1299
1300    if (ResultReg == 0)
1301      return false;
1302  }
1303
1304  if (DstVT == MVT::i64) {
1305    // Handle extension to 64-bits via sub-register shenanigans.
1306    unsigned MovInst;
1307
1308    switch (SrcVT.SimpleTy) {
1309    case MVT::i8:  MovInst = X86::MOVZX32rr8;  break;
1310    case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1311    case MVT::i32: MovInst = X86::MOV32rr;     break;
1312    default: llvm_unreachable("Unexpected zext to i64 source type");
1313    }
1314
1315    unsigned Result32 = createResultReg(&X86::GR32RegClass);
1316    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1317      .addReg(ResultReg);
1318
1319    ResultReg = createResultReg(&X86::GR64RegClass);
1320    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1321            ResultReg)
1322      .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1323  } else if (DstVT != MVT::i8) {
1324    ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1325                           ResultReg, /*Kill=*/true);
1326    if (ResultReg == 0)
1327      return false;
1328  }
1329
1330  updateValueMap(I, ResultReg);
1331  return true;
1332}
1333
1334bool X86FastISel::X86SelectBranch(const Instruction *I) {
1335  // Unconditional branches are selected by tablegen-generated code.
1336  // Handle a conditional branch.
1337  const BranchInst *BI = cast<BranchInst>(I);
1338  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1339  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1340
1341  // Fold the common case of a conditional branch with a comparison
1342  // in the same block (values defined on other blocks may not have
1343  // initialized registers).
1344  X86::CondCode CC;
1345  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1346    if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1347      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
1348
1349      // Try to optimize or fold the cmp.
1350      CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1351      switch (Predicate) {
1352      default: break;
1353      case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1354      case CmpInst::FCMP_TRUE:  fastEmitBranch(TrueMBB, DbgLoc); return true;
1355      }
1356
1357      const Value *CmpLHS = CI->getOperand(0);
1358      const Value *CmpRHS = CI->getOperand(1);
1359
1360      // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1361      // 0.0.
1362      // We don't have to materialize a zero constant for this case and can just
1363      // use %x again on the RHS.
1364      if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1365        const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1366        if (CmpRHSC && CmpRHSC->isNullValue())
1367          CmpRHS = CmpLHS;
1368      }
1369
1370      // Try to take advantage of fallthrough opportunities.
1371      if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1372        std::swap(TrueMBB, FalseMBB);
1373        Predicate = CmpInst::getInversePredicate(Predicate);
1374      }
1375
1376      // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1377      // code check. Instead two branch instructions are required to check all
1378      // the flags. First we change the predicate to a supported condition code,
1379      // which will be the first branch. Later one we will emit the second
1380      // branch.
1381      bool NeedExtraBranch = false;
1382      switch (Predicate) {
1383      default: break;
1384      case CmpInst::FCMP_OEQ:
1385        std::swap(TrueMBB, FalseMBB); // fall-through
1386      case CmpInst::FCMP_UNE:
1387        NeedExtraBranch = true;
1388        Predicate = CmpInst::FCMP_ONE;
1389        break;
1390      }
1391
1392      bool SwapArgs;
1393      unsigned BranchOpc;
1394      std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
1395      assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1396
1397      BranchOpc = X86::GetCondBranchFromCond(CC);
1398      if (SwapArgs)
1399        std::swap(CmpLHS, CmpRHS);
1400
1401      // Emit a compare of the LHS and RHS, setting the flags.
1402      if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1403        return false;
1404
1405      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1406        .addMBB(TrueMBB);
1407
1408      // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1409      // to UNE above).
1410      if (NeedExtraBranch) {
1411        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
1412          .addMBB(TrueMBB);
1413      }
1414
1415      // Obtain the branch weight and add the TrueBB to the successor list.
1416      uint32_t BranchWeight = 0;
1417      if (FuncInfo.BPI)
1418        BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1419                                                   TrueMBB->getBasicBlock());
1420      FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
1421
1422      // Emits an unconditional branch to the FalseBB, obtains the branch
1423      // weight, and adds it to the successor list.
1424      fastEmitBranch(FalseMBB, DbgLoc);
1425
1426      return true;
1427    }
1428  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1429    // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1430    // typically happen for _Bool and C++ bools.
1431    MVT SourceVT;
1432    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1433        isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1434      unsigned TestOpc = 0;
1435      switch (SourceVT.SimpleTy) {
1436      default: break;
1437      case MVT::i8:  TestOpc = X86::TEST8ri; break;
1438      case MVT::i16: TestOpc = X86::TEST16ri; break;
1439      case MVT::i32: TestOpc = X86::TEST32ri; break;
1440      case MVT::i64: TestOpc = X86::TEST64ri32; break;
1441      }
1442      if (TestOpc) {
1443        unsigned OpReg = getRegForValue(TI->getOperand(0));
1444        if (OpReg == 0) return false;
1445        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1446          .addReg(OpReg).addImm(1);
1447
1448        unsigned JmpOpc = X86::JNE_1;
1449        if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1450          std::swap(TrueMBB, FalseMBB);
1451          JmpOpc = X86::JE_1;
1452        }
1453
1454        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
1455          .addMBB(TrueMBB);
1456        fastEmitBranch(FalseMBB, DbgLoc);
1457        uint32_t BranchWeight = 0;
1458        if (FuncInfo.BPI)
1459          BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1460                                                     TrueMBB->getBasicBlock());
1461        FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
1462        return true;
1463      }
1464    }
1465  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1466    // Fake request the condition, otherwise the intrinsic might be completely
1467    // optimized away.
1468    unsigned TmpReg = getRegForValue(BI->getCondition());
1469    if (TmpReg == 0)
1470      return false;
1471
1472    unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
1473
1474    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1475      .addMBB(TrueMBB);
1476    fastEmitBranch(FalseMBB, DbgLoc);
1477    uint32_t BranchWeight = 0;
1478    if (FuncInfo.BPI)
1479      BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1480                                                 TrueMBB->getBasicBlock());
1481    FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
1482    return true;
1483  }
1484
1485  // Otherwise do a clumsy setcc and re-test it.
1486  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1487  // in an explicit cast, so make sure to handle that correctly.
1488  unsigned OpReg = getRegForValue(BI->getCondition());
1489  if (OpReg == 0) return false;
1490
1491  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1492    .addReg(OpReg).addImm(1);
1493  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
1494    .addMBB(TrueMBB);
1495  fastEmitBranch(FalseMBB, DbgLoc);
1496  uint32_t BranchWeight = 0;
1497  if (FuncInfo.BPI)
1498    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1499                                               TrueMBB->getBasicBlock());
1500  FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
1501  return true;
1502}
1503
1504bool X86FastISel::X86SelectShift(const Instruction *I) {
1505  unsigned CReg = 0, OpReg = 0;
1506  const TargetRegisterClass *RC = nullptr;
1507  if (I->getType()->isIntegerTy(8)) {
1508    CReg = X86::CL;
1509    RC = &X86::GR8RegClass;
1510    switch (I->getOpcode()) {
1511    case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1512    case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1513    case Instruction::Shl:  OpReg = X86::SHL8rCL; break;
1514    default: return false;
1515    }
1516  } else if (I->getType()->isIntegerTy(16)) {
1517    CReg = X86::CX;
1518    RC = &X86::GR16RegClass;
1519    switch (I->getOpcode()) {
1520    case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1521    case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1522    case Instruction::Shl:  OpReg = X86::SHL16rCL; break;
1523    default: return false;
1524    }
1525  } else if (I->getType()->isIntegerTy(32)) {
1526    CReg = X86::ECX;
1527    RC = &X86::GR32RegClass;
1528    switch (I->getOpcode()) {
1529    case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1530    case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1531    case Instruction::Shl:  OpReg = X86::SHL32rCL; break;
1532    default: return false;
1533    }
1534  } else if (I->getType()->isIntegerTy(64)) {
1535    CReg = X86::RCX;
1536    RC = &X86::GR64RegClass;
1537    switch (I->getOpcode()) {
1538    case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1539    case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1540    case Instruction::Shl:  OpReg = X86::SHL64rCL; break;
1541    default: return false;
1542    }
1543  } else {
1544    return false;
1545  }
1546
1547  MVT VT;
1548  if (!isTypeLegal(I->getType(), VT))
1549    return false;
1550
1551  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1552  if (Op0Reg == 0) return false;
1553
1554  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1555  if (Op1Reg == 0) return false;
1556  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1557          CReg).addReg(Op1Reg);
1558
1559  // The shift instruction uses X86::CL. If we defined a super-register
1560  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1561  if (CReg != X86::CL)
1562    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1563            TII.get(TargetOpcode::KILL), X86::CL)
1564      .addReg(CReg, RegState::Kill);
1565
1566  unsigned ResultReg = createResultReg(RC);
1567  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1568    .addReg(Op0Reg);
1569  updateValueMap(I, ResultReg);
1570  return true;
1571}
1572
1573bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1574  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1575  const static unsigned NumOps   = 4; // SDiv, SRem, UDiv, URem
1576  const static bool S = true;  // IsSigned
1577  const static bool U = false; // !IsSigned
1578  const static unsigned Copy = TargetOpcode::COPY;
1579  // For the X86 DIV/IDIV instruction, in most cases the dividend
1580  // (numerator) must be in a specific register pair highreg:lowreg,
1581  // producing the quotient in lowreg and the remainder in highreg.
1582  // For most data types, to set up the instruction, the dividend is
1583  // copied into lowreg, and lowreg is sign-extended or zero-extended
1584  // into highreg.  The exception is i8, where the dividend is defined
1585  // as a single register rather than a register pair, and we
1586  // therefore directly sign-extend or zero-extend the dividend into
1587  // lowreg, instead of copying, and ignore the highreg.
1588  const static struct DivRemEntry {
1589    // The following portion depends only on the data type.
1590    const TargetRegisterClass *RC;
1591    unsigned LowInReg;  // low part of the register pair
1592    unsigned HighInReg; // high part of the register pair
1593    // The following portion depends on both the data type and the operation.
1594    struct DivRemResult {
1595    unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
1596    unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
1597                              // highreg, or copying a zero into highreg.
1598    unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
1599                              // zero/sign-extending into lowreg for i8.
1600    unsigned DivRemResultReg; // Register containing the desired result.
1601    bool IsOpSigned;          // Whether to use signed or unsigned form.
1602    } ResultTable[NumOps];
1603  } OpTable[NumTypes] = {
1604    { &X86::GR8RegClass,  X86::AX,  0, {
1605        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AL,  S }, // SDiv
1606        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AH,  S }, // SRem
1607        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AL,  U }, // UDiv
1608        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AH,  U }, // URem
1609      }
1610    }, // i8
1611    { &X86::GR16RegClass, X86::AX,  X86::DX, {
1612        { X86::IDIV16r, X86::CWD,     Copy,            X86::AX,  S }, // SDiv
1613        { X86::IDIV16r, X86::CWD,     Copy,            X86::DX,  S }, // SRem
1614        { X86::DIV16r,  X86::MOV32r0, Copy,            X86::AX,  U }, // UDiv
1615        { X86::DIV16r,  X86::MOV32r0, Copy,            X86::DX,  U }, // URem
1616      }
1617    }, // i16
1618    { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1619        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EAX, S }, // SDiv
1620        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EDX, S }, // SRem
1621        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EAX, U }, // UDiv
1622        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EDX, U }, // URem
1623      }
1624    }, // i32
1625    { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1626        { X86::IDIV64r, X86::CQO,     Copy,            X86::RAX, S }, // SDiv
1627        { X86::IDIV64r, X86::CQO,     Copy,            X86::RDX, S }, // SRem
1628        { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RAX, U }, // UDiv
1629        { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RDX, U }, // URem
1630      }
1631    }, // i64
1632  };
1633
1634  MVT VT;
1635  if (!isTypeLegal(I->getType(), VT))
1636    return false;
1637
1638  unsigned TypeIndex, OpIndex;
1639  switch (VT.SimpleTy) {
1640  default: return false;
1641  case MVT::i8:  TypeIndex = 0; break;
1642  case MVT::i16: TypeIndex = 1; break;
1643  case MVT::i32: TypeIndex = 2; break;
1644  case MVT::i64: TypeIndex = 3;
1645    if (!Subtarget->is64Bit())
1646      return false;
1647    break;
1648  }
1649
1650  switch (I->getOpcode()) {
1651  default: llvm_unreachable("Unexpected div/rem opcode");
1652  case Instruction::SDiv: OpIndex = 0; break;
1653  case Instruction::SRem: OpIndex = 1; break;
1654  case Instruction::UDiv: OpIndex = 2; break;
1655  case Instruction::URem: OpIndex = 3; break;
1656  }
1657
1658  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1659  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1660  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1661  if (Op0Reg == 0)
1662    return false;
1663  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1664  if (Op1Reg == 0)
1665    return false;
1666
1667  // Move op0 into low-order input register.
1668  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1669          TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1670  // Zero-extend or sign-extend into high-order input register.
1671  if (OpEntry.OpSignExtend) {
1672    if (OpEntry.IsOpSigned)
1673      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1674              TII.get(OpEntry.OpSignExtend));
1675    else {
1676      unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1677      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1678              TII.get(X86::MOV32r0), Zero32);
1679
1680      // Copy the zero into the appropriate sub/super/identical physical
1681      // register. Unfortunately the operations needed are not uniform enough
1682      // to fit neatly into the table above.
1683      if (VT.SimpleTy == MVT::i16) {
1684        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1685                TII.get(Copy), TypeEntry.HighInReg)
1686          .addReg(Zero32, 0, X86::sub_16bit);
1687      } else if (VT.SimpleTy == MVT::i32) {
1688        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1689                TII.get(Copy), TypeEntry.HighInReg)
1690            .addReg(Zero32);
1691      } else if (VT.SimpleTy == MVT::i64) {
1692        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1693                TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1694            .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1695      }
1696    }
1697  }
1698  // Generate the DIV/IDIV instruction.
1699  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1700          TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1701  // For i8 remainder, we can't reference AH directly, as we'll end
1702  // up with bogus copies like %R9B = COPY %AH. Reference AX
1703  // instead to prevent AH references in a REX instruction.
1704  //
1705  // The current assumption of the fast register allocator is that isel
1706  // won't generate explicit references to the GPR8_NOREX registers. If
1707  // the allocator and/or the backend get enhanced to be more robust in
1708  // that regard, this can be, and should be, removed.
1709  unsigned ResultReg = 0;
1710  if ((I->getOpcode() == Instruction::SRem ||
1711       I->getOpcode() == Instruction::URem) &&
1712      OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1713    unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1714    unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
1715    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1716            TII.get(Copy), SourceSuperReg).addReg(X86::AX);
1717
1718    // Shift AX right by 8 bits instead of using AH.
1719    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
1720            ResultSuperReg).addReg(SourceSuperReg).addImm(8);
1721
1722    // Now reference the 8-bit subreg of the result.
1723    ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
1724                                           /*Kill=*/true, X86::sub_8bit);
1725  }
1726  // Copy the result out of the physreg if we haven't already.
1727  if (!ResultReg) {
1728    ResultReg = createResultReg(TypeEntry.RC);
1729    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
1730        .addReg(OpEntry.DivRemResultReg);
1731  }
1732  updateValueMap(I, ResultReg);
1733
1734  return true;
1735}
1736
1737/// \brief Emit a conditional move instruction (if the are supported) to lower
1738/// the select.
1739bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
1740  // Check if the subtarget supports these instructions.
1741  if (!Subtarget->hasCMov())
1742    return false;
1743
1744  // FIXME: Add support for i8.
1745  if (RetVT < MVT::i16 || RetVT > MVT::i64)
1746    return false;
1747
1748  const Value *Cond = I->getOperand(0);
1749  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
1750  bool NeedTest = true;
1751  X86::CondCode CC = X86::COND_NE;
1752
1753  // Optimize conditions coming from a compare if both instructions are in the
1754  // same basic block (values defined in other basic blocks may not have
1755  // initialized registers).
1756  const auto *CI = dyn_cast<CmpInst>(Cond);
1757  if (CI && (CI->getParent() == I->getParent())) {
1758    CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1759
1760    // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1761    static unsigned SETFOpcTable[2][3] = {
1762      { X86::SETNPr, X86::SETEr , X86::TEST8rr },
1763      { X86::SETPr,  X86::SETNEr, X86::OR8rr   }
1764    };
1765    unsigned *SETFOpc = nullptr;
1766    switch (Predicate) {
1767    default: break;
1768    case CmpInst::FCMP_OEQ:
1769      SETFOpc = &SETFOpcTable[0][0];
1770      Predicate = CmpInst::ICMP_NE;
1771      break;
1772    case CmpInst::FCMP_UNE:
1773      SETFOpc = &SETFOpcTable[1][0];
1774      Predicate = CmpInst::ICMP_NE;
1775      break;
1776    }
1777
1778    bool NeedSwap;
1779    std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate);
1780    assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1781
1782    const Value *CmpLHS = CI->getOperand(0);
1783    const Value *CmpRHS = CI->getOperand(1);
1784    if (NeedSwap)
1785      std::swap(CmpLHS, CmpRHS);
1786
1787    EVT CmpVT = TLI.getValueType(CmpLHS->getType());
1788    // Emit a compare of the LHS and RHS, setting the flags.
1789    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
1790      return false;
1791
1792    if (SETFOpc) {
1793      unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1794      unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1795      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1796              FlagReg1);
1797      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1798              FlagReg2);
1799      auto const &II = TII.get(SETFOpc[2]);
1800      if (II.getNumDefs()) {
1801        unsigned TmpReg = createResultReg(&X86::GR8RegClass);
1802        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
1803          .addReg(FlagReg2).addReg(FlagReg1);
1804      } else {
1805        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1806          .addReg(FlagReg2).addReg(FlagReg1);
1807      }
1808    }
1809    NeedTest = false;
1810  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
1811    // Fake request the condition, otherwise the intrinsic might be completely
1812    // optimized away.
1813    unsigned TmpReg = getRegForValue(Cond);
1814    if (TmpReg == 0)
1815      return false;
1816
1817    NeedTest = false;
1818  }
1819
1820  if (NeedTest) {
1821    // Selects operate on i1, however, CondReg is 8 bits width and may contain
1822    // garbage. Indeed, only the less significant bit is supposed to be
1823    // accurate. If we read more than the lsb, we may see non-zero values
1824    // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
1825    // the select. This is achieved by performing TEST against 1.
1826    unsigned CondReg = getRegForValue(Cond);
1827    if (CondReg == 0)
1828      return false;
1829    bool CondIsKill = hasTrivialKill(Cond);
1830
1831    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1832      .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
1833  }
1834
1835  const Value *LHS = I->getOperand(1);
1836  const Value *RHS = I->getOperand(2);
1837
1838  unsigned RHSReg = getRegForValue(RHS);
1839  bool RHSIsKill = hasTrivialKill(RHS);
1840
1841  unsigned LHSReg = getRegForValue(LHS);
1842  bool LHSIsKill = hasTrivialKill(LHS);
1843
1844  if (!LHSReg || !RHSReg)
1845    return false;
1846
1847  unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
1848  unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
1849                                       LHSReg, LHSIsKill);
1850  updateValueMap(I, ResultReg);
1851  return true;
1852}
1853
1854/// \brief Emit SSE or AVX instructions to lower the select.
1855///
1856/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
1857/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
1858/// SSE instructions are available. If AVX is available, try to use a VBLENDV.
1859bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
1860  // Optimize conditions coming from a compare if both instructions are in the
1861  // same basic block (values defined in other basic blocks may not have
1862  // initialized registers).
1863  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
1864  if (!CI || (CI->getParent() != I->getParent()))
1865    return false;
1866
1867  if (I->getType() != CI->getOperand(0)->getType() ||
1868      !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
1869        (Subtarget->hasSSE2() && RetVT == MVT::f64)))
1870    return false;
1871
1872  const Value *CmpLHS = CI->getOperand(0);
1873  const Value *CmpRHS = CI->getOperand(1);
1874  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1875
1876  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1877  // We don't have to materialize a zero constant for this case and can just use
1878  // %x again on the RHS.
1879  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1880    const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1881    if (CmpRHSC && CmpRHSC->isNullValue())
1882      CmpRHS = CmpLHS;
1883  }
1884
1885  unsigned CC;
1886  bool NeedSwap;
1887  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
1888  if (CC > 7)
1889    return false;
1890
1891  if (NeedSwap)
1892    std::swap(CmpLHS, CmpRHS);
1893
1894  // Choose the SSE instruction sequence based on data type (float or double).
1895  static unsigned OpcTable[2][4] = {
1896    { X86::CMPSSrr,  X86::FsANDPSrr,  X86::FsANDNPSrr,  X86::FsORPSrr  },
1897    { X86::CMPSDrr,  X86::FsANDPDrr,  X86::FsANDNPDrr,  X86::FsORPDrr  }
1898  };
1899
1900  unsigned *Opc = nullptr;
1901  switch (RetVT.SimpleTy) {
1902  default: return false;
1903  case MVT::f32: Opc = &OpcTable[0][0]; break;
1904  case MVT::f64: Opc = &OpcTable[1][0]; break;
1905  }
1906
1907  const Value *LHS = I->getOperand(1);
1908  const Value *RHS = I->getOperand(2);
1909
1910  unsigned LHSReg = getRegForValue(LHS);
1911  bool LHSIsKill = hasTrivialKill(LHS);
1912
1913  unsigned RHSReg = getRegForValue(RHS);
1914  bool RHSIsKill = hasTrivialKill(RHS);
1915
1916  unsigned CmpLHSReg = getRegForValue(CmpLHS);
1917  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
1918
1919  unsigned CmpRHSReg = getRegForValue(CmpRHS);
1920  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
1921
1922  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
1923    return false;
1924
1925  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
1926  unsigned ResultReg;
1927
1928  if (Subtarget->hasAVX()) {
1929    // If we have AVX, create 1 blendv instead of 3 logic instructions.
1930    // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
1931    // uses XMM0 as the selection register. That may need just as many
1932    // instructions as the AND/ANDN/OR sequence due to register moves, so
1933    // don't bother.
1934    unsigned CmpOpcode =
1935      (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
1936    unsigned BlendOpcode =
1937      (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
1938
1939    unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
1940                                       CmpRHSReg, CmpRHSIsKill, CC);
1941    ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
1942                                 LHSReg, LHSIsKill, CmpReg, true);
1943  } else {
1944    unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
1945                                       CmpRHSReg, CmpRHSIsKill, CC);
1946    unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
1947                                      LHSReg, LHSIsKill);
1948    unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
1949                                       RHSReg, RHSIsKill);
1950    ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
1951                                         AndReg, /*IsKill=*/true);
1952  }
1953  updateValueMap(I, ResultReg);
1954  return true;
1955}
1956
1957bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
1958  // These are pseudo CMOV instructions and will be later expanded into control-
1959  // flow.
1960  unsigned Opc;
1961  switch (RetVT.SimpleTy) {
1962  default: return false;
1963  case MVT::i8:  Opc = X86::CMOV_GR8;  break;
1964  case MVT::i16: Opc = X86::CMOV_GR16; break;
1965  case MVT::i32: Opc = X86::CMOV_GR32; break;
1966  case MVT::f32: Opc = X86::CMOV_FR32; break;
1967  case MVT::f64: Opc = X86::CMOV_FR64; break;
1968  }
1969
1970  const Value *Cond = I->getOperand(0);
1971  X86::CondCode CC = X86::COND_NE;
1972
1973  // Optimize conditions coming from a compare if both instructions are in the
1974  // same basic block (values defined in other basic blocks may not have
1975  // initialized registers).
1976  const auto *CI = dyn_cast<CmpInst>(Cond);
1977  if (CI && (CI->getParent() == I->getParent())) {
1978    bool NeedSwap;
1979    std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate());
1980    if (CC > X86::LAST_VALID_COND)
1981      return false;
1982
1983    const Value *CmpLHS = CI->getOperand(0);
1984    const Value *CmpRHS = CI->getOperand(1);
1985
1986    if (NeedSwap)
1987      std::swap(CmpLHS, CmpRHS);
1988
1989    EVT CmpVT = TLI.getValueType(CmpLHS->getType());
1990    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
1991      return false;
1992  } else {
1993    unsigned CondReg = getRegForValue(Cond);
1994    if (CondReg == 0)
1995      return false;
1996    bool CondIsKill = hasTrivialKill(Cond);
1997    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1998      .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
1999  }
2000
2001  const Value *LHS = I->getOperand(1);
2002  const Value *RHS = I->getOperand(2);
2003
2004  unsigned LHSReg = getRegForValue(LHS);
2005  bool LHSIsKill = hasTrivialKill(LHS);
2006
2007  unsigned RHSReg = getRegForValue(RHS);
2008  bool RHSIsKill = hasTrivialKill(RHS);
2009
2010  if (!LHSReg || !RHSReg)
2011    return false;
2012
2013  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2014
2015  unsigned ResultReg =
2016    fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2017  updateValueMap(I, ResultReg);
2018  return true;
2019}
2020
2021bool X86FastISel::X86SelectSelect(const Instruction *I) {
2022  MVT RetVT;
2023  if (!isTypeLegal(I->getType(), RetVT))
2024    return false;
2025
2026  // Check if we can fold the select.
2027  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2028    CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2029    const Value *Opnd = nullptr;
2030    switch (Predicate) {
2031    default:                              break;
2032    case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2033    case CmpInst::FCMP_TRUE:  Opnd = I->getOperand(1); break;
2034    }
2035    // No need for a select anymore - this is an unconditional move.
2036    if (Opnd) {
2037      unsigned OpReg = getRegForValue(Opnd);
2038      if (OpReg == 0)
2039        return false;
2040      bool OpIsKill = hasTrivialKill(Opnd);
2041      const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2042      unsigned ResultReg = createResultReg(RC);
2043      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2044              TII.get(TargetOpcode::COPY), ResultReg)
2045        .addReg(OpReg, getKillRegState(OpIsKill));
2046      updateValueMap(I, ResultReg);
2047      return true;
2048    }
2049  }
2050
2051  // First try to use real conditional move instructions.
2052  if (X86FastEmitCMoveSelect(RetVT, I))
2053    return true;
2054
2055  // Try to use a sequence of SSE instructions to simulate a conditional move.
2056  if (X86FastEmitSSESelect(RetVT, I))
2057    return true;
2058
2059  // Fall-back to pseudo conditional move instructions, which will be later
2060  // converted to control-flow.
2061  if (X86FastEmitPseudoSelect(RetVT, I))
2062    return true;
2063
2064  return false;
2065}
2066
2067bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2068  if (!I->getOperand(0)->getType()->isIntegerTy(32))
2069    return false;
2070
2071  // Select integer to float/double conversion.
2072  unsigned OpReg = getRegForValue(I->getOperand(0));
2073  if (OpReg == 0)
2074    return false;
2075
2076  const TargetRegisterClass *RC = nullptr;
2077  unsigned Opcode;
2078
2079  if (I->getType()->isDoubleTy()) {
2080    // sitofp int -> double
2081    Opcode = X86::VCVTSI2SDrr;
2082    RC = &X86::FR64RegClass;
2083  } else if (I->getType()->isFloatTy()) {
2084    // sitofp int -> float
2085    Opcode = X86::VCVTSI2SSrr;
2086    RC = &X86::FR32RegClass;
2087  } else
2088    return false;
2089
2090  // The target-independent selection algorithm in FastISel already knows how
2091  // to select a SINT_TO_FP if the target is SSE but not AVX. This code is only
2092  // reachable if the subtarget has AVX.
2093  assert(Subtarget->hasAVX() && "Expected a subtarget with AVX!");
2094
2095  unsigned ImplicitDefReg = createResultReg(RC);
2096  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2097          TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2098  unsigned ResultReg =
2099      fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2100  updateValueMap(I, ResultReg);
2101  return true;
2102}
2103
2104// Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2105bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2106                                          unsigned TargetOpc,
2107                                          const TargetRegisterClass *RC) {
2108  assert((I->getOpcode() == Instruction::FPExt ||
2109          I->getOpcode() == Instruction::FPTrunc) &&
2110         "Instruction must be an FPExt or FPTrunc!");
2111
2112  unsigned OpReg = getRegForValue(I->getOperand(0));
2113  if (OpReg == 0)
2114    return false;
2115
2116  unsigned ResultReg = createResultReg(RC);
2117  MachineInstrBuilder MIB;
2118  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2119                ResultReg);
2120  if (Subtarget->hasAVX())
2121    MIB.addReg(OpReg);
2122  MIB.addReg(OpReg);
2123  updateValueMap(I, ResultReg);
2124  return true;
2125}
2126
2127bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2128  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2129      I->getOperand(0)->getType()->isFloatTy()) {
2130    // fpext from float to double.
2131    unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2132    return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR64RegClass);
2133  }
2134
2135  return false;
2136}
2137
2138bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2139  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2140      I->getOperand(0)->getType()->isDoubleTy()) {
2141    // fptrunc from double to float.
2142    unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2143    return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR32RegClass);
2144  }
2145
2146  return false;
2147}
2148
2149bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2150  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
2151  EVT DstVT = TLI.getValueType(I->getType());
2152
2153  // This code only handles truncation to byte.
2154  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2155    return false;
2156  if (!TLI.isTypeLegal(SrcVT))
2157    return false;
2158
2159  unsigned InputReg = getRegForValue(I->getOperand(0));
2160  if (!InputReg)
2161    // Unhandled operand.  Halt "fast" selection and bail.
2162    return false;
2163
2164  if (SrcVT == MVT::i8) {
2165    // Truncate from i8 to i1; no code needed.
2166    updateValueMap(I, InputReg);
2167    return true;
2168  }
2169
2170  if (!Subtarget->is64Bit()) {
2171    // If we're on x86-32; we can't extract an i8 from a general register.
2172    // First issue a copy to GR16_ABCD or GR32_ABCD.
2173    const TargetRegisterClass *CopyRC =
2174      (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass;
2175    unsigned CopyReg = createResultReg(CopyRC);
2176    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2177            TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
2178    InputReg = CopyReg;
2179  }
2180
2181  // Issue an extract_subreg.
2182  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2183                                                  InputReg, /*Kill=*/true,
2184                                                  X86::sub_8bit);
2185  if (!ResultReg)
2186    return false;
2187
2188  updateValueMap(I, ResultReg);
2189  return true;
2190}
2191
2192bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2193  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2194}
2195
2196bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2197                                     X86AddressMode SrcAM, uint64_t Len) {
2198
2199  // Make sure we don't bloat code by inlining very large memcpy's.
2200  if (!IsMemcpySmall(Len))
2201    return false;
2202
2203  bool i64Legal = Subtarget->is64Bit();
2204
2205  // We don't care about alignment here since we just emit integer accesses.
2206  while (Len) {
2207    MVT VT;
2208    if (Len >= 8 && i64Legal)
2209      VT = MVT::i64;
2210    else if (Len >= 4)
2211      VT = MVT::i32;
2212    else if (Len >= 2)
2213      VT = MVT::i16;
2214    else
2215      VT = MVT::i8;
2216
2217    unsigned Reg;
2218    bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2219    RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2220    assert(RV && "Failed to emit load or store??");
2221
2222    unsigned Size = VT.getSizeInBits()/8;
2223    Len -= Size;
2224    DestAM.Disp += Size;
2225    SrcAM.Disp += Size;
2226  }
2227
2228  return true;
2229}
2230
2231bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2232  // FIXME: Handle more intrinsics.
2233  switch (II->getIntrinsicID()) {
2234  default: return false;
2235  case Intrinsic::convert_from_fp16:
2236  case Intrinsic::convert_to_fp16: {
2237    if (TM.Options.UseSoftFloat || !Subtarget->hasF16C())
2238      return false;
2239
2240    const Value *Op = II->getArgOperand(0);
2241    unsigned InputReg = getRegForValue(Op);
2242    if (InputReg == 0)
2243      return false;
2244
2245    // F16C only allows converting from float to half and from half to float.
2246    bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2247    if (IsFloatToHalf) {
2248      if (!Op->getType()->isFloatTy())
2249        return false;
2250    } else {
2251      if (!II->getType()->isFloatTy())
2252        return false;
2253    }
2254
2255    unsigned ResultReg = 0;
2256    const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2257    if (IsFloatToHalf) {
2258      // 'InputReg' is implicitly promoted from register class FR32 to
2259      // register class VR128 by method 'constrainOperandRegClass' which is
2260      // directly called by 'fastEmitInst_ri'.
2261      // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2262      // used to provide rounding control.
2263      InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0);
2264
2265      // Move the lower 32-bits of ResultReg to another register of class GR32.
2266      ResultReg = createResultReg(&X86::GR32RegClass);
2267      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2268              TII.get(X86::VMOVPDI2DIrr), ResultReg)
2269          .addReg(InputReg, RegState::Kill);
2270
2271      // The result value is in the lower 16-bits of ResultReg.
2272      unsigned RegIdx = X86::sub_16bit;
2273      ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2274    } else {
2275      assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2276      // Explicitly sign-extend the input to 32-bit.
2277      InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2278                            /*Kill=*/false);
2279
2280      // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2281      InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2282                            InputReg, /*Kill=*/true);
2283
2284      InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2285
2286      // The result value is in the lower 32-bits of ResultReg.
2287      // Emit an explicit copy from register class VR128 to register class FR32.
2288      ResultReg = createResultReg(&X86::FR32RegClass);
2289      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2290              TII.get(TargetOpcode::COPY), ResultReg)
2291          .addReg(InputReg, RegState::Kill);
2292    }
2293
2294    updateValueMap(II, ResultReg);
2295    return true;
2296  }
2297  case Intrinsic::frameaddress: {
2298    MachineFunction *MF = FuncInfo.MF;
2299    if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2300      return false;
2301
2302    Type *RetTy = II->getCalledFunction()->getReturnType();
2303
2304    MVT VT;
2305    if (!isTypeLegal(RetTy, VT))
2306      return false;
2307
2308    unsigned Opc;
2309    const TargetRegisterClass *RC = nullptr;
2310
2311    switch (VT.SimpleTy) {
2312    default: llvm_unreachable("Invalid result type for frameaddress.");
2313    case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2314    case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2315    }
2316
2317    // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2318    // we get the wrong frame register.
2319    MachineFrameInfo *MFI = MF->getFrameInfo();
2320    MFI->setFrameAddressIsTaken(true);
2321
2322    const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2323    unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2324    assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2325            (FrameReg == X86::EBP && VT == MVT::i32)) &&
2326           "Invalid Frame Register!");
2327
2328    // Always make a copy of the frame register to to a vreg first, so that we
2329    // never directly reference the frame register (the TwoAddressInstruction-
2330    // Pass doesn't like that).
2331    unsigned SrcReg = createResultReg(RC);
2332    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2333            TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2334
2335    // Now recursively load from the frame address.
2336    // movq (%rbp), %rax
2337    // movq (%rax), %rax
2338    // movq (%rax), %rax
2339    // ...
2340    unsigned DestReg;
2341    unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2342    while (Depth--) {
2343      DestReg = createResultReg(RC);
2344      addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2345                           TII.get(Opc), DestReg), SrcReg);
2346      SrcReg = DestReg;
2347    }
2348
2349    updateValueMap(II, SrcReg);
2350    return true;
2351  }
2352  case Intrinsic::memcpy: {
2353    const MemCpyInst *MCI = cast<MemCpyInst>(II);
2354    // Don't handle volatile or variable length memcpys.
2355    if (MCI->isVolatile())
2356      return false;
2357
2358    if (isa<ConstantInt>(MCI->getLength())) {
2359      // Small memcpy's are common enough that we want to do them
2360      // without a call if possible.
2361      uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2362      if (IsMemcpySmall(Len)) {
2363        X86AddressMode DestAM, SrcAM;
2364        if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2365            !X86SelectAddress(MCI->getRawSource(), SrcAM))
2366          return false;
2367        TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2368        return true;
2369      }
2370    }
2371
2372    unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2373    if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2374      return false;
2375
2376    if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2377      return false;
2378
2379    return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2);
2380  }
2381  case Intrinsic::memset: {
2382    const MemSetInst *MSI = cast<MemSetInst>(II);
2383
2384    if (MSI->isVolatile())
2385      return false;
2386
2387    unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2388    if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2389      return false;
2390
2391    if (MSI->getDestAddressSpace() > 255)
2392      return false;
2393
2394    return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2395  }
2396  case Intrinsic::stackprotector: {
2397    // Emit code to store the stack guard onto the stack.
2398    EVT PtrTy = TLI.getPointerTy();
2399
2400    const Value *Op1 = II->getArgOperand(0); // The guard's value.
2401    const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2402
2403    MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2404
2405    // Grab the frame index.
2406    X86AddressMode AM;
2407    if (!X86SelectAddress(Slot, AM)) return false;
2408    if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2409    return true;
2410  }
2411  case Intrinsic::dbg_declare: {
2412    const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2413    X86AddressMode AM;
2414    assert(DI->getAddress() && "Null address should be checked earlier!");
2415    if (!X86SelectAddress(DI->getAddress(), AM))
2416      return false;
2417    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2418    // FIXME may need to add RegState::Debug to any registers produced,
2419    // although ESP/EBP should be the only ones at the moment.
2420    assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
2421           "Expected inlined-at fields to agree");
2422    addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2423        .addImm(0)
2424        .addMetadata(DI->getVariable())
2425        .addMetadata(DI->getExpression());
2426    return true;
2427  }
2428  case Intrinsic::trap: {
2429    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2430    return true;
2431  }
2432  case Intrinsic::sqrt: {
2433    if (!Subtarget->hasSSE1())
2434      return false;
2435
2436    Type *RetTy = II->getCalledFunction()->getReturnType();
2437
2438    MVT VT;
2439    if (!isTypeLegal(RetTy, VT))
2440      return false;
2441
2442    // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2443    // is not generated by FastISel yet.
2444    // FIXME: Update this code once tablegen can handle it.
2445    static const unsigned SqrtOpc[2][2] = {
2446      {X86::SQRTSSr, X86::VSQRTSSr},
2447      {X86::SQRTSDr, X86::VSQRTSDr}
2448    };
2449    bool HasAVX = Subtarget->hasAVX();
2450    unsigned Opc;
2451    const TargetRegisterClass *RC;
2452    switch (VT.SimpleTy) {
2453    default: return false;
2454    case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
2455    case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
2456    }
2457
2458    const Value *SrcVal = II->getArgOperand(0);
2459    unsigned SrcReg = getRegForValue(SrcVal);
2460
2461    if (SrcReg == 0)
2462      return false;
2463
2464    unsigned ImplicitDefReg = 0;
2465    if (HasAVX) {
2466      ImplicitDefReg = createResultReg(RC);
2467      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2468              TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2469    }
2470
2471    unsigned ResultReg = createResultReg(RC);
2472    MachineInstrBuilder MIB;
2473    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2474                  ResultReg);
2475
2476    if (ImplicitDefReg)
2477      MIB.addReg(ImplicitDefReg);
2478
2479    MIB.addReg(SrcReg);
2480
2481    updateValueMap(II, ResultReg);
2482    return true;
2483  }
2484  case Intrinsic::sadd_with_overflow:
2485  case Intrinsic::uadd_with_overflow:
2486  case Intrinsic::ssub_with_overflow:
2487  case Intrinsic::usub_with_overflow:
2488  case Intrinsic::smul_with_overflow:
2489  case Intrinsic::umul_with_overflow: {
2490    // This implements the basic lowering of the xalu with overflow intrinsics
2491    // into add/sub/mul followed by either seto or setb.
2492    const Function *Callee = II->getCalledFunction();
2493    auto *Ty = cast<StructType>(Callee->getReturnType());
2494    Type *RetTy = Ty->getTypeAtIndex(0U);
2495    Type *CondTy = Ty->getTypeAtIndex(1);
2496
2497    MVT VT;
2498    if (!isTypeLegal(RetTy, VT))
2499      return false;
2500
2501    if (VT < MVT::i8 || VT > MVT::i64)
2502      return false;
2503
2504    const Value *LHS = II->getArgOperand(0);
2505    const Value *RHS = II->getArgOperand(1);
2506
2507    // Canonicalize immediate to the RHS.
2508    if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2509        isCommutativeIntrinsic(II))
2510      std::swap(LHS, RHS);
2511
2512    bool UseIncDec = false;
2513    if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
2514      UseIncDec = true;
2515
2516    unsigned BaseOpc, CondOpc;
2517    switch (II->getIntrinsicID()) {
2518    default: llvm_unreachable("Unexpected intrinsic!");
2519    case Intrinsic::sadd_with_overflow:
2520      BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
2521      CondOpc = X86::SETOr;
2522      break;
2523    case Intrinsic::uadd_with_overflow:
2524      BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
2525    case Intrinsic::ssub_with_overflow:
2526      BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
2527      CondOpc = X86::SETOr;
2528      break;
2529    case Intrinsic::usub_with_overflow:
2530      BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
2531    case Intrinsic::smul_with_overflow:
2532      BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
2533    case Intrinsic::umul_with_overflow:
2534      BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
2535    }
2536
2537    unsigned LHSReg = getRegForValue(LHS);
2538    if (LHSReg == 0)
2539      return false;
2540    bool LHSIsKill = hasTrivialKill(LHS);
2541
2542    unsigned ResultReg = 0;
2543    // Check if we have an immediate version.
2544    if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2545      static const unsigned Opc[2][4] = {
2546        { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2547        { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2548      };
2549
2550      if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
2551        ResultReg = createResultReg(TLI.getRegClassFor(VT));
2552        bool IsDec = BaseOpc == X86ISD::DEC;
2553        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2554                TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2555          .addReg(LHSReg, getKillRegState(LHSIsKill));
2556      } else
2557        ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2558                                CI->getZExtValue());
2559    }
2560
2561    unsigned RHSReg;
2562    bool RHSIsKill;
2563    if (!ResultReg) {
2564      RHSReg = getRegForValue(RHS);
2565      if (RHSReg == 0)
2566        return false;
2567      RHSIsKill = hasTrivialKill(RHS);
2568      ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2569                              RHSIsKill);
2570    }
2571
2572    // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2573    // it manually.
2574    if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2575      static const unsigned MULOpc[] =
2576        { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2577      static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2578      // First copy the first operand into RAX, which is an implicit input to
2579      // the X86::MUL*r instruction.
2580      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2581              TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2582        .addReg(LHSReg, getKillRegState(LHSIsKill));
2583      ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2584                                 TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2585    } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2586      static const unsigned MULOpc[] =
2587        { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2588      if (VT == MVT::i8) {
2589        // Copy the first operand into AL, which is an implicit input to the
2590        // X86::IMUL8r instruction.
2591        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2592               TII.get(TargetOpcode::COPY), X86::AL)
2593          .addReg(LHSReg, getKillRegState(LHSIsKill));
2594        ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2595                                   RHSIsKill);
2596      } else
2597        ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2598                                    TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2599                                    RHSReg, RHSIsKill);
2600    }
2601
2602    if (!ResultReg)
2603      return false;
2604
2605    unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
2606    assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2607    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
2608            ResultReg2);
2609
2610    updateValueMap(II, ResultReg, 2);
2611    return true;
2612  }
2613  case Intrinsic::x86_sse_cvttss2si:
2614  case Intrinsic::x86_sse_cvttss2si64:
2615  case Intrinsic::x86_sse2_cvttsd2si:
2616  case Intrinsic::x86_sse2_cvttsd2si64: {
2617    bool IsInputDouble;
2618    switch (II->getIntrinsicID()) {
2619    default: llvm_unreachable("Unexpected intrinsic.");
2620    case Intrinsic::x86_sse_cvttss2si:
2621    case Intrinsic::x86_sse_cvttss2si64:
2622      if (!Subtarget->hasSSE1())
2623        return false;
2624      IsInputDouble = false;
2625      break;
2626    case Intrinsic::x86_sse2_cvttsd2si:
2627    case Intrinsic::x86_sse2_cvttsd2si64:
2628      if (!Subtarget->hasSSE2())
2629        return false;
2630      IsInputDouble = true;
2631      break;
2632    }
2633
2634    Type *RetTy = II->getCalledFunction()->getReturnType();
2635    MVT VT;
2636    if (!isTypeLegal(RetTy, VT))
2637      return false;
2638
2639    static const unsigned CvtOpc[2][2][2] = {
2640      { { X86::CVTTSS2SIrr,   X86::VCVTTSS2SIrr   },
2641        { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr }  },
2642      { { X86::CVTTSD2SIrr,   X86::VCVTTSD2SIrr   },
2643        { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr }  }
2644    };
2645    bool HasAVX = Subtarget->hasAVX();
2646    unsigned Opc;
2647    switch (VT.SimpleTy) {
2648    default: llvm_unreachable("Unexpected result type.");
2649    case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
2650    case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
2651    }
2652
2653    // Check if we can fold insertelement instructions into the convert.
2654    const Value *Op = II->getArgOperand(0);
2655    while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
2656      const Value *Index = IE->getOperand(2);
2657      if (!isa<ConstantInt>(Index))
2658        break;
2659      unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
2660
2661      if (Idx == 0) {
2662        Op = IE->getOperand(1);
2663        break;
2664      }
2665      Op = IE->getOperand(0);
2666    }
2667
2668    unsigned Reg = getRegForValue(Op);
2669    if (Reg == 0)
2670      return false;
2671
2672    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
2673    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2674      .addReg(Reg);
2675
2676    updateValueMap(II, ResultReg);
2677    return true;
2678  }
2679  }
2680}
2681
2682bool X86FastISel::fastLowerArguments() {
2683  if (!FuncInfo.CanLowerReturn)
2684    return false;
2685
2686  const Function *F = FuncInfo.Fn;
2687  if (F->isVarArg())
2688    return false;
2689
2690  CallingConv::ID CC = F->getCallingConv();
2691  if (CC != CallingConv::C)
2692    return false;
2693
2694  if (Subtarget->isCallingConvWin64(CC))
2695    return false;
2696
2697  if (!Subtarget->is64Bit())
2698    return false;
2699
2700  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
2701  unsigned GPRCnt = 0;
2702  unsigned FPRCnt = 0;
2703  unsigned Idx = 0;
2704  for (auto const &Arg : F->args()) {
2705    // The first argument is at index 1.
2706    ++Idx;
2707    if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2708        F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2709        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2710        F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2711      return false;
2712
2713    Type *ArgTy = Arg.getType();
2714    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
2715      return false;
2716
2717    EVT ArgVT = TLI.getValueType(ArgTy);
2718    if (!ArgVT.isSimple()) return false;
2719    switch (ArgVT.getSimpleVT().SimpleTy) {
2720    default: return false;
2721    case MVT::i32:
2722    case MVT::i64:
2723      ++GPRCnt;
2724      break;
2725    case MVT::f32:
2726    case MVT::f64:
2727      if (!Subtarget->hasSSE1())
2728        return false;
2729      ++FPRCnt;
2730      break;
2731    }
2732
2733    if (GPRCnt > 6)
2734      return false;
2735
2736    if (FPRCnt > 8)
2737      return false;
2738  }
2739
2740  static const MCPhysReg GPR32ArgRegs[] = {
2741    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
2742  };
2743  static const MCPhysReg GPR64ArgRegs[] = {
2744    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
2745  };
2746  static const MCPhysReg XMMArgRegs[] = {
2747    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2748    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2749  };
2750
2751  unsigned GPRIdx = 0;
2752  unsigned FPRIdx = 0;
2753  for (auto const &Arg : F->args()) {
2754    MVT VT = TLI.getSimpleValueType(Arg.getType());
2755    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2756    unsigned SrcReg;
2757    switch (VT.SimpleTy) {
2758    default: llvm_unreachable("Unexpected value type.");
2759    case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
2760    case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
2761    case MVT::f32: // fall-through
2762    case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
2763    }
2764    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2765    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2766    // Without this, EmitLiveInCopies may eliminate the livein if its only
2767    // use is a bitcast (which isn't turned into an instruction).
2768    unsigned ResultReg = createResultReg(RC);
2769    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2770            TII.get(TargetOpcode::COPY), ResultReg)
2771      .addReg(DstReg, getKillRegState(true));
2772    updateValueMap(&Arg, ResultReg);
2773  }
2774  return true;
2775}
2776
2777static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
2778                                           CallingConv::ID CC,
2779                                           ImmutableCallSite *CS) {
2780  if (Subtarget->is64Bit())
2781    return 0;
2782  if (Subtarget->getTargetTriple().isOSMSVCRT())
2783    return 0;
2784  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2785      CC == CallingConv::HiPE)
2786    return 0;
2787  if (CS && !CS->paramHasAttr(1, Attribute::StructRet))
2788    return 0;
2789  if (CS && CS->paramHasAttr(1, Attribute::InReg))
2790    return 0;
2791  return 4;
2792}
2793
2794bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
2795  auto &OutVals       = CLI.OutVals;
2796  auto &OutFlags      = CLI.OutFlags;
2797  auto &OutRegs       = CLI.OutRegs;
2798  auto &Ins           = CLI.Ins;
2799  auto &InRegs        = CLI.InRegs;
2800  CallingConv::ID CC  = CLI.CallConv;
2801  bool &IsTailCall    = CLI.IsTailCall;
2802  bool IsVarArg       = CLI.IsVarArg;
2803  const Value *Callee = CLI.Callee;
2804  const char *SymName = CLI.SymName;
2805
2806  bool Is64Bit        = Subtarget->is64Bit();
2807  bool IsWin64        = Subtarget->isCallingConvWin64(CC);
2808
2809  // Handle only C, fastcc, and webkit_js calling conventions for now.
2810  switch (CC) {
2811  default: return false;
2812  case CallingConv::C:
2813  case CallingConv::Fast:
2814  case CallingConv::WebKit_JS:
2815  case CallingConv::X86_FastCall:
2816  case CallingConv::X86_64_Win64:
2817  case CallingConv::X86_64_SysV:
2818    break;
2819  }
2820
2821  // Allow SelectionDAG isel to handle tail calls.
2822  if (IsTailCall)
2823    return false;
2824
2825  // fastcc with -tailcallopt is intended to provide a guaranteed
2826  // tail call optimization. Fastisel doesn't know how to do that.
2827  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
2828    return false;
2829
2830  // Don't know how to handle Win64 varargs yet.  Nothing special needed for
2831  // x86-32. Special handling for x86-64 is implemented.
2832  if (IsVarArg && IsWin64)
2833    return false;
2834
2835  // Don't know about inalloca yet.
2836  if (CLI.CS && CLI.CS->hasInAllocaArgument())
2837    return false;
2838
2839  // Fast-isel doesn't know about callee-pop yet.
2840  if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
2841                       TM.Options.GuaranteedTailCallOpt))
2842    return false;
2843
2844  SmallVector<MVT, 16> OutVTs;
2845  SmallVector<unsigned, 16> ArgRegs;
2846
2847  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
2848  // instruction. This is safe because it is common to all FastISel supported
2849  // calling conventions on x86.
2850  for (int i = 0, e = OutVals.size(); i != e; ++i) {
2851    Value *&Val = OutVals[i];
2852    ISD::ArgFlagsTy Flags = OutFlags[i];
2853    if (auto *CI = dyn_cast<ConstantInt>(Val)) {
2854      if (CI->getBitWidth() < 32) {
2855        if (Flags.isSExt())
2856          Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext()));
2857        else
2858          Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext()));
2859      }
2860    }
2861
2862    // Passing bools around ends up doing a trunc to i1 and passing it.
2863    // Codegen this as an argument + "and 1".
2864    MVT VT;
2865    auto *TI = dyn_cast<TruncInst>(Val);
2866    unsigned ResultReg;
2867    if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
2868              (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
2869              TI->hasOneUse()) {
2870      Value *PrevVal = TI->getOperand(0);
2871      ResultReg = getRegForValue(PrevVal);
2872
2873      if (!ResultReg)
2874        return false;
2875
2876      if (!isTypeLegal(PrevVal->getType(), VT))
2877        return false;
2878
2879      ResultReg =
2880        fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
2881    } else {
2882      if (!isTypeLegal(Val->getType(), VT))
2883        return false;
2884      ResultReg = getRegForValue(Val);
2885    }
2886
2887    if (!ResultReg)
2888      return false;
2889
2890    ArgRegs.push_back(ResultReg);
2891    OutVTs.push_back(VT);
2892  }
2893
2894  // Analyze operands of the call, assigning locations to each operand.
2895  SmallVector<CCValAssign, 16> ArgLocs;
2896  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
2897
2898  // Allocate shadow area for Win64
2899  if (IsWin64)
2900    CCInfo.AllocateStack(32, 8);
2901
2902  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
2903
2904  // Get a count of how many bytes are to be pushed on the stack.
2905  unsigned NumBytes = CCInfo.getNextStackOffset();
2906
2907  // Issue CALLSEQ_START
2908  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2909  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2910    .addImm(NumBytes).addImm(0);
2911
2912  // Walk the register/memloc assignments, inserting copies/loads.
2913  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2914  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2915    CCValAssign const &VA = ArgLocs[i];
2916    const Value *ArgVal = OutVals[VA.getValNo()];
2917    MVT ArgVT = OutVTs[VA.getValNo()];
2918
2919    if (ArgVT == MVT::x86mmx)
2920      return false;
2921
2922    unsigned ArgReg = ArgRegs[VA.getValNo()];
2923
2924    // Promote the value if needed.
2925    switch (VA.getLocInfo()) {
2926    case CCValAssign::Full: break;
2927    case CCValAssign::SExt: {
2928      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
2929             "Unexpected extend");
2930      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
2931                                       ArgVT, ArgReg);
2932      assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
2933      ArgVT = VA.getLocVT();
2934      break;
2935    }
2936    case CCValAssign::ZExt: {
2937      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
2938             "Unexpected extend");
2939      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
2940                                       ArgVT, ArgReg);
2941      assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
2942      ArgVT = VA.getLocVT();
2943      break;
2944    }
2945    case CCValAssign::AExt: {
2946      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
2947             "Unexpected extend");
2948      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
2949                                       ArgVT, ArgReg);
2950      if (!Emitted)
2951        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
2952                                    ArgVT, ArgReg);
2953      if (!Emitted)
2954        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
2955                                    ArgVT, ArgReg);
2956
2957      assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
2958      ArgVT = VA.getLocVT();
2959      break;
2960    }
2961    case CCValAssign::BCvt: {
2962      ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
2963                          /*TODO: Kill=*/false);
2964      assert(ArgReg && "Failed to emit a bitcast!");
2965      ArgVT = VA.getLocVT();
2966      break;
2967    }
2968    case CCValAssign::VExt:
2969      // VExt has not been implemented, so this should be impossible to reach
2970      // for now.  However, fallback to Selection DAG isel once implemented.
2971      return false;
2972    case CCValAssign::AExtUpper:
2973    case CCValAssign::SExtUpper:
2974    case CCValAssign::ZExtUpper:
2975    case CCValAssign::FPExt:
2976      llvm_unreachable("Unexpected loc info!");
2977    case CCValAssign::Indirect:
2978      // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
2979      // support this.
2980      return false;
2981    }
2982
2983    if (VA.isRegLoc()) {
2984      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2985              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2986      OutRegs.push_back(VA.getLocReg());
2987    } else {
2988      assert(VA.isMemLoc());
2989
2990      // Don't emit stores for undef values.
2991      if (isa<UndefValue>(ArgVal))
2992        continue;
2993
2994      unsigned LocMemOffset = VA.getLocMemOffset();
2995      X86AddressMode AM;
2996      AM.Base.Reg = RegInfo->getStackRegister();
2997      AM.Disp = LocMemOffset;
2998      ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
2999      unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3000      MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3001        MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
3002        ArgVT.getStoreSize(), Alignment);
3003      if (Flags.isByVal()) {
3004        X86AddressMode SrcAM;
3005        SrcAM.Base.Reg = ArgReg;
3006        if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3007          return false;
3008      } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3009        // If this is a really simple value, emit this with the Value* version
3010        // of X86FastEmitStore.  If it isn't simple, we don't want to do this,
3011        // as it can cause us to reevaluate the argument.
3012        if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3013          return false;
3014      } else {
3015        bool ValIsKill = hasTrivialKill(ArgVal);
3016        if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3017          return false;
3018      }
3019    }
3020  }
3021
3022  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3023  // GOT pointer.
3024  if (Subtarget->isPICStyleGOT()) {
3025    unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3026    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3027            TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3028  }
3029
3030  if (Is64Bit && IsVarArg && !IsWin64) {
3031    // From AMD64 ABI document:
3032    // For calls that may call functions that use varargs or stdargs
3033    // (prototype-less calls or calls to functions containing ellipsis (...) in
3034    // the declaration) %al is used as hidden argument to specify the number
3035    // of SSE registers used. The contents of %al do not need to match exactly
3036    // the number of registers, but must be an ubound on the number of SSE
3037    // registers used and is in the range 0 - 8 inclusive.
3038
3039    // Count the number of XMM registers allocated.
3040    static const MCPhysReg XMMArgRegs[] = {
3041      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3042      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3043    };
3044    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3045    assert((Subtarget->hasSSE1() || !NumXMMRegs)
3046           && "SSE registers cannot be used when SSE is disabled");
3047    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3048            X86::AL).addImm(NumXMMRegs);
3049  }
3050
3051  // Materialize callee address in a register. FIXME: GV address can be
3052  // handled with a CALLpcrel32 instead.
3053  X86AddressMode CalleeAM;
3054  if (!X86SelectCallAddress(Callee, CalleeAM))
3055    return false;
3056
3057  unsigned CalleeOp = 0;
3058  const GlobalValue *GV = nullptr;
3059  if (CalleeAM.GV != nullptr) {
3060    GV = CalleeAM.GV;
3061  } else if (CalleeAM.Base.Reg != 0) {
3062    CalleeOp = CalleeAM.Base.Reg;
3063  } else
3064    return false;
3065
3066  // Issue the call.
3067  MachineInstrBuilder MIB;
3068  if (CalleeOp) {
3069    // Register-indirect call.
3070    unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3071    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3072      .addReg(CalleeOp);
3073  } else {
3074    // Direct call.
3075    assert(GV && "Not a direct call");
3076    unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
3077
3078    // See if we need any target-specific flags on the GV operand.
3079    unsigned char OpFlags = 0;
3080
3081    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
3082    // external symbols most go through the PLT in PIC mode.  If the symbol
3083    // has hidden or protected visibility, or if it is static or local, then
3084    // we don't need to use the PLT - we can directly call it.
3085    if (Subtarget->isTargetELF() &&
3086        TM.getRelocationModel() == Reloc::PIC_ &&
3087        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
3088      OpFlags = X86II::MO_PLT;
3089    } else if (Subtarget->isPICStyleStubAny() &&
3090               (GV->isDeclaration() || GV->isWeakForLinker()) &&
3091               (!Subtarget->getTargetTriple().isMacOSX() ||
3092                Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
3093      // PC-relative references to external symbols should go through $stub,
3094      // unless we're building with the leopard linker or later, which
3095      // automatically synthesizes these stubs.
3096      OpFlags = X86II::MO_DARWIN_STUB;
3097    }
3098
3099    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3100    if (SymName)
3101      MIB.addExternalSymbol(SymName, OpFlags);
3102    else
3103      MIB.addGlobalAddress(GV, 0, OpFlags);
3104  }
3105
3106  // Add a register mask operand representing the call-preserved registers.
3107  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3108  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3109
3110  // Add an implicit use GOT pointer in EBX.
3111  if (Subtarget->isPICStyleGOT())
3112    MIB.addReg(X86::EBX, RegState::Implicit);
3113
3114  if (Is64Bit && IsVarArg && !IsWin64)
3115    MIB.addReg(X86::AL, RegState::Implicit);
3116
3117  // Add implicit physical register uses to the call.
3118  for (auto Reg : OutRegs)
3119    MIB.addReg(Reg, RegState::Implicit);
3120
3121  // Issue CALLSEQ_END
3122  unsigned NumBytesForCalleeToPop =
3123    computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
3124  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3125  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3126    .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3127
3128  // Now handle call return values.
3129  SmallVector<CCValAssign, 16> RVLocs;
3130  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3131                    CLI.RetTy->getContext());
3132  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3133
3134  // Copy all of the result registers out of their specified physreg.
3135  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3136  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3137    CCValAssign &VA = RVLocs[i];
3138    EVT CopyVT = VA.getValVT();
3139    unsigned CopyReg = ResultReg + i;
3140
3141    // If this is x86-64, and we disabled SSE, we can't return FP values
3142    if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3143        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3144      report_fatal_error("SSE register return with SSE disabled");
3145    }
3146
3147    // If we prefer to use the value in xmm registers, copy it out as f80 and
3148    // use a truncate to move it from fp stack reg to xmm reg.
3149    if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3150        isScalarFPTypeInSSEReg(VA.getValVT())) {
3151      CopyVT = MVT::f80;
3152      CopyReg = createResultReg(&X86::RFP80RegClass);
3153    }
3154
3155    // Copy out the result.
3156    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3157            TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
3158    InRegs.push_back(VA.getLocReg());
3159
3160    // Round the f80 to the right size, which also moves it to the appropriate
3161    // xmm register. This is accomplished by storing the f80 value in memory
3162    // and then loading it back.
3163    if (CopyVT != VA.getValVT()) {
3164      EVT ResVT = VA.getValVT();
3165      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3166      unsigned MemSize = ResVT.getSizeInBits()/8;
3167      int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3168      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3169                                TII.get(Opc)), FI)
3170        .addReg(CopyReg);
3171      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3172      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3173                                TII.get(Opc), ResultReg + i), FI);
3174    }
3175  }
3176
3177  CLI.ResultReg = ResultReg;
3178  CLI.NumResultRegs = RVLocs.size();
3179  CLI.Call = MIB;
3180
3181  return true;
3182}
3183
3184bool
3185X86FastISel::fastSelectInstruction(const Instruction *I)  {
3186  switch (I->getOpcode()) {
3187  default: break;
3188  case Instruction::Load:
3189    return X86SelectLoad(I);
3190  case Instruction::Store:
3191    return X86SelectStore(I);
3192  case Instruction::Ret:
3193    return X86SelectRet(I);
3194  case Instruction::ICmp:
3195  case Instruction::FCmp:
3196    return X86SelectCmp(I);
3197  case Instruction::ZExt:
3198    return X86SelectZExt(I);
3199  case Instruction::Br:
3200    return X86SelectBranch(I);
3201  case Instruction::LShr:
3202  case Instruction::AShr:
3203  case Instruction::Shl:
3204    return X86SelectShift(I);
3205  case Instruction::SDiv:
3206  case Instruction::UDiv:
3207  case Instruction::SRem:
3208  case Instruction::URem:
3209    return X86SelectDivRem(I);
3210  case Instruction::Select:
3211    return X86SelectSelect(I);
3212  case Instruction::Trunc:
3213    return X86SelectTrunc(I);
3214  case Instruction::FPExt:
3215    return X86SelectFPExt(I);
3216  case Instruction::FPTrunc:
3217    return X86SelectFPTrunc(I);
3218  case Instruction::SIToFP:
3219    return X86SelectSIToFP(I);
3220  case Instruction::IntToPtr: // Deliberate fall-through.
3221  case Instruction::PtrToInt: {
3222    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
3223    EVT DstVT = TLI.getValueType(I->getType());
3224    if (DstVT.bitsGT(SrcVT))
3225      return X86SelectZExt(I);
3226    if (DstVT.bitsLT(SrcVT))
3227      return X86SelectTrunc(I);
3228    unsigned Reg = getRegForValue(I->getOperand(0));
3229    if (Reg == 0) return false;
3230    updateValueMap(I, Reg);
3231    return true;
3232  }
3233  }
3234
3235  return false;
3236}
3237
3238unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3239  if (VT > MVT::i64)
3240    return 0;
3241
3242  uint64_t Imm = CI->getZExtValue();
3243  if (Imm == 0) {
3244    unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3245    switch (VT.SimpleTy) {
3246    default: llvm_unreachable("Unexpected value type");
3247    case MVT::i1:
3248    case MVT::i8:
3249      return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3250                                        X86::sub_8bit);
3251    case MVT::i16:
3252      return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3253                                        X86::sub_16bit);
3254    case MVT::i32:
3255      return SrcReg;
3256    case MVT::i64: {
3257      unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3258      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3259              TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3260        .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3261      return ResultReg;
3262    }
3263    }
3264  }
3265
3266  unsigned Opc = 0;
3267  switch (VT.SimpleTy) {
3268  default: llvm_unreachable("Unexpected value type");
3269  case MVT::i1:  VT = MVT::i8; // fall-through
3270  case MVT::i8:  Opc = X86::MOV8ri;  break;
3271  case MVT::i16: Opc = X86::MOV16ri; break;
3272  case MVT::i32: Opc = X86::MOV32ri; break;
3273  case MVT::i64: {
3274    if (isUInt<32>(Imm))
3275      Opc = X86::MOV32ri;
3276    else if (isInt<32>(Imm))
3277      Opc = X86::MOV64ri32;
3278    else
3279      Opc = X86::MOV64ri;
3280    break;
3281  }
3282  }
3283  if (VT == MVT::i64 && Opc == X86::MOV32ri) {
3284    unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
3285    unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3286    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3287            TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3288      .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3289    return ResultReg;
3290  }
3291  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3292}
3293
3294unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3295  if (CFP->isNullValue())
3296    return fastMaterializeFloatZero(CFP);
3297
3298  // Can't handle alternate code models yet.
3299  CodeModel::Model CM = TM.getCodeModel();
3300  if (CM != CodeModel::Small && CM != CodeModel::Large)
3301    return 0;
3302
3303  // Get opcode and regclass of the output for the given load instruction.
3304  unsigned Opc = 0;
3305  const TargetRegisterClass *RC = nullptr;
3306  switch (VT.SimpleTy) {
3307  default: return 0;
3308  case MVT::f32:
3309    if (X86ScalarSSEf32) {
3310      Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
3311      RC  = &X86::FR32RegClass;
3312    } else {
3313      Opc = X86::LD_Fp32m;
3314      RC  = &X86::RFP32RegClass;
3315    }
3316    break;
3317  case MVT::f64:
3318    if (X86ScalarSSEf64) {
3319      Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
3320      RC  = &X86::FR64RegClass;
3321    } else {
3322      Opc = X86::LD_Fp64m;
3323      RC  = &X86::RFP64RegClass;
3324    }
3325    break;
3326  case MVT::f80:
3327    // No f80 support yet.
3328    return 0;
3329  }
3330
3331  // MachineConstantPool wants an explicit alignment.
3332  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3333  if (Align == 0) {
3334    // Alignment of vector types. FIXME!
3335    Align = DL.getTypeAllocSize(CFP->getType());
3336  }
3337
3338  // x86-32 PIC requires a PIC base register for constant pools.
3339  unsigned PICBase = 0;
3340  unsigned char OpFlag = 0;
3341  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
3342    OpFlag = X86II::MO_PIC_BASE_OFFSET;
3343    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3344  } else if (Subtarget->isPICStyleGOT()) {
3345    OpFlag = X86II::MO_GOTOFF;
3346    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3347  } else if (Subtarget->isPICStyleRIPRel() &&
3348             TM.getCodeModel() == CodeModel::Small) {
3349    PICBase = X86::RIP;
3350  }
3351
3352  // Create the load from the constant pool.
3353  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3354  unsigned ResultReg = createResultReg(RC);
3355
3356  if (CM == CodeModel::Large) {
3357    unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3358    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3359            AddrReg)
3360      .addConstantPoolIndex(CPI, 0, OpFlag);
3361    MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3362                                      TII.get(Opc), ResultReg);
3363    addDirectMem(MIB, AddrReg);
3364    MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3365        MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
3366        TM.getDataLayout()->getPointerSize(), Align);
3367    MIB->addMemOperand(*FuncInfo.MF, MMO);
3368    return ResultReg;
3369  }
3370
3371  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3372                                   TII.get(Opc), ResultReg),
3373                           CPI, PICBase, OpFlag);
3374  return ResultReg;
3375}
3376
3377unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3378  // Can't handle alternate code models yet.
3379  if (TM.getCodeModel() != CodeModel::Small)
3380    return 0;
3381
3382  // Materialize addresses with LEA/MOV instructions.
3383  X86AddressMode AM;
3384  if (X86SelectAddress(GV, AM)) {
3385    // If the expression is just a basereg, then we're done, otherwise we need
3386    // to emit an LEA.
3387    if (AM.BaseType == X86AddressMode::RegBase &&
3388        AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3389      return AM.Base.Reg;
3390
3391    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3392    if (TM.getRelocationModel() == Reloc::Static &&
3393        TLI.getPointerTy() == MVT::i64) {
3394      // The displacement code could be more than 32 bits away so we need to use
3395      // an instruction with a 64 bit immediate
3396      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3397              ResultReg)
3398        .addGlobalAddress(GV);
3399    } else {
3400      unsigned Opc = TLI.getPointerTy() == MVT::i32
3401                     ? (Subtarget->isTarget64BitILP32()
3402                        ? X86::LEA64_32r : X86::LEA32r)
3403                     : X86::LEA64r;
3404      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3405                             TII.get(Opc), ResultReg), AM);
3406    }
3407    return ResultReg;
3408  }
3409  return 0;
3410}
3411
3412unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3413  EVT CEVT = TLI.getValueType(C->getType(), true);
3414
3415  // Only handle simple types.
3416  if (!CEVT.isSimple())
3417    return 0;
3418  MVT VT = CEVT.getSimpleVT();
3419
3420  if (const auto *CI = dyn_cast<ConstantInt>(C))
3421    return X86MaterializeInt(CI, VT);
3422  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3423    return X86MaterializeFP(CFP, VT);
3424  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3425    return X86MaterializeGV(GV, VT);
3426
3427  return 0;
3428}
3429
3430unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3431  // Fail on dynamic allocas. At this point, getRegForValue has already
3432  // checked its CSE maps, so if we're here trying to handle a dynamic
3433  // alloca, we're not going to succeed. X86SelectAddress has a
3434  // check for dynamic allocas, because it's called directly from
3435  // various places, but targetMaterializeAlloca also needs a check
3436  // in order to avoid recursion between getRegForValue,
3437  // X86SelectAddrss, and targetMaterializeAlloca.
3438  if (!FuncInfo.StaticAllocaMap.count(C))
3439    return 0;
3440  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3441
3442  X86AddressMode AM;
3443  if (!X86SelectAddress(C, AM))
3444    return 0;
3445  unsigned Opc = TLI.getPointerTy() == MVT::i32
3446                 ? (Subtarget->isTarget64BitILP32()
3447                    ? X86::LEA64_32r : X86::LEA32r)
3448                 : X86::LEA64r;
3449  const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
3450  unsigned ResultReg = createResultReg(RC);
3451  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3452                         TII.get(Opc), ResultReg), AM);
3453  return ResultReg;
3454}
3455
3456unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3457  MVT VT;
3458  if (!isTypeLegal(CF->getType(), VT))
3459    return 0;
3460
3461  // Get opcode and regclass for the given zero.
3462  unsigned Opc = 0;
3463  const TargetRegisterClass *RC = nullptr;
3464  switch (VT.SimpleTy) {
3465  default: return 0;
3466  case MVT::f32:
3467    if (X86ScalarSSEf32) {
3468      Opc = X86::FsFLD0SS;
3469      RC  = &X86::FR32RegClass;
3470    } else {
3471      Opc = X86::LD_Fp032;
3472      RC  = &X86::RFP32RegClass;
3473    }
3474    break;
3475  case MVT::f64:
3476    if (X86ScalarSSEf64) {
3477      Opc = X86::FsFLD0SD;
3478      RC  = &X86::FR64RegClass;
3479    } else {
3480      Opc = X86::LD_Fp064;
3481      RC  = &X86::RFP64RegClass;
3482    }
3483    break;
3484  case MVT::f80:
3485    // No f80 support yet.
3486    return 0;
3487  }
3488
3489  unsigned ResultReg = createResultReg(RC);
3490  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3491  return ResultReg;
3492}
3493
3494
3495bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3496                                      const LoadInst *LI) {
3497  const Value *Ptr = LI->getPointerOperand();
3498  X86AddressMode AM;
3499  if (!X86SelectAddress(Ptr, AM))
3500    return false;
3501
3502  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3503
3504  unsigned Size = DL.getTypeAllocSize(LI->getType());
3505  unsigned Alignment = LI->getAlignment();
3506
3507  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
3508    Alignment = DL.getABITypeAlignment(LI->getType());
3509
3510  SmallVector<MachineOperand, 8> AddrOps;
3511  AM.getFullAddress(AddrOps);
3512
3513  MachineInstr *Result =
3514    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps,
3515                              Size, Alignment, /*AllowCommute=*/true);
3516  if (!Result)
3517    return false;
3518
3519  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3520  FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
3521  MI->eraseFromParent();
3522  return true;
3523}
3524
3525
3526namespace llvm {
3527  FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
3528                                const TargetLibraryInfo *libInfo) {
3529    return new X86FastISel(funcInfo, libInfo);
3530  }
3531}
3532