1//===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the AArch64-specific support for the FastISel class. Some
11// of the target-specific code is generated by tablegen in the file
12// AArch64GenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64.h"
17#include "AArch64CallingConvention.h"
18#include "AArch64Subtarget.h"
19#include "AArch64TargetMachine.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "llvm/Analysis/BranchProbabilityInfo.h"
22#include "llvm/CodeGen/CallingConvLower.h"
23#include "llvm/CodeGen/FastISel.h"
24#include "llvm/CodeGen/FunctionLoweringInfo.h"
25#include "llvm/CodeGen/MachineConstantPool.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/IR/CallingConv.h"
30#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/DerivedTypes.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/GetElementPtrTypeIterator.h"
34#include "llvm/IR/GlobalAlias.h"
35#include "llvm/IR/GlobalVariable.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/IntrinsicInst.h"
38#include "llvm/IR/Operator.h"
39#include "llvm/MC/MCSymbol.h"
40#include "llvm/Support/CommandLine.h"
41using namespace llvm;
42
43namespace {
44
45class AArch64FastISel final : public FastISel {
46  class Address {
47  public:
48    typedef enum {
49      RegBase,
50      FrameIndexBase
51    } BaseKind;
52
53  private:
54    BaseKind Kind;
55    AArch64_AM::ShiftExtendType ExtType;
56    union {
57      unsigned Reg;
58      int FI;
59    } Base;
60    unsigned OffsetReg;
61    unsigned Shift;
62    int64_t Offset;
63    const GlobalValue *GV;
64
65  public:
66    Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
67      OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
68    void setKind(BaseKind K) { Kind = K; }
69    BaseKind getKind() const { return Kind; }
70    void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
71    AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
72    bool isRegBase() const { return Kind == RegBase; }
73    bool isFIBase() const { return Kind == FrameIndexBase; }
74    void setReg(unsigned Reg) {
75      assert(isRegBase() && "Invalid base register access!");
76      Base.Reg = Reg;
77    }
78    unsigned getReg() const {
79      assert(isRegBase() && "Invalid base register access!");
80      return Base.Reg;
81    }
82    void setOffsetReg(unsigned Reg) {
83      OffsetReg = Reg;
84    }
85    unsigned getOffsetReg() const {
86      return OffsetReg;
87    }
88    void setFI(unsigned FI) {
89      assert(isFIBase() && "Invalid base frame index  access!");
90      Base.FI = FI;
91    }
92    unsigned getFI() const {
93      assert(isFIBase() && "Invalid base frame index access!");
94      return Base.FI;
95    }
96    void setOffset(int64_t O) { Offset = O; }
97    int64_t getOffset() { return Offset; }
98    void setShift(unsigned S) { Shift = S; }
99    unsigned getShift() { return Shift; }
100
101    void setGlobalValue(const GlobalValue *G) { GV = G; }
102    const GlobalValue *getGlobalValue() { return GV; }
103  };
104
105  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
106  /// make the right decision when generating code for different targets.
107  const AArch64Subtarget *Subtarget;
108  LLVMContext *Context;
109
110  bool fastLowerArguments() override;
111  bool fastLowerCall(CallLoweringInfo &CLI) override;
112  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
113
114private:
115  // Selection routines.
116  bool selectAddSub(const Instruction *I);
117  bool selectLogicalOp(const Instruction *I);
118  bool selectLoad(const Instruction *I);
119  bool selectStore(const Instruction *I);
120  bool selectBranch(const Instruction *I);
121  bool selectIndirectBr(const Instruction *I);
122  bool selectCmp(const Instruction *I);
123  bool selectSelect(const Instruction *I);
124  bool selectFPExt(const Instruction *I);
125  bool selectFPTrunc(const Instruction *I);
126  bool selectFPToInt(const Instruction *I, bool Signed);
127  bool selectIntToFP(const Instruction *I, bool Signed);
128  bool selectRem(const Instruction *I, unsigned ISDOpcode);
129  bool selectRet(const Instruction *I);
130  bool selectTrunc(const Instruction *I);
131  bool selectIntExt(const Instruction *I);
132  bool selectMul(const Instruction *I);
133  bool selectShift(const Instruction *I);
134  bool selectBitCast(const Instruction *I);
135  bool selectFRem(const Instruction *I);
136  bool selectSDiv(const Instruction *I);
137  bool selectGetElementPtr(const Instruction *I);
138
139  // Utility helper routines.
140  bool isTypeLegal(Type *Ty, MVT &VT);
141  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
142  bool isValueAvailable(const Value *V) const;
143  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
144  bool computeCallAddress(const Value *V, Address &Addr);
145  bool simplifyAddress(Address &Addr, MVT VT);
146  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
147                            unsigned Flags, unsigned ScaleFactor,
148                            MachineMemOperand *MMO);
149  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
150  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
151                          unsigned Alignment);
152  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
153                         const Value *Cond);
154  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
155  bool optimizeSelect(const SelectInst *SI);
156  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
157
158  // Emit helper routines.
159  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
160                      const Value *RHS, bool SetFlags = false,
161                      bool WantResult = true,  bool IsZExt = false);
162  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
163                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
164                         bool SetFlags = false, bool WantResult = true);
165  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
166                         bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
167                         bool WantResult = true);
168  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
169                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
170                         AArch64_AM::ShiftExtendType ShiftType,
171                         uint64_t ShiftImm, bool SetFlags = false,
172                         bool WantResult = true);
173  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
174                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
175                          AArch64_AM::ShiftExtendType ExtType,
176                          uint64_t ShiftImm, bool SetFlags = false,
177                         bool WantResult = true);
178
179  // Emit functions.
180  bool emitCompareAndBranch(const BranchInst *BI);
181  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
182  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
183  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
184  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
185  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
186                    MachineMemOperand *MMO = nullptr);
187  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
188                 MachineMemOperand *MMO = nullptr);
189  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
190  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
191  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
192                   bool SetFlags = false, bool WantResult = true,
193                   bool IsZExt = false);
194  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
195  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
196                   bool SetFlags = false, bool WantResult = true,
197                   bool IsZExt = false);
198  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
199                       unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
200  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
201                       unsigned RHSReg, bool RHSIsKill,
202                       AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
203                       bool WantResult = true);
204  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
205                         const Value *RHS);
206  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
207                            bool LHSIsKill, uint64_t Imm);
208  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
209                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
210                            uint64_t ShiftImm);
211  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
212  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
213                      unsigned Op1, bool Op1IsKill);
214  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
215                        unsigned Op1, bool Op1IsKill);
216  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
217                        unsigned Op1, bool Op1IsKill);
218  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
219                      unsigned Op1Reg, bool Op1IsKill);
220  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
221                      uint64_t Imm, bool IsZExt = true);
222  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
223                      unsigned Op1Reg, bool Op1IsKill);
224  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
225                      uint64_t Imm, bool IsZExt = true);
226  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
227                      unsigned Op1Reg, bool Op1IsKill);
228  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
229                      uint64_t Imm, bool IsZExt = false);
230
231  unsigned materializeInt(const ConstantInt *CI, MVT VT);
232  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
233  unsigned materializeGV(const GlobalValue *GV);
234
235  // Call handling routines.
236private:
237  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
238  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
239                       unsigned &NumBytes);
240  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
241
242public:
243  // Backend specific FastISel code.
244  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
245  unsigned fastMaterializeConstant(const Constant *C) override;
246  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
247
248  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
249                           const TargetLibraryInfo *LibInfo)
250      : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
251    Subtarget =
252        &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
253    Context = &FuncInfo.Fn->getContext();
254  }
255
256  bool fastSelectInstruction(const Instruction *I) override;
257
258#include "AArch64GenFastISel.inc"
259};
260
261} // end anonymous namespace
262
263#include "AArch64GenCallingConv.inc"
264
265/// \brief Check if the sign-/zero-extend will be a noop.
266static bool isIntExtFree(const Instruction *I) {
267  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
268         "Unexpected integer extend instruction.");
269  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
270         "Unexpected value type.");
271  bool IsZExt = isa<ZExtInst>(I);
272
273  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
274    if (LI->hasOneUse())
275      return true;
276
277  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
278    if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
279      return true;
280
281  return false;
282}
283
284/// \brief Determine the implicit scale factor that is applied by a memory
285/// operation for a given value type.
286static unsigned getImplicitScaleFactor(MVT VT) {
287  switch (VT.SimpleTy) {
288  default:
289    return 0;    // invalid
290  case MVT::i1:  // fall-through
291  case MVT::i8:
292    return 1;
293  case MVT::i16:
294    return 2;
295  case MVT::i32: // fall-through
296  case MVT::f32:
297    return 4;
298  case MVT::i64: // fall-through
299  case MVT::f64:
300    return 8;
301  }
302}
303
304CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
305  if (CC == CallingConv::WebKit_JS)
306    return CC_AArch64_WebKit_JS;
307  if (CC == CallingConv::GHC)
308    return CC_AArch64_GHC;
309  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
310}
311
312unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
313  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
314         "Alloca should always return a pointer.");
315
316  // Don't handle dynamic allocas.
317  if (!FuncInfo.StaticAllocaMap.count(AI))
318    return 0;
319
320  DenseMap<const AllocaInst *, int>::iterator SI =
321      FuncInfo.StaticAllocaMap.find(AI);
322
323  if (SI != FuncInfo.StaticAllocaMap.end()) {
324    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
325    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
326            ResultReg)
327        .addFrameIndex(SI->second)
328        .addImm(0)
329        .addImm(0);
330    return ResultReg;
331  }
332
333  return 0;
334}
335
336unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
337  if (VT > MVT::i64)
338    return 0;
339
340  if (!CI->isZero())
341    return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
342
343  // Create a copy from the zero register to materialize a "0" value.
344  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
345                                                   : &AArch64::GPR32RegClass;
346  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
347  unsigned ResultReg = createResultReg(RC);
348  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
349          ResultReg).addReg(ZeroReg, getKillRegState(true));
350  return ResultReg;
351}
352
353unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
354  // Positive zero (+0.0) has to be materialized with a fmov from the zero
355  // register, because the immediate version of fmov cannot encode zero.
356  if (CFP->isNullValue())
357    return fastMaterializeFloatZero(CFP);
358
359  if (VT != MVT::f32 && VT != MVT::f64)
360    return 0;
361
362  const APFloat Val = CFP->getValueAPF();
363  bool Is64Bit = (VT == MVT::f64);
364  // This checks to see if we can use FMOV instructions to materialize
365  // a constant, otherwise we have to materialize via the constant pool.
366  if (TLI.isFPImmLegal(Val, VT)) {
367    int Imm =
368        Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
369    assert((Imm != -1) && "Cannot encode floating-point constant.");
370    unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
371    return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
372  }
373
374  // For the MachO large code model materialize the FP constant in code.
375  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
376    unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
377    const TargetRegisterClass *RC = Is64Bit ?
378        &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
379
380    unsigned TmpReg = createResultReg(RC);
381    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
382        .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
383
384    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
385    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
386            TII.get(TargetOpcode::COPY), ResultReg)
387        .addReg(TmpReg, getKillRegState(true));
388
389    return ResultReg;
390  }
391
392  // Materialize via constant pool.  MachineConstantPool wants an explicit
393  // alignment.
394  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
395  if (Align == 0)
396    Align = DL.getTypeAllocSize(CFP->getType());
397
398  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
399  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
400  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
401          ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
402
403  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
404  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
405  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
406      .addReg(ADRPReg)
407      .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
408  return ResultReg;
409}
410
411unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
412  // We can't handle thread-local variables quickly yet.
413  if (GV->isThreadLocal())
414    return 0;
415
416  // MachO still uses GOT for large code-model accesses, but ELF requires
417  // movz/movk sequences, which FastISel doesn't handle yet.
418  if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
419    return 0;
420
421  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
422
423  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
424  if (!DestEVT.isSimple())
425    return 0;
426
427  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
428  unsigned ResultReg;
429
430  if (OpFlags & AArch64II::MO_GOT) {
431    // ADRP + LDRX
432    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
433            ADRPReg)
434      .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
435
436    ResultReg = createResultReg(&AArch64::GPR64RegClass);
437    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
438            ResultReg)
439      .addReg(ADRPReg)
440      .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
441                        AArch64II::MO_NC);
442  } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
443    // We can't handle addresses loaded from a constant pool quickly yet.
444    return 0;
445  } else {
446    // ADRP + ADDX
447    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
448            ADRPReg)
449      .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
450
451    ResultReg = createResultReg(&AArch64::GPR64spRegClass);
452    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
453            ResultReg)
454      .addReg(ADRPReg)
455      .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
456      .addImm(0);
457  }
458  return ResultReg;
459}
460
461unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
462  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
463
464  // Only handle simple types.
465  if (!CEVT.isSimple())
466    return 0;
467  MVT VT = CEVT.getSimpleVT();
468
469  if (const auto *CI = dyn_cast<ConstantInt>(C))
470    return materializeInt(CI, VT);
471  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
472    return materializeFP(CFP, VT);
473  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
474    return materializeGV(GV);
475
476  return 0;
477}
478
479unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
480  assert(CFP->isNullValue() &&
481         "Floating-point constant is not a positive zero.");
482  MVT VT;
483  if (!isTypeLegal(CFP->getType(), VT))
484    return 0;
485
486  if (VT != MVT::f32 && VT != MVT::f64)
487    return 0;
488
489  bool Is64Bit = (VT == MVT::f64);
490  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
491  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
492  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
493}
494
495/// \brief Check if the multiply is by a power-of-2 constant.
496static bool isMulPowOf2(const Value *I) {
497  if (const auto *MI = dyn_cast<MulOperator>(I)) {
498    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
499      if (C->getValue().isPowerOf2())
500        return true;
501    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
502      if (C->getValue().isPowerOf2())
503        return true;
504  }
505  return false;
506}
507
508// Computes the address to get to an object.
509bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
510{
511  const User *U = nullptr;
512  unsigned Opcode = Instruction::UserOp1;
513  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
514    // Don't walk into other basic blocks unless the object is an alloca from
515    // another block, otherwise it may not have a virtual register assigned.
516    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
517        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
518      Opcode = I->getOpcode();
519      U = I;
520    }
521  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
522    Opcode = C->getOpcode();
523    U = C;
524  }
525
526  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
527    if (Ty->getAddressSpace() > 255)
528      // Fast instruction selection doesn't support the special
529      // address spaces.
530      return false;
531
532  switch (Opcode) {
533  default:
534    break;
535  case Instruction::BitCast: {
536    // Look through bitcasts.
537    return computeAddress(U->getOperand(0), Addr, Ty);
538  }
539  case Instruction::IntToPtr: {
540    // Look past no-op inttoptrs.
541    if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
542        TLI.getPointerTy(DL))
543      return computeAddress(U->getOperand(0), Addr, Ty);
544    break;
545  }
546  case Instruction::PtrToInt: {
547    // Look past no-op ptrtoints.
548    if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
549      return computeAddress(U->getOperand(0), Addr, Ty);
550    break;
551  }
552  case Instruction::GetElementPtr: {
553    Address SavedAddr = Addr;
554    uint64_t TmpOffset = Addr.getOffset();
555
556    // Iterate through the GEP folding the constants into offsets where
557    // we can.
558    gep_type_iterator GTI = gep_type_begin(U);
559    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
560         ++i, ++GTI) {
561      const Value *Op = *i;
562      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
563        const StructLayout *SL = DL.getStructLayout(STy);
564        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
565        TmpOffset += SL->getElementOffset(Idx);
566      } else {
567        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
568        for (;;) {
569          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
570            // Constant-offset addressing.
571            TmpOffset += CI->getSExtValue() * S;
572            break;
573          }
574          if (canFoldAddIntoGEP(U, Op)) {
575            // A compatible add with a constant operand. Fold the constant.
576            ConstantInt *CI =
577                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
578            TmpOffset += CI->getSExtValue() * S;
579            // Iterate on the other operand.
580            Op = cast<AddOperator>(Op)->getOperand(0);
581            continue;
582          }
583          // Unsupported
584          goto unsupported_gep;
585        }
586      }
587    }
588
589    // Try to grab the base operand now.
590    Addr.setOffset(TmpOffset);
591    if (computeAddress(U->getOperand(0), Addr, Ty))
592      return true;
593
594    // We failed, restore everything and try the other options.
595    Addr = SavedAddr;
596
597  unsupported_gep:
598    break;
599  }
600  case Instruction::Alloca: {
601    const AllocaInst *AI = cast<AllocaInst>(Obj);
602    DenseMap<const AllocaInst *, int>::iterator SI =
603        FuncInfo.StaticAllocaMap.find(AI);
604    if (SI != FuncInfo.StaticAllocaMap.end()) {
605      Addr.setKind(Address::FrameIndexBase);
606      Addr.setFI(SI->second);
607      return true;
608    }
609    break;
610  }
611  case Instruction::Add: {
612    // Adds of constants are common and easy enough.
613    const Value *LHS = U->getOperand(0);
614    const Value *RHS = U->getOperand(1);
615
616    if (isa<ConstantInt>(LHS))
617      std::swap(LHS, RHS);
618
619    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
620      Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
621      return computeAddress(LHS, Addr, Ty);
622    }
623
624    Address Backup = Addr;
625    if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
626      return true;
627    Addr = Backup;
628
629    break;
630  }
631  case Instruction::Sub: {
632    // Subs of constants are common and easy enough.
633    const Value *LHS = U->getOperand(0);
634    const Value *RHS = U->getOperand(1);
635
636    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
637      Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
638      return computeAddress(LHS, Addr, Ty);
639    }
640    break;
641  }
642  case Instruction::Shl: {
643    if (Addr.getOffsetReg())
644      break;
645
646    const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
647    if (!CI)
648      break;
649
650    unsigned Val = CI->getZExtValue();
651    if (Val < 1 || Val > 3)
652      break;
653
654    uint64_t NumBytes = 0;
655    if (Ty && Ty->isSized()) {
656      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
657      NumBytes = NumBits / 8;
658      if (!isPowerOf2_64(NumBits))
659        NumBytes = 0;
660    }
661
662    if (NumBytes != (1ULL << Val))
663      break;
664
665    Addr.setShift(Val);
666    Addr.setExtendType(AArch64_AM::LSL);
667
668    const Value *Src = U->getOperand(0);
669    if (const auto *I = dyn_cast<Instruction>(Src)) {
670      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
671        // Fold the zext or sext when it won't become a noop.
672        if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
673          if (!isIntExtFree(ZE) &&
674              ZE->getOperand(0)->getType()->isIntegerTy(32)) {
675            Addr.setExtendType(AArch64_AM::UXTW);
676            Src = ZE->getOperand(0);
677          }
678        } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
679          if (!isIntExtFree(SE) &&
680              SE->getOperand(0)->getType()->isIntegerTy(32)) {
681            Addr.setExtendType(AArch64_AM::SXTW);
682            Src = SE->getOperand(0);
683          }
684        }
685      }
686    }
687
688    if (const auto *AI = dyn_cast<BinaryOperator>(Src))
689      if (AI->getOpcode() == Instruction::And) {
690        const Value *LHS = AI->getOperand(0);
691        const Value *RHS = AI->getOperand(1);
692
693        if (const auto *C = dyn_cast<ConstantInt>(LHS))
694          if (C->getValue() == 0xffffffff)
695            std::swap(LHS, RHS);
696
697        if (const auto *C = dyn_cast<ConstantInt>(RHS))
698          if (C->getValue() == 0xffffffff) {
699            Addr.setExtendType(AArch64_AM::UXTW);
700            unsigned Reg = getRegForValue(LHS);
701            if (!Reg)
702              return false;
703            bool RegIsKill = hasTrivialKill(LHS);
704            Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
705                                             AArch64::sub_32);
706            Addr.setOffsetReg(Reg);
707            return true;
708          }
709      }
710
711    unsigned Reg = getRegForValue(Src);
712    if (!Reg)
713      return false;
714    Addr.setOffsetReg(Reg);
715    return true;
716  }
717  case Instruction::Mul: {
718    if (Addr.getOffsetReg())
719      break;
720
721    if (!isMulPowOf2(U))
722      break;
723
724    const Value *LHS = U->getOperand(0);
725    const Value *RHS = U->getOperand(1);
726
727    // Canonicalize power-of-2 value to the RHS.
728    if (const auto *C = dyn_cast<ConstantInt>(LHS))
729      if (C->getValue().isPowerOf2())
730        std::swap(LHS, RHS);
731
732    assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
733    const auto *C = cast<ConstantInt>(RHS);
734    unsigned Val = C->getValue().logBase2();
735    if (Val < 1 || Val > 3)
736      break;
737
738    uint64_t NumBytes = 0;
739    if (Ty && Ty->isSized()) {
740      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
741      NumBytes = NumBits / 8;
742      if (!isPowerOf2_64(NumBits))
743        NumBytes = 0;
744    }
745
746    if (NumBytes != (1ULL << Val))
747      break;
748
749    Addr.setShift(Val);
750    Addr.setExtendType(AArch64_AM::LSL);
751
752    const Value *Src = LHS;
753    if (const auto *I = dyn_cast<Instruction>(Src)) {
754      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
755        // Fold the zext or sext when it won't become a noop.
756        if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
757          if (!isIntExtFree(ZE) &&
758              ZE->getOperand(0)->getType()->isIntegerTy(32)) {
759            Addr.setExtendType(AArch64_AM::UXTW);
760            Src = ZE->getOperand(0);
761          }
762        } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
763          if (!isIntExtFree(SE) &&
764              SE->getOperand(0)->getType()->isIntegerTy(32)) {
765            Addr.setExtendType(AArch64_AM::SXTW);
766            Src = SE->getOperand(0);
767          }
768        }
769      }
770    }
771
772    unsigned Reg = getRegForValue(Src);
773    if (!Reg)
774      return false;
775    Addr.setOffsetReg(Reg);
776    return true;
777  }
778  case Instruction::And: {
779    if (Addr.getOffsetReg())
780      break;
781
782    if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
783      break;
784
785    const Value *LHS = U->getOperand(0);
786    const Value *RHS = U->getOperand(1);
787
788    if (const auto *C = dyn_cast<ConstantInt>(LHS))
789      if (C->getValue() == 0xffffffff)
790        std::swap(LHS, RHS);
791
792    if (const auto *C = dyn_cast<ConstantInt>(RHS))
793      if (C->getValue() == 0xffffffff) {
794        Addr.setShift(0);
795        Addr.setExtendType(AArch64_AM::LSL);
796        Addr.setExtendType(AArch64_AM::UXTW);
797
798        unsigned Reg = getRegForValue(LHS);
799        if (!Reg)
800          return false;
801        bool RegIsKill = hasTrivialKill(LHS);
802        Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
803                                         AArch64::sub_32);
804        Addr.setOffsetReg(Reg);
805        return true;
806      }
807    break;
808  }
809  case Instruction::SExt:
810  case Instruction::ZExt: {
811    if (!Addr.getReg() || Addr.getOffsetReg())
812      break;
813
814    const Value *Src = nullptr;
815    // Fold the zext or sext when it won't become a noop.
816    if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
817      if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
818        Addr.setExtendType(AArch64_AM::UXTW);
819        Src = ZE->getOperand(0);
820      }
821    } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
822      if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
823        Addr.setExtendType(AArch64_AM::SXTW);
824        Src = SE->getOperand(0);
825      }
826    }
827
828    if (!Src)
829      break;
830
831    Addr.setShift(0);
832    unsigned Reg = getRegForValue(Src);
833    if (!Reg)
834      return false;
835    Addr.setOffsetReg(Reg);
836    return true;
837  }
838  } // end switch
839
840  if (Addr.isRegBase() && !Addr.getReg()) {
841    unsigned Reg = getRegForValue(Obj);
842    if (!Reg)
843      return false;
844    Addr.setReg(Reg);
845    return true;
846  }
847
848  if (!Addr.getOffsetReg()) {
849    unsigned Reg = getRegForValue(Obj);
850    if (!Reg)
851      return false;
852    Addr.setOffsetReg(Reg);
853    return true;
854  }
855
856  return false;
857}
858
859bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
860  const User *U = nullptr;
861  unsigned Opcode = Instruction::UserOp1;
862  bool InMBB = true;
863
864  if (const auto *I = dyn_cast<Instruction>(V)) {
865    Opcode = I->getOpcode();
866    U = I;
867    InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
868  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
869    Opcode = C->getOpcode();
870    U = C;
871  }
872
873  switch (Opcode) {
874  default: break;
875  case Instruction::BitCast:
876    // Look past bitcasts if its operand is in the same BB.
877    if (InMBB)
878      return computeCallAddress(U->getOperand(0), Addr);
879    break;
880  case Instruction::IntToPtr:
881    // Look past no-op inttoptrs if its operand is in the same BB.
882    if (InMBB &&
883        TLI.getValueType(DL, U->getOperand(0)->getType()) ==
884            TLI.getPointerTy(DL))
885      return computeCallAddress(U->getOperand(0), Addr);
886    break;
887  case Instruction::PtrToInt:
888    // Look past no-op ptrtoints if its operand is in the same BB.
889    if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
890      return computeCallAddress(U->getOperand(0), Addr);
891    break;
892  }
893
894  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
895    Addr.setGlobalValue(GV);
896    return true;
897  }
898
899  // If all else fails, try to materialize the value in a register.
900  if (!Addr.getGlobalValue()) {
901    Addr.setReg(getRegForValue(V));
902    return Addr.getReg() != 0;
903  }
904
905  return false;
906}
907
908
909bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
910  EVT evt = TLI.getValueType(DL, Ty, true);
911
912  // Only handle simple types.
913  if (evt == MVT::Other || !evt.isSimple())
914    return false;
915  VT = evt.getSimpleVT();
916
917  // This is a legal type, but it's not something we handle in fast-isel.
918  if (VT == MVT::f128)
919    return false;
920
921  // Handle all other legal types, i.e. a register that will directly hold this
922  // value.
923  return TLI.isTypeLegal(VT);
924}
925
926/// \brief Determine if the value type is supported by FastISel.
927///
928/// FastISel for AArch64 can handle more value types than are legal. This adds
929/// simple value type such as i1, i8, and i16.
930bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
931  if (Ty->isVectorTy() && !IsVectorAllowed)
932    return false;
933
934  if (isTypeLegal(Ty, VT))
935    return true;
936
937  // If this is a type than can be sign or zero-extended to a basic operation
938  // go ahead and accept it now.
939  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
940    return true;
941
942  return false;
943}
944
945bool AArch64FastISel::isValueAvailable(const Value *V) const {
946  if (!isa<Instruction>(V))
947    return true;
948
949  const auto *I = cast<Instruction>(V);
950  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
951    return true;
952
953  return false;
954}
955
956bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
957  unsigned ScaleFactor = getImplicitScaleFactor(VT);
958  if (!ScaleFactor)
959    return false;
960
961  bool ImmediateOffsetNeedsLowering = false;
962  bool RegisterOffsetNeedsLowering = false;
963  int64_t Offset = Addr.getOffset();
964  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
965    ImmediateOffsetNeedsLowering = true;
966  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
967           !isUInt<12>(Offset / ScaleFactor))
968    ImmediateOffsetNeedsLowering = true;
969
970  // Cannot encode an offset register and an immediate offset in the same
971  // instruction. Fold the immediate offset into the load/store instruction and
972  // emit an additional add to take care of the offset register.
973  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
974    RegisterOffsetNeedsLowering = true;
975
976  // Cannot encode zero register as base.
977  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
978    RegisterOffsetNeedsLowering = true;
979
980  // If this is a stack pointer and the offset needs to be simplified then put
981  // the alloca address into a register, set the base type back to register and
982  // continue. This should almost never happen.
983  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
984  {
985    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
986    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
987            ResultReg)
988      .addFrameIndex(Addr.getFI())
989      .addImm(0)
990      .addImm(0);
991    Addr.setKind(Address::RegBase);
992    Addr.setReg(ResultReg);
993  }
994
995  if (RegisterOffsetNeedsLowering) {
996    unsigned ResultReg = 0;
997    if (Addr.getReg()) {
998      if (Addr.getExtendType() == AArch64_AM::SXTW ||
999          Addr.getExtendType() == AArch64_AM::UXTW   )
1000        ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1001                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1002                                  /*TODO:IsKill=*/false, Addr.getExtendType(),
1003                                  Addr.getShift());
1004      else
1005        ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1006                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1007                                  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1008                                  Addr.getShift());
1009    } else {
1010      if (Addr.getExtendType() == AArch64_AM::UXTW)
1011        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1012                               /*Op0IsKill=*/false, Addr.getShift(),
1013                               /*IsZExt=*/true);
1014      else if (Addr.getExtendType() == AArch64_AM::SXTW)
1015        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1016                               /*Op0IsKill=*/false, Addr.getShift(),
1017                               /*IsZExt=*/false);
1018      else
1019        ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1020                               /*Op0IsKill=*/false, Addr.getShift());
1021    }
1022    if (!ResultReg)
1023      return false;
1024
1025    Addr.setReg(ResultReg);
1026    Addr.setOffsetReg(0);
1027    Addr.setShift(0);
1028    Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1029  }
1030
1031  // Since the offset is too large for the load/store instruction get the
1032  // reg+offset into a register.
1033  if (ImmediateOffsetNeedsLowering) {
1034    unsigned ResultReg;
1035    if (Addr.getReg())
1036      // Try to fold the immediate into the add instruction.
1037      ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1038    else
1039      ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1040
1041    if (!ResultReg)
1042      return false;
1043    Addr.setReg(ResultReg);
1044    Addr.setOffset(0);
1045  }
1046  return true;
1047}
1048
1049void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1050                                           const MachineInstrBuilder &MIB,
1051                                           unsigned Flags,
1052                                           unsigned ScaleFactor,
1053                                           MachineMemOperand *MMO) {
1054  int64_t Offset = Addr.getOffset() / ScaleFactor;
1055  // Frame base works a bit differently. Handle it separately.
1056  if (Addr.isFIBase()) {
1057    int FI = Addr.getFI();
1058    // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1059    // and alignment should be based on the VT.
1060    MMO = FuncInfo.MF->getMachineMemOperand(
1061        MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1062        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1063    // Now add the rest of the operands.
1064    MIB.addFrameIndex(FI).addImm(Offset);
1065  } else {
1066    assert(Addr.isRegBase() && "Unexpected address kind.");
1067    const MCInstrDesc &II = MIB->getDesc();
1068    unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1069    Addr.setReg(
1070      constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1071    Addr.setOffsetReg(
1072      constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1073    if (Addr.getOffsetReg()) {
1074      assert(Addr.getOffset() == 0 && "Unexpected offset");
1075      bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1076                      Addr.getExtendType() == AArch64_AM::SXTX;
1077      MIB.addReg(Addr.getReg());
1078      MIB.addReg(Addr.getOffsetReg());
1079      MIB.addImm(IsSigned);
1080      MIB.addImm(Addr.getShift() != 0);
1081    } else
1082      MIB.addReg(Addr.getReg()).addImm(Offset);
1083  }
1084
1085  if (MMO)
1086    MIB.addMemOperand(MMO);
1087}
1088
1089unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1090                                     const Value *RHS, bool SetFlags,
1091                                     bool WantResult,  bool IsZExt) {
1092  AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1093  bool NeedExtend = false;
1094  switch (RetVT.SimpleTy) {
1095  default:
1096    return 0;
1097  case MVT::i1:
1098    NeedExtend = true;
1099    break;
1100  case MVT::i8:
1101    NeedExtend = true;
1102    ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1103    break;
1104  case MVT::i16:
1105    NeedExtend = true;
1106    ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1107    break;
1108  case MVT::i32:  // fall-through
1109  case MVT::i64:
1110    break;
1111  }
1112  MVT SrcVT = RetVT;
1113  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1114
1115  // Canonicalize immediates to the RHS first.
1116  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1117    std::swap(LHS, RHS);
1118
1119  // Canonicalize mul by power of 2 to the RHS.
1120  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1121    if (isMulPowOf2(LHS))
1122      std::swap(LHS, RHS);
1123
1124  // Canonicalize shift immediate to the RHS.
1125  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1126    if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1127      if (isa<ConstantInt>(SI->getOperand(1)))
1128        if (SI->getOpcode() == Instruction::Shl  ||
1129            SI->getOpcode() == Instruction::LShr ||
1130            SI->getOpcode() == Instruction::AShr   )
1131          std::swap(LHS, RHS);
1132
1133  unsigned LHSReg = getRegForValue(LHS);
1134  if (!LHSReg)
1135    return 0;
1136  bool LHSIsKill = hasTrivialKill(LHS);
1137
1138  if (NeedExtend)
1139    LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1140
1141  unsigned ResultReg = 0;
1142  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1143    uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1144    if (C->isNegative())
1145      ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1146                                SetFlags, WantResult);
1147    else
1148      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1149                                WantResult);
1150  } else if (const auto *C = dyn_cast<Constant>(RHS))
1151    if (C->isNullValue())
1152      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1153                                WantResult);
1154
1155  if (ResultReg)
1156    return ResultReg;
1157
1158  // Only extend the RHS within the instruction if there is a valid extend type.
1159  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1160      isValueAvailable(RHS)) {
1161    if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1162      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1163        if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1164          unsigned RHSReg = getRegForValue(SI->getOperand(0));
1165          if (!RHSReg)
1166            return 0;
1167          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1168          return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1169                               RHSIsKill, ExtendType, C->getZExtValue(),
1170                               SetFlags, WantResult);
1171        }
1172    unsigned RHSReg = getRegForValue(RHS);
1173    if (!RHSReg)
1174      return 0;
1175    bool RHSIsKill = hasTrivialKill(RHS);
1176    return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1177                         ExtendType, 0, SetFlags, WantResult);
1178  }
1179
1180  // Check if the mul can be folded into the instruction.
1181  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1182    if (isMulPowOf2(RHS)) {
1183      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1184      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1185
1186      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1187        if (C->getValue().isPowerOf2())
1188          std::swap(MulLHS, MulRHS);
1189
1190      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1191      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1192      unsigned RHSReg = getRegForValue(MulLHS);
1193      if (!RHSReg)
1194        return 0;
1195      bool RHSIsKill = hasTrivialKill(MulLHS);
1196      ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1197                                RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1198                                WantResult);
1199      if (ResultReg)
1200        return ResultReg;
1201    }
1202  }
1203
1204  // Check if the shift can be folded into the instruction.
1205  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1206    if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1207      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1208        AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1209        switch (SI->getOpcode()) {
1210        default: break;
1211        case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1212        case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1213        case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1214        }
1215        uint64_t ShiftVal = C->getZExtValue();
1216        if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1217          unsigned RHSReg = getRegForValue(SI->getOperand(0));
1218          if (!RHSReg)
1219            return 0;
1220          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1221          ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1222                                    RHSIsKill, ShiftType, ShiftVal, SetFlags,
1223                                    WantResult);
1224          if (ResultReg)
1225            return ResultReg;
1226        }
1227      }
1228    }
1229  }
1230
1231  unsigned RHSReg = getRegForValue(RHS);
1232  if (!RHSReg)
1233    return 0;
1234  bool RHSIsKill = hasTrivialKill(RHS);
1235
1236  if (NeedExtend)
1237    RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1238
1239  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1240                       SetFlags, WantResult);
1241}
1242
1243unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1244                                        bool LHSIsKill, unsigned RHSReg,
1245                                        bool RHSIsKill, bool SetFlags,
1246                                        bool WantResult) {
1247  assert(LHSReg && RHSReg && "Invalid register number.");
1248
1249  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1250    return 0;
1251
1252  static const unsigned OpcTable[2][2][2] = {
1253    { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1254      { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1255    { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1256      { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1257  };
1258  bool Is64Bit = RetVT == MVT::i64;
1259  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1260  const TargetRegisterClass *RC =
1261      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1262  unsigned ResultReg;
1263  if (WantResult)
1264    ResultReg = createResultReg(RC);
1265  else
1266    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1267
1268  const MCInstrDesc &II = TII.get(Opc);
1269  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1270  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1271  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1272      .addReg(LHSReg, getKillRegState(LHSIsKill))
1273      .addReg(RHSReg, getKillRegState(RHSIsKill));
1274  return ResultReg;
1275}
1276
1277unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1278                                        bool LHSIsKill, uint64_t Imm,
1279                                        bool SetFlags, bool WantResult) {
1280  assert(LHSReg && "Invalid register number.");
1281
1282  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1283    return 0;
1284
1285  unsigned ShiftImm;
1286  if (isUInt<12>(Imm))
1287    ShiftImm = 0;
1288  else if ((Imm & 0xfff000) == Imm) {
1289    ShiftImm = 12;
1290    Imm >>= 12;
1291  } else
1292    return 0;
1293
1294  static const unsigned OpcTable[2][2][2] = {
1295    { { AArch64::SUBWri,  AArch64::SUBXri  },
1296      { AArch64::ADDWri,  AArch64::ADDXri  }  },
1297    { { AArch64::SUBSWri, AArch64::SUBSXri },
1298      { AArch64::ADDSWri, AArch64::ADDSXri }  }
1299  };
1300  bool Is64Bit = RetVT == MVT::i64;
1301  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1302  const TargetRegisterClass *RC;
1303  if (SetFlags)
1304    RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1305  else
1306    RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1307  unsigned ResultReg;
1308  if (WantResult)
1309    ResultReg = createResultReg(RC);
1310  else
1311    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1312
1313  const MCInstrDesc &II = TII.get(Opc);
1314  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1315  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1316      .addReg(LHSReg, getKillRegState(LHSIsKill))
1317      .addImm(Imm)
1318      .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1319  return ResultReg;
1320}
1321
1322unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1323                                        bool LHSIsKill, unsigned RHSReg,
1324                                        bool RHSIsKill,
1325                                        AArch64_AM::ShiftExtendType ShiftType,
1326                                        uint64_t ShiftImm, bool SetFlags,
1327                                        bool WantResult) {
1328  assert(LHSReg && RHSReg && "Invalid register number.");
1329
1330  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1331    return 0;
1332
1333  // Don't deal with undefined shifts.
1334  if (ShiftImm >= RetVT.getSizeInBits())
1335    return 0;
1336
1337  static const unsigned OpcTable[2][2][2] = {
1338    { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1339      { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1340    { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1341      { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1342  };
1343  bool Is64Bit = RetVT == MVT::i64;
1344  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1345  const TargetRegisterClass *RC =
1346      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347  unsigned ResultReg;
1348  if (WantResult)
1349    ResultReg = createResultReg(RC);
1350  else
1351    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1352
1353  const MCInstrDesc &II = TII.get(Opc);
1354  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1355  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1356  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1357      .addReg(LHSReg, getKillRegState(LHSIsKill))
1358      .addReg(RHSReg, getKillRegState(RHSIsKill))
1359      .addImm(getShifterImm(ShiftType, ShiftImm));
1360  return ResultReg;
1361}
1362
1363unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1364                                        bool LHSIsKill, unsigned RHSReg,
1365                                        bool RHSIsKill,
1366                                        AArch64_AM::ShiftExtendType ExtType,
1367                                        uint64_t ShiftImm, bool SetFlags,
1368                                        bool WantResult) {
1369  assert(LHSReg && RHSReg && "Invalid register number.");
1370
1371  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1372    return 0;
1373
1374  if (ShiftImm >= 4)
1375    return 0;
1376
1377  static const unsigned OpcTable[2][2][2] = {
1378    { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1379      { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1380    { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1381      { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1382  };
1383  bool Is64Bit = RetVT == MVT::i64;
1384  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1385  const TargetRegisterClass *RC = nullptr;
1386  if (SetFlags)
1387    RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1388  else
1389    RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1390  unsigned ResultReg;
1391  if (WantResult)
1392    ResultReg = createResultReg(RC);
1393  else
1394    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1395
1396  const MCInstrDesc &II = TII.get(Opc);
1397  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1398  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1399  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1400      .addReg(LHSReg, getKillRegState(LHSIsKill))
1401      .addReg(RHSReg, getKillRegState(RHSIsKill))
1402      .addImm(getArithExtendImm(ExtType, ShiftImm));
1403  return ResultReg;
1404}
1405
1406bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1407  Type *Ty = LHS->getType();
1408  EVT EVT = TLI.getValueType(DL, Ty, true);
1409  if (!EVT.isSimple())
1410    return false;
1411  MVT VT = EVT.getSimpleVT();
1412
1413  switch (VT.SimpleTy) {
1414  default:
1415    return false;
1416  case MVT::i1:
1417  case MVT::i8:
1418  case MVT::i16:
1419  case MVT::i32:
1420  case MVT::i64:
1421    return emitICmp(VT, LHS, RHS, IsZExt);
1422  case MVT::f32:
1423  case MVT::f64:
1424    return emitFCmp(VT, LHS, RHS);
1425  }
1426}
1427
1428bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1429                               bool IsZExt) {
1430  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1431                 IsZExt) != 0;
1432}
1433
1434bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1435                                  uint64_t Imm) {
1436  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1437                       /*SetFlags=*/true, /*WantResult=*/false) != 0;
1438}
1439
1440bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1441  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1442    return false;
1443
1444  // Check to see if the 2nd operand is a constant that we can encode directly
1445  // in the compare.
1446  bool UseImm = false;
1447  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1448    if (CFP->isZero() && !CFP->isNegative())
1449      UseImm = true;
1450
1451  unsigned LHSReg = getRegForValue(LHS);
1452  if (!LHSReg)
1453    return false;
1454  bool LHSIsKill = hasTrivialKill(LHS);
1455
1456  if (UseImm) {
1457    unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1458    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1459        .addReg(LHSReg, getKillRegState(LHSIsKill));
1460    return true;
1461  }
1462
1463  unsigned RHSReg = getRegForValue(RHS);
1464  if (!RHSReg)
1465    return false;
1466  bool RHSIsKill = hasTrivialKill(RHS);
1467
1468  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1469  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1470      .addReg(LHSReg, getKillRegState(LHSIsKill))
1471      .addReg(RHSReg, getKillRegState(RHSIsKill));
1472  return true;
1473}
1474
1475unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1476                                  bool SetFlags, bool WantResult, bool IsZExt) {
1477  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1478                    IsZExt);
1479}
1480
1481/// \brief This method is a wrapper to simplify add emission.
1482///
1483/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1484/// that fails, then try to materialize the immediate into a register and use
1485/// emitAddSub_rr instead.
1486unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1487                                      int64_t Imm) {
1488  unsigned ResultReg;
1489  if (Imm < 0)
1490    ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1491  else
1492    ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1493
1494  if (ResultReg)
1495    return ResultReg;
1496
1497  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1498  if (!CReg)
1499    return 0;
1500
1501  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1502  return ResultReg;
1503}
1504
1505unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1506                                  bool SetFlags, bool WantResult, bool IsZExt) {
1507  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1508                    IsZExt);
1509}
1510
1511unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1512                                      bool LHSIsKill, unsigned RHSReg,
1513                                      bool RHSIsKill, bool WantResult) {
1514  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1515                       RHSIsKill, /*SetFlags=*/true, WantResult);
1516}
1517
1518unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1519                                      bool LHSIsKill, unsigned RHSReg,
1520                                      bool RHSIsKill,
1521                                      AArch64_AM::ShiftExtendType ShiftType,
1522                                      uint64_t ShiftImm, bool WantResult) {
1523  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1524                       RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1525                       WantResult);
1526}
1527
1528unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1529                                        const Value *LHS, const Value *RHS) {
1530  // Canonicalize immediates to the RHS first.
1531  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1532    std::swap(LHS, RHS);
1533
1534  // Canonicalize mul by power-of-2 to the RHS.
1535  if (LHS->hasOneUse() && isValueAvailable(LHS))
1536    if (isMulPowOf2(LHS))
1537      std::swap(LHS, RHS);
1538
1539  // Canonicalize shift immediate to the RHS.
1540  if (LHS->hasOneUse() && isValueAvailable(LHS))
1541    if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1542      if (isa<ConstantInt>(SI->getOperand(1)))
1543        std::swap(LHS, RHS);
1544
1545  unsigned LHSReg = getRegForValue(LHS);
1546  if (!LHSReg)
1547    return 0;
1548  bool LHSIsKill = hasTrivialKill(LHS);
1549
1550  unsigned ResultReg = 0;
1551  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1552    uint64_t Imm = C->getZExtValue();
1553    ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1554  }
1555  if (ResultReg)
1556    return ResultReg;
1557
1558  // Check if the mul can be folded into the instruction.
1559  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1560    if (isMulPowOf2(RHS)) {
1561      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1562      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1563
1564      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1565        if (C->getValue().isPowerOf2())
1566          std::swap(MulLHS, MulRHS);
1567
1568      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1569      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1570
1571      unsigned RHSReg = getRegForValue(MulLHS);
1572      if (!RHSReg)
1573        return 0;
1574      bool RHSIsKill = hasTrivialKill(MulLHS);
1575      ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1576                                   RHSIsKill, ShiftVal);
1577      if (ResultReg)
1578        return ResultReg;
1579    }
1580  }
1581
1582  // Check if the shift can be folded into the instruction.
1583  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1584    if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1585      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1586        uint64_t ShiftVal = C->getZExtValue();
1587        unsigned RHSReg = getRegForValue(SI->getOperand(0));
1588        if (!RHSReg)
1589          return 0;
1590        bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1591        ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1592                                     RHSIsKill, ShiftVal);
1593        if (ResultReg)
1594          return ResultReg;
1595      }
1596  }
1597
1598  unsigned RHSReg = getRegForValue(RHS);
1599  if (!RHSReg)
1600    return 0;
1601  bool RHSIsKill = hasTrivialKill(RHS);
1602
1603  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1604  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1605  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1606    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1607    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1608  }
1609  return ResultReg;
1610}
1611
1612unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1613                                           unsigned LHSReg, bool LHSIsKill,
1614                                           uint64_t Imm) {
1615  assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1616         "ISD nodes are not consecutive!");
1617  static const unsigned OpcTable[3][2] = {
1618    { AArch64::ANDWri, AArch64::ANDXri },
1619    { AArch64::ORRWri, AArch64::ORRXri },
1620    { AArch64::EORWri, AArch64::EORXri }
1621  };
1622  const TargetRegisterClass *RC;
1623  unsigned Opc;
1624  unsigned RegSize;
1625  switch (RetVT.SimpleTy) {
1626  default:
1627    return 0;
1628  case MVT::i1:
1629  case MVT::i8:
1630  case MVT::i16:
1631  case MVT::i32: {
1632    unsigned Idx = ISDOpc - ISD::AND;
1633    Opc = OpcTable[Idx][0];
1634    RC = &AArch64::GPR32spRegClass;
1635    RegSize = 32;
1636    break;
1637  }
1638  case MVT::i64:
1639    Opc = OpcTable[ISDOpc - ISD::AND][1];
1640    RC = &AArch64::GPR64spRegClass;
1641    RegSize = 64;
1642    break;
1643  }
1644
1645  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1646    return 0;
1647
1648  unsigned ResultReg =
1649      fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1650                      AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1651  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1652    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1653    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1654  }
1655  return ResultReg;
1656}
1657
1658unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1659                                           unsigned LHSReg, bool LHSIsKill,
1660                                           unsigned RHSReg, bool RHSIsKill,
1661                                           uint64_t ShiftImm) {
1662  assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1663         "ISD nodes are not consecutive!");
1664  static const unsigned OpcTable[3][2] = {
1665    { AArch64::ANDWrs, AArch64::ANDXrs },
1666    { AArch64::ORRWrs, AArch64::ORRXrs },
1667    { AArch64::EORWrs, AArch64::EORXrs }
1668  };
1669
1670  // Don't deal with undefined shifts.
1671  if (ShiftImm >= RetVT.getSizeInBits())
1672    return 0;
1673
1674  const TargetRegisterClass *RC;
1675  unsigned Opc;
1676  switch (RetVT.SimpleTy) {
1677  default:
1678    return 0;
1679  case MVT::i1:
1680  case MVT::i8:
1681  case MVT::i16:
1682  case MVT::i32:
1683    Opc = OpcTable[ISDOpc - ISD::AND][0];
1684    RC = &AArch64::GPR32RegClass;
1685    break;
1686  case MVT::i64:
1687    Opc = OpcTable[ISDOpc - ISD::AND][1];
1688    RC = &AArch64::GPR64RegClass;
1689    break;
1690  }
1691  unsigned ResultReg =
1692      fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1693                       AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1694  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1695    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1696    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1697  }
1698  return ResultReg;
1699}
1700
1701unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1702                                     uint64_t Imm) {
1703  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1704}
1705
1706unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1707                                   bool WantZExt, MachineMemOperand *MMO) {
1708  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1709    return 0;
1710
1711  // Simplify this down to something we can handle.
1712  if (!simplifyAddress(Addr, VT))
1713    return 0;
1714
1715  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1716  if (!ScaleFactor)
1717    llvm_unreachable("Unexpected value type.");
1718
1719  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1720  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1721  bool UseScaled = true;
1722  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1723    UseScaled = false;
1724    ScaleFactor = 1;
1725  }
1726
1727  static const unsigned GPOpcTable[2][8][4] = {
1728    // Sign-extend.
1729    { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1730        AArch64::LDURXi  },
1731      { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1732        AArch64::LDURXi  },
1733      { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1734        AArch64::LDRXui  },
1735      { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1736        AArch64::LDRXui  },
1737      { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1738        AArch64::LDRXroX },
1739      { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1740        AArch64::LDRXroX },
1741      { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1742        AArch64::LDRXroW },
1743      { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1744        AArch64::LDRXroW }
1745    },
1746    // Zero-extend.
1747    { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1748        AArch64::LDURXi  },
1749      { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1750        AArch64::LDURXi  },
1751      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1752        AArch64::LDRXui  },
1753      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1754        AArch64::LDRXui  },
1755      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1756        AArch64::LDRXroX },
1757      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1758        AArch64::LDRXroX },
1759      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1760        AArch64::LDRXroW },
1761      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1762        AArch64::LDRXroW }
1763    }
1764  };
1765
1766  static const unsigned FPOpcTable[4][2] = {
1767    { AArch64::LDURSi,  AArch64::LDURDi  },
1768    { AArch64::LDRSui,  AArch64::LDRDui  },
1769    { AArch64::LDRSroX, AArch64::LDRDroX },
1770    { AArch64::LDRSroW, AArch64::LDRDroW }
1771  };
1772
1773  unsigned Opc;
1774  const TargetRegisterClass *RC;
1775  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1776                      Addr.getOffsetReg();
1777  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1778  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1779      Addr.getExtendType() == AArch64_AM::SXTW)
1780    Idx++;
1781
1782  bool IsRet64Bit = RetVT == MVT::i64;
1783  switch (VT.SimpleTy) {
1784  default:
1785    llvm_unreachable("Unexpected value type.");
1786  case MVT::i1: // Intentional fall-through.
1787  case MVT::i8:
1788    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1789    RC = (IsRet64Bit && !WantZExt) ?
1790             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1791    break;
1792  case MVT::i16:
1793    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1794    RC = (IsRet64Bit && !WantZExt) ?
1795             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1796    break;
1797  case MVT::i32:
1798    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1799    RC = (IsRet64Bit && !WantZExt) ?
1800             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1801    break;
1802  case MVT::i64:
1803    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1804    RC = &AArch64::GPR64RegClass;
1805    break;
1806  case MVT::f32:
1807    Opc = FPOpcTable[Idx][0];
1808    RC = &AArch64::FPR32RegClass;
1809    break;
1810  case MVT::f64:
1811    Opc = FPOpcTable[Idx][1];
1812    RC = &AArch64::FPR64RegClass;
1813    break;
1814  }
1815
1816  // Create the base instruction, then add the operands.
1817  unsigned ResultReg = createResultReg(RC);
1818  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1819                                    TII.get(Opc), ResultReg);
1820  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1821
1822  // Loading an i1 requires special handling.
1823  if (VT == MVT::i1) {
1824    unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1825    assert(ANDReg && "Unexpected AND instruction emission failure.");
1826    ResultReg = ANDReg;
1827  }
1828
1829  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1830  // the 32bit reg to a 64bit reg.
1831  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1832    unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1833    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1834            TII.get(AArch64::SUBREG_TO_REG), Reg64)
1835        .addImm(0)
1836        .addReg(ResultReg, getKillRegState(true))
1837        .addImm(AArch64::sub_32);
1838    ResultReg = Reg64;
1839  }
1840  return ResultReg;
1841}
1842
1843bool AArch64FastISel::selectAddSub(const Instruction *I) {
1844  MVT VT;
1845  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1846    return false;
1847
1848  if (VT.isVector())
1849    return selectOperator(I, I->getOpcode());
1850
1851  unsigned ResultReg;
1852  switch (I->getOpcode()) {
1853  default:
1854    llvm_unreachable("Unexpected instruction.");
1855  case Instruction::Add:
1856    ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1857    break;
1858  case Instruction::Sub:
1859    ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1860    break;
1861  }
1862  if (!ResultReg)
1863    return false;
1864
1865  updateValueMap(I, ResultReg);
1866  return true;
1867}
1868
1869bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1870  MVT VT;
1871  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1872    return false;
1873
1874  if (VT.isVector())
1875    return selectOperator(I, I->getOpcode());
1876
1877  unsigned ResultReg;
1878  switch (I->getOpcode()) {
1879  default:
1880    llvm_unreachable("Unexpected instruction.");
1881  case Instruction::And:
1882    ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1883    break;
1884  case Instruction::Or:
1885    ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1886    break;
1887  case Instruction::Xor:
1888    ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1889    break;
1890  }
1891  if (!ResultReg)
1892    return false;
1893
1894  updateValueMap(I, ResultReg);
1895  return true;
1896}
1897
1898bool AArch64FastISel::selectLoad(const Instruction *I) {
1899  MVT VT;
1900  // Verify we have a legal type before going any further.  Currently, we handle
1901  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1902  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1903  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1904      cast<LoadInst>(I)->isAtomic())
1905    return false;
1906
1907  // See if we can handle this address.
1908  Address Addr;
1909  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1910    return false;
1911
1912  // Fold the following sign-/zero-extend into the load instruction.
1913  bool WantZExt = true;
1914  MVT RetVT = VT;
1915  const Value *IntExtVal = nullptr;
1916  if (I->hasOneUse()) {
1917    if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1918      if (isTypeSupported(ZE->getType(), RetVT))
1919        IntExtVal = ZE;
1920      else
1921        RetVT = VT;
1922    } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1923      if (isTypeSupported(SE->getType(), RetVT))
1924        IntExtVal = SE;
1925      else
1926        RetVT = VT;
1927      WantZExt = false;
1928    }
1929  }
1930
1931  unsigned ResultReg =
1932      emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1933  if (!ResultReg)
1934    return false;
1935
1936  // There are a few different cases we have to handle, because the load or the
1937  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1938  // SelectionDAG. There is also an ordering issue when both instructions are in
1939  // different basic blocks.
1940  // 1.) The load instruction is selected by FastISel, but the integer extend
1941  //     not. This usually happens when the integer extend is in a different
1942  //     basic block and SelectionDAG took over for that basic block.
1943  // 2.) The load instruction is selected before the integer extend. This only
1944  //     happens when the integer extend is in a different basic block.
1945  // 3.) The load instruction is selected by SelectionDAG and the integer extend
1946  //     by FastISel. This happens if there are instructions between the load
1947  //     and the integer extend that couldn't be selected by FastISel.
1948  if (IntExtVal) {
1949    // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1950    // could select it. Emit a copy to subreg if necessary. FastISel will remove
1951    // it when it selects the integer extend.
1952    unsigned Reg = lookUpRegForValue(IntExtVal);
1953    auto *MI = MRI.getUniqueVRegDef(Reg);
1954    if (!MI) {
1955      if (RetVT == MVT::i64 && VT <= MVT::i32) {
1956        if (WantZExt) {
1957          // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
1958          std::prev(FuncInfo.InsertPt)->eraseFromParent();
1959          ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
1960        } else
1961          ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
1962                                                 /*IsKill=*/true,
1963                                                 AArch64::sub_32);
1964      }
1965      updateValueMap(I, ResultReg);
1966      return true;
1967    }
1968
1969    // The integer extend has already been emitted - delete all the instructions
1970    // that have been emitted by the integer extend lowering code and use the
1971    // result from the load instruction directly.
1972    while (MI) {
1973      Reg = 0;
1974      for (auto &Opnd : MI->uses()) {
1975        if (Opnd.isReg()) {
1976          Reg = Opnd.getReg();
1977          break;
1978        }
1979      }
1980      MI->eraseFromParent();
1981      MI = nullptr;
1982      if (Reg)
1983        MI = MRI.getUniqueVRegDef(Reg);
1984    }
1985    updateValueMap(IntExtVal, ResultReg);
1986    return true;
1987  }
1988
1989  updateValueMap(I, ResultReg);
1990  return true;
1991}
1992
1993bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
1994                                MachineMemOperand *MMO) {
1995  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1996    return false;
1997
1998  // Simplify this down to something we can handle.
1999  if (!simplifyAddress(Addr, VT))
2000    return false;
2001
2002  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2003  if (!ScaleFactor)
2004    llvm_unreachable("Unexpected value type.");
2005
2006  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2007  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2008  bool UseScaled = true;
2009  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2010    UseScaled = false;
2011    ScaleFactor = 1;
2012  }
2013
2014  static const unsigned OpcTable[4][6] = {
2015    { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2016      AArch64::STURSi,   AArch64::STURDi },
2017    { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2018      AArch64::STRSui,   AArch64::STRDui },
2019    { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2020      AArch64::STRSroX,  AArch64::STRDroX },
2021    { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2022      AArch64::STRSroW,  AArch64::STRDroW }
2023  };
2024
2025  unsigned Opc;
2026  bool VTIsi1 = false;
2027  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2028                      Addr.getOffsetReg();
2029  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2030  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2031      Addr.getExtendType() == AArch64_AM::SXTW)
2032    Idx++;
2033
2034  switch (VT.SimpleTy) {
2035  default: llvm_unreachable("Unexpected value type.");
2036  case MVT::i1:  VTIsi1 = true;
2037  case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2038  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2039  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2040  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2041  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2042  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2043  }
2044
2045  // Storing an i1 requires special handling.
2046  if (VTIsi1 && SrcReg != AArch64::WZR) {
2047    unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2048    assert(ANDReg && "Unexpected AND instruction emission failure.");
2049    SrcReg = ANDReg;
2050  }
2051  // Create the base instruction, then add the operands.
2052  const MCInstrDesc &II = TII.get(Opc);
2053  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2054  MachineInstrBuilder MIB =
2055      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2056  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2057
2058  return true;
2059}
2060
2061bool AArch64FastISel::selectStore(const Instruction *I) {
2062  MVT VT;
2063  const Value *Op0 = I->getOperand(0);
2064  // Verify we have a legal type before going any further.  Currently, we handle
2065  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2066  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2067  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
2068      cast<StoreInst>(I)->isAtomic())
2069    return false;
2070
2071  // Get the value to be stored into a register. Use the zero register directly
2072  // when possible to avoid an unnecessary copy and a wasted register.
2073  unsigned SrcReg = 0;
2074  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2075    if (CI->isZero())
2076      SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2077  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2078    if (CF->isZero() && !CF->isNegative()) {
2079      VT = MVT::getIntegerVT(VT.getSizeInBits());
2080      SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2081    }
2082  }
2083
2084  if (!SrcReg)
2085    SrcReg = getRegForValue(Op0);
2086
2087  if (!SrcReg)
2088    return false;
2089
2090  // See if we can handle this address.
2091  Address Addr;
2092  if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
2093    return false;
2094
2095  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2096    return false;
2097  return true;
2098}
2099
2100static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2101  switch (Pred) {
2102  case CmpInst::FCMP_ONE:
2103  case CmpInst::FCMP_UEQ:
2104  default:
2105    // AL is our "false" for now. The other two need more compares.
2106    return AArch64CC::AL;
2107  case CmpInst::ICMP_EQ:
2108  case CmpInst::FCMP_OEQ:
2109    return AArch64CC::EQ;
2110  case CmpInst::ICMP_SGT:
2111  case CmpInst::FCMP_OGT:
2112    return AArch64CC::GT;
2113  case CmpInst::ICMP_SGE:
2114  case CmpInst::FCMP_OGE:
2115    return AArch64CC::GE;
2116  case CmpInst::ICMP_UGT:
2117  case CmpInst::FCMP_UGT:
2118    return AArch64CC::HI;
2119  case CmpInst::FCMP_OLT:
2120    return AArch64CC::MI;
2121  case CmpInst::ICMP_ULE:
2122  case CmpInst::FCMP_OLE:
2123    return AArch64CC::LS;
2124  case CmpInst::FCMP_ORD:
2125    return AArch64CC::VC;
2126  case CmpInst::FCMP_UNO:
2127    return AArch64CC::VS;
2128  case CmpInst::FCMP_UGE:
2129    return AArch64CC::PL;
2130  case CmpInst::ICMP_SLT:
2131  case CmpInst::FCMP_ULT:
2132    return AArch64CC::LT;
2133  case CmpInst::ICMP_SLE:
2134  case CmpInst::FCMP_ULE:
2135    return AArch64CC::LE;
2136  case CmpInst::FCMP_UNE:
2137  case CmpInst::ICMP_NE:
2138    return AArch64CC::NE;
2139  case CmpInst::ICMP_UGE:
2140    return AArch64CC::HS;
2141  case CmpInst::ICMP_ULT:
2142    return AArch64CC::LO;
2143  }
2144}
2145
2146/// \brief Try to emit a combined compare-and-branch instruction.
2147bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2148  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2149  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2150  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2151
2152  const Value *LHS = CI->getOperand(0);
2153  const Value *RHS = CI->getOperand(1);
2154
2155  MVT VT;
2156  if (!isTypeSupported(LHS->getType(), VT))
2157    return false;
2158
2159  unsigned BW = VT.getSizeInBits();
2160  if (BW > 64)
2161    return false;
2162
2163  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2164  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2165
2166  // Try to take advantage of fallthrough opportunities.
2167  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2168    std::swap(TBB, FBB);
2169    Predicate = CmpInst::getInversePredicate(Predicate);
2170  }
2171
2172  int TestBit = -1;
2173  bool IsCmpNE;
2174  switch (Predicate) {
2175  default:
2176    return false;
2177  case CmpInst::ICMP_EQ:
2178  case CmpInst::ICMP_NE:
2179    if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2180      std::swap(LHS, RHS);
2181
2182    if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2183      return false;
2184
2185    if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2186      if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2187        const Value *AndLHS = AI->getOperand(0);
2188        const Value *AndRHS = AI->getOperand(1);
2189
2190        if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2191          if (C->getValue().isPowerOf2())
2192            std::swap(AndLHS, AndRHS);
2193
2194        if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2195          if (C->getValue().isPowerOf2()) {
2196            TestBit = C->getValue().logBase2();
2197            LHS = AndLHS;
2198          }
2199      }
2200
2201    if (VT == MVT::i1)
2202      TestBit = 0;
2203
2204    IsCmpNE = Predicate == CmpInst::ICMP_NE;
2205    break;
2206  case CmpInst::ICMP_SLT:
2207  case CmpInst::ICMP_SGE:
2208    if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2209      return false;
2210
2211    TestBit = BW - 1;
2212    IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2213    break;
2214  case CmpInst::ICMP_SGT:
2215  case CmpInst::ICMP_SLE:
2216    if (!isa<ConstantInt>(RHS))
2217      return false;
2218
2219    if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2220      return false;
2221
2222    TestBit = BW - 1;
2223    IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2224    break;
2225  } // end switch
2226
2227  static const unsigned OpcTable[2][2][2] = {
2228    { {AArch64::CBZW,  AArch64::CBZX },
2229      {AArch64::CBNZW, AArch64::CBNZX} },
2230    { {AArch64::TBZW,  AArch64::TBZX },
2231      {AArch64::TBNZW, AArch64::TBNZX} }
2232  };
2233
2234  bool IsBitTest = TestBit != -1;
2235  bool Is64Bit = BW == 64;
2236  if (TestBit < 32 && TestBit >= 0)
2237    Is64Bit = false;
2238
2239  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2240  const MCInstrDesc &II = TII.get(Opc);
2241
2242  unsigned SrcReg = getRegForValue(LHS);
2243  if (!SrcReg)
2244    return false;
2245  bool SrcIsKill = hasTrivialKill(LHS);
2246
2247  if (BW == 64 && !Is64Bit)
2248    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2249                                        AArch64::sub_32);
2250
2251  if ((BW < 32) && !IsBitTest)
2252    SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2253
2254  // Emit the combined compare and branch instruction.
2255  SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2256  MachineInstrBuilder MIB =
2257      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2258          .addReg(SrcReg, getKillRegState(SrcIsKill));
2259  if (IsBitTest)
2260    MIB.addImm(TestBit);
2261  MIB.addMBB(TBB);
2262
2263  finishCondBranch(BI->getParent(), TBB, FBB);
2264  return true;
2265}
2266
2267bool AArch64FastISel::selectBranch(const Instruction *I) {
2268  const BranchInst *BI = cast<BranchInst>(I);
2269  if (BI->isUnconditional()) {
2270    MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2271    fastEmitBranch(MSucc, BI->getDebugLoc());
2272    return true;
2273  }
2274
2275  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2276  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2277
2278  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2279    if (CI->hasOneUse() && isValueAvailable(CI)) {
2280      // Try to optimize or fold the cmp.
2281      CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2282      switch (Predicate) {
2283      default:
2284        break;
2285      case CmpInst::FCMP_FALSE:
2286        fastEmitBranch(FBB, DbgLoc);
2287        return true;
2288      case CmpInst::FCMP_TRUE:
2289        fastEmitBranch(TBB, DbgLoc);
2290        return true;
2291      }
2292
2293      // Try to emit a combined compare-and-branch first.
2294      if (emitCompareAndBranch(BI))
2295        return true;
2296
2297      // Try to take advantage of fallthrough opportunities.
2298      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2299        std::swap(TBB, FBB);
2300        Predicate = CmpInst::getInversePredicate(Predicate);
2301      }
2302
2303      // Emit the cmp.
2304      if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2305        return false;
2306
2307      // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2308      // instruction.
2309      AArch64CC::CondCode CC = getCompareCC(Predicate);
2310      AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2311      switch (Predicate) {
2312      default:
2313        break;
2314      case CmpInst::FCMP_UEQ:
2315        ExtraCC = AArch64CC::EQ;
2316        CC = AArch64CC::VS;
2317        break;
2318      case CmpInst::FCMP_ONE:
2319        ExtraCC = AArch64CC::MI;
2320        CC = AArch64CC::GT;
2321        break;
2322      }
2323      assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2324
2325      // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2326      if (ExtraCC != AArch64CC::AL) {
2327        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2328            .addImm(ExtraCC)
2329            .addMBB(TBB);
2330      }
2331
2332      // Emit the branch.
2333      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2334          .addImm(CC)
2335          .addMBB(TBB);
2336
2337      finishCondBranch(BI->getParent(), TBB, FBB);
2338      return true;
2339    }
2340  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2341    uint64_t Imm = CI->getZExtValue();
2342    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2343    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2344        .addMBB(Target);
2345
2346    // Obtain the branch probability and add the target to the successor list.
2347    if (FuncInfo.BPI) {
2348      auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2349          BI->getParent(), Target->getBasicBlock());
2350      FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2351    } else
2352      FuncInfo.MBB->addSuccessorWithoutProb(Target);
2353    return true;
2354  } else {
2355    AArch64CC::CondCode CC = AArch64CC::NE;
2356    if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2357      // Fake request the condition, otherwise the intrinsic might be completely
2358      // optimized away.
2359      unsigned CondReg = getRegForValue(BI->getCondition());
2360      if (!CondReg)
2361        return false;
2362
2363      // Emit the branch.
2364      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2365        .addImm(CC)
2366        .addMBB(TBB);
2367
2368      finishCondBranch(BI->getParent(), TBB, FBB);
2369      return true;
2370    }
2371  }
2372
2373  unsigned CondReg = getRegForValue(BI->getCondition());
2374  if (CondReg == 0)
2375    return false;
2376  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2377
2378  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2379  unsigned Opcode = AArch64::TBNZW;
2380  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2381    std::swap(TBB, FBB);
2382    Opcode = AArch64::TBZW;
2383  }
2384
2385  const MCInstrDesc &II = TII.get(Opcode);
2386  unsigned ConstrainedCondReg
2387    = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2388  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2389      .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2390      .addImm(0)
2391      .addMBB(TBB);
2392
2393  finishCondBranch(BI->getParent(), TBB, FBB);
2394  return true;
2395}
2396
2397bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2398  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2399  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2400  if (AddrReg == 0)
2401    return false;
2402
2403  // Emit the indirect branch.
2404  const MCInstrDesc &II = TII.get(AArch64::BR);
2405  AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2406  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2407
2408  // Make sure the CFG is up-to-date.
2409  for (auto *Succ : BI->successors())
2410    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2411
2412  return true;
2413}
2414
2415bool AArch64FastISel::selectCmp(const Instruction *I) {
2416  const CmpInst *CI = cast<CmpInst>(I);
2417
2418  // Vectors of i1 are weird: bail out.
2419  if (CI->getType()->isVectorTy())
2420    return false;
2421
2422  // Try to optimize or fold the cmp.
2423  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2424  unsigned ResultReg = 0;
2425  switch (Predicate) {
2426  default:
2427    break;
2428  case CmpInst::FCMP_FALSE:
2429    ResultReg = createResultReg(&AArch64::GPR32RegClass);
2430    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2431            TII.get(TargetOpcode::COPY), ResultReg)
2432        .addReg(AArch64::WZR, getKillRegState(true));
2433    break;
2434  case CmpInst::FCMP_TRUE:
2435    ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2436    break;
2437  }
2438
2439  if (ResultReg) {
2440    updateValueMap(I, ResultReg);
2441    return true;
2442  }
2443
2444  // Emit the cmp.
2445  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2446    return false;
2447
2448  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2449
2450  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2451  // condition codes are inverted, because they are used by CSINC.
2452  static unsigned CondCodeTable[2][2] = {
2453    { AArch64CC::NE, AArch64CC::VC },
2454    { AArch64CC::PL, AArch64CC::LE }
2455  };
2456  unsigned *CondCodes = nullptr;
2457  switch (Predicate) {
2458  default:
2459    break;
2460  case CmpInst::FCMP_UEQ:
2461    CondCodes = &CondCodeTable[0][0];
2462    break;
2463  case CmpInst::FCMP_ONE:
2464    CondCodes = &CondCodeTable[1][0];
2465    break;
2466  }
2467
2468  if (CondCodes) {
2469    unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2470    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2471            TmpReg1)
2472        .addReg(AArch64::WZR, getKillRegState(true))
2473        .addReg(AArch64::WZR, getKillRegState(true))
2474        .addImm(CondCodes[0]);
2475    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2476            ResultReg)
2477        .addReg(TmpReg1, getKillRegState(true))
2478        .addReg(AArch64::WZR, getKillRegState(true))
2479        .addImm(CondCodes[1]);
2480
2481    updateValueMap(I, ResultReg);
2482    return true;
2483  }
2484
2485  // Now set a register based on the comparison.
2486  AArch64CC::CondCode CC = getCompareCC(Predicate);
2487  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2488  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2489  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2490          ResultReg)
2491      .addReg(AArch64::WZR, getKillRegState(true))
2492      .addReg(AArch64::WZR, getKillRegState(true))
2493      .addImm(invertedCC);
2494
2495  updateValueMap(I, ResultReg);
2496  return true;
2497}
2498
2499/// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2500/// value.
2501bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2502  if (!SI->getType()->isIntegerTy(1))
2503    return false;
2504
2505  const Value *Src1Val, *Src2Val;
2506  unsigned Opc = 0;
2507  bool NeedExtraOp = false;
2508  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2509    if (CI->isOne()) {
2510      Src1Val = SI->getCondition();
2511      Src2Val = SI->getFalseValue();
2512      Opc = AArch64::ORRWrr;
2513    } else {
2514      assert(CI->isZero());
2515      Src1Val = SI->getFalseValue();
2516      Src2Val = SI->getCondition();
2517      Opc = AArch64::BICWrr;
2518    }
2519  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2520    if (CI->isOne()) {
2521      Src1Val = SI->getCondition();
2522      Src2Val = SI->getTrueValue();
2523      Opc = AArch64::ORRWrr;
2524      NeedExtraOp = true;
2525    } else {
2526      assert(CI->isZero());
2527      Src1Val = SI->getCondition();
2528      Src2Val = SI->getTrueValue();
2529      Opc = AArch64::ANDWrr;
2530    }
2531  }
2532
2533  if (!Opc)
2534    return false;
2535
2536  unsigned Src1Reg = getRegForValue(Src1Val);
2537  if (!Src1Reg)
2538    return false;
2539  bool Src1IsKill = hasTrivialKill(Src1Val);
2540
2541  unsigned Src2Reg = getRegForValue(Src2Val);
2542  if (!Src2Reg)
2543    return false;
2544  bool Src2IsKill = hasTrivialKill(Src2Val);
2545
2546  if (NeedExtraOp) {
2547    Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2548    Src1IsKill = true;
2549  }
2550  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2551                                       Src1IsKill, Src2Reg, Src2IsKill);
2552  updateValueMap(SI, ResultReg);
2553  return true;
2554}
2555
2556bool AArch64FastISel::selectSelect(const Instruction *I) {
2557  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2558  MVT VT;
2559  if (!isTypeSupported(I->getType(), VT))
2560    return false;
2561
2562  unsigned Opc;
2563  const TargetRegisterClass *RC;
2564  switch (VT.SimpleTy) {
2565  default:
2566    return false;
2567  case MVT::i1:
2568  case MVT::i8:
2569  case MVT::i16:
2570  case MVT::i32:
2571    Opc = AArch64::CSELWr;
2572    RC = &AArch64::GPR32RegClass;
2573    break;
2574  case MVT::i64:
2575    Opc = AArch64::CSELXr;
2576    RC = &AArch64::GPR64RegClass;
2577    break;
2578  case MVT::f32:
2579    Opc = AArch64::FCSELSrrr;
2580    RC = &AArch64::FPR32RegClass;
2581    break;
2582  case MVT::f64:
2583    Opc = AArch64::FCSELDrrr;
2584    RC = &AArch64::FPR64RegClass;
2585    break;
2586  }
2587
2588  const SelectInst *SI = cast<SelectInst>(I);
2589  const Value *Cond = SI->getCondition();
2590  AArch64CC::CondCode CC = AArch64CC::NE;
2591  AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2592
2593  if (optimizeSelect(SI))
2594    return true;
2595
2596  // Try to pickup the flags, so we don't have to emit another compare.
2597  if (foldXALUIntrinsic(CC, I, Cond)) {
2598    // Fake request the condition to force emission of the XALU intrinsic.
2599    unsigned CondReg = getRegForValue(Cond);
2600    if (!CondReg)
2601      return false;
2602  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2603             isValueAvailable(Cond)) {
2604    const auto *Cmp = cast<CmpInst>(Cond);
2605    // Try to optimize or fold the cmp.
2606    CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2607    const Value *FoldSelect = nullptr;
2608    switch (Predicate) {
2609    default:
2610      break;
2611    case CmpInst::FCMP_FALSE:
2612      FoldSelect = SI->getFalseValue();
2613      break;
2614    case CmpInst::FCMP_TRUE:
2615      FoldSelect = SI->getTrueValue();
2616      break;
2617    }
2618
2619    if (FoldSelect) {
2620      unsigned SrcReg = getRegForValue(FoldSelect);
2621      if (!SrcReg)
2622        return false;
2623      unsigned UseReg = lookUpRegForValue(SI);
2624      if (UseReg)
2625        MRI.clearKillFlags(UseReg);
2626
2627      updateValueMap(I, SrcReg);
2628      return true;
2629    }
2630
2631    // Emit the cmp.
2632    if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2633      return false;
2634
2635    // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2636    CC = getCompareCC(Predicate);
2637    switch (Predicate) {
2638    default:
2639      break;
2640    case CmpInst::FCMP_UEQ:
2641      ExtraCC = AArch64CC::EQ;
2642      CC = AArch64CC::VS;
2643      break;
2644    case CmpInst::FCMP_ONE:
2645      ExtraCC = AArch64CC::MI;
2646      CC = AArch64CC::GT;
2647      break;
2648    }
2649    assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2650  } else {
2651    unsigned CondReg = getRegForValue(Cond);
2652    if (!CondReg)
2653      return false;
2654    bool CondIsKill = hasTrivialKill(Cond);
2655
2656    const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2657    CondReg = constrainOperandRegClass(II, CondReg, 1);
2658
2659    // Emit a TST instruction (ANDS wzr, reg, #imm).
2660    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2661            AArch64::WZR)
2662        .addReg(CondReg, getKillRegState(CondIsKill))
2663        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2664  }
2665
2666  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2667  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2668
2669  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2670  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2671
2672  if (!Src1Reg || !Src2Reg)
2673    return false;
2674
2675  if (ExtraCC != AArch64CC::AL) {
2676    Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2677                               Src2IsKill, ExtraCC);
2678    Src2IsKill = true;
2679  }
2680  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2681                                        Src2IsKill, CC);
2682  updateValueMap(I, ResultReg);
2683  return true;
2684}
2685
2686bool AArch64FastISel::selectFPExt(const Instruction *I) {
2687  Value *V = I->getOperand(0);
2688  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2689    return false;
2690
2691  unsigned Op = getRegForValue(V);
2692  if (Op == 0)
2693    return false;
2694
2695  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2696  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2697          ResultReg).addReg(Op);
2698  updateValueMap(I, ResultReg);
2699  return true;
2700}
2701
2702bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2703  Value *V = I->getOperand(0);
2704  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2705    return false;
2706
2707  unsigned Op = getRegForValue(V);
2708  if (Op == 0)
2709    return false;
2710
2711  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2712  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2713          ResultReg).addReg(Op);
2714  updateValueMap(I, ResultReg);
2715  return true;
2716}
2717
2718// FPToUI and FPToSI
2719bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2720  MVT DestVT;
2721  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2722    return false;
2723
2724  unsigned SrcReg = getRegForValue(I->getOperand(0));
2725  if (SrcReg == 0)
2726    return false;
2727
2728  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2729  if (SrcVT == MVT::f128)
2730    return false;
2731
2732  unsigned Opc;
2733  if (SrcVT == MVT::f64) {
2734    if (Signed)
2735      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2736    else
2737      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2738  } else {
2739    if (Signed)
2740      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2741    else
2742      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2743  }
2744  unsigned ResultReg = createResultReg(
2745      DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2746  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2747      .addReg(SrcReg);
2748  updateValueMap(I, ResultReg);
2749  return true;
2750}
2751
2752bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2753  MVT DestVT;
2754  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2755    return false;
2756  assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2757          "Unexpected value type.");
2758
2759  unsigned SrcReg = getRegForValue(I->getOperand(0));
2760  if (!SrcReg)
2761    return false;
2762  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2763
2764  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2765
2766  // Handle sign-extension.
2767  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2768    SrcReg =
2769        emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2770    if (!SrcReg)
2771      return false;
2772    SrcIsKill = true;
2773  }
2774
2775  unsigned Opc;
2776  if (SrcVT == MVT::i64) {
2777    if (Signed)
2778      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2779    else
2780      Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2781  } else {
2782    if (Signed)
2783      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2784    else
2785      Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2786  }
2787
2788  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2789                                      SrcIsKill);
2790  updateValueMap(I, ResultReg);
2791  return true;
2792}
2793
2794bool AArch64FastISel::fastLowerArguments() {
2795  if (!FuncInfo.CanLowerReturn)
2796    return false;
2797
2798  const Function *F = FuncInfo.Fn;
2799  if (F->isVarArg())
2800    return false;
2801
2802  CallingConv::ID CC = F->getCallingConv();
2803  if (CC != CallingConv::C)
2804    return false;
2805
2806  // Only handle simple cases of up to 8 GPR and FPR each.
2807  unsigned GPRCnt = 0;
2808  unsigned FPRCnt = 0;
2809  unsigned Idx = 0;
2810  for (auto const &Arg : F->args()) {
2811    // The first argument is at index 1.
2812    ++Idx;
2813    if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2814        F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2815        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2816        F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2817      return false;
2818
2819    Type *ArgTy = Arg.getType();
2820    if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2821      return false;
2822
2823    EVT ArgVT = TLI.getValueType(DL, ArgTy);
2824    if (!ArgVT.isSimple())
2825      return false;
2826
2827    MVT VT = ArgVT.getSimpleVT().SimpleTy;
2828    if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2829      return false;
2830
2831    if (VT.isVector() &&
2832        (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2833      return false;
2834
2835    if (VT >= MVT::i1 && VT <= MVT::i64)
2836      ++GPRCnt;
2837    else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2838             VT.is128BitVector())
2839      ++FPRCnt;
2840    else
2841      return false;
2842
2843    if (GPRCnt > 8 || FPRCnt > 8)
2844      return false;
2845  }
2846
2847  static const MCPhysReg Registers[6][8] = {
2848    { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2849      AArch64::W5, AArch64::W6, AArch64::W7 },
2850    { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2851      AArch64::X5, AArch64::X6, AArch64::X7 },
2852    { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2853      AArch64::H5, AArch64::H6, AArch64::H7 },
2854    { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2855      AArch64::S5, AArch64::S6, AArch64::S7 },
2856    { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2857      AArch64::D5, AArch64::D6, AArch64::D7 },
2858    { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2859      AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2860  };
2861
2862  unsigned GPRIdx = 0;
2863  unsigned FPRIdx = 0;
2864  for (auto const &Arg : F->args()) {
2865    MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2866    unsigned SrcReg;
2867    const TargetRegisterClass *RC;
2868    if (VT >= MVT::i1 && VT <= MVT::i32) {
2869      SrcReg = Registers[0][GPRIdx++];
2870      RC = &AArch64::GPR32RegClass;
2871      VT = MVT::i32;
2872    } else if (VT == MVT::i64) {
2873      SrcReg = Registers[1][GPRIdx++];
2874      RC = &AArch64::GPR64RegClass;
2875    } else if (VT == MVT::f16) {
2876      SrcReg = Registers[2][FPRIdx++];
2877      RC = &AArch64::FPR16RegClass;
2878    } else if (VT ==  MVT::f32) {
2879      SrcReg = Registers[3][FPRIdx++];
2880      RC = &AArch64::FPR32RegClass;
2881    } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2882      SrcReg = Registers[4][FPRIdx++];
2883      RC = &AArch64::FPR64RegClass;
2884    } else if (VT.is128BitVector()) {
2885      SrcReg = Registers[5][FPRIdx++];
2886      RC = &AArch64::FPR128RegClass;
2887    } else
2888      llvm_unreachable("Unexpected value type.");
2889
2890    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2891    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2892    // Without this, EmitLiveInCopies may eliminate the livein if its only
2893    // use is a bitcast (which isn't turned into an instruction).
2894    unsigned ResultReg = createResultReg(RC);
2895    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2896            TII.get(TargetOpcode::COPY), ResultReg)
2897        .addReg(DstReg, getKillRegState(true));
2898    updateValueMap(&Arg, ResultReg);
2899  }
2900  return true;
2901}
2902
2903bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2904                                      SmallVectorImpl<MVT> &OutVTs,
2905                                      unsigned &NumBytes) {
2906  CallingConv::ID CC = CLI.CallConv;
2907  SmallVector<CCValAssign, 16> ArgLocs;
2908  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2909  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2910
2911  // Get a count of how many bytes are to be pushed on the stack.
2912  NumBytes = CCInfo.getNextStackOffset();
2913
2914  // Issue CALLSEQ_START
2915  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2916  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2917    .addImm(NumBytes);
2918
2919  // Process the args.
2920  for (CCValAssign &VA : ArgLocs) {
2921    const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2922    MVT ArgVT = OutVTs[VA.getValNo()];
2923
2924    unsigned ArgReg = getRegForValue(ArgVal);
2925    if (!ArgReg)
2926      return false;
2927
2928    // Handle arg promotion: SExt, ZExt, AExt.
2929    switch (VA.getLocInfo()) {
2930    case CCValAssign::Full:
2931      break;
2932    case CCValAssign::SExt: {
2933      MVT DestVT = VA.getLocVT();
2934      MVT SrcVT = ArgVT;
2935      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2936      if (!ArgReg)
2937        return false;
2938      break;
2939    }
2940    case CCValAssign::AExt:
2941    // Intentional fall-through.
2942    case CCValAssign::ZExt: {
2943      MVT DestVT = VA.getLocVT();
2944      MVT SrcVT = ArgVT;
2945      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2946      if (!ArgReg)
2947        return false;
2948      break;
2949    }
2950    default:
2951      llvm_unreachable("Unknown arg promotion!");
2952    }
2953
2954    // Now copy/store arg to correct locations.
2955    if (VA.isRegLoc() && !VA.needsCustom()) {
2956      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2957              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2958      CLI.OutRegs.push_back(VA.getLocReg());
2959    } else if (VA.needsCustom()) {
2960      // FIXME: Handle custom args.
2961      return false;
2962    } else {
2963      assert(VA.isMemLoc() && "Assuming store on stack.");
2964
2965      // Don't emit stores for undef values.
2966      if (isa<UndefValue>(ArgVal))
2967        continue;
2968
2969      // Need to store on the stack.
2970      unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2971
2972      unsigned BEAlign = 0;
2973      if (ArgSize < 8 && !Subtarget->isLittleEndian())
2974        BEAlign = 8 - ArgSize;
2975
2976      Address Addr;
2977      Addr.setKind(Address::RegBase);
2978      Addr.setReg(AArch64::SP);
2979      Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2980
2981      unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2982      MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2983          MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
2984          MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2985
2986      if (!emitStore(ArgVT, ArgReg, Addr, MMO))
2987        return false;
2988    }
2989  }
2990  return true;
2991}
2992
2993bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
2994                                 unsigned NumBytes) {
2995  CallingConv::ID CC = CLI.CallConv;
2996
2997  // Issue CALLSEQ_END
2998  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2999  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3000    .addImm(NumBytes).addImm(0);
3001
3002  // Now the return value.
3003  if (RetVT != MVT::isVoid) {
3004    SmallVector<CCValAssign, 16> RVLocs;
3005    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3006    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3007
3008    // Only handle a single return value.
3009    if (RVLocs.size() != 1)
3010      return false;
3011
3012    // Copy all of the result registers out of their specified physreg.
3013    MVT CopyVT = RVLocs[0].getValVT();
3014
3015    // TODO: Handle big-endian results
3016    if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3017      return false;
3018
3019    unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3020    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3021            TII.get(TargetOpcode::COPY), ResultReg)
3022        .addReg(RVLocs[0].getLocReg());
3023    CLI.InRegs.push_back(RVLocs[0].getLocReg());
3024
3025    CLI.ResultReg = ResultReg;
3026    CLI.NumResultRegs = 1;
3027  }
3028
3029  return true;
3030}
3031
3032bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3033  CallingConv::ID CC  = CLI.CallConv;
3034  bool IsTailCall     = CLI.IsTailCall;
3035  bool IsVarArg       = CLI.IsVarArg;
3036  const Value *Callee = CLI.Callee;
3037  MCSymbol *Symbol = CLI.Symbol;
3038
3039  if (!Callee && !Symbol)
3040    return false;
3041
3042  // Allow SelectionDAG isel to handle tail calls.
3043  if (IsTailCall)
3044    return false;
3045
3046  CodeModel::Model CM = TM.getCodeModel();
3047  // Only support the small and large code model.
3048  if (CM != CodeModel::Small && CM != CodeModel::Large)
3049    return false;
3050
3051  // FIXME: Add large code model support for ELF.
3052  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3053    return false;
3054
3055  // Let SDISel handle vararg functions.
3056  if (IsVarArg)
3057    return false;
3058
3059  // FIXME: Only handle *simple* calls for now.
3060  MVT RetVT;
3061  if (CLI.RetTy->isVoidTy())
3062    RetVT = MVT::isVoid;
3063  else if (!isTypeLegal(CLI.RetTy, RetVT))
3064    return false;
3065
3066  for (auto Flag : CLI.OutFlags)
3067    if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
3068      return false;
3069
3070  // Set up the argument vectors.
3071  SmallVector<MVT, 16> OutVTs;
3072  OutVTs.reserve(CLI.OutVals.size());
3073
3074  for (auto *Val : CLI.OutVals) {
3075    MVT VT;
3076    if (!isTypeLegal(Val->getType(), VT) &&
3077        !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3078      return false;
3079
3080    // We don't handle vector parameters yet.
3081    if (VT.isVector() || VT.getSizeInBits() > 64)
3082      return false;
3083
3084    OutVTs.push_back(VT);
3085  }
3086
3087  Address Addr;
3088  if (Callee && !computeCallAddress(Callee, Addr))
3089    return false;
3090
3091  // Handle the arguments now that we've gotten them.
3092  unsigned NumBytes;
3093  if (!processCallArgs(CLI, OutVTs, NumBytes))
3094    return false;
3095
3096  // Issue the call.
3097  MachineInstrBuilder MIB;
3098  if (CM == CodeModel::Small) {
3099    const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3100    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3101    if (Symbol)
3102      MIB.addSym(Symbol, 0);
3103    else if (Addr.getGlobalValue())
3104      MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3105    else if (Addr.getReg()) {
3106      unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3107      MIB.addReg(Reg);
3108    } else
3109      return false;
3110  } else {
3111    unsigned CallReg = 0;
3112    if (Symbol) {
3113      unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3114      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3115              ADRPReg)
3116          .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3117
3118      CallReg = createResultReg(&AArch64::GPR64RegClass);
3119      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3120              TII.get(AArch64::LDRXui), CallReg)
3121          .addReg(ADRPReg)
3122          .addSym(Symbol,
3123                  AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3124    } else if (Addr.getGlobalValue())
3125      CallReg = materializeGV(Addr.getGlobalValue());
3126    else if (Addr.getReg())
3127      CallReg = Addr.getReg();
3128
3129    if (!CallReg)
3130      return false;
3131
3132    const MCInstrDesc &II = TII.get(AArch64::BLR);
3133    CallReg = constrainOperandRegClass(II, CallReg, 0);
3134    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3135  }
3136
3137  // Add implicit physical register uses to the call.
3138  for (auto Reg : CLI.OutRegs)
3139    MIB.addReg(Reg, RegState::Implicit);
3140
3141  // Add a register mask with the call-preserved registers.
3142  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3143  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3144
3145  CLI.Call = MIB;
3146
3147  // Finish off the call including any return values.
3148  return finishCall(CLI, RetVT, NumBytes);
3149}
3150
3151bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3152  if (Alignment)
3153    return Len / Alignment <= 4;
3154  else
3155    return Len < 32;
3156}
3157
3158bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3159                                         uint64_t Len, unsigned Alignment) {
3160  // Make sure we don't bloat code by inlining very large memcpy's.
3161  if (!isMemCpySmall(Len, Alignment))
3162    return false;
3163
3164  int64_t UnscaledOffset = 0;
3165  Address OrigDest = Dest;
3166  Address OrigSrc = Src;
3167
3168  while (Len) {
3169    MVT VT;
3170    if (!Alignment || Alignment >= 8) {
3171      if (Len >= 8)
3172        VT = MVT::i64;
3173      else if (Len >= 4)
3174        VT = MVT::i32;
3175      else if (Len >= 2)
3176        VT = MVT::i16;
3177      else {
3178        VT = MVT::i8;
3179      }
3180    } else {
3181      // Bound based on alignment.
3182      if (Len >= 4 && Alignment == 4)
3183        VT = MVT::i32;
3184      else if (Len >= 2 && Alignment == 2)
3185        VT = MVT::i16;
3186      else {
3187        VT = MVT::i8;
3188      }
3189    }
3190
3191    unsigned ResultReg = emitLoad(VT, VT, Src);
3192    if (!ResultReg)
3193      return false;
3194
3195    if (!emitStore(VT, ResultReg, Dest))
3196      return false;
3197
3198    int64_t Size = VT.getSizeInBits() / 8;
3199    Len -= Size;
3200    UnscaledOffset += Size;
3201
3202    // We need to recompute the unscaled offset for each iteration.
3203    Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3204    Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3205  }
3206
3207  return true;
3208}
3209
3210/// \brief Check if it is possible to fold the condition from the XALU intrinsic
3211/// into the user. The condition code will only be updated on success.
3212bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3213                                        const Instruction *I,
3214                                        const Value *Cond) {
3215  if (!isa<ExtractValueInst>(Cond))
3216    return false;
3217
3218  const auto *EV = cast<ExtractValueInst>(Cond);
3219  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3220    return false;
3221
3222  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3223  MVT RetVT;
3224  const Function *Callee = II->getCalledFunction();
3225  Type *RetTy =
3226  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3227  if (!isTypeLegal(RetTy, RetVT))
3228    return false;
3229
3230  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3231    return false;
3232
3233  const Value *LHS = II->getArgOperand(0);
3234  const Value *RHS = II->getArgOperand(1);
3235
3236  // Canonicalize immediate to the RHS.
3237  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3238      isCommutativeIntrinsic(II))
3239    std::swap(LHS, RHS);
3240
3241  // Simplify multiplies.
3242  Intrinsic::ID IID = II->getIntrinsicID();
3243  switch (IID) {
3244  default:
3245    break;
3246  case Intrinsic::smul_with_overflow:
3247    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3248      if (C->getValue() == 2)
3249        IID = Intrinsic::sadd_with_overflow;
3250    break;
3251  case Intrinsic::umul_with_overflow:
3252    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3253      if (C->getValue() == 2)
3254        IID = Intrinsic::uadd_with_overflow;
3255    break;
3256  }
3257
3258  AArch64CC::CondCode TmpCC;
3259  switch (IID) {
3260  default:
3261    return false;
3262  case Intrinsic::sadd_with_overflow:
3263  case Intrinsic::ssub_with_overflow:
3264    TmpCC = AArch64CC::VS;
3265    break;
3266  case Intrinsic::uadd_with_overflow:
3267    TmpCC = AArch64CC::HS;
3268    break;
3269  case Intrinsic::usub_with_overflow:
3270    TmpCC = AArch64CC::LO;
3271    break;
3272  case Intrinsic::smul_with_overflow:
3273  case Intrinsic::umul_with_overflow:
3274    TmpCC = AArch64CC::NE;
3275    break;
3276  }
3277
3278  // Check if both instructions are in the same basic block.
3279  if (!isValueAvailable(II))
3280    return false;
3281
3282  // Make sure nothing is in the way
3283  BasicBlock::const_iterator Start(I);
3284  BasicBlock::const_iterator End(II);
3285  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3286    // We only expect extractvalue instructions between the intrinsic and the
3287    // instruction to be selected.
3288    if (!isa<ExtractValueInst>(Itr))
3289      return false;
3290
3291    // Check that the extractvalue operand comes from the intrinsic.
3292    const auto *EVI = cast<ExtractValueInst>(Itr);
3293    if (EVI->getAggregateOperand() != II)
3294      return false;
3295  }
3296
3297  CC = TmpCC;
3298  return true;
3299}
3300
3301bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3302  // FIXME: Handle more intrinsics.
3303  switch (II->getIntrinsicID()) {
3304  default: return false;
3305  case Intrinsic::frameaddress: {
3306    MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
3307    MFI->setFrameAddressIsTaken(true);
3308
3309    const AArch64RegisterInfo *RegInfo =
3310        static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
3311    unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3312    unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3313    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3314            TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3315    // Recursively load frame address
3316    // ldr x0, [fp]
3317    // ldr x0, [x0]
3318    // ldr x0, [x0]
3319    // ...
3320    unsigned DestReg;
3321    unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3322    while (Depth--) {
3323      DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3324                                SrcReg, /*IsKill=*/true, 0);
3325      assert(DestReg && "Unexpected LDR instruction emission failure.");
3326      SrcReg = DestReg;
3327    }
3328
3329    updateValueMap(II, SrcReg);
3330    return true;
3331  }
3332  case Intrinsic::memcpy:
3333  case Intrinsic::memmove: {
3334    const auto *MTI = cast<MemTransferInst>(II);
3335    // Don't handle volatile.
3336    if (MTI->isVolatile())
3337      return false;
3338
3339    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3340    // we would emit dead code because we don't currently handle memmoves.
3341    bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3342    if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3343      // Small memcpy's are common enough that we want to do them without a call
3344      // if possible.
3345      uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3346      unsigned Alignment = MTI->getAlignment();
3347      if (isMemCpySmall(Len, Alignment)) {
3348        Address Dest, Src;
3349        if (!computeAddress(MTI->getRawDest(), Dest) ||
3350            !computeAddress(MTI->getRawSource(), Src))
3351          return false;
3352        if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3353          return true;
3354      }
3355    }
3356
3357    if (!MTI->getLength()->getType()->isIntegerTy(64))
3358      return false;
3359
3360    if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3361      // Fast instruction selection doesn't support the special
3362      // address spaces.
3363      return false;
3364
3365    const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3366    return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3367  }
3368  case Intrinsic::memset: {
3369    const MemSetInst *MSI = cast<MemSetInst>(II);
3370    // Don't handle volatile.
3371    if (MSI->isVolatile())
3372      return false;
3373
3374    if (!MSI->getLength()->getType()->isIntegerTy(64))
3375      return false;
3376
3377    if (MSI->getDestAddressSpace() > 255)
3378      // Fast instruction selection doesn't support the special
3379      // address spaces.
3380      return false;
3381
3382    return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3383  }
3384  case Intrinsic::sin:
3385  case Intrinsic::cos:
3386  case Intrinsic::pow: {
3387    MVT RetVT;
3388    if (!isTypeLegal(II->getType(), RetVT))
3389      return false;
3390
3391    if (RetVT != MVT::f32 && RetVT != MVT::f64)
3392      return false;
3393
3394    static const RTLIB::Libcall LibCallTable[3][2] = {
3395      { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3396      { RTLIB::COS_F32, RTLIB::COS_F64 },
3397      { RTLIB::POW_F32, RTLIB::POW_F64 }
3398    };
3399    RTLIB::Libcall LC;
3400    bool Is64Bit = RetVT == MVT::f64;
3401    switch (II->getIntrinsicID()) {
3402    default:
3403      llvm_unreachable("Unexpected intrinsic.");
3404    case Intrinsic::sin:
3405      LC = LibCallTable[0][Is64Bit];
3406      break;
3407    case Intrinsic::cos:
3408      LC = LibCallTable[1][Is64Bit];
3409      break;
3410    case Intrinsic::pow:
3411      LC = LibCallTable[2][Is64Bit];
3412      break;
3413    }
3414
3415    ArgListTy Args;
3416    Args.reserve(II->getNumArgOperands());
3417
3418    // Populate the argument list.
3419    for (auto &Arg : II->arg_operands()) {
3420      ArgListEntry Entry;
3421      Entry.Val = Arg;
3422      Entry.Ty = Arg->getType();
3423      Args.push_back(Entry);
3424    }
3425
3426    CallLoweringInfo CLI;
3427    MCContext &Ctx = MF->getContext();
3428    CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3429                  TLI.getLibcallName(LC), std::move(Args));
3430    if (!lowerCallTo(CLI))
3431      return false;
3432    updateValueMap(II, CLI.ResultReg);
3433    return true;
3434  }
3435  case Intrinsic::fabs: {
3436    MVT VT;
3437    if (!isTypeLegal(II->getType(), VT))
3438      return false;
3439
3440    unsigned Opc;
3441    switch (VT.SimpleTy) {
3442    default:
3443      return false;
3444    case MVT::f32:
3445      Opc = AArch64::FABSSr;
3446      break;
3447    case MVT::f64:
3448      Opc = AArch64::FABSDr;
3449      break;
3450    }
3451    unsigned SrcReg = getRegForValue(II->getOperand(0));
3452    if (!SrcReg)
3453      return false;
3454    bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3455    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3456    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3457      .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3458    updateValueMap(II, ResultReg);
3459    return true;
3460  }
3461  case Intrinsic::trap: {
3462    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3463        .addImm(1);
3464    return true;
3465  }
3466  case Intrinsic::sqrt: {
3467    Type *RetTy = II->getCalledFunction()->getReturnType();
3468
3469    MVT VT;
3470    if (!isTypeLegal(RetTy, VT))
3471      return false;
3472
3473    unsigned Op0Reg = getRegForValue(II->getOperand(0));
3474    if (!Op0Reg)
3475      return false;
3476    bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3477
3478    unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3479    if (!ResultReg)
3480      return false;
3481
3482    updateValueMap(II, ResultReg);
3483    return true;
3484  }
3485  case Intrinsic::sadd_with_overflow:
3486  case Intrinsic::uadd_with_overflow:
3487  case Intrinsic::ssub_with_overflow:
3488  case Intrinsic::usub_with_overflow:
3489  case Intrinsic::smul_with_overflow:
3490  case Intrinsic::umul_with_overflow: {
3491    // This implements the basic lowering of the xalu with overflow intrinsics.
3492    const Function *Callee = II->getCalledFunction();
3493    auto *Ty = cast<StructType>(Callee->getReturnType());
3494    Type *RetTy = Ty->getTypeAtIndex(0U);
3495
3496    MVT VT;
3497    if (!isTypeLegal(RetTy, VT))
3498      return false;
3499
3500    if (VT != MVT::i32 && VT != MVT::i64)
3501      return false;
3502
3503    const Value *LHS = II->getArgOperand(0);
3504    const Value *RHS = II->getArgOperand(1);
3505    // Canonicalize immediate to the RHS.
3506    if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3507        isCommutativeIntrinsic(II))
3508      std::swap(LHS, RHS);
3509
3510    // Simplify multiplies.
3511    Intrinsic::ID IID = II->getIntrinsicID();
3512    switch (IID) {
3513    default:
3514      break;
3515    case Intrinsic::smul_with_overflow:
3516      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3517        if (C->getValue() == 2) {
3518          IID = Intrinsic::sadd_with_overflow;
3519          RHS = LHS;
3520        }
3521      break;
3522    case Intrinsic::umul_with_overflow:
3523      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3524        if (C->getValue() == 2) {
3525          IID = Intrinsic::uadd_with_overflow;
3526          RHS = LHS;
3527        }
3528      break;
3529    }
3530
3531    unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3532    AArch64CC::CondCode CC = AArch64CC::Invalid;
3533    switch (IID) {
3534    default: llvm_unreachable("Unexpected intrinsic!");
3535    case Intrinsic::sadd_with_overflow:
3536      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3537      CC = AArch64CC::VS;
3538      break;
3539    case Intrinsic::uadd_with_overflow:
3540      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3541      CC = AArch64CC::HS;
3542      break;
3543    case Intrinsic::ssub_with_overflow:
3544      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3545      CC = AArch64CC::VS;
3546      break;
3547    case Intrinsic::usub_with_overflow:
3548      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3549      CC = AArch64CC::LO;
3550      break;
3551    case Intrinsic::smul_with_overflow: {
3552      CC = AArch64CC::NE;
3553      unsigned LHSReg = getRegForValue(LHS);
3554      if (!LHSReg)
3555        return false;
3556      bool LHSIsKill = hasTrivialKill(LHS);
3557
3558      unsigned RHSReg = getRegForValue(RHS);
3559      if (!RHSReg)
3560        return false;
3561      bool RHSIsKill = hasTrivialKill(RHS);
3562
3563      if (VT == MVT::i32) {
3564        MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3565        unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3566                                       /*IsKill=*/false, 32);
3567        MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3568                                            AArch64::sub_32);
3569        ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3570                                              AArch64::sub_32);
3571        emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3572                    AArch64_AM::ASR, 31, /*WantResult=*/false);
3573      } else {
3574        assert(VT == MVT::i64 && "Unexpected value type.");
3575        // LHSReg and RHSReg cannot be killed by this Mul, since they are
3576        // reused in the next instruction.
3577        MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3578                            /*IsKill=*/false);
3579        unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3580                                        RHSReg, RHSIsKill);
3581        emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3582                    AArch64_AM::ASR, 63, /*WantResult=*/false);
3583      }
3584      break;
3585    }
3586    case Intrinsic::umul_with_overflow: {
3587      CC = AArch64CC::NE;
3588      unsigned LHSReg = getRegForValue(LHS);
3589      if (!LHSReg)
3590        return false;
3591      bool LHSIsKill = hasTrivialKill(LHS);
3592
3593      unsigned RHSReg = getRegForValue(RHS);
3594      if (!RHSReg)
3595        return false;
3596      bool RHSIsKill = hasTrivialKill(RHS);
3597
3598      if (VT == MVT::i32) {
3599        MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3600        emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3601                    /*IsKill=*/false, AArch64_AM::LSR, 32,
3602                    /*WantResult=*/false);
3603        MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3604                                            AArch64::sub_32);
3605      } else {
3606        assert(VT == MVT::i64 && "Unexpected value type.");
3607        // LHSReg and RHSReg cannot be killed by this Mul, since they are
3608        // reused in the next instruction.
3609        MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3610                            /*IsKill=*/false);
3611        unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3612                                        RHSReg, RHSIsKill);
3613        emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3614                    /*IsKill=*/false, /*WantResult=*/false);
3615      }
3616      break;
3617    }
3618    }
3619
3620    if (MulReg) {
3621      ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3622      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3623              TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3624    }
3625
3626    ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3627                                  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3628                                  /*IsKill=*/true, getInvertedCondCode(CC));
3629    (void)ResultReg2;
3630    assert((ResultReg1 + 1) == ResultReg2 &&
3631           "Nonconsecutive result registers.");
3632    updateValueMap(II, ResultReg1, 2);
3633    return true;
3634  }
3635  }
3636  return false;
3637}
3638
3639bool AArch64FastISel::selectRet(const Instruction *I) {
3640  const ReturnInst *Ret = cast<ReturnInst>(I);
3641  const Function &F = *I->getParent()->getParent();
3642
3643  if (!FuncInfo.CanLowerReturn)
3644    return false;
3645
3646  if (F.isVarArg())
3647    return false;
3648
3649  if (TLI.supportSplitCSR(FuncInfo.MF))
3650    return false;
3651
3652  // Build a list of return value registers.
3653  SmallVector<unsigned, 4> RetRegs;
3654
3655  if (Ret->getNumOperands() > 0) {
3656    CallingConv::ID CC = F.getCallingConv();
3657    SmallVector<ISD::OutputArg, 4> Outs;
3658    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3659
3660    // Analyze operands of the call, assigning locations to each operand.
3661    SmallVector<CCValAssign, 16> ValLocs;
3662    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3663    CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3664                                                     : RetCC_AArch64_AAPCS;
3665    CCInfo.AnalyzeReturn(Outs, RetCC);
3666
3667    // Only handle a single return value for now.
3668    if (ValLocs.size() != 1)
3669      return false;
3670
3671    CCValAssign &VA = ValLocs[0];
3672    const Value *RV = Ret->getOperand(0);
3673
3674    // Don't bother handling odd stuff for now.
3675    if ((VA.getLocInfo() != CCValAssign::Full) &&
3676        (VA.getLocInfo() != CCValAssign::BCvt))
3677      return false;
3678
3679    // Only handle register returns for now.
3680    if (!VA.isRegLoc())
3681      return false;
3682
3683    unsigned Reg = getRegForValue(RV);
3684    if (Reg == 0)
3685      return false;
3686
3687    unsigned SrcReg = Reg + VA.getValNo();
3688    unsigned DestReg = VA.getLocReg();
3689    // Avoid a cross-class copy. This is very unlikely.
3690    if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3691      return false;
3692
3693    EVT RVEVT = TLI.getValueType(DL, RV->getType());
3694    if (!RVEVT.isSimple())
3695      return false;
3696
3697    // Vectors (of > 1 lane) in big endian need tricky handling.
3698    if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3699        !Subtarget->isLittleEndian())
3700      return false;
3701
3702    MVT RVVT = RVEVT.getSimpleVT();
3703    if (RVVT == MVT::f128)
3704      return false;
3705
3706    MVT DestVT = VA.getValVT();
3707    // Special handling for extended integers.
3708    if (RVVT != DestVT) {
3709      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3710        return false;
3711
3712      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3713        return false;
3714
3715      bool IsZExt = Outs[0].Flags.isZExt();
3716      SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3717      if (SrcReg == 0)
3718        return false;
3719    }
3720
3721    // Make the copy.
3722    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3723            TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3724
3725    // Add register to return instruction.
3726    RetRegs.push_back(VA.getLocReg());
3727  }
3728
3729  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3730                                    TII.get(AArch64::RET_ReallyLR));
3731  for (unsigned RetReg : RetRegs)
3732    MIB.addReg(RetReg, RegState::Implicit);
3733  return true;
3734}
3735
3736bool AArch64FastISel::selectTrunc(const Instruction *I) {
3737  Type *DestTy = I->getType();
3738  Value *Op = I->getOperand(0);
3739  Type *SrcTy = Op->getType();
3740
3741  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3742  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3743  if (!SrcEVT.isSimple())
3744    return false;
3745  if (!DestEVT.isSimple())
3746    return false;
3747
3748  MVT SrcVT = SrcEVT.getSimpleVT();
3749  MVT DestVT = DestEVT.getSimpleVT();
3750
3751  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3752      SrcVT != MVT::i8)
3753    return false;
3754  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3755      DestVT != MVT::i1)
3756    return false;
3757
3758  unsigned SrcReg = getRegForValue(Op);
3759  if (!SrcReg)
3760    return false;
3761  bool SrcIsKill = hasTrivialKill(Op);
3762
3763  // If we're truncating from i64 to a smaller non-legal type then generate an
3764  // AND. Otherwise, we know the high bits are undefined and a truncate only
3765  // generate a COPY. We cannot mark the source register also as result
3766  // register, because this can incorrectly transfer the kill flag onto the
3767  // source register.
3768  unsigned ResultReg;
3769  if (SrcVT == MVT::i64) {
3770    uint64_t Mask = 0;
3771    switch (DestVT.SimpleTy) {
3772    default:
3773      // Trunc i64 to i32 is handled by the target-independent fast-isel.
3774      return false;
3775    case MVT::i1:
3776      Mask = 0x1;
3777      break;
3778    case MVT::i8:
3779      Mask = 0xff;
3780      break;
3781    case MVT::i16:
3782      Mask = 0xffff;
3783      break;
3784    }
3785    // Issue an extract_subreg to get the lower 32-bits.
3786    unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3787                                                AArch64::sub_32);
3788    // Create the AND instruction which performs the actual truncation.
3789    ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3790    assert(ResultReg && "Unexpected AND instruction emission failure.");
3791  } else {
3792    ResultReg = createResultReg(&AArch64::GPR32RegClass);
3793    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3794            TII.get(TargetOpcode::COPY), ResultReg)
3795        .addReg(SrcReg, getKillRegState(SrcIsKill));
3796  }
3797
3798  updateValueMap(I, ResultReg);
3799  return true;
3800}
3801
3802unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3803  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3804          DestVT == MVT::i64) &&
3805         "Unexpected value type.");
3806  // Handle i8 and i16 as i32.
3807  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3808    DestVT = MVT::i32;
3809
3810  if (IsZExt) {
3811    unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3812    assert(ResultReg && "Unexpected AND instruction emission failure.");
3813    if (DestVT == MVT::i64) {
3814      // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3815      // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3816      unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3817      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3818              TII.get(AArch64::SUBREG_TO_REG), Reg64)
3819          .addImm(0)
3820          .addReg(ResultReg)
3821          .addImm(AArch64::sub_32);
3822      ResultReg = Reg64;
3823    }
3824    return ResultReg;
3825  } else {
3826    if (DestVT == MVT::i64) {
3827      // FIXME: We're SExt i1 to i64.
3828      return 0;
3829    }
3830    return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3831                            /*TODO:IsKill=*/false, 0, 0);
3832  }
3833}
3834
3835unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3836                                      unsigned Op1, bool Op1IsKill) {
3837  unsigned Opc, ZReg;
3838  switch (RetVT.SimpleTy) {
3839  default: return 0;
3840  case MVT::i8:
3841  case MVT::i16:
3842  case MVT::i32:
3843    RetVT = MVT::i32;
3844    Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3845  case MVT::i64:
3846    Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3847  }
3848
3849  const TargetRegisterClass *RC =
3850      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3851  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3852                          /*IsKill=*/ZReg, true);
3853}
3854
3855unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3856                                        unsigned Op1, bool Op1IsKill) {
3857  if (RetVT != MVT::i64)
3858    return 0;
3859
3860  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3861                          Op0, Op0IsKill, Op1, Op1IsKill,
3862                          AArch64::XZR, /*IsKill=*/true);
3863}
3864
3865unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3866                                        unsigned Op1, bool Op1IsKill) {
3867  if (RetVT != MVT::i64)
3868    return 0;
3869
3870  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3871                          Op0, Op0IsKill, Op1, Op1IsKill,
3872                          AArch64::XZR, /*IsKill=*/true);
3873}
3874
3875unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3876                                     unsigned Op1Reg, bool Op1IsKill) {
3877  unsigned Opc = 0;
3878  bool NeedTrunc = false;
3879  uint64_t Mask = 0;
3880  switch (RetVT.SimpleTy) {
3881  default: return 0;
3882  case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3883  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3884  case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3885  case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3886  }
3887
3888  const TargetRegisterClass *RC =
3889      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3890  if (NeedTrunc) {
3891    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3892    Op1IsKill = true;
3893  }
3894  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3895                                       Op1IsKill);
3896  if (NeedTrunc)
3897    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3898  return ResultReg;
3899}
3900
3901unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3902                                     bool Op0IsKill, uint64_t Shift,
3903                                     bool IsZExt) {
3904  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3905         "Unexpected source/return type pair.");
3906  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
3907          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
3908         "Unexpected source value type.");
3909  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3910          RetVT == MVT::i64) && "Unexpected return value type.");
3911
3912  bool Is64Bit = (RetVT == MVT::i64);
3913  unsigned RegSize = Is64Bit ? 64 : 32;
3914  unsigned DstBits = RetVT.getSizeInBits();
3915  unsigned SrcBits = SrcVT.getSizeInBits();
3916  const TargetRegisterClass *RC =
3917      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3918
3919  // Just emit a copy for "zero" shifts.
3920  if (Shift == 0) {
3921    if (RetVT == SrcVT) {
3922      unsigned ResultReg = createResultReg(RC);
3923      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3924              TII.get(TargetOpcode::COPY), ResultReg)
3925          .addReg(Op0, getKillRegState(Op0IsKill));
3926      return ResultReg;
3927    } else
3928      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
3929  }
3930
3931  // Don't deal with undefined shifts.
3932  if (Shift >= DstBits)
3933    return 0;
3934
3935  // For immediate shifts we can fold the zero-/sign-extension into the shift.
3936  // {S|U}BFM Wd, Wn, #r, #s
3937  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3938
3939  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3940  // %2 = shl i16 %1, 4
3941  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3942  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3943  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3944  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3945
3946  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3947  // %2 = shl i16 %1, 8
3948  // Wd<32+7-24,32-24> = Wn<7:0>
3949  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3950  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3951  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3952
3953  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3954  // %2 = shl i16 %1, 12
3955  // Wd<32+3-20,32-20> = Wn<3:0>
3956  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3957  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3958  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3959
3960  unsigned ImmR = RegSize - Shift;
3961  // Limit the width to the length of the source type.
3962  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3963  static const unsigned OpcTable[2][2] = {
3964    {AArch64::SBFMWri, AArch64::SBFMXri},
3965    {AArch64::UBFMWri, AArch64::UBFMXri}
3966  };
3967  unsigned Opc = OpcTable[IsZExt][Is64Bit];
3968  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3969    unsigned TmpReg = MRI.createVirtualRegister(RC);
3970    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3971            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3972        .addImm(0)
3973        .addReg(Op0, getKillRegState(Op0IsKill))
3974        .addImm(AArch64::sub_32);
3975    Op0 = TmpReg;
3976    Op0IsKill = true;
3977  }
3978  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3979}
3980
3981unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3982                                     unsigned Op1Reg, bool Op1IsKill) {
3983  unsigned Opc = 0;
3984  bool NeedTrunc = false;
3985  uint64_t Mask = 0;
3986  switch (RetVT.SimpleTy) {
3987  default: return 0;
3988  case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
3989  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3990  case MVT::i32: Opc = AArch64::LSRVWr; break;
3991  case MVT::i64: Opc = AArch64::LSRVXr; break;
3992  }
3993
3994  const TargetRegisterClass *RC =
3995      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3996  if (NeedTrunc) {
3997    Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3998    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3999    Op0IsKill = Op1IsKill = true;
4000  }
4001  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4002                                       Op1IsKill);
4003  if (NeedTrunc)
4004    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4005  return ResultReg;
4006}
4007
4008unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4009                                     bool Op0IsKill, uint64_t Shift,
4010                                     bool IsZExt) {
4011  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4012         "Unexpected source/return type pair.");
4013  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4014          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4015         "Unexpected source value type.");
4016  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4017          RetVT == MVT::i64) && "Unexpected return value type.");
4018
4019  bool Is64Bit = (RetVT == MVT::i64);
4020  unsigned RegSize = Is64Bit ? 64 : 32;
4021  unsigned DstBits = RetVT.getSizeInBits();
4022  unsigned SrcBits = SrcVT.getSizeInBits();
4023  const TargetRegisterClass *RC =
4024      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4025
4026  // Just emit a copy for "zero" shifts.
4027  if (Shift == 0) {
4028    if (RetVT == SrcVT) {
4029      unsigned ResultReg = createResultReg(RC);
4030      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4031              TII.get(TargetOpcode::COPY), ResultReg)
4032      .addReg(Op0, getKillRegState(Op0IsKill));
4033      return ResultReg;
4034    } else
4035      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4036  }
4037
4038  // Don't deal with undefined shifts.
4039  if (Shift >= DstBits)
4040    return 0;
4041
4042  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4043  // {S|U}BFM Wd, Wn, #r, #s
4044  // Wd<s-r:0> = Wn<s:r> when r <= s
4045
4046  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4047  // %2 = lshr i16 %1, 4
4048  // Wd<7-4:0> = Wn<7:4>
4049  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4050  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4051  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4052
4053  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4054  // %2 = lshr i16 %1, 8
4055  // Wd<7-7,0> = Wn<7:7>
4056  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4057  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4058  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4059
4060  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4061  // %2 = lshr i16 %1, 12
4062  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4063  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4064  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4065  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4066
4067  if (Shift >= SrcBits && IsZExt)
4068    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4069
4070  // It is not possible to fold a sign-extend into the LShr instruction. In this
4071  // case emit a sign-extend.
4072  if (!IsZExt) {
4073    Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4074    if (!Op0)
4075      return 0;
4076    Op0IsKill = true;
4077    SrcVT = RetVT;
4078    SrcBits = SrcVT.getSizeInBits();
4079    IsZExt = true;
4080  }
4081
4082  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4083  unsigned ImmS = SrcBits - 1;
4084  static const unsigned OpcTable[2][2] = {
4085    {AArch64::SBFMWri, AArch64::SBFMXri},
4086    {AArch64::UBFMWri, AArch64::UBFMXri}
4087  };
4088  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4089  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4090    unsigned TmpReg = MRI.createVirtualRegister(RC);
4091    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4092            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4093        .addImm(0)
4094        .addReg(Op0, getKillRegState(Op0IsKill))
4095        .addImm(AArch64::sub_32);
4096    Op0 = TmpReg;
4097    Op0IsKill = true;
4098  }
4099  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4100}
4101
4102unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4103                                     unsigned Op1Reg, bool Op1IsKill) {
4104  unsigned Opc = 0;
4105  bool NeedTrunc = false;
4106  uint64_t Mask = 0;
4107  switch (RetVT.SimpleTy) {
4108  default: return 0;
4109  case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4110  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4111  case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4112  case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4113  }
4114
4115  const TargetRegisterClass *RC =
4116      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4117  if (NeedTrunc) {
4118    Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4119    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4120    Op0IsKill = Op1IsKill = true;
4121  }
4122  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4123                                       Op1IsKill);
4124  if (NeedTrunc)
4125    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4126  return ResultReg;
4127}
4128
4129unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4130                                     bool Op0IsKill, uint64_t Shift,
4131                                     bool IsZExt) {
4132  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4133         "Unexpected source/return type pair.");
4134  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4135          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4136         "Unexpected source value type.");
4137  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4138          RetVT == MVT::i64) && "Unexpected return value type.");
4139
4140  bool Is64Bit = (RetVT == MVT::i64);
4141  unsigned RegSize = Is64Bit ? 64 : 32;
4142  unsigned DstBits = RetVT.getSizeInBits();
4143  unsigned SrcBits = SrcVT.getSizeInBits();
4144  const TargetRegisterClass *RC =
4145      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4146
4147  // Just emit a copy for "zero" shifts.
4148  if (Shift == 0) {
4149    if (RetVT == SrcVT) {
4150      unsigned ResultReg = createResultReg(RC);
4151      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4152              TII.get(TargetOpcode::COPY), ResultReg)
4153      .addReg(Op0, getKillRegState(Op0IsKill));
4154      return ResultReg;
4155    } else
4156      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4157  }
4158
4159  // Don't deal with undefined shifts.
4160  if (Shift >= DstBits)
4161    return 0;
4162
4163  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4164  // {S|U}BFM Wd, Wn, #r, #s
4165  // Wd<s-r:0> = Wn<s:r> when r <= s
4166
4167  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4168  // %2 = ashr i16 %1, 4
4169  // Wd<7-4:0> = Wn<7:4>
4170  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4171  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4172  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4173
4174  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4175  // %2 = ashr i16 %1, 8
4176  // Wd<7-7,0> = Wn<7:7>
4177  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4178  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4179  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4180
4181  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4182  // %2 = ashr i16 %1, 12
4183  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4184  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4185  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4186  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4187
4188  if (Shift >= SrcBits && IsZExt)
4189    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4190
4191  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4192  unsigned ImmS = SrcBits - 1;
4193  static const unsigned OpcTable[2][2] = {
4194    {AArch64::SBFMWri, AArch64::SBFMXri},
4195    {AArch64::UBFMWri, AArch64::UBFMXri}
4196  };
4197  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4198  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4199    unsigned TmpReg = MRI.createVirtualRegister(RC);
4200    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4201            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4202        .addImm(0)
4203        .addReg(Op0, getKillRegState(Op0IsKill))
4204        .addImm(AArch64::sub_32);
4205    Op0 = TmpReg;
4206    Op0IsKill = true;
4207  }
4208  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4209}
4210
4211unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4212                                     bool IsZExt) {
4213  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4214
4215  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4216  // DestVT are odd things, so test to make sure that they are both types we can
4217  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4218  // bail out to SelectionDAG.
4219  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4220       (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4221      ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4222       (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4223    return 0;
4224
4225  unsigned Opc;
4226  unsigned Imm = 0;
4227
4228  switch (SrcVT.SimpleTy) {
4229  default:
4230    return 0;
4231  case MVT::i1:
4232    return emiti1Ext(SrcReg, DestVT, IsZExt);
4233  case MVT::i8:
4234    if (DestVT == MVT::i64)
4235      Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4236    else
4237      Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4238    Imm = 7;
4239    break;
4240  case MVT::i16:
4241    if (DestVT == MVT::i64)
4242      Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4243    else
4244      Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4245    Imm = 15;
4246    break;
4247  case MVT::i32:
4248    assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4249    Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4250    Imm = 31;
4251    break;
4252  }
4253
4254  // Handle i8 and i16 as i32.
4255  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4256    DestVT = MVT::i32;
4257  else if (DestVT == MVT::i64) {
4258    unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4259    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4260            TII.get(AArch64::SUBREG_TO_REG), Src64)
4261        .addImm(0)
4262        .addReg(SrcReg)
4263        .addImm(AArch64::sub_32);
4264    SrcReg = Src64;
4265  }
4266
4267  const TargetRegisterClass *RC =
4268      (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4269  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4270}
4271
4272static bool isZExtLoad(const MachineInstr *LI) {
4273  switch (LI->getOpcode()) {
4274  default:
4275    return false;
4276  case AArch64::LDURBBi:
4277  case AArch64::LDURHHi:
4278  case AArch64::LDURWi:
4279  case AArch64::LDRBBui:
4280  case AArch64::LDRHHui:
4281  case AArch64::LDRWui:
4282  case AArch64::LDRBBroX:
4283  case AArch64::LDRHHroX:
4284  case AArch64::LDRWroX:
4285  case AArch64::LDRBBroW:
4286  case AArch64::LDRHHroW:
4287  case AArch64::LDRWroW:
4288    return true;
4289  }
4290}
4291
4292static bool isSExtLoad(const MachineInstr *LI) {
4293  switch (LI->getOpcode()) {
4294  default:
4295    return false;
4296  case AArch64::LDURSBWi:
4297  case AArch64::LDURSHWi:
4298  case AArch64::LDURSBXi:
4299  case AArch64::LDURSHXi:
4300  case AArch64::LDURSWi:
4301  case AArch64::LDRSBWui:
4302  case AArch64::LDRSHWui:
4303  case AArch64::LDRSBXui:
4304  case AArch64::LDRSHXui:
4305  case AArch64::LDRSWui:
4306  case AArch64::LDRSBWroX:
4307  case AArch64::LDRSHWroX:
4308  case AArch64::LDRSBXroX:
4309  case AArch64::LDRSHXroX:
4310  case AArch64::LDRSWroX:
4311  case AArch64::LDRSBWroW:
4312  case AArch64::LDRSHWroW:
4313  case AArch64::LDRSBXroW:
4314  case AArch64::LDRSHXroW:
4315  case AArch64::LDRSWroW:
4316    return true;
4317  }
4318}
4319
4320bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4321                                         MVT SrcVT) {
4322  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4323  if (!LI || !LI->hasOneUse())
4324    return false;
4325
4326  // Check if the load instruction has already been selected.
4327  unsigned Reg = lookUpRegForValue(LI);
4328  if (!Reg)
4329    return false;
4330
4331  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4332  if (!MI)
4333    return false;
4334
4335  // Check if the correct load instruction has been emitted - SelectionDAG might
4336  // have emitted a zero-extending load, but we need a sign-extending load.
4337  bool IsZExt = isa<ZExtInst>(I);
4338  const auto *LoadMI = MI;
4339  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4340      LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4341    unsigned LoadReg = MI->getOperand(1).getReg();
4342    LoadMI = MRI.getUniqueVRegDef(LoadReg);
4343    assert(LoadMI && "Expected valid instruction");
4344  }
4345  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4346    return false;
4347
4348  // Nothing to be done.
4349  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4350    updateValueMap(I, Reg);
4351    return true;
4352  }
4353
4354  if (IsZExt) {
4355    unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4356    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4357            TII.get(AArch64::SUBREG_TO_REG), Reg64)
4358        .addImm(0)
4359        .addReg(Reg, getKillRegState(true))
4360        .addImm(AArch64::sub_32);
4361    Reg = Reg64;
4362  } else {
4363    assert((MI->getOpcode() == TargetOpcode::COPY &&
4364            MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4365           "Expected copy instruction");
4366    Reg = MI->getOperand(1).getReg();
4367    MI->eraseFromParent();
4368  }
4369  updateValueMap(I, Reg);
4370  return true;
4371}
4372
4373bool AArch64FastISel::selectIntExt(const Instruction *I) {
4374  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4375         "Unexpected integer extend instruction.");
4376  MVT RetVT;
4377  MVT SrcVT;
4378  if (!isTypeSupported(I->getType(), RetVT))
4379    return false;
4380
4381  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4382    return false;
4383
4384  // Try to optimize already sign-/zero-extended values from load instructions.
4385  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4386    return true;
4387
4388  unsigned SrcReg = getRegForValue(I->getOperand(0));
4389  if (!SrcReg)
4390    return false;
4391  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4392
4393  // Try to optimize already sign-/zero-extended values from function arguments.
4394  bool IsZExt = isa<ZExtInst>(I);
4395  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4396    if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4397      if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4398        unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4399        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4400                TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4401            .addImm(0)
4402            .addReg(SrcReg, getKillRegState(SrcIsKill))
4403            .addImm(AArch64::sub_32);
4404        SrcReg = ResultReg;
4405      }
4406      // Conservatively clear all kill flags from all uses, because we are
4407      // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4408      // level. The result of the instruction at IR level might have been
4409      // trivially dead, which is now not longer true.
4410      unsigned UseReg = lookUpRegForValue(I);
4411      if (UseReg)
4412        MRI.clearKillFlags(UseReg);
4413
4414      updateValueMap(I, SrcReg);
4415      return true;
4416    }
4417  }
4418
4419  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4420  if (!ResultReg)
4421    return false;
4422
4423  updateValueMap(I, ResultReg);
4424  return true;
4425}
4426
4427bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4428  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4429  if (!DestEVT.isSimple())
4430    return false;
4431
4432  MVT DestVT = DestEVT.getSimpleVT();
4433  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4434    return false;
4435
4436  unsigned DivOpc;
4437  bool Is64bit = (DestVT == MVT::i64);
4438  switch (ISDOpcode) {
4439  default:
4440    return false;
4441  case ISD::SREM:
4442    DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4443    break;
4444  case ISD::UREM:
4445    DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4446    break;
4447  }
4448  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4449  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4450  if (!Src0Reg)
4451    return false;
4452  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4453
4454  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4455  if (!Src1Reg)
4456    return false;
4457  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4458
4459  const TargetRegisterClass *RC =
4460      (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4461  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4462                                     Src1Reg, /*IsKill=*/false);
4463  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4464  // The remainder is computed as numerator - (quotient * denominator) using the
4465  // MSUB instruction.
4466  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4467                                        Src1Reg, Src1IsKill, Src0Reg,
4468                                        Src0IsKill);
4469  updateValueMap(I, ResultReg);
4470  return true;
4471}
4472
4473bool AArch64FastISel::selectMul(const Instruction *I) {
4474  MVT VT;
4475  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4476    return false;
4477
4478  if (VT.isVector())
4479    return selectBinaryOp(I, ISD::MUL);
4480
4481  const Value *Src0 = I->getOperand(0);
4482  const Value *Src1 = I->getOperand(1);
4483  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4484    if (C->getValue().isPowerOf2())
4485      std::swap(Src0, Src1);
4486
4487  // Try to simplify to a shift instruction.
4488  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4489    if (C->getValue().isPowerOf2()) {
4490      uint64_t ShiftVal = C->getValue().logBase2();
4491      MVT SrcVT = VT;
4492      bool IsZExt = true;
4493      if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4494        if (!isIntExtFree(ZExt)) {
4495          MVT VT;
4496          if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4497            SrcVT = VT;
4498            IsZExt = true;
4499            Src0 = ZExt->getOperand(0);
4500          }
4501        }
4502      } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4503        if (!isIntExtFree(SExt)) {
4504          MVT VT;
4505          if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4506            SrcVT = VT;
4507            IsZExt = false;
4508            Src0 = SExt->getOperand(0);
4509          }
4510        }
4511      }
4512
4513      unsigned Src0Reg = getRegForValue(Src0);
4514      if (!Src0Reg)
4515        return false;
4516      bool Src0IsKill = hasTrivialKill(Src0);
4517
4518      unsigned ResultReg =
4519          emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4520
4521      if (ResultReg) {
4522        updateValueMap(I, ResultReg);
4523        return true;
4524      }
4525    }
4526
4527  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4528  if (!Src0Reg)
4529    return false;
4530  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4531
4532  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4533  if (!Src1Reg)
4534    return false;
4535  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4536
4537  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4538
4539  if (!ResultReg)
4540    return false;
4541
4542  updateValueMap(I, ResultReg);
4543  return true;
4544}
4545
4546bool AArch64FastISel::selectShift(const Instruction *I) {
4547  MVT RetVT;
4548  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4549    return false;
4550
4551  if (RetVT.isVector())
4552    return selectOperator(I, I->getOpcode());
4553
4554  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4555    unsigned ResultReg = 0;
4556    uint64_t ShiftVal = C->getZExtValue();
4557    MVT SrcVT = RetVT;
4558    bool IsZExt = I->getOpcode() != Instruction::AShr;
4559    const Value *Op0 = I->getOperand(0);
4560    if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4561      if (!isIntExtFree(ZExt)) {
4562        MVT TmpVT;
4563        if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4564          SrcVT = TmpVT;
4565          IsZExt = true;
4566          Op0 = ZExt->getOperand(0);
4567        }
4568      }
4569    } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4570      if (!isIntExtFree(SExt)) {
4571        MVT TmpVT;
4572        if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4573          SrcVT = TmpVT;
4574          IsZExt = false;
4575          Op0 = SExt->getOperand(0);
4576        }
4577      }
4578    }
4579
4580    unsigned Op0Reg = getRegForValue(Op0);
4581    if (!Op0Reg)
4582      return false;
4583    bool Op0IsKill = hasTrivialKill(Op0);
4584
4585    switch (I->getOpcode()) {
4586    default: llvm_unreachable("Unexpected instruction.");
4587    case Instruction::Shl:
4588      ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4589      break;
4590    case Instruction::AShr:
4591      ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4592      break;
4593    case Instruction::LShr:
4594      ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4595      break;
4596    }
4597    if (!ResultReg)
4598      return false;
4599
4600    updateValueMap(I, ResultReg);
4601    return true;
4602  }
4603
4604  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4605  if (!Op0Reg)
4606    return false;
4607  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4608
4609  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4610  if (!Op1Reg)
4611    return false;
4612  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4613
4614  unsigned ResultReg = 0;
4615  switch (I->getOpcode()) {
4616  default: llvm_unreachable("Unexpected instruction.");
4617  case Instruction::Shl:
4618    ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4619    break;
4620  case Instruction::AShr:
4621    ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4622    break;
4623  case Instruction::LShr:
4624    ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4625    break;
4626  }
4627
4628  if (!ResultReg)
4629    return false;
4630
4631  updateValueMap(I, ResultReg);
4632  return true;
4633}
4634
4635bool AArch64FastISel::selectBitCast(const Instruction *I) {
4636  MVT RetVT, SrcVT;
4637
4638  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4639    return false;
4640  if (!isTypeLegal(I->getType(), RetVT))
4641    return false;
4642
4643  unsigned Opc;
4644  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4645    Opc = AArch64::FMOVWSr;
4646  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4647    Opc = AArch64::FMOVXDr;
4648  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4649    Opc = AArch64::FMOVSWr;
4650  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4651    Opc = AArch64::FMOVDXr;
4652  else
4653    return false;
4654
4655  const TargetRegisterClass *RC = nullptr;
4656  switch (RetVT.SimpleTy) {
4657  default: llvm_unreachable("Unexpected value type.");
4658  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4659  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4660  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4661  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4662  }
4663  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4664  if (!Op0Reg)
4665    return false;
4666  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4667  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4668
4669  if (!ResultReg)
4670    return false;
4671
4672  updateValueMap(I, ResultReg);
4673  return true;
4674}
4675
4676bool AArch64FastISel::selectFRem(const Instruction *I) {
4677  MVT RetVT;
4678  if (!isTypeLegal(I->getType(), RetVT))
4679    return false;
4680
4681  RTLIB::Libcall LC;
4682  switch (RetVT.SimpleTy) {
4683  default:
4684    return false;
4685  case MVT::f32:
4686    LC = RTLIB::REM_F32;
4687    break;
4688  case MVT::f64:
4689    LC = RTLIB::REM_F64;
4690    break;
4691  }
4692
4693  ArgListTy Args;
4694  Args.reserve(I->getNumOperands());
4695
4696  // Populate the argument list.
4697  for (auto &Arg : I->operands()) {
4698    ArgListEntry Entry;
4699    Entry.Val = Arg;
4700    Entry.Ty = Arg->getType();
4701    Args.push_back(Entry);
4702  }
4703
4704  CallLoweringInfo CLI;
4705  MCContext &Ctx = MF->getContext();
4706  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4707                TLI.getLibcallName(LC), std::move(Args));
4708  if (!lowerCallTo(CLI))
4709    return false;
4710  updateValueMap(I, CLI.ResultReg);
4711  return true;
4712}
4713
4714bool AArch64FastISel::selectSDiv(const Instruction *I) {
4715  MVT VT;
4716  if (!isTypeLegal(I->getType(), VT))
4717    return false;
4718
4719  if (!isa<ConstantInt>(I->getOperand(1)))
4720    return selectBinaryOp(I, ISD::SDIV);
4721
4722  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4723  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4724      !(C.isPowerOf2() || (-C).isPowerOf2()))
4725    return selectBinaryOp(I, ISD::SDIV);
4726
4727  unsigned Lg2 = C.countTrailingZeros();
4728  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4729  if (!Src0Reg)
4730    return false;
4731  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4732
4733  if (cast<BinaryOperator>(I)->isExact()) {
4734    unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4735    if (!ResultReg)
4736      return false;
4737    updateValueMap(I, ResultReg);
4738    return true;
4739  }
4740
4741  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4742  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4743  if (!AddReg)
4744    return false;
4745
4746  // (Src0 < 0) ? Pow2 - 1 : 0;
4747  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4748    return false;
4749
4750  unsigned SelectOpc;
4751  const TargetRegisterClass *RC;
4752  if (VT == MVT::i64) {
4753    SelectOpc = AArch64::CSELXr;
4754    RC = &AArch64::GPR64RegClass;
4755  } else {
4756    SelectOpc = AArch64::CSELWr;
4757    RC = &AArch64::GPR32RegClass;
4758  }
4759  unsigned SelectReg =
4760      fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4761                       Src0IsKill, AArch64CC::LT);
4762  if (!SelectReg)
4763    return false;
4764
4765  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4766  // negate the result.
4767  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4768  unsigned ResultReg;
4769  if (C.isNegative())
4770    ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4771                              SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4772  else
4773    ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4774
4775  if (!ResultReg)
4776    return false;
4777
4778  updateValueMap(I, ResultReg);
4779  return true;
4780}
4781
4782/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4783/// have to duplicate it for AArch64, because otherwise we would fail during the
4784/// sign-extend emission.
4785std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4786  unsigned IdxN = getRegForValue(Idx);
4787  if (IdxN == 0)
4788    // Unhandled operand. Halt "fast" selection and bail.
4789    return std::pair<unsigned, bool>(0, false);
4790
4791  bool IdxNIsKill = hasTrivialKill(Idx);
4792
4793  // If the index is smaller or larger than intptr_t, truncate or extend it.
4794  MVT PtrVT = TLI.getPointerTy(DL);
4795  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4796  if (IdxVT.bitsLT(PtrVT)) {
4797    IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4798    IdxNIsKill = true;
4799  } else if (IdxVT.bitsGT(PtrVT))
4800    llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4801  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4802}
4803
4804/// This is mostly a copy of the existing FastISel GEP code, but we have to
4805/// duplicate it for AArch64, because otherwise we would bail out even for
4806/// simple cases. This is because the standard fastEmit functions don't cover
4807/// MUL at all and ADD is lowered very inefficientily.
4808bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4809  unsigned N = getRegForValue(I->getOperand(0));
4810  if (!N)
4811    return false;
4812  bool NIsKill = hasTrivialKill(I->getOperand(0));
4813
4814  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4815  // into a single N = N + TotalOffset.
4816  uint64_t TotalOffs = 0;
4817  Type *Ty = I->getOperand(0)->getType();
4818  MVT VT = TLI.getPointerTy(DL);
4819  for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
4820    const Value *Idx = *OI;
4821    if (auto *StTy = dyn_cast<StructType>(Ty)) {
4822      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4823      // N = N + Offset
4824      if (Field)
4825        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4826      Ty = StTy->getElementType(Field);
4827    } else {
4828      Ty = cast<SequentialType>(Ty)->getElementType();
4829      // If this is a constant subscript, handle it quickly.
4830      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4831        if (CI->isZero())
4832          continue;
4833        // N = N + Offset
4834        TotalOffs +=
4835            DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4836        continue;
4837      }
4838      if (TotalOffs) {
4839        N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4840        if (!N)
4841          return false;
4842        NIsKill = true;
4843        TotalOffs = 0;
4844      }
4845
4846      // N = N + Idx * ElementSize;
4847      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4848      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4849      unsigned IdxN = Pair.first;
4850      bool IdxNIsKill = Pair.second;
4851      if (!IdxN)
4852        return false;
4853
4854      if (ElementSize != 1) {
4855        unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4856        if (!C)
4857          return false;
4858        IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4859        if (!IdxN)
4860          return false;
4861        IdxNIsKill = true;
4862      }
4863      N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4864      if (!N)
4865        return false;
4866    }
4867  }
4868  if (TotalOffs) {
4869    N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4870    if (!N)
4871      return false;
4872  }
4873  updateValueMap(I, N);
4874  return true;
4875}
4876
4877bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4878  switch (I->getOpcode()) {
4879  default:
4880    break;
4881  case Instruction::Add:
4882  case Instruction::Sub:
4883    return selectAddSub(I);
4884  case Instruction::Mul:
4885    return selectMul(I);
4886  case Instruction::SDiv:
4887    return selectSDiv(I);
4888  case Instruction::SRem:
4889    if (!selectBinaryOp(I, ISD::SREM))
4890      return selectRem(I, ISD::SREM);
4891    return true;
4892  case Instruction::URem:
4893    if (!selectBinaryOp(I, ISD::UREM))
4894      return selectRem(I, ISD::UREM);
4895    return true;
4896  case Instruction::Shl:
4897  case Instruction::LShr:
4898  case Instruction::AShr:
4899    return selectShift(I);
4900  case Instruction::And:
4901  case Instruction::Or:
4902  case Instruction::Xor:
4903    return selectLogicalOp(I);
4904  case Instruction::Br:
4905    return selectBranch(I);
4906  case Instruction::IndirectBr:
4907    return selectIndirectBr(I);
4908  case Instruction::BitCast:
4909    if (!FastISel::selectBitCast(I))
4910      return selectBitCast(I);
4911    return true;
4912  case Instruction::FPToSI:
4913    if (!selectCast(I, ISD::FP_TO_SINT))
4914      return selectFPToInt(I, /*Signed=*/true);
4915    return true;
4916  case Instruction::FPToUI:
4917    return selectFPToInt(I, /*Signed=*/false);
4918  case Instruction::ZExt:
4919  case Instruction::SExt:
4920    return selectIntExt(I);
4921  case Instruction::Trunc:
4922    if (!selectCast(I, ISD::TRUNCATE))
4923      return selectTrunc(I);
4924    return true;
4925  case Instruction::FPExt:
4926    return selectFPExt(I);
4927  case Instruction::FPTrunc:
4928    return selectFPTrunc(I);
4929  case Instruction::SIToFP:
4930    if (!selectCast(I, ISD::SINT_TO_FP))
4931      return selectIntToFP(I, /*Signed=*/true);
4932    return true;
4933  case Instruction::UIToFP:
4934    return selectIntToFP(I, /*Signed=*/false);
4935  case Instruction::Load:
4936    return selectLoad(I);
4937  case Instruction::Store:
4938    return selectStore(I);
4939  case Instruction::FCmp:
4940  case Instruction::ICmp:
4941    return selectCmp(I);
4942  case Instruction::Select:
4943    return selectSelect(I);
4944  case Instruction::Ret:
4945    return selectRet(I);
4946  case Instruction::FRem:
4947    return selectFRem(I);
4948  case Instruction::GetElementPtr:
4949    return selectGetElementPtr(I);
4950  }
4951
4952  // fall-back to target-independent instruction selection.
4953  return selectOperator(I, I->getOpcode());
4954  // Silence warnings.
4955  (void)&CC_AArch64_DarwinPCS_VarArg;
4956}
4957
4958namespace llvm {
4959llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
4960                                        const TargetLibraryInfo *LibInfo) {
4961  return new AArch64FastISel(FuncInfo, LibInfo);
4962}
4963}
4964