1//===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the AArch64-specific support for the FastISel class. Some
11// of the target-specific code is generated by tablegen in the file
12// AArch64GenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64.h"
17#include "AArch64CallingConvention.h"
18#include "AArch64Subtarget.h"
19#include "AArch64TargetMachine.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "llvm/Analysis/BranchProbabilityInfo.h"
22#include "llvm/CodeGen/CallingConvLower.h"
23#include "llvm/CodeGen/FastISel.h"
24#include "llvm/CodeGen/FunctionLoweringInfo.h"
25#include "llvm/CodeGen/MachineConstantPool.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/IR/CallingConv.h"
30#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/DerivedTypes.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/GetElementPtrTypeIterator.h"
34#include "llvm/IR/GlobalAlias.h"
35#include "llvm/IR/GlobalVariable.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/IntrinsicInst.h"
38#include "llvm/IR/Operator.h"
39#include "llvm/Support/CommandLine.h"
40using namespace llvm;
41
42namespace {
43
44class AArch64FastISel final : public FastISel {
45  class Address {
46  public:
47    typedef enum {
48      RegBase,
49      FrameIndexBase
50    } BaseKind;
51
52  private:
53    BaseKind Kind;
54    AArch64_AM::ShiftExtendType ExtType;
55    union {
56      unsigned Reg;
57      int FI;
58    } Base;
59    unsigned OffsetReg;
60    unsigned Shift;
61    int64_t Offset;
62    const GlobalValue *GV;
63
64  public:
65    Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
66      OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
67    void setKind(BaseKind K) { Kind = K; }
68    BaseKind getKind() const { return Kind; }
69    void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
70    AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
71    bool isRegBase() const { return Kind == RegBase; }
72    bool isFIBase() const { return Kind == FrameIndexBase; }
73    void setReg(unsigned Reg) {
74      assert(isRegBase() && "Invalid base register access!");
75      Base.Reg = Reg;
76    }
77    unsigned getReg() const {
78      assert(isRegBase() && "Invalid base register access!");
79      return Base.Reg;
80    }
81    void setOffsetReg(unsigned Reg) {
82      OffsetReg = Reg;
83    }
84    unsigned getOffsetReg() const {
85      return OffsetReg;
86    }
87    void setFI(unsigned FI) {
88      assert(isFIBase() && "Invalid base frame index  access!");
89      Base.FI = FI;
90    }
91    unsigned getFI() const {
92      assert(isFIBase() && "Invalid base frame index access!");
93      return Base.FI;
94    }
95    void setOffset(int64_t O) { Offset = O; }
96    int64_t getOffset() { return Offset; }
97    void setShift(unsigned S) { Shift = S; }
98    unsigned getShift() { return Shift; }
99
100    void setGlobalValue(const GlobalValue *G) { GV = G; }
101    const GlobalValue *getGlobalValue() { return GV; }
102  };
103
104  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
105  /// make the right decision when generating code for different targets.
106  const AArch64Subtarget *Subtarget;
107  LLVMContext *Context;
108
109  bool fastLowerArguments() override;
110  bool fastLowerCall(CallLoweringInfo &CLI) override;
111  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
112
113private:
114  // Selection routines.
115  bool selectAddSub(const Instruction *I);
116  bool selectLogicalOp(const Instruction *I);
117  bool selectLoad(const Instruction *I);
118  bool selectStore(const Instruction *I);
119  bool selectBranch(const Instruction *I);
120  bool selectIndirectBr(const Instruction *I);
121  bool selectCmp(const Instruction *I);
122  bool selectSelect(const Instruction *I);
123  bool selectFPExt(const Instruction *I);
124  bool selectFPTrunc(const Instruction *I);
125  bool selectFPToInt(const Instruction *I, bool Signed);
126  bool selectIntToFP(const Instruction *I, bool Signed);
127  bool selectRem(const Instruction *I, unsigned ISDOpcode);
128  bool selectRet(const Instruction *I);
129  bool selectTrunc(const Instruction *I);
130  bool selectIntExt(const Instruction *I);
131  bool selectMul(const Instruction *I);
132  bool selectShift(const Instruction *I);
133  bool selectBitCast(const Instruction *I);
134  bool selectFRem(const Instruction *I);
135  bool selectSDiv(const Instruction *I);
136  bool selectGetElementPtr(const Instruction *I);
137
138  // Utility helper routines.
139  bool isTypeLegal(Type *Ty, MVT &VT);
140  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
141  bool isValueAvailable(const Value *V) const;
142  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
143  bool computeCallAddress(const Value *V, Address &Addr);
144  bool simplifyAddress(Address &Addr, MVT VT);
145  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
146                            unsigned Flags, unsigned ScaleFactor,
147                            MachineMemOperand *MMO);
148  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
149  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
150                          unsigned Alignment);
151  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
152                         const Value *Cond);
153  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
154  bool optimizeSelect(const SelectInst *SI);
155  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
156
157  // Emit helper routines.
158  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
159                      const Value *RHS, bool SetFlags = false,
160                      bool WantResult = true,  bool IsZExt = false);
161  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
162                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
163                         bool SetFlags = false, bool WantResult = true);
164  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
165                         bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
166                         bool WantResult = true);
167  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
168                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
169                         AArch64_AM::ShiftExtendType ShiftType,
170                         uint64_t ShiftImm, bool SetFlags = false,
171                         bool WantResult = true);
172  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
173                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
174                          AArch64_AM::ShiftExtendType ExtType,
175                          uint64_t ShiftImm, bool SetFlags = false,
176                         bool WantResult = true);
177
178  // Emit functions.
179  bool emitCompareAndBranch(const BranchInst *BI);
180  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
181  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
182  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
183  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
184  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
185                    MachineMemOperand *MMO = nullptr);
186  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
187                 MachineMemOperand *MMO = nullptr);
188  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
189  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
190  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
191                   bool SetFlags = false, bool WantResult = true,
192                   bool IsZExt = false);
193  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
194  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
195                   bool SetFlags = false, bool WantResult = true,
196                   bool IsZExt = false);
197  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
198                       unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
199  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
200                       unsigned RHSReg, bool RHSIsKill,
201                       AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
202                       bool WantResult = true);
203  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
204                         const Value *RHS);
205  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
206                            bool LHSIsKill, uint64_t Imm);
207  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
208                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
209                            uint64_t ShiftImm);
210  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
211  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
212                      unsigned Op1, bool Op1IsKill);
213  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
214                        unsigned Op1, bool Op1IsKill);
215  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
216                        unsigned Op1, bool Op1IsKill);
217  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
218                      unsigned Op1Reg, bool Op1IsKill);
219  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
220                      uint64_t Imm, bool IsZExt = true);
221  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
222                      unsigned Op1Reg, bool Op1IsKill);
223  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
224                      uint64_t Imm, bool IsZExt = true);
225  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
226                      unsigned Op1Reg, bool Op1IsKill);
227  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
228                      uint64_t Imm, bool IsZExt = false);
229
230  unsigned materializeInt(const ConstantInt *CI, MVT VT);
231  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
232  unsigned materializeGV(const GlobalValue *GV);
233
234  // Call handling routines.
235private:
236  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
237  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
238                       unsigned &NumBytes);
239  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
240
241public:
242  // Backend specific FastISel code.
243  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
244  unsigned fastMaterializeConstant(const Constant *C) override;
245  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
246
247  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
248                           const TargetLibraryInfo *LibInfo)
249      : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
250    Subtarget =
251        &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
252    Context = &FuncInfo.Fn->getContext();
253  }
254
255  bool fastSelectInstruction(const Instruction *I) override;
256
257#include "AArch64GenFastISel.inc"
258};
259
260} // end anonymous namespace
261
262#include "AArch64GenCallingConv.inc"
263
264/// \brief Check if the sign-/zero-extend will be a noop.
265static bool isIntExtFree(const Instruction *I) {
266  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
267         "Unexpected integer extend instruction.");
268  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
269         "Unexpected value type.");
270  bool IsZExt = isa<ZExtInst>(I);
271
272  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
273    if (LI->hasOneUse())
274      return true;
275
276  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
277    if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
278      return true;
279
280  return false;
281}
282
283/// \brief Determine the implicit scale factor that is applied by a memory
284/// operation for a given value type.
285static unsigned getImplicitScaleFactor(MVT VT) {
286  switch (VT.SimpleTy) {
287  default:
288    return 0;    // invalid
289  case MVT::i1:  // fall-through
290  case MVT::i8:
291    return 1;
292  case MVT::i16:
293    return 2;
294  case MVT::i32: // fall-through
295  case MVT::f32:
296    return 4;
297  case MVT::i64: // fall-through
298  case MVT::f64:
299    return 8;
300  }
301}
302
303CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
304  if (CC == CallingConv::WebKit_JS)
305    return CC_AArch64_WebKit_JS;
306  if (CC == CallingConv::GHC)
307    return CC_AArch64_GHC;
308  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
309}
310
311unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
312  assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
313         "Alloca should always return a pointer.");
314
315  // Don't handle dynamic allocas.
316  if (!FuncInfo.StaticAllocaMap.count(AI))
317    return 0;
318
319  DenseMap<const AllocaInst *, int>::iterator SI =
320      FuncInfo.StaticAllocaMap.find(AI);
321
322  if (SI != FuncInfo.StaticAllocaMap.end()) {
323    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
324    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
325            ResultReg)
326        .addFrameIndex(SI->second)
327        .addImm(0)
328        .addImm(0);
329    return ResultReg;
330  }
331
332  return 0;
333}
334
335unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
336  if (VT > MVT::i64)
337    return 0;
338
339  if (!CI->isZero())
340    return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
341
342  // Create a copy from the zero register to materialize a "0" value.
343  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
344                                                   : &AArch64::GPR32RegClass;
345  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
346  unsigned ResultReg = createResultReg(RC);
347  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
348          ResultReg).addReg(ZeroReg, getKillRegState(true));
349  return ResultReg;
350}
351
352unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
353  // Positive zero (+0.0) has to be materialized with a fmov from the zero
354  // register, because the immediate version of fmov cannot encode zero.
355  if (CFP->isNullValue())
356    return fastMaterializeFloatZero(CFP);
357
358  if (VT != MVT::f32 && VT != MVT::f64)
359    return 0;
360
361  const APFloat Val = CFP->getValueAPF();
362  bool Is64Bit = (VT == MVT::f64);
363  // This checks to see if we can use FMOV instructions to materialize
364  // a constant, otherwise we have to materialize via the constant pool.
365  if (TLI.isFPImmLegal(Val, VT)) {
366    int Imm =
367        Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
368    assert((Imm != -1) && "Cannot encode floating-point constant.");
369    unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
370    return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
371  }
372
373  // For the MachO large code model materialize the FP constant in code.
374  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
375    unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
376    const TargetRegisterClass *RC = Is64Bit ?
377        &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
378
379    unsigned TmpReg = createResultReg(RC);
380    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
381        .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
382
383    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
384    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
385            TII.get(TargetOpcode::COPY), ResultReg)
386        .addReg(TmpReg, getKillRegState(true));
387
388    return ResultReg;
389  }
390
391  // Materialize via constant pool.  MachineConstantPool wants an explicit
392  // alignment.
393  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
394  if (Align == 0)
395    Align = DL.getTypeAllocSize(CFP->getType());
396
397  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
398  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
399  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
400          ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
401
402  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
403  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
404  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
405      .addReg(ADRPReg)
406      .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
407  return ResultReg;
408}
409
410unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
411  // We can't handle thread-local variables quickly yet.
412  if (GV->isThreadLocal())
413    return 0;
414
415  // MachO still uses GOT for large code-model accesses, but ELF requires
416  // movz/movk sequences, which FastISel doesn't handle yet.
417  if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
418    return 0;
419
420  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
421
422  EVT DestEVT = TLI.getValueType(GV->getType(), true);
423  if (!DestEVT.isSimple())
424    return 0;
425
426  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
427  unsigned ResultReg;
428
429  if (OpFlags & AArch64II::MO_GOT) {
430    // ADRP + LDRX
431    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
432            ADRPReg)
433      .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
434
435    ResultReg = createResultReg(&AArch64::GPR64RegClass);
436    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
437            ResultReg)
438      .addReg(ADRPReg)
439      .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
440                        AArch64II::MO_NC);
441  } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
442    // We can't handle addresses loaded from a constant pool quickly yet.
443    return 0;
444  } else {
445    // ADRP + ADDX
446    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
447            ADRPReg)
448      .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
449
450    ResultReg = createResultReg(&AArch64::GPR64spRegClass);
451    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
452            ResultReg)
453      .addReg(ADRPReg)
454      .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
455      .addImm(0);
456  }
457  return ResultReg;
458}
459
460unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
461  EVT CEVT = TLI.getValueType(C->getType(), true);
462
463  // Only handle simple types.
464  if (!CEVT.isSimple())
465    return 0;
466  MVT VT = CEVT.getSimpleVT();
467
468  if (const auto *CI = dyn_cast<ConstantInt>(C))
469    return materializeInt(CI, VT);
470  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
471    return materializeFP(CFP, VT);
472  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
473    return materializeGV(GV);
474
475  return 0;
476}
477
478unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
479  assert(CFP->isNullValue() &&
480         "Floating-point constant is not a positive zero.");
481  MVT VT;
482  if (!isTypeLegal(CFP->getType(), VT))
483    return 0;
484
485  if (VT != MVT::f32 && VT != MVT::f64)
486    return 0;
487
488  bool Is64Bit = (VT == MVT::f64);
489  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
490  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
491  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
492}
493
494/// \brief Check if the multiply is by a power-of-2 constant.
495static bool isMulPowOf2(const Value *I) {
496  if (const auto *MI = dyn_cast<MulOperator>(I)) {
497    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
498      if (C->getValue().isPowerOf2())
499        return true;
500    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
501      if (C->getValue().isPowerOf2())
502        return true;
503  }
504  return false;
505}
506
507// Computes the address to get to an object.
508bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
509{
510  const User *U = nullptr;
511  unsigned Opcode = Instruction::UserOp1;
512  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
513    // Don't walk into other basic blocks unless the object is an alloca from
514    // another block, otherwise it may not have a virtual register assigned.
515    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
516        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
517      Opcode = I->getOpcode();
518      U = I;
519    }
520  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
521    Opcode = C->getOpcode();
522    U = C;
523  }
524
525  if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
526    if (Ty->getAddressSpace() > 255)
527      // Fast instruction selection doesn't support the special
528      // address spaces.
529      return false;
530
531  switch (Opcode) {
532  default:
533    break;
534  case Instruction::BitCast: {
535    // Look through bitcasts.
536    return computeAddress(U->getOperand(0), Addr, Ty);
537  }
538  case Instruction::IntToPtr: {
539    // Look past no-op inttoptrs.
540    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
541      return computeAddress(U->getOperand(0), Addr, Ty);
542    break;
543  }
544  case Instruction::PtrToInt: {
545    // Look past no-op ptrtoints.
546    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
547      return computeAddress(U->getOperand(0), Addr, Ty);
548    break;
549  }
550  case Instruction::GetElementPtr: {
551    Address SavedAddr = Addr;
552    uint64_t TmpOffset = Addr.getOffset();
553
554    // Iterate through the GEP folding the constants into offsets where
555    // we can.
556    gep_type_iterator GTI = gep_type_begin(U);
557    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
558         ++i, ++GTI) {
559      const Value *Op = *i;
560      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
561        const StructLayout *SL = DL.getStructLayout(STy);
562        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
563        TmpOffset += SL->getElementOffset(Idx);
564      } else {
565        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
566        for (;;) {
567          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
568            // Constant-offset addressing.
569            TmpOffset += CI->getSExtValue() * S;
570            break;
571          }
572          if (canFoldAddIntoGEP(U, Op)) {
573            // A compatible add with a constant operand. Fold the constant.
574            ConstantInt *CI =
575                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
576            TmpOffset += CI->getSExtValue() * S;
577            // Iterate on the other operand.
578            Op = cast<AddOperator>(Op)->getOperand(0);
579            continue;
580          }
581          // Unsupported
582          goto unsupported_gep;
583        }
584      }
585    }
586
587    // Try to grab the base operand now.
588    Addr.setOffset(TmpOffset);
589    if (computeAddress(U->getOperand(0), Addr, Ty))
590      return true;
591
592    // We failed, restore everything and try the other options.
593    Addr = SavedAddr;
594
595  unsupported_gep:
596    break;
597  }
598  case Instruction::Alloca: {
599    const AllocaInst *AI = cast<AllocaInst>(Obj);
600    DenseMap<const AllocaInst *, int>::iterator SI =
601        FuncInfo.StaticAllocaMap.find(AI);
602    if (SI != FuncInfo.StaticAllocaMap.end()) {
603      Addr.setKind(Address::FrameIndexBase);
604      Addr.setFI(SI->second);
605      return true;
606    }
607    break;
608  }
609  case Instruction::Add: {
610    // Adds of constants are common and easy enough.
611    const Value *LHS = U->getOperand(0);
612    const Value *RHS = U->getOperand(1);
613
614    if (isa<ConstantInt>(LHS))
615      std::swap(LHS, RHS);
616
617    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
618      Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
619      return computeAddress(LHS, Addr, Ty);
620    }
621
622    Address Backup = Addr;
623    if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
624      return true;
625    Addr = Backup;
626
627    break;
628  }
629  case Instruction::Sub: {
630    // Subs of constants are common and easy enough.
631    const Value *LHS = U->getOperand(0);
632    const Value *RHS = U->getOperand(1);
633
634    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
635      Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
636      return computeAddress(LHS, Addr, Ty);
637    }
638    break;
639  }
640  case Instruction::Shl: {
641    if (Addr.getOffsetReg())
642      break;
643
644    const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
645    if (!CI)
646      break;
647
648    unsigned Val = CI->getZExtValue();
649    if (Val < 1 || Val > 3)
650      break;
651
652    uint64_t NumBytes = 0;
653    if (Ty && Ty->isSized()) {
654      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
655      NumBytes = NumBits / 8;
656      if (!isPowerOf2_64(NumBits))
657        NumBytes = 0;
658    }
659
660    if (NumBytes != (1ULL << Val))
661      break;
662
663    Addr.setShift(Val);
664    Addr.setExtendType(AArch64_AM::LSL);
665
666    const Value *Src = U->getOperand(0);
667    if (const auto *I = dyn_cast<Instruction>(Src))
668      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
669        Src = I;
670
671    // Fold the zext or sext when it won't become a noop.
672    if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
673      if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
674          Addr.setExtendType(AArch64_AM::UXTW);
675          Src = ZE->getOperand(0);
676      }
677    } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
678      if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
679        Addr.setExtendType(AArch64_AM::SXTW);
680        Src = SE->getOperand(0);
681      }
682    }
683
684    if (const auto *AI = dyn_cast<BinaryOperator>(Src))
685      if (AI->getOpcode() == Instruction::And) {
686        const Value *LHS = AI->getOperand(0);
687        const Value *RHS = AI->getOperand(1);
688
689        if (const auto *C = dyn_cast<ConstantInt>(LHS))
690          if (C->getValue() == 0xffffffff)
691            std::swap(LHS, RHS);
692
693        if (const auto *C = dyn_cast<ConstantInt>(RHS))
694          if (C->getValue() == 0xffffffff) {
695            Addr.setExtendType(AArch64_AM::UXTW);
696            unsigned Reg = getRegForValue(LHS);
697            if (!Reg)
698              return false;
699            bool RegIsKill = hasTrivialKill(LHS);
700            Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
701                                             AArch64::sub_32);
702            Addr.setOffsetReg(Reg);
703            return true;
704          }
705      }
706
707    unsigned Reg = getRegForValue(Src);
708    if (!Reg)
709      return false;
710    Addr.setOffsetReg(Reg);
711    return true;
712  }
713  case Instruction::Mul: {
714    if (Addr.getOffsetReg())
715      break;
716
717    if (!isMulPowOf2(U))
718      break;
719
720    const Value *LHS = U->getOperand(0);
721    const Value *RHS = U->getOperand(1);
722
723    // Canonicalize power-of-2 value to the RHS.
724    if (const auto *C = dyn_cast<ConstantInt>(LHS))
725      if (C->getValue().isPowerOf2())
726        std::swap(LHS, RHS);
727
728    assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
729    const auto *C = cast<ConstantInt>(RHS);
730    unsigned Val = C->getValue().logBase2();
731    if (Val < 1 || Val > 3)
732      break;
733
734    uint64_t NumBytes = 0;
735    if (Ty && Ty->isSized()) {
736      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
737      NumBytes = NumBits / 8;
738      if (!isPowerOf2_64(NumBits))
739        NumBytes = 0;
740    }
741
742    if (NumBytes != (1ULL << Val))
743      break;
744
745    Addr.setShift(Val);
746    Addr.setExtendType(AArch64_AM::LSL);
747
748    const Value *Src = LHS;
749    if (const auto *I = dyn_cast<Instruction>(Src))
750      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
751        Src = I;
752
753
754    // Fold the zext or sext when it won't become a noop.
755    if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
756      if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
757        Addr.setExtendType(AArch64_AM::UXTW);
758        Src = ZE->getOperand(0);
759      }
760    } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
761      if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
762        Addr.setExtendType(AArch64_AM::SXTW);
763        Src = SE->getOperand(0);
764      }
765    }
766
767    unsigned Reg = getRegForValue(Src);
768    if (!Reg)
769      return false;
770    Addr.setOffsetReg(Reg);
771    return true;
772  }
773  case Instruction::And: {
774    if (Addr.getOffsetReg())
775      break;
776
777    if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
778      break;
779
780    const Value *LHS = U->getOperand(0);
781    const Value *RHS = U->getOperand(1);
782
783    if (const auto *C = dyn_cast<ConstantInt>(LHS))
784      if (C->getValue() == 0xffffffff)
785        std::swap(LHS, RHS);
786
787    if (const auto *C = dyn_cast<ConstantInt>(RHS))
788      if (C->getValue() == 0xffffffff) {
789        Addr.setShift(0);
790        Addr.setExtendType(AArch64_AM::LSL);
791        Addr.setExtendType(AArch64_AM::UXTW);
792
793        unsigned Reg = getRegForValue(LHS);
794        if (!Reg)
795          return false;
796        bool RegIsKill = hasTrivialKill(LHS);
797        Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
798                                         AArch64::sub_32);
799        Addr.setOffsetReg(Reg);
800        return true;
801      }
802    break;
803  }
804  case Instruction::SExt:
805  case Instruction::ZExt: {
806    if (!Addr.getReg() || Addr.getOffsetReg())
807      break;
808
809    const Value *Src = nullptr;
810    // Fold the zext or sext when it won't become a noop.
811    if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
812      if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
813        Addr.setExtendType(AArch64_AM::UXTW);
814        Src = ZE->getOperand(0);
815      }
816    } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
817      if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
818        Addr.setExtendType(AArch64_AM::SXTW);
819        Src = SE->getOperand(0);
820      }
821    }
822
823    if (!Src)
824      break;
825
826    Addr.setShift(0);
827    unsigned Reg = getRegForValue(Src);
828    if (!Reg)
829      return false;
830    Addr.setOffsetReg(Reg);
831    return true;
832  }
833  } // end switch
834
835  if (Addr.isRegBase() && !Addr.getReg()) {
836    unsigned Reg = getRegForValue(Obj);
837    if (!Reg)
838      return false;
839    Addr.setReg(Reg);
840    return true;
841  }
842
843  if (!Addr.getOffsetReg()) {
844    unsigned Reg = getRegForValue(Obj);
845    if (!Reg)
846      return false;
847    Addr.setOffsetReg(Reg);
848    return true;
849  }
850
851  return false;
852}
853
854bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
855  const User *U = nullptr;
856  unsigned Opcode = Instruction::UserOp1;
857  bool InMBB = true;
858
859  if (const auto *I = dyn_cast<Instruction>(V)) {
860    Opcode = I->getOpcode();
861    U = I;
862    InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
863  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
864    Opcode = C->getOpcode();
865    U = C;
866  }
867
868  switch (Opcode) {
869  default: break;
870  case Instruction::BitCast:
871    // Look past bitcasts if its operand is in the same BB.
872    if (InMBB)
873      return computeCallAddress(U->getOperand(0), Addr);
874    break;
875  case Instruction::IntToPtr:
876    // Look past no-op inttoptrs if its operand is in the same BB.
877    if (InMBB &&
878        TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
879      return computeCallAddress(U->getOperand(0), Addr);
880    break;
881  case Instruction::PtrToInt:
882    // Look past no-op ptrtoints if its operand is in the same BB.
883    if (InMBB &&
884        TLI.getValueType(U->getType()) == TLI.getPointerTy())
885      return computeCallAddress(U->getOperand(0), Addr);
886    break;
887  }
888
889  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
890    Addr.setGlobalValue(GV);
891    return true;
892  }
893
894  // If all else fails, try to materialize the value in a register.
895  if (!Addr.getGlobalValue()) {
896    Addr.setReg(getRegForValue(V));
897    return Addr.getReg() != 0;
898  }
899
900  return false;
901}
902
903
904bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
905  EVT evt = TLI.getValueType(Ty, true);
906
907  // Only handle simple types.
908  if (evt == MVT::Other || !evt.isSimple())
909    return false;
910  VT = evt.getSimpleVT();
911
912  // This is a legal type, but it's not something we handle in fast-isel.
913  if (VT == MVT::f128)
914    return false;
915
916  // Handle all other legal types, i.e. a register that will directly hold this
917  // value.
918  return TLI.isTypeLegal(VT);
919}
920
921/// \brief Determine if the value type is supported by FastISel.
922///
923/// FastISel for AArch64 can handle more value types than are legal. This adds
924/// simple value type such as i1, i8, and i16.
925bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
926  if (Ty->isVectorTy() && !IsVectorAllowed)
927    return false;
928
929  if (isTypeLegal(Ty, VT))
930    return true;
931
932  // If this is a type than can be sign or zero-extended to a basic operation
933  // go ahead and accept it now.
934  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
935    return true;
936
937  return false;
938}
939
940bool AArch64FastISel::isValueAvailable(const Value *V) const {
941  if (!isa<Instruction>(V))
942    return true;
943
944  const auto *I = cast<Instruction>(V);
945  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
946    return true;
947
948  return false;
949}
950
951bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
952  unsigned ScaleFactor = getImplicitScaleFactor(VT);
953  if (!ScaleFactor)
954    return false;
955
956  bool ImmediateOffsetNeedsLowering = false;
957  bool RegisterOffsetNeedsLowering = false;
958  int64_t Offset = Addr.getOffset();
959  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
960    ImmediateOffsetNeedsLowering = true;
961  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
962           !isUInt<12>(Offset / ScaleFactor))
963    ImmediateOffsetNeedsLowering = true;
964
965  // Cannot encode an offset register and an immediate offset in the same
966  // instruction. Fold the immediate offset into the load/store instruction and
967  // emit an additonal add to take care of the offset register.
968  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
969    RegisterOffsetNeedsLowering = true;
970
971  // Cannot encode zero register as base.
972  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
973    RegisterOffsetNeedsLowering = true;
974
975  // If this is a stack pointer and the offset needs to be simplified then put
976  // the alloca address into a register, set the base type back to register and
977  // continue. This should almost never happen.
978  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
979  {
980    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
981    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
982            ResultReg)
983      .addFrameIndex(Addr.getFI())
984      .addImm(0)
985      .addImm(0);
986    Addr.setKind(Address::RegBase);
987    Addr.setReg(ResultReg);
988  }
989
990  if (RegisterOffsetNeedsLowering) {
991    unsigned ResultReg = 0;
992    if (Addr.getReg()) {
993      if (Addr.getExtendType() == AArch64_AM::SXTW ||
994          Addr.getExtendType() == AArch64_AM::UXTW   )
995        ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
996                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
997                                  /*TODO:IsKill=*/false, Addr.getExtendType(),
998                                  Addr.getShift());
999      else
1000        ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1001                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1002                                  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1003                                  Addr.getShift());
1004    } else {
1005      if (Addr.getExtendType() == AArch64_AM::UXTW)
1006        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1007                               /*Op0IsKill=*/false, Addr.getShift(),
1008                               /*IsZExt=*/true);
1009      else if (Addr.getExtendType() == AArch64_AM::SXTW)
1010        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1011                               /*Op0IsKill=*/false, Addr.getShift(),
1012                               /*IsZExt=*/false);
1013      else
1014        ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1015                               /*Op0IsKill=*/false, Addr.getShift());
1016    }
1017    if (!ResultReg)
1018      return false;
1019
1020    Addr.setReg(ResultReg);
1021    Addr.setOffsetReg(0);
1022    Addr.setShift(0);
1023    Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1024  }
1025
1026  // Since the offset is too large for the load/store instruction get the
1027  // reg+offset into a register.
1028  if (ImmediateOffsetNeedsLowering) {
1029    unsigned ResultReg;
1030    if (Addr.getReg())
1031      // Try to fold the immediate into the add instruction.
1032      ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1033    else
1034      ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1035
1036    if (!ResultReg)
1037      return false;
1038    Addr.setReg(ResultReg);
1039    Addr.setOffset(0);
1040  }
1041  return true;
1042}
1043
1044void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1045                                           const MachineInstrBuilder &MIB,
1046                                           unsigned Flags,
1047                                           unsigned ScaleFactor,
1048                                           MachineMemOperand *MMO) {
1049  int64_t Offset = Addr.getOffset() / ScaleFactor;
1050  // Frame base works a bit differently. Handle it separately.
1051  if (Addr.isFIBase()) {
1052    int FI = Addr.getFI();
1053    // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1054    // and alignment should be based on the VT.
1055    MMO = FuncInfo.MF->getMachineMemOperand(
1056      MachinePointerInfo::getFixedStack(FI, Offset), Flags,
1057      MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1058    // Now add the rest of the operands.
1059    MIB.addFrameIndex(FI).addImm(Offset);
1060  } else {
1061    assert(Addr.isRegBase() && "Unexpected address kind.");
1062    const MCInstrDesc &II = MIB->getDesc();
1063    unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1064    Addr.setReg(
1065      constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1066    Addr.setOffsetReg(
1067      constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1068    if (Addr.getOffsetReg()) {
1069      assert(Addr.getOffset() == 0 && "Unexpected offset");
1070      bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1071                      Addr.getExtendType() == AArch64_AM::SXTX;
1072      MIB.addReg(Addr.getReg());
1073      MIB.addReg(Addr.getOffsetReg());
1074      MIB.addImm(IsSigned);
1075      MIB.addImm(Addr.getShift() != 0);
1076    } else
1077      MIB.addReg(Addr.getReg()).addImm(Offset);
1078  }
1079
1080  if (MMO)
1081    MIB.addMemOperand(MMO);
1082}
1083
1084unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1085                                     const Value *RHS, bool SetFlags,
1086                                     bool WantResult,  bool IsZExt) {
1087  AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1088  bool NeedExtend = false;
1089  switch (RetVT.SimpleTy) {
1090  default:
1091    return 0;
1092  case MVT::i1:
1093    NeedExtend = true;
1094    break;
1095  case MVT::i8:
1096    NeedExtend = true;
1097    ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1098    break;
1099  case MVT::i16:
1100    NeedExtend = true;
1101    ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1102    break;
1103  case MVT::i32:  // fall-through
1104  case MVT::i64:
1105    break;
1106  }
1107  MVT SrcVT = RetVT;
1108  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1109
1110  // Canonicalize immediates to the RHS first.
1111  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1112    std::swap(LHS, RHS);
1113
1114  // Canonicalize mul by power of 2 to the RHS.
1115  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1116    if (isMulPowOf2(LHS))
1117      std::swap(LHS, RHS);
1118
1119  // Canonicalize shift immediate to the RHS.
1120  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1121    if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1122      if (isa<ConstantInt>(SI->getOperand(1)))
1123        if (SI->getOpcode() == Instruction::Shl  ||
1124            SI->getOpcode() == Instruction::LShr ||
1125            SI->getOpcode() == Instruction::AShr   )
1126          std::swap(LHS, RHS);
1127
1128  unsigned LHSReg = getRegForValue(LHS);
1129  if (!LHSReg)
1130    return 0;
1131  bool LHSIsKill = hasTrivialKill(LHS);
1132
1133  if (NeedExtend)
1134    LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1135
1136  unsigned ResultReg = 0;
1137  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1138    uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1139    if (C->isNegative())
1140      ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1141                                SetFlags, WantResult);
1142    else
1143      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1144                                WantResult);
1145  } else if (const auto *C = dyn_cast<Constant>(RHS))
1146    if (C->isNullValue())
1147      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1148                                WantResult);
1149
1150  if (ResultReg)
1151    return ResultReg;
1152
1153  // Only extend the RHS within the instruction if there is a valid extend type.
1154  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1155      isValueAvailable(RHS)) {
1156    if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1157      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1158        if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1159          unsigned RHSReg = getRegForValue(SI->getOperand(0));
1160          if (!RHSReg)
1161            return 0;
1162          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1163          return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1164                               RHSIsKill, ExtendType, C->getZExtValue(),
1165                               SetFlags, WantResult);
1166        }
1167    unsigned RHSReg = getRegForValue(RHS);
1168    if (!RHSReg)
1169      return 0;
1170    bool RHSIsKill = hasTrivialKill(RHS);
1171    return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1172                         ExtendType, 0, SetFlags, WantResult);
1173  }
1174
1175  // Check if the mul can be folded into the instruction.
1176  if (RHS->hasOneUse() && isValueAvailable(RHS))
1177    if (isMulPowOf2(RHS)) {
1178      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1179      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1180
1181      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1182        if (C->getValue().isPowerOf2())
1183          std::swap(MulLHS, MulRHS);
1184
1185      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1186      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1187      unsigned RHSReg = getRegForValue(MulLHS);
1188      if (!RHSReg)
1189        return 0;
1190      bool RHSIsKill = hasTrivialKill(MulLHS);
1191      return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1192                           AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
1193    }
1194
1195  // Check if the shift can be folded into the instruction.
1196  if (RHS->hasOneUse() && isValueAvailable(RHS))
1197    if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1198      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1199        AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1200        switch (SI->getOpcode()) {
1201        default: break;
1202        case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1203        case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1204        case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1205        }
1206        uint64_t ShiftVal = C->getZExtValue();
1207        if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1208          unsigned RHSReg = getRegForValue(SI->getOperand(0));
1209          if (!RHSReg)
1210            return 0;
1211          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1212          return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1213                               RHSIsKill, ShiftType, ShiftVal, SetFlags,
1214                               WantResult);
1215        }
1216      }
1217    }
1218
1219  unsigned RHSReg = getRegForValue(RHS);
1220  if (!RHSReg)
1221    return 0;
1222  bool RHSIsKill = hasTrivialKill(RHS);
1223
1224  if (NeedExtend)
1225    RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1226
1227  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1228                       SetFlags, WantResult);
1229}
1230
1231unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1232                                        bool LHSIsKill, unsigned RHSReg,
1233                                        bool RHSIsKill, bool SetFlags,
1234                                        bool WantResult) {
1235  assert(LHSReg && RHSReg && "Invalid register number.");
1236
1237  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1238    return 0;
1239
1240  static const unsigned OpcTable[2][2][2] = {
1241    { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1242      { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1243    { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1244      { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1245  };
1246  bool Is64Bit = RetVT == MVT::i64;
1247  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1248  const TargetRegisterClass *RC =
1249      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1250  unsigned ResultReg;
1251  if (WantResult)
1252    ResultReg = createResultReg(RC);
1253  else
1254    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1255
1256  const MCInstrDesc &II = TII.get(Opc);
1257  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1258  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1259  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1260      .addReg(LHSReg, getKillRegState(LHSIsKill))
1261      .addReg(RHSReg, getKillRegState(RHSIsKill));
1262  return ResultReg;
1263}
1264
1265unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1266                                        bool LHSIsKill, uint64_t Imm,
1267                                        bool SetFlags, bool WantResult) {
1268  assert(LHSReg && "Invalid register number.");
1269
1270  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1271    return 0;
1272
1273  unsigned ShiftImm;
1274  if (isUInt<12>(Imm))
1275    ShiftImm = 0;
1276  else if ((Imm & 0xfff000) == Imm) {
1277    ShiftImm = 12;
1278    Imm >>= 12;
1279  } else
1280    return 0;
1281
1282  static const unsigned OpcTable[2][2][2] = {
1283    { { AArch64::SUBWri,  AArch64::SUBXri  },
1284      { AArch64::ADDWri,  AArch64::ADDXri  }  },
1285    { { AArch64::SUBSWri, AArch64::SUBSXri },
1286      { AArch64::ADDSWri, AArch64::ADDSXri }  }
1287  };
1288  bool Is64Bit = RetVT == MVT::i64;
1289  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1290  const TargetRegisterClass *RC;
1291  if (SetFlags)
1292    RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1293  else
1294    RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1295  unsigned ResultReg;
1296  if (WantResult)
1297    ResultReg = createResultReg(RC);
1298  else
1299    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1300
1301  const MCInstrDesc &II = TII.get(Opc);
1302  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1303  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1304      .addReg(LHSReg, getKillRegState(LHSIsKill))
1305      .addImm(Imm)
1306      .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1307  return ResultReg;
1308}
1309
1310unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1311                                        bool LHSIsKill, unsigned RHSReg,
1312                                        bool RHSIsKill,
1313                                        AArch64_AM::ShiftExtendType ShiftType,
1314                                        uint64_t ShiftImm, bool SetFlags,
1315                                        bool WantResult) {
1316  assert(LHSReg && RHSReg && "Invalid register number.");
1317
1318  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1319    return 0;
1320
1321  static const unsigned OpcTable[2][2][2] = {
1322    { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1323      { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1324    { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1325      { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1326  };
1327  bool Is64Bit = RetVT == MVT::i64;
1328  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1329  const TargetRegisterClass *RC =
1330      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1331  unsigned ResultReg;
1332  if (WantResult)
1333    ResultReg = createResultReg(RC);
1334  else
1335    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1336
1337  const MCInstrDesc &II = TII.get(Opc);
1338  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1339  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1340  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1341      .addReg(LHSReg, getKillRegState(LHSIsKill))
1342      .addReg(RHSReg, getKillRegState(RHSIsKill))
1343      .addImm(getShifterImm(ShiftType, ShiftImm));
1344  return ResultReg;
1345}
1346
1347unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1348                                        bool LHSIsKill, unsigned RHSReg,
1349                                        bool RHSIsKill,
1350                                        AArch64_AM::ShiftExtendType ExtType,
1351                                        uint64_t ShiftImm, bool SetFlags,
1352                                        bool WantResult) {
1353  assert(LHSReg && RHSReg && "Invalid register number.");
1354
1355  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1356    return 0;
1357
1358  static const unsigned OpcTable[2][2][2] = {
1359    { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1360      { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1361    { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1362      { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1363  };
1364  bool Is64Bit = RetVT == MVT::i64;
1365  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1366  const TargetRegisterClass *RC = nullptr;
1367  if (SetFlags)
1368    RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1369  else
1370    RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1371  unsigned ResultReg;
1372  if (WantResult)
1373    ResultReg = createResultReg(RC);
1374  else
1375    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1376
1377  const MCInstrDesc &II = TII.get(Opc);
1378  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1379  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1380  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1381      .addReg(LHSReg, getKillRegState(LHSIsKill))
1382      .addReg(RHSReg, getKillRegState(RHSIsKill))
1383      .addImm(getArithExtendImm(ExtType, ShiftImm));
1384  return ResultReg;
1385}
1386
1387bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1388  Type *Ty = LHS->getType();
1389  EVT EVT = TLI.getValueType(Ty, true);
1390  if (!EVT.isSimple())
1391    return false;
1392  MVT VT = EVT.getSimpleVT();
1393
1394  switch (VT.SimpleTy) {
1395  default:
1396    return false;
1397  case MVT::i1:
1398  case MVT::i8:
1399  case MVT::i16:
1400  case MVT::i32:
1401  case MVT::i64:
1402    return emitICmp(VT, LHS, RHS, IsZExt);
1403  case MVT::f32:
1404  case MVT::f64:
1405    return emitFCmp(VT, LHS, RHS);
1406  }
1407}
1408
1409bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1410                               bool IsZExt) {
1411  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1412                 IsZExt) != 0;
1413}
1414
1415bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1416                                  uint64_t Imm) {
1417  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1418                       /*SetFlags=*/true, /*WantResult=*/false) != 0;
1419}
1420
1421bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1422  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1423    return false;
1424
1425  // Check to see if the 2nd operand is a constant that we can encode directly
1426  // in the compare.
1427  bool UseImm = false;
1428  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1429    if (CFP->isZero() && !CFP->isNegative())
1430      UseImm = true;
1431
1432  unsigned LHSReg = getRegForValue(LHS);
1433  if (!LHSReg)
1434    return false;
1435  bool LHSIsKill = hasTrivialKill(LHS);
1436
1437  if (UseImm) {
1438    unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1439    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1440        .addReg(LHSReg, getKillRegState(LHSIsKill));
1441    return true;
1442  }
1443
1444  unsigned RHSReg = getRegForValue(RHS);
1445  if (!RHSReg)
1446    return false;
1447  bool RHSIsKill = hasTrivialKill(RHS);
1448
1449  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1450  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1451      .addReg(LHSReg, getKillRegState(LHSIsKill))
1452      .addReg(RHSReg, getKillRegState(RHSIsKill));
1453  return true;
1454}
1455
1456unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1457                                  bool SetFlags, bool WantResult, bool IsZExt) {
1458  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1459                    IsZExt);
1460}
1461
1462/// \brief This method is a wrapper to simplify add emission.
1463///
1464/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1465/// that fails, then try to materialize the immediate into a register and use
1466/// emitAddSub_rr instead.
1467unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1468                                      int64_t Imm) {
1469  unsigned ResultReg;
1470  if (Imm < 0)
1471    ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1472  else
1473    ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1474
1475  if (ResultReg)
1476    return ResultReg;
1477
1478  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1479  if (!CReg)
1480    return 0;
1481
1482  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1483  return ResultReg;
1484}
1485
1486unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1487                                  bool SetFlags, bool WantResult, bool IsZExt) {
1488  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1489                    IsZExt);
1490}
1491
1492unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1493                                      bool LHSIsKill, unsigned RHSReg,
1494                                      bool RHSIsKill, bool WantResult) {
1495  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1496                       RHSIsKill, /*SetFlags=*/true, WantResult);
1497}
1498
1499unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1500                                      bool LHSIsKill, unsigned RHSReg,
1501                                      bool RHSIsKill,
1502                                      AArch64_AM::ShiftExtendType ShiftType,
1503                                      uint64_t ShiftImm, bool WantResult) {
1504  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1505                       RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1506                       WantResult);
1507}
1508
1509unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1510                                        const Value *LHS, const Value *RHS) {
1511  // Canonicalize immediates to the RHS first.
1512  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1513    std::swap(LHS, RHS);
1514
1515  // Canonicalize mul by power-of-2 to the RHS.
1516  if (LHS->hasOneUse() && isValueAvailable(LHS))
1517    if (isMulPowOf2(LHS))
1518      std::swap(LHS, RHS);
1519
1520  // Canonicalize shift immediate to the RHS.
1521  if (LHS->hasOneUse() && isValueAvailable(LHS))
1522    if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1523      if (isa<ConstantInt>(SI->getOperand(1)))
1524        std::swap(LHS, RHS);
1525
1526  unsigned LHSReg = getRegForValue(LHS);
1527  if (!LHSReg)
1528    return 0;
1529  bool LHSIsKill = hasTrivialKill(LHS);
1530
1531  unsigned ResultReg = 0;
1532  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1533    uint64_t Imm = C->getZExtValue();
1534    ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1535  }
1536  if (ResultReg)
1537    return ResultReg;
1538
1539  // Check if the mul can be folded into the instruction.
1540  if (RHS->hasOneUse() && isValueAvailable(RHS))
1541    if (isMulPowOf2(RHS)) {
1542      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1543      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1544
1545      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1546        if (C->getValue().isPowerOf2())
1547          std::swap(MulLHS, MulRHS);
1548
1549      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1550      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1551
1552      unsigned RHSReg = getRegForValue(MulLHS);
1553      if (!RHSReg)
1554        return 0;
1555      bool RHSIsKill = hasTrivialKill(MulLHS);
1556      return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1557                              RHSIsKill, ShiftVal);
1558    }
1559
1560  // Check if the shift can be folded into the instruction.
1561  if (RHS->hasOneUse() && isValueAvailable(RHS))
1562    if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1563      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1564        uint64_t ShiftVal = C->getZExtValue();
1565        unsigned RHSReg = getRegForValue(SI->getOperand(0));
1566        if (!RHSReg)
1567          return 0;
1568        bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1569        return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1570                                RHSIsKill, ShiftVal);
1571      }
1572
1573  unsigned RHSReg = getRegForValue(RHS);
1574  if (!RHSReg)
1575    return 0;
1576  bool RHSIsKill = hasTrivialKill(RHS);
1577
1578  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1579  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1580  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1581    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1582    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1583  }
1584  return ResultReg;
1585}
1586
1587unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1588                                           unsigned LHSReg, bool LHSIsKill,
1589                                           uint64_t Imm) {
1590  assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1591         "ISD nodes are not consecutive!");
1592  static const unsigned OpcTable[3][2] = {
1593    { AArch64::ANDWri, AArch64::ANDXri },
1594    { AArch64::ORRWri, AArch64::ORRXri },
1595    { AArch64::EORWri, AArch64::EORXri }
1596  };
1597  const TargetRegisterClass *RC;
1598  unsigned Opc;
1599  unsigned RegSize;
1600  switch (RetVT.SimpleTy) {
1601  default:
1602    return 0;
1603  case MVT::i1:
1604  case MVT::i8:
1605  case MVT::i16:
1606  case MVT::i32: {
1607    unsigned Idx = ISDOpc - ISD::AND;
1608    Opc = OpcTable[Idx][0];
1609    RC = &AArch64::GPR32spRegClass;
1610    RegSize = 32;
1611    break;
1612  }
1613  case MVT::i64:
1614    Opc = OpcTable[ISDOpc - ISD::AND][1];
1615    RC = &AArch64::GPR64spRegClass;
1616    RegSize = 64;
1617    break;
1618  }
1619
1620  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1621    return 0;
1622
1623  unsigned ResultReg =
1624      fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1625                      AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1626  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1627    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1628    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1629  }
1630  return ResultReg;
1631}
1632
1633unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1634                                           unsigned LHSReg, bool LHSIsKill,
1635                                           unsigned RHSReg, bool RHSIsKill,
1636                                           uint64_t ShiftImm) {
1637  assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1638         "ISD nodes are not consecutive!");
1639  static const unsigned OpcTable[3][2] = {
1640    { AArch64::ANDWrs, AArch64::ANDXrs },
1641    { AArch64::ORRWrs, AArch64::ORRXrs },
1642    { AArch64::EORWrs, AArch64::EORXrs }
1643  };
1644  const TargetRegisterClass *RC;
1645  unsigned Opc;
1646  switch (RetVT.SimpleTy) {
1647  default:
1648    return 0;
1649  case MVT::i1:
1650  case MVT::i8:
1651  case MVT::i16:
1652  case MVT::i32:
1653    Opc = OpcTable[ISDOpc - ISD::AND][0];
1654    RC = &AArch64::GPR32RegClass;
1655    break;
1656  case MVT::i64:
1657    Opc = OpcTable[ISDOpc - ISD::AND][1];
1658    RC = &AArch64::GPR64RegClass;
1659    break;
1660  }
1661  unsigned ResultReg =
1662      fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1663                       AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1664  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1665    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1666    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1667  }
1668  return ResultReg;
1669}
1670
1671unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1672                                     uint64_t Imm) {
1673  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1674}
1675
1676unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1677                                   bool WantZExt, MachineMemOperand *MMO) {
1678  // Simplify this down to something we can handle.
1679  if (!simplifyAddress(Addr, VT))
1680    return 0;
1681
1682  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1683  if (!ScaleFactor)
1684    llvm_unreachable("Unexpected value type.");
1685
1686  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1687  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1688  bool UseScaled = true;
1689  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1690    UseScaled = false;
1691    ScaleFactor = 1;
1692  }
1693
1694  static const unsigned GPOpcTable[2][8][4] = {
1695    // Sign-extend.
1696    { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1697        AArch64::LDURXi  },
1698      { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1699        AArch64::LDURXi  },
1700      { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1701        AArch64::LDRXui  },
1702      { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1703        AArch64::LDRXui  },
1704      { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1705        AArch64::LDRXroX },
1706      { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1707        AArch64::LDRXroX },
1708      { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1709        AArch64::LDRXroW },
1710      { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1711        AArch64::LDRXroW }
1712    },
1713    // Zero-extend.
1714    { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1715        AArch64::LDURXi  },
1716      { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1717        AArch64::LDURXi  },
1718      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1719        AArch64::LDRXui  },
1720      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1721        AArch64::LDRXui  },
1722      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1723        AArch64::LDRXroX },
1724      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1725        AArch64::LDRXroX },
1726      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1727        AArch64::LDRXroW },
1728      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1729        AArch64::LDRXroW }
1730    }
1731  };
1732
1733  static const unsigned FPOpcTable[4][2] = {
1734    { AArch64::LDURSi,  AArch64::LDURDi  },
1735    { AArch64::LDRSui,  AArch64::LDRDui  },
1736    { AArch64::LDRSroX, AArch64::LDRDroX },
1737    { AArch64::LDRSroW, AArch64::LDRDroW }
1738  };
1739
1740  unsigned Opc;
1741  const TargetRegisterClass *RC;
1742  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1743                      Addr.getOffsetReg();
1744  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1745  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1746      Addr.getExtendType() == AArch64_AM::SXTW)
1747    Idx++;
1748
1749  bool IsRet64Bit = RetVT == MVT::i64;
1750  switch (VT.SimpleTy) {
1751  default:
1752    llvm_unreachable("Unexpected value type.");
1753  case MVT::i1: // Intentional fall-through.
1754  case MVT::i8:
1755    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1756    RC = (IsRet64Bit && !WantZExt) ?
1757             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1758    break;
1759  case MVT::i16:
1760    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1761    RC = (IsRet64Bit && !WantZExt) ?
1762             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1763    break;
1764  case MVT::i32:
1765    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1766    RC = (IsRet64Bit && !WantZExt) ?
1767             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1768    break;
1769  case MVT::i64:
1770    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1771    RC = &AArch64::GPR64RegClass;
1772    break;
1773  case MVT::f32:
1774    Opc = FPOpcTable[Idx][0];
1775    RC = &AArch64::FPR32RegClass;
1776    break;
1777  case MVT::f64:
1778    Opc = FPOpcTable[Idx][1];
1779    RC = &AArch64::FPR64RegClass;
1780    break;
1781  }
1782
1783  // Create the base instruction, then add the operands.
1784  unsigned ResultReg = createResultReg(RC);
1785  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1786                                    TII.get(Opc), ResultReg);
1787  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1788
1789  // Loading an i1 requires special handling.
1790  if (VT == MVT::i1) {
1791    unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1792    assert(ANDReg && "Unexpected AND instruction emission failure.");
1793    ResultReg = ANDReg;
1794  }
1795
1796  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1797  // the 32bit reg to a 64bit reg.
1798  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1799    unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1800    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1801            TII.get(AArch64::SUBREG_TO_REG), Reg64)
1802        .addImm(0)
1803        .addReg(ResultReg, getKillRegState(true))
1804        .addImm(AArch64::sub_32);
1805    ResultReg = Reg64;
1806  }
1807  return ResultReg;
1808}
1809
1810bool AArch64FastISel::selectAddSub(const Instruction *I) {
1811  MVT VT;
1812  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1813    return false;
1814
1815  if (VT.isVector())
1816    return selectOperator(I, I->getOpcode());
1817
1818  unsigned ResultReg;
1819  switch (I->getOpcode()) {
1820  default:
1821    llvm_unreachable("Unexpected instruction.");
1822  case Instruction::Add:
1823    ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1824    break;
1825  case Instruction::Sub:
1826    ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1827    break;
1828  }
1829  if (!ResultReg)
1830    return false;
1831
1832  updateValueMap(I, ResultReg);
1833  return true;
1834}
1835
1836bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1837  MVT VT;
1838  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1839    return false;
1840
1841  if (VT.isVector())
1842    return selectOperator(I, I->getOpcode());
1843
1844  unsigned ResultReg;
1845  switch (I->getOpcode()) {
1846  default:
1847    llvm_unreachable("Unexpected instruction.");
1848  case Instruction::And:
1849    ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1850    break;
1851  case Instruction::Or:
1852    ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1853    break;
1854  case Instruction::Xor:
1855    ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1856    break;
1857  }
1858  if (!ResultReg)
1859    return false;
1860
1861  updateValueMap(I, ResultReg);
1862  return true;
1863}
1864
1865bool AArch64FastISel::selectLoad(const Instruction *I) {
1866  MVT VT;
1867  // Verify we have a legal type before going any further.  Currently, we handle
1868  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1869  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1870  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1871      cast<LoadInst>(I)->isAtomic())
1872    return false;
1873
1874  // See if we can handle this address.
1875  Address Addr;
1876  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1877    return false;
1878
1879  // Fold the following sign-/zero-extend into the load instruction.
1880  bool WantZExt = true;
1881  MVT RetVT = VT;
1882  const Value *IntExtVal = nullptr;
1883  if (I->hasOneUse()) {
1884    if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1885      if (isTypeSupported(ZE->getType(), RetVT))
1886        IntExtVal = ZE;
1887      else
1888        RetVT = VT;
1889    } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1890      if (isTypeSupported(SE->getType(), RetVT))
1891        IntExtVal = SE;
1892      else
1893        RetVT = VT;
1894      WantZExt = false;
1895    }
1896  }
1897
1898  unsigned ResultReg =
1899      emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1900  if (!ResultReg)
1901    return false;
1902
1903  // There are a few different cases we have to handle, because the load or the
1904  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1905  // SelectionDAG. There is also an ordering issue when both instructions are in
1906  // different basic blocks.
1907  // 1.) The load instruction is selected by FastISel, but the integer extend
1908  //     not. This usually happens when the integer extend is in a different
1909  //     basic block and SelectionDAG took over for that basic block.
1910  // 2.) The load instruction is selected before the integer extend. This only
1911  //     happens when the integer extend is in a different basic block.
1912  // 3.) The load instruction is selected by SelectionDAG and the integer extend
1913  //     by FastISel. This happens if there are instructions between the load
1914  //     and the integer extend that couldn't be selected by FastISel.
1915  if (IntExtVal) {
1916    // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1917    // could select it. Emit a copy to subreg if necessary. FastISel will remove
1918    // it when it selects the integer extend.
1919    unsigned Reg = lookUpRegForValue(IntExtVal);
1920    auto *MI = MRI.getUniqueVRegDef(Reg);
1921    if (!MI) {
1922      if (RetVT == MVT::i64 && VT <= MVT::i32) {
1923        if (WantZExt) {
1924          // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
1925          std::prev(FuncInfo.InsertPt)->eraseFromParent();
1926          ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
1927        } else
1928          ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
1929                                                 /*IsKill=*/true,
1930                                                 AArch64::sub_32);
1931      }
1932      updateValueMap(I, ResultReg);
1933      return true;
1934    }
1935
1936    // The integer extend has already been emitted - delete all the instructions
1937    // that have been emitted by the integer extend lowering code and use the
1938    // result from the load instruction directly.
1939    while (MI) {
1940      Reg = 0;
1941      for (auto &Opnd : MI->uses()) {
1942        if (Opnd.isReg()) {
1943          Reg = Opnd.getReg();
1944          break;
1945        }
1946      }
1947      MI->eraseFromParent();
1948      MI = nullptr;
1949      if (Reg)
1950        MI = MRI.getUniqueVRegDef(Reg);
1951    }
1952    updateValueMap(IntExtVal, ResultReg);
1953    return true;
1954  }
1955
1956  updateValueMap(I, ResultReg);
1957  return true;
1958}
1959
1960bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
1961                                MachineMemOperand *MMO) {
1962  // Simplify this down to something we can handle.
1963  if (!simplifyAddress(Addr, VT))
1964    return false;
1965
1966  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1967  if (!ScaleFactor)
1968    llvm_unreachable("Unexpected value type.");
1969
1970  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1971  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1972  bool UseScaled = true;
1973  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1974    UseScaled = false;
1975    ScaleFactor = 1;
1976  }
1977
1978  static const unsigned OpcTable[4][6] = {
1979    { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1980      AArch64::STURSi,   AArch64::STURDi },
1981    { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1982      AArch64::STRSui,   AArch64::STRDui },
1983    { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1984      AArch64::STRSroX,  AArch64::STRDroX },
1985    { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1986      AArch64::STRSroW,  AArch64::STRDroW }
1987  };
1988
1989  unsigned Opc;
1990  bool VTIsi1 = false;
1991  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1992                      Addr.getOffsetReg();
1993  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1994  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1995      Addr.getExtendType() == AArch64_AM::SXTW)
1996    Idx++;
1997
1998  switch (VT.SimpleTy) {
1999  default: llvm_unreachable("Unexpected value type.");
2000  case MVT::i1:  VTIsi1 = true;
2001  case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2002  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2003  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2004  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2005  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2006  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2007  }
2008
2009  // Storing an i1 requires special handling.
2010  if (VTIsi1 && SrcReg != AArch64::WZR) {
2011    unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2012    assert(ANDReg && "Unexpected AND instruction emission failure.");
2013    SrcReg = ANDReg;
2014  }
2015  // Create the base instruction, then add the operands.
2016  const MCInstrDesc &II = TII.get(Opc);
2017  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2018  MachineInstrBuilder MIB =
2019      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2020  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2021
2022  return true;
2023}
2024
2025bool AArch64FastISel::selectStore(const Instruction *I) {
2026  MVT VT;
2027  const Value *Op0 = I->getOperand(0);
2028  // Verify we have a legal type before going any further.  Currently, we handle
2029  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2030  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2031  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
2032      cast<StoreInst>(I)->isAtomic())
2033    return false;
2034
2035  // Get the value to be stored into a register. Use the zero register directly
2036  // when possible to avoid an unnecessary copy and a wasted register.
2037  unsigned SrcReg = 0;
2038  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2039    if (CI->isZero())
2040      SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2041  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2042    if (CF->isZero() && !CF->isNegative()) {
2043      VT = MVT::getIntegerVT(VT.getSizeInBits());
2044      SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2045    }
2046  }
2047
2048  if (!SrcReg)
2049    SrcReg = getRegForValue(Op0);
2050
2051  if (!SrcReg)
2052    return false;
2053
2054  // See if we can handle this address.
2055  Address Addr;
2056  if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
2057    return false;
2058
2059  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2060    return false;
2061  return true;
2062}
2063
2064static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2065  switch (Pred) {
2066  case CmpInst::FCMP_ONE:
2067  case CmpInst::FCMP_UEQ:
2068  default:
2069    // AL is our "false" for now. The other two need more compares.
2070    return AArch64CC::AL;
2071  case CmpInst::ICMP_EQ:
2072  case CmpInst::FCMP_OEQ:
2073    return AArch64CC::EQ;
2074  case CmpInst::ICMP_SGT:
2075  case CmpInst::FCMP_OGT:
2076    return AArch64CC::GT;
2077  case CmpInst::ICMP_SGE:
2078  case CmpInst::FCMP_OGE:
2079    return AArch64CC::GE;
2080  case CmpInst::ICMP_UGT:
2081  case CmpInst::FCMP_UGT:
2082    return AArch64CC::HI;
2083  case CmpInst::FCMP_OLT:
2084    return AArch64CC::MI;
2085  case CmpInst::ICMP_ULE:
2086  case CmpInst::FCMP_OLE:
2087    return AArch64CC::LS;
2088  case CmpInst::FCMP_ORD:
2089    return AArch64CC::VC;
2090  case CmpInst::FCMP_UNO:
2091    return AArch64CC::VS;
2092  case CmpInst::FCMP_UGE:
2093    return AArch64CC::PL;
2094  case CmpInst::ICMP_SLT:
2095  case CmpInst::FCMP_ULT:
2096    return AArch64CC::LT;
2097  case CmpInst::ICMP_SLE:
2098  case CmpInst::FCMP_ULE:
2099    return AArch64CC::LE;
2100  case CmpInst::FCMP_UNE:
2101  case CmpInst::ICMP_NE:
2102    return AArch64CC::NE;
2103  case CmpInst::ICMP_UGE:
2104    return AArch64CC::HS;
2105  case CmpInst::ICMP_ULT:
2106    return AArch64CC::LO;
2107  }
2108}
2109
2110/// \brief Try to emit a combined compare-and-branch instruction.
2111bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2112  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2113  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2114  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2115
2116  const Value *LHS = CI->getOperand(0);
2117  const Value *RHS = CI->getOperand(1);
2118
2119  MVT VT;
2120  if (!isTypeSupported(LHS->getType(), VT))
2121    return false;
2122
2123  unsigned BW = VT.getSizeInBits();
2124  if (BW > 64)
2125    return false;
2126
2127  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2128  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2129
2130  // Try to take advantage of fallthrough opportunities.
2131  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2132    std::swap(TBB, FBB);
2133    Predicate = CmpInst::getInversePredicate(Predicate);
2134  }
2135
2136  int TestBit = -1;
2137  bool IsCmpNE;
2138  switch (Predicate) {
2139  default:
2140    return false;
2141  case CmpInst::ICMP_EQ:
2142  case CmpInst::ICMP_NE:
2143    if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2144      std::swap(LHS, RHS);
2145
2146    if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2147      return false;
2148
2149    if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2150      if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2151        const Value *AndLHS = AI->getOperand(0);
2152        const Value *AndRHS = AI->getOperand(1);
2153
2154        if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2155          if (C->getValue().isPowerOf2())
2156            std::swap(AndLHS, AndRHS);
2157
2158        if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2159          if (C->getValue().isPowerOf2()) {
2160            TestBit = C->getValue().logBase2();
2161            LHS = AndLHS;
2162          }
2163      }
2164
2165    if (VT == MVT::i1)
2166      TestBit = 0;
2167
2168    IsCmpNE = Predicate == CmpInst::ICMP_NE;
2169    break;
2170  case CmpInst::ICMP_SLT:
2171  case CmpInst::ICMP_SGE:
2172    if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2173      return false;
2174
2175    TestBit = BW - 1;
2176    IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2177    break;
2178  case CmpInst::ICMP_SGT:
2179  case CmpInst::ICMP_SLE:
2180    if (!isa<ConstantInt>(RHS))
2181      return false;
2182
2183    if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2184      return false;
2185
2186    TestBit = BW - 1;
2187    IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2188    break;
2189  } // end switch
2190
2191  static const unsigned OpcTable[2][2][2] = {
2192    { {AArch64::CBZW,  AArch64::CBZX },
2193      {AArch64::CBNZW, AArch64::CBNZX} },
2194    { {AArch64::TBZW,  AArch64::TBZX },
2195      {AArch64::TBNZW, AArch64::TBNZX} }
2196  };
2197
2198  bool IsBitTest = TestBit != -1;
2199  bool Is64Bit = BW == 64;
2200  if (TestBit < 32 && TestBit >= 0)
2201    Is64Bit = false;
2202
2203  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2204  const MCInstrDesc &II = TII.get(Opc);
2205
2206  unsigned SrcReg = getRegForValue(LHS);
2207  if (!SrcReg)
2208    return false;
2209  bool SrcIsKill = hasTrivialKill(LHS);
2210
2211  if (BW == 64 && !Is64Bit)
2212    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2213                                        AArch64::sub_32);
2214
2215  if ((BW < 32) && !IsBitTest)
2216    SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2217
2218  // Emit the combined compare and branch instruction.
2219  SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2220  MachineInstrBuilder MIB =
2221      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2222          .addReg(SrcReg, getKillRegState(SrcIsKill));
2223  if (IsBitTest)
2224    MIB.addImm(TestBit);
2225  MIB.addMBB(TBB);
2226
2227  // Obtain the branch weight and add the TrueBB to the successor list.
2228  uint32_t BranchWeight = 0;
2229  if (FuncInfo.BPI)
2230    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2231                                               TBB->getBasicBlock());
2232  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2233  fastEmitBranch(FBB, DbgLoc);
2234
2235  return true;
2236}
2237
2238bool AArch64FastISel::selectBranch(const Instruction *I) {
2239  const BranchInst *BI = cast<BranchInst>(I);
2240  if (BI->isUnconditional()) {
2241    MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2242    fastEmitBranch(MSucc, BI->getDebugLoc());
2243    return true;
2244  }
2245
2246  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2247  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2248
2249  AArch64CC::CondCode CC = AArch64CC::NE;
2250  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2251    if (CI->hasOneUse() && isValueAvailable(CI)) {
2252      // Try to optimize or fold the cmp.
2253      CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2254      switch (Predicate) {
2255      default:
2256        break;
2257      case CmpInst::FCMP_FALSE:
2258        fastEmitBranch(FBB, DbgLoc);
2259        return true;
2260      case CmpInst::FCMP_TRUE:
2261        fastEmitBranch(TBB, DbgLoc);
2262        return true;
2263      }
2264
2265      // Try to emit a combined compare-and-branch first.
2266      if (emitCompareAndBranch(BI))
2267        return true;
2268
2269      // Try to take advantage of fallthrough opportunities.
2270      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2271        std::swap(TBB, FBB);
2272        Predicate = CmpInst::getInversePredicate(Predicate);
2273      }
2274
2275      // Emit the cmp.
2276      if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2277        return false;
2278
2279      // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2280      // instruction.
2281      CC = getCompareCC(Predicate);
2282      AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2283      switch (Predicate) {
2284      default:
2285        break;
2286      case CmpInst::FCMP_UEQ:
2287        ExtraCC = AArch64CC::EQ;
2288        CC = AArch64CC::VS;
2289        break;
2290      case CmpInst::FCMP_ONE:
2291        ExtraCC = AArch64CC::MI;
2292        CC = AArch64CC::GT;
2293        break;
2294      }
2295      assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2296
2297      // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2298      if (ExtraCC != AArch64CC::AL) {
2299        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2300            .addImm(ExtraCC)
2301            .addMBB(TBB);
2302      }
2303
2304      // Emit the branch.
2305      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2306          .addImm(CC)
2307          .addMBB(TBB);
2308
2309      // Obtain the branch weight and add the TrueBB to the successor list.
2310      uint32_t BranchWeight = 0;
2311      if (FuncInfo.BPI)
2312        BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2313                                                  TBB->getBasicBlock());
2314      FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2315
2316      fastEmitBranch(FBB, DbgLoc);
2317      return true;
2318    }
2319  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
2320    MVT SrcVT;
2321    if (TI->hasOneUse() && isValueAvailable(TI) &&
2322        isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
2323      unsigned CondReg = getRegForValue(TI->getOperand(0));
2324      if (!CondReg)
2325        return false;
2326      bool CondIsKill = hasTrivialKill(TI->getOperand(0));
2327
2328      // Issue an extract_subreg to get the lower 32-bits.
2329      if (SrcVT == MVT::i64) {
2330        CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
2331                                             AArch64::sub_32);
2332        CondIsKill = true;
2333      }
2334
2335      unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2336      assert(ANDReg && "Unexpected AND instruction emission failure.");
2337      emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2338
2339      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2340        std::swap(TBB, FBB);
2341        CC = AArch64CC::EQ;
2342      }
2343      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2344          .addImm(CC)
2345          .addMBB(TBB);
2346
2347      // Obtain the branch weight and add the TrueBB to the successor list.
2348      uint32_t BranchWeight = 0;
2349      if (FuncInfo.BPI)
2350        BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2351                                                  TBB->getBasicBlock());
2352      FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2353
2354      fastEmitBranch(FBB, DbgLoc);
2355      return true;
2356    }
2357  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2358    uint64_t Imm = CI->getZExtValue();
2359    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2360    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2361        .addMBB(Target);
2362
2363    // Obtain the branch weight and add the target to the successor list.
2364    uint32_t BranchWeight = 0;
2365    if (FuncInfo.BPI)
2366      BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2367                                                 Target->getBasicBlock());
2368    FuncInfo.MBB->addSuccessor(Target, BranchWeight);
2369    return true;
2370  } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2371    // Fake request the condition, otherwise the intrinsic might be completely
2372    // optimized away.
2373    unsigned CondReg = getRegForValue(BI->getCondition());
2374    if (!CondReg)
2375      return false;
2376
2377    // Emit the branch.
2378    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2379      .addImm(CC)
2380      .addMBB(TBB);
2381
2382    // Obtain the branch weight and add the TrueBB to the successor list.
2383    uint32_t BranchWeight = 0;
2384    if (FuncInfo.BPI)
2385      BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2386                                                 TBB->getBasicBlock());
2387    FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2388
2389    fastEmitBranch(FBB, DbgLoc);
2390    return true;
2391  }
2392
2393  unsigned CondReg = getRegForValue(BI->getCondition());
2394  if (CondReg == 0)
2395    return false;
2396  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2397
2398  // We've been divorced from our compare!  Our block was split, and
2399  // now our compare lives in a predecessor block.  We musn't
2400  // re-compare here, as the children of the compare aren't guaranteed
2401  // live across the block boundary (we *could* check for this).
2402  // Regardless, the compare has been done in the predecessor block,
2403  // and it left a value for us in a virtual register.  Ergo, we test
2404  // the one-bit value left in the virtual register.
2405  emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
2406
2407  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2408    std::swap(TBB, FBB);
2409    CC = AArch64CC::EQ;
2410  }
2411
2412  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2413      .addImm(CC)
2414      .addMBB(TBB);
2415
2416  // Obtain the branch weight and add the TrueBB to the successor list.
2417  uint32_t BranchWeight = 0;
2418  if (FuncInfo.BPI)
2419    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2420                                               TBB->getBasicBlock());
2421  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2422
2423  fastEmitBranch(FBB, DbgLoc);
2424  return true;
2425}
2426
2427bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2428  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2429  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2430  if (AddrReg == 0)
2431    return false;
2432
2433  // Emit the indirect branch.
2434  const MCInstrDesc &II = TII.get(AArch64::BR);
2435  AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2436  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2437
2438  // Make sure the CFG is up-to-date.
2439  for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
2440    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
2441
2442  return true;
2443}
2444
2445bool AArch64FastISel::selectCmp(const Instruction *I) {
2446  const CmpInst *CI = cast<CmpInst>(I);
2447
2448  // Try to optimize or fold the cmp.
2449  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2450  unsigned ResultReg = 0;
2451  switch (Predicate) {
2452  default:
2453    break;
2454  case CmpInst::FCMP_FALSE:
2455    ResultReg = createResultReg(&AArch64::GPR32RegClass);
2456    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2457            TII.get(TargetOpcode::COPY), ResultReg)
2458        .addReg(AArch64::WZR, getKillRegState(true));
2459    break;
2460  case CmpInst::FCMP_TRUE:
2461    ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2462    break;
2463  }
2464
2465  if (ResultReg) {
2466    updateValueMap(I, ResultReg);
2467    return true;
2468  }
2469
2470  // Emit the cmp.
2471  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2472    return false;
2473
2474  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2475
2476  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2477  // condition codes are inverted, because they are used by CSINC.
2478  static unsigned CondCodeTable[2][2] = {
2479    { AArch64CC::NE, AArch64CC::VC },
2480    { AArch64CC::PL, AArch64CC::LE }
2481  };
2482  unsigned *CondCodes = nullptr;
2483  switch (Predicate) {
2484  default:
2485    break;
2486  case CmpInst::FCMP_UEQ:
2487    CondCodes = &CondCodeTable[0][0];
2488    break;
2489  case CmpInst::FCMP_ONE:
2490    CondCodes = &CondCodeTable[1][0];
2491    break;
2492  }
2493
2494  if (CondCodes) {
2495    unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2496    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2497            TmpReg1)
2498        .addReg(AArch64::WZR, getKillRegState(true))
2499        .addReg(AArch64::WZR, getKillRegState(true))
2500        .addImm(CondCodes[0]);
2501    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2502            ResultReg)
2503        .addReg(TmpReg1, getKillRegState(true))
2504        .addReg(AArch64::WZR, getKillRegState(true))
2505        .addImm(CondCodes[1]);
2506
2507    updateValueMap(I, ResultReg);
2508    return true;
2509  }
2510
2511  // Now set a register based on the comparison.
2512  AArch64CC::CondCode CC = getCompareCC(Predicate);
2513  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2514  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2515  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2516          ResultReg)
2517      .addReg(AArch64::WZR, getKillRegState(true))
2518      .addReg(AArch64::WZR, getKillRegState(true))
2519      .addImm(invertedCC);
2520
2521  updateValueMap(I, ResultReg);
2522  return true;
2523}
2524
2525/// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2526/// value.
2527bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2528  if (!SI->getType()->isIntegerTy(1))
2529    return false;
2530
2531  const Value *Src1Val, *Src2Val;
2532  unsigned Opc = 0;
2533  bool NeedExtraOp = false;
2534  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2535    if (CI->isOne()) {
2536      Src1Val = SI->getCondition();
2537      Src2Val = SI->getFalseValue();
2538      Opc = AArch64::ORRWrr;
2539    } else {
2540      assert(CI->isZero());
2541      Src1Val = SI->getFalseValue();
2542      Src2Val = SI->getCondition();
2543      Opc = AArch64::BICWrr;
2544    }
2545  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2546    if (CI->isOne()) {
2547      Src1Val = SI->getCondition();
2548      Src2Val = SI->getTrueValue();
2549      Opc = AArch64::ORRWrr;
2550      NeedExtraOp = true;
2551    } else {
2552      assert(CI->isZero());
2553      Src1Val = SI->getCondition();
2554      Src2Val = SI->getTrueValue();
2555      Opc = AArch64::ANDWrr;
2556    }
2557  }
2558
2559  if (!Opc)
2560    return false;
2561
2562  unsigned Src1Reg = getRegForValue(Src1Val);
2563  if (!Src1Reg)
2564    return false;
2565  bool Src1IsKill = hasTrivialKill(Src1Val);
2566
2567  unsigned Src2Reg = getRegForValue(Src2Val);
2568  if (!Src2Reg)
2569    return false;
2570  bool Src2IsKill = hasTrivialKill(Src2Val);
2571
2572  if (NeedExtraOp) {
2573    Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2574    Src1IsKill = true;
2575  }
2576  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg,
2577                                       Src1IsKill, Src2Reg, Src2IsKill);
2578  updateValueMap(SI, ResultReg);
2579  return true;
2580}
2581
2582bool AArch64FastISel::selectSelect(const Instruction *I) {
2583  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2584  MVT VT;
2585  if (!isTypeSupported(I->getType(), VT))
2586    return false;
2587
2588  unsigned Opc;
2589  const TargetRegisterClass *RC;
2590  switch (VT.SimpleTy) {
2591  default:
2592    return false;
2593  case MVT::i1:
2594  case MVT::i8:
2595  case MVT::i16:
2596  case MVT::i32:
2597    Opc = AArch64::CSELWr;
2598    RC = &AArch64::GPR32RegClass;
2599    break;
2600  case MVT::i64:
2601    Opc = AArch64::CSELXr;
2602    RC = &AArch64::GPR64RegClass;
2603    break;
2604  case MVT::f32:
2605    Opc = AArch64::FCSELSrrr;
2606    RC = &AArch64::FPR32RegClass;
2607    break;
2608  case MVT::f64:
2609    Opc = AArch64::FCSELDrrr;
2610    RC = &AArch64::FPR64RegClass;
2611    break;
2612  }
2613
2614  const SelectInst *SI = cast<SelectInst>(I);
2615  const Value *Cond = SI->getCondition();
2616  AArch64CC::CondCode CC = AArch64CC::NE;
2617  AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2618
2619  if (optimizeSelect(SI))
2620    return true;
2621
2622  // Try to pickup the flags, so we don't have to emit another compare.
2623  if (foldXALUIntrinsic(CC, I, Cond)) {
2624    // Fake request the condition to force emission of the XALU intrinsic.
2625    unsigned CondReg = getRegForValue(Cond);
2626    if (!CondReg)
2627      return false;
2628  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2629             isValueAvailable(Cond)) {
2630    const auto *Cmp = cast<CmpInst>(Cond);
2631    // Try to optimize or fold the cmp.
2632    CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2633    const Value *FoldSelect = nullptr;
2634    switch (Predicate) {
2635    default:
2636      break;
2637    case CmpInst::FCMP_FALSE:
2638      FoldSelect = SI->getFalseValue();
2639      break;
2640    case CmpInst::FCMP_TRUE:
2641      FoldSelect = SI->getTrueValue();
2642      break;
2643    }
2644
2645    if (FoldSelect) {
2646      unsigned SrcReg = getRegForValue(FoldSelect);
2647      if (!SrcReg)
2648        return false;
2649      unsigned UseReg = lookUpRegForValue(SI);
2650      if (UseReg)
2651        MRI.clearKillFlags(UseReg);
2652
2653      updateValueMap(I, SrcReg);
2654      return true;
2655    }
2656
2657    // Emit the cmp.
2658    if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2659      return false;
2660
2661    // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2662    CC = getCompareCC(Predicate);
2663    switch (Predicate) {
2664    default:
2665      break;
2666    case CmpInst::FCMP_UEQ:
2667      ExtraCC = AArch64CC::EQ;
2668      CC = AArch64CC::VS;
2669      break;
2670    case CmpInst::FCMP_ONE:
2671      ExtraCC = AArch64CC::MI;
2672      CC = AArch64CC::GT;
2673      break;
2674    }
2675    assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2676  } else {
2677    unsigned CondReg = getRegForValue(Cond);
2678    if (!CondReg)
2679      return false;
2680    bool CondIsKill = hasTrivialKill(Cond);
2681
2682    // Emit a TST instruction (ANDS wzr, reg, #imm).
2683    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri),
2684            AArch64::WZR)
2685        .addReg(CondReg, getKillRegState(CondIsKill))
2686        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2687  }
2688
2689  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2690  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2691
2692  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2693  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2694
2695  if (!Src1Reg || !Src2Reg)
2696    return false;
2697
2698  if (ExtraCC != AArch64CC::AL) {
2699    Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2700                               Src2IsKill, ExtraCC);
2701    Src2IsKill = true;
2702  }
2703  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2704                                        Src2IsKill, CC);
2705  updateValueMap(I, ResultReg);
2706  return true;
2707}
2708
2709bool AArch64FastISel::selectFPExt(const Instruction *I) {
2710  Value *V = I->getOperand(0);
2711  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2712    return false;
2713
2714  unsigned Op = getRegForValue(V);
2715  if (Op == 0)
2716    return false;
2717
2718  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2719  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2720          ResultReg).addReg(Op);
2721  updateValueMap(I, ResultReg);
2722  return true;
2723}
2724
2725bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2726  Value *V = I->getOperand(0);
2727  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2728    return false;
2729
2730  unsigned Op = getRegForValue(V);
2731  if (Op == 0)
2732    return false;
2733
2734  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2735  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2736          ResultReg).addReg(Op);
2737  updateValueMap(I, ResultReg);
2738  return true;
2739}
2740
2741// FPToUI and FPToSI
2742bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2743  MVT DestVT;
2744  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2745    return false;
2746
2747  unsigned SrcReg = getRegForValue(I->getOperand(0));
2748  if (SrcReg == 0)
2749    return false;
2750
2751  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2752  if (SrcVT == MVT::f128)
2753    return false;
2754
2755  unsigned Opc;
2756  if (SrcVT == MVT::f64) {
2757    if (Signed)
2758      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2759    else
2760      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2761  } else {
2762    if (Signed)
2763      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2764    else
2765      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2766  }
2767  unsigned ResultReg = createResultReg(
2768      DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2769  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2770      .addReg(SrcReg);
2771  updateValueMap(I, ResultReg);
2772  return true;
2773}
2774
2775bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2776  MVT DestVT;
2777  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2778    return false;
2779  assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2780          "Unexpected value type.");
2781
2782  unsigned SrcReg = getRegForValue(I->getOperand(0));
2783  if (!SrcReg)
2784    return false;
2785  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2786
2787  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2788
2789  // Handle sign-extension.
2790  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2791    SrcReg =
2792        emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2793    if (!SrcReg)
2794      return false;
2795    SrcIsKill = true;
2796  }
2797
2798  unsigned Opc;
2799  if (SrcVT == MVT::i64) {
2800    if (Signed)
2801      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2802    else
2803      Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2804  } else {
2805    if (Signed)
2806      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2807    else
2808      Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2809  }
2810
2811  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2812                                      SrcIsKill);
2813  updateValueMap(I, ResultReg);
2814  return true;
2815}
2816
2817bool AArch64FastISel::fastLowerArguments() {
2818  if (!FuncInfo.CanLowerReturn)
2819    return false;
2820
2821  const Function *F = FuncInfo.Fn;
2822  if (F->isVarArg())
2823    return false;
2824
2825  CallingConv::ID CC = F->getCallingConv();
2826  if (CC != CallingConv::C)
2827    return false;
2828
2829  // Only handle simple cases of up to 8 GPR and FPR each.
2830  unsigned GPRCnt = 0;
2831  unsigned FPRCnt = 0;
2832  unsigned Idx = 0;
2833  for (auto const &Arg : F->args()) {
2834    // The first argument is at index 1.
2835    ++Idx;
2836    if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2837        F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2838        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2839        F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2840      return false;
2841
2842    Type *ArgTy = Arg.getType();
2843    if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2844      return false;
2845
2846    EVT ArgVT = TLI.getValueType(ArgTy);
2847    if (!ArgVT.isSimple())
2848      return false;
2849
2850    MVT VT = ArgVT.getSimpleVT().SimpleTy;
2851    if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2852      return false;
2853
2854    if (VT.isVector() &&
2855        (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2856      return false;
2857
2858    if (VT >= MVT::i1 && VT <= MVT::i64)
2859      ++GPRCnt;
2860    else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2861             VT.is128BitVector())
2862      ++FPRCnt;
2863    else
2864      return false;
2865
2866    if (GPRCnt > 8 || FPRCnt > 8)
2867      return false;
2868  }
2869
2870  static const MCPhysReg Registers[6][8] = {
2871    { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2872      AArch64::W5, AArch64::W6, AArch64::W7 },
2873    { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2874      AArch64::X5, AArch64::X6, AArch64::X7 },
2875    { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2876      AArch64::H5, AArch64::H6, AArch64::H7 },
2877    { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2878      AArch64::S5, AArch64::S6, AArch64::S7 },
2879    { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2880      AArch64::D5, AArch64::D6, AArch64::D7 },
2881    { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2882      AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2883  };
2884
2885  unsigned GPRIdx = 0;
2886  unsigned FPRIdx = 0;
2887  for (auto const &Arg : F->args()) {
2888    MVT VT = TLI.getSimpleValueType(Arg.getType());
2889    unsigned SrcReg;
2890    const TargetRegisterClass *RC;
2891    if (VT >= MVT::i1 && VT <= MVT::i32) {
2892      SrcReg = Registers[0][GPRIdx++];
2893      RC = &AArch64::GPR32RegClass;
2894      VT = MVT::i32;
2895    } else if (VT == MVT::i64) {
2896      SrcReg = Registers[1][GPRIdx++];
2897      RC = &AArch64::GPR64RegClass;
2898    } else if (VT == MVT::f16) {
2899      SrcReg = Registers[2][FPRIdx++];
2900      RC = &AArch64::FPR16RegClass;
2901    } else if (VT ==  MVT::f32) {
2902      SrcReg = Registers[3][FPRIdx++];
2903      RC = &AArch64::FPR32RegClass;
2904    } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2905      SrcReg = Registers[4][FPRIdx++];
2906      RC = &AArch64::FPR64RegClass;
2907    } else if (VT.is128BitVector()) {
2908      SrcReg = Registers[5][FPRIdx++];
2909      RC = &AArch64::FPR128RegClass;
2910    } else
2911      llvm_unreachable("Unexpected value type.");
2912
2913    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2914    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2915    // Without this, EmitLiveInCopies may eliminate the livein if its only
2916    // use is a bitcast (which isn't turned into an instruction).
2917    unsigned ResultReg = createResultReg(RC);
2918    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2919            TII.get(TargetOpcode::COPY), ResultReg)
2920        .addReg(DstReg, getKillRegState(true));
2921    updateValueMap(&Arg, ResultReg);
2922  }
2923  return true;
2924}
2925
2926bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2927                                      SmallVectorImpl<MVT> &OutVTs,
2928                                      unsigned &NumBytes) {
2929  CallingConv::ID CC = CLI.CallConv;
2930  SmallVector<CCValAssign, 16> ArgLocs;
2931  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2932  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2933
2934  // Get a count of how many bytes are to be pushed on the stack.
2935  NumBytes = CCInfo.getNextStackOffset();
2936
2937  // Issue CALLSEQ_START
2938  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2939  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2940    .addImm(NumBytes);
2941
2942  // Process the args.
2943  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2944    CCValAssign &VA = ArgLocs[i];
2945    const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2946    MVT ArgVT = OutVTs[VA.getValNo()];
2947
2948    unsigned ArgReg = getRegForValue(ArgVal);
2949    if (!ArgReg)
2950      return false;
2951
2952    // Handle arg promotion: SExt, ZExt, AExt.
2953    switch (VA.getLocInfo()) {
2954    case CCValAssign::Full:
2955      break;
2956    case CCValAssign::SExt: {
2957      MVT DestVT = VA.getLocVT();
2958      MVT SrcVT = ArgVT;
2959      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2960      if (!ArgReg)
2961        return false;
2962      break;
2963    }
2964    case CCValAssign::AExt:
2965    // Intentional fall-through.
2966    case CCValAssign::ZExt: {
2967      MVT DestVT = VA.getLocVT();
2968      MVT SrcVT = ArgVT;
2969      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2970      if (!ArgReg)
2971        return false;
2972      break;
2973    }
2974    default:
2975      llvm_unreachable("Unknown arg promotion!");
2976    }
2977
2978    // Now copy/store arg to correct locations.
2979    if (VA.isRegLoc() && !VA.needsCustom()) {
2980      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2981              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2982      CLI.OutRegs.push_back(VA.getLocReg());
2983    } else if (VA.needsCustom()) {
2984      // FIXME: Handle custom args.
2985      return false;
2986    } else {
2987      assert(VA.isMemLoc() && "Assuming store on stack.");
2988
2989      // Don't emit stores for undef values.
2990      if (isa<UndefValue>(ArgVal))
2991        continue;
2992
2993      // Need to store on the stack.
2994      unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2995
2996      unsigned BEAlign = 0;
2997      if (ArgSize < 8 && !Subtarget->isLittleEndian())
2998        BEAlign = 8 - ArgSize;
2999
3000      Address Addr;
3001      Addr.setKind(Address::RegBase);
3002      Addr.setReg(AArch64::SP);
3003      Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3004
3005      unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3006      MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3007        MachinePointerInfo::getStack(Addr.getOffset()),
3008        MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3009
3010      if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3011        return false;
3012    }
3013  }
3014  return true;
3015}
3016
3017bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3018                                 unsigned NumBytes) {
3019  CallingConv::ID CC = CLI.CallConv;
3020
3021  // Issue CALLSEQ_END
3022  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3023  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3024    .addImm(NumBytes).addImm(0);
3025
3026  // Now the return value.
3027  if (RetVT != MVT::isVoid) {
3028    SmallVector<CCValAssign, 16> RVLocs;
3029    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3030    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3031
3032    // Only handle a single return value.
3033    if (RVLocs.size() != 1)
3034      return false;
3035
3036    // Copy all of the result registers out of their specified physreg.
3037    MVT CopyVT = RVLocs[0].getValVT();
3038
3039    // TODO: Handle big-endian results
3040    if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3041      return false;
3042
3043    unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3044    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3045            TII.get(TargetOpcode::COPY), ResultReg)
3046        .addReg(RVLocs[0].getLocReg());
3047    CLI.InRegs.push_back(RVLocs[0].getLocReg());
3048
3049    CLI.ResultReg = ResultReg;
3050    CLI.NumResultRegs = 1;
3051  }
3052
3053  return true;
3054}
3055
3056bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3057  CallingConv::ID CC  = CLI.CallConv;
3058  bool IsTailCall     = CLI.IsTailCall;
3059  bool IsVarArg       = CLI.IsVarArg;
3060  const Value *Callee = CLI.Callee;
3061  const char *SymName = CLI.SymName;
3062
3063  if (!Callee && !SymName)
3064    return false;
3065
3066  // Allow SelectionDAG isel to handle tail calls.
3067  if (IsTailCall)
3068    return false;
3069
3070  CodeModel::Model CM = TM.getCodeModel();
3071  // Only support the small and large code model.
3072  if (CM != CodeModel::Small && CM != CodeModel::Large)
3073    return false;
3074
3075  // FIXME: Add large code model support for ELF.
3076  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3077    return false;
3078
3079  // Let SDISel handle vararg functions.
3080  if (IsVarArg)
3081    return false;
3082
3083  // FIXME: Only handle *simple* calls for now.
3084  MVT RetVT;
3085  if (CLI.RetTy->isVoidTy())
3086    RetVT = MVT::isVoid;
3087  else if (!isTypeLegal(CLI.RetTy, RetVT))
3088    return false;
3089
3090  for (auto Flag : CLI.OutFlags)
3091    if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
3092      return false;
3093
3094  // Set up the argument vectors.
3095  SmallVector<MVT, 16> OutVTs;
3096  OutVTs.reserve(CLI.OutVals.size());
3097
3098  for (auto *Val : CLI.OutVals) {
3099    MVT VT;
3100    if (!isTypeLegal(Val->getType(), VT) &&
3101        !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3102      return false;
3103
3104    // We don't handle vector parameters yet.
3105    if (VT.isVector() || VT.getSizeInBits() > 64)
3106      return false;
3107
3108    OutVTs.push_back(VT);
3109  }
3110
3111  Address Addr;
3112  if (Callee && !computeCallAddress(Callee, Addr))
3113    return false;
3114
3115  // Handle the arguments now that we've gotten them.
3116  unsigned NumBytes;
3117  if (!processCallArgs(CLI, OutVTs, NumBytes))
3118    return false;
3119
3120  // Issue the call.
3121  MachineInstrBuilder MIB;
3122  if (CM == CodeModel::Small) {
3123    const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3124    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3125    if (SymName)
3126      MIB.addExternalSymbol(SymName, 0);
3127    else if (Addr.getGlobalValue())
3128      MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3129    else if (Addr.getReg()) {
3130      unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3131      MIB.addReg(Reg);
3132    } else
3133      return false;
3134  } else {
3135    unsigned CallReg = 0;
3136    if (SymName) {
3137      unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3138      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3139              ADRPReg)
3140        .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3141
3142      CallReg = createResultReg(&AArch64::GPR64RegClass);
3143      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
3144              CallReg)
3145        .addReg(ADRPReg)
3146        .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
3147                           AArch64II::MO_NC);
3148    } else if (Addr.getGlobalValue())
3149      CallReg = materializeGV(Addr.getGlobalValue());
3150    else if (Addr.getReg())
3151      CallReg = Addr.getReg();
3152
3153    if (!CallReg)
3154      return false;
3155
3156    const MCInstrDesc &II = TII.get(AArch64::BLR);
3157    CallReg = constrainOperandRegClass(II, CallReg, 0);
3158    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3159  }
3160
3161  // Add implicit physical register uses to the call.
3162  for (auto Reg : CLI.OutRegs)
3163    MIB.addReg(Reg, RegState::Implicit);
3164
3165  // Add a register mask with the call-preserved registers.
3166  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3167  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3168
3169  CLI.Call = MIB;
3170
3171  // Finish off the call including any return values.
3172  return finishCall(CLI, RetVT, NumBytes);
3173}
3174
3175bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3176  if (Alignment)
3177    return Len / Alignment <= 4;
3178  else
3179    return Len < 32;
3180}
3181
3182bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3183                                         uint64_t Len, unsigned Alignment) {
3184  // Make sure we don't bloat code by inlining very large memcpy's.
3185  if (!isMemCpySmall(Len, Alignment))
3186    return false;
3187
3188  int64_t UnscaledOffset = 0;
3189  Address OrigDest = Dest;
3190  Address OrigSrc = Src;
3191
3192  while (Len) {
3193    MVT VT;
3194    if (!Alignment || Alignment >= 8) {
3195      if (Len >= 8)
3196        VT = MVT::i64;
3197      else if (Len >= 4)
3198        VT = MVT::i32;
3199      else if (Len >= 2)
3200        VT = MVT::i16;
3201      else {
3202        VT = MVT::i8;
3203      }
3204    } else {
3205      // Bound based on alignment.
3206      if (Len >= 4 && Alignment == 4)
3207        VT = MVT::i32;
3208      else if (Len >= 2 && Alignment == 2)
3209        VT = MVT::i16;
3210      else {
3211        VT = MVT::i8;
3212      }
3213    }
3214
3215    unsigned ResultReg = emitLoad(VT, VT, Src);
3216    if (!ResultReg)
3217      return false;
3218
3219    if (!emitStore(VT, ResultReg, Dest))
3220      return false;
3221
3222    int64_t Size = VT.getSizeInBits() / 8;
3223    Len -= Size;
3224    UnscaledOffset += Size;
3225
3226    // We need to recompute the unscaled offset for each iteration.
3227    Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3228    Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3229  }
3230
3231  return true;
3232}
3233
3234/// \brief Check if it is possible to fold the condition from the XALU intrinsic
3235/// into the user. The condition code will only be updated on success.
3236bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3237                                        const Instruction *I,
3238                                        const Value *Cond) {
3239  if (!isa<ExtractValueInst>(Cond))
3240    return false;
3241
3242  const auto *EV = cast<ExtractValueInst>(Cond);
3243  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3244    return false;
3245
3246  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3247  MVT RetVT;
3248  const Function *Callee = II->getCalledFunction();
3249  Type *RetTy =
3250  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3251  if (!isTypeLegal(RetTy, RetVT))
3252    return false;
3253
3254  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3255    return false;
3256
3257  const Value *LHS = II->getArgOperand(0);
3258  const Value *RHS = II->getArgOperand(1);
3259
3260  // Canonicalize immediate to the RHS.
3261  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3262      isCommutativeIntrinsic(II))
3263    std::swap(LHS, RHS);
3264
3265  // Simplify multiplies.
3266  unsigned IID = II->getIntrinsicID();
3267  switch (IID) {
3268  default:
3269    break;
3270  case Intrinsic::smul_with_overflow:
3271    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3272      if (C->getValue() == 2)
3273        IID = Intrinsic::sadd_with_overflow;
3274    break;
3275  case Intrinsic::umul_with_overflow:
3276    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3277      if (C->getValue() == 2)
3278        IID = Intrinsic::uadd_with_overflow;
3279    break;
3280  }
3281
3282  AArch64CC::CondCode TmpCC;
3283  switch (IID) {
3284  default:
3285    return false;
3286  case Intrinsic::sadd_with_overflow:
3287  case Intrinsic::ssub_with_overflow:
3288    TmpCC = AArch64CC::VS;
3289    break;
3290  case Intrinsic::uadd_with_overflow:
3291    TmpCC = AArch64CC::HS;
3292    break;
3293  case Intrinsic::usub_with_overflow:
3294    TmpCC = AArch64CC::LO;
3295    break;
3296  case Intrinsic::smul_with_overflow:
3297  case Intrinsic::umul_with_overflow:
3298    TmpCC = AArch64CC::NE;
3299    break;
3300  }
3301
3302  // Check if both instructions are in the same basic block.
3303  if (!isValueAvailable(II))
3304    return false;
3305
3306  // Make sure nothing is in the way
3307  BasicBlock::const_iterator Start = I;
3308  BasicBlock::const_iterator End = II;
3309  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3310    // We only expect extractvalue instructions between the intrinsic and the
3311    // instruction to be selected.
3312    if (!isa<ExtractValueInst>(Itr))
3313      return false;
3314
3315    // Check that the extractvalue operand comes from the intrinsic.
3316    const auto *EVI = cast<ExtractValueInst>(Itr);
3317    if (EVI->getAggregateOperand() != II)
3318      return false;
3319  }
3320
3321  CC = TmpCC;
3322  return true;
3323}
3324
3325bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3326  // FIXME: Handle more intrinsics.
3327  switch (II->getIntrinsicID()) {
3328  default: return false;
3329  case Intrinsic::frameaddress: {
3330    MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
3331    MFI->setFrameAddressIsTaken(true);
3332
3333    const AArch64RegisterInfo *RegInfo =
3334        static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
3335    unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3336    unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3337    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3338            TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3339    // Recursively load frame address
3340    // ldr x0, [fp]
3341    // ldr x0, [x0]
3342    // ldr x0, [x0]
3343    // ...
3344    unsigned DestReg;
3345    unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3346    while (Depth--) {
3347      DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3348                                SrcReg, /*IsKill=*/true, 0);
3349      assert(DestReg && "Unexpected LDR instruction emission failure.");
3350      SrcReg = DestReg;
3351    }
3352
3353    updateValueMap(II, SrcReg);
3354    return true;
3355  }
3356  case Intrinsic::memcpy:
3357  case Intrinsic::memmove: {
3358    const auto *MTI = cast<MemTransferInst>(II);
3359    // Don't handle volatile.
3360    if (MTI->isVolatile())
3361      return false;
3362
3363    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3364    // we would emit dead code because we don't currently handle memmoves.
3365    bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3366    if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3367      // Small memcpy's are common enough that we want to do them without a call
3368      // if possible.
3369      uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3370      unsigned Alignment = MTI->getAlignment();
3371      if (isMemCpySmall(Len, Alignment)) {
3372        Address Dest, Src;
3373        if (!computeAddress(MTI->getRawDest(), Dest) ||
3374            !computeAddress(MTI->getRawSource(), Src))
3375          return false;
3376        if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3377          return true;
3378      }
3379    }
3380
3381    if (!MTI->getLength()->getType()->isIntegerTy(64))
3382      return false;
3383
3384    if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3385      // Fast instruction selection doesn't support the special
3386      // address spaces.
3387      return false;
3388
3389    const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3390    return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3391  }
3392  case Intrinsic::memset: {
3393    const MemSetInst *MSI = cast<MemSetInst>(II);
3394    // Don't handle volatile.
3395    if (MSI->isVolatile())
3396      return false;
3397
3398    if (!MSI->getLength()->getType()->isIntegerTy(64))
3399      return false;
3400
3401    if (MSI->getDestAddressSpace() > 255)
3402      // Fast instruction selection doesn't support the special
3403      // address spaces.
3404      return false;
3405
3406    return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3407  }
3408  case Intrinsic::sin:
3409  case Intrinsic::cos:
3410  case Intrinsic::pow: {
3411    MVT RetVT;
3412    if (!isTypeLegal(II->getType(), RetVT))
3413      return false;
3414
3415    if (RetVT != MVT::f32 && RetVT != MVT::f64)
3416      return false;
3417
3418    static const RTLIB::Libcall LibCallTable[3][2] = {
3419      { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3420      { RTLIB::COS_F32, RTLIB::COS_F64 },
3421      { RTLIB::POW_F32, RTLIB::POW_F64 }
3422    };
3423    RTLIB::Libcall LC;
3424    bool Is64Bit = RetVT == MVT::f64;
3425    switch (II->getIntrinsicID()) {
3426    default:
3427      llvm_unreachable("Unexpected intrinsic.");
3428    case Intrinsic::sin:
3429      LC = LibCallTable[0][Is64Bit];
3430      break;
3431    case Intrinsic::cos:
3432      LC = LibCallTable[1][Is64Bit];
3433      break;
3434    case Intrinsic::pow:
3435      LC = LibCallTable[2][Is64Bit];
3436      break;
3437    }
3438
3439    ArgListTy Args;
3440    Args.reserve(II->getNumArgOperands());
3441
3442    // Populate the argument list.
3443    for (auto &Arg : II->arg_operands()) {
3444      ArgListEntry Entry;
3445      Entry.Val = Arg;
3446      Entry.Ty = Arg->getType();
3447      Args.push_back(Entry);
3448    }
3449
3450    CallLoweringInfo CLI;
3451    CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
3452                  TLI.getLibcallName(LC), std::move(Args));
3453    if (!lowerCallTo(CLI))
3454      return false;
3455    updateValueMap(II, CLI.ResultReg);
3456    return true;
3457  }
3458  case Intrinsic::fabs: {
3459    MVT VT;
3460    if (!isTypeLegal(II->getType(), VT))
3461      return false;
3462
3463    unsigned Opc;
3464    switch (VT.SimpleTy) {
3465    default:
3466      return false;
3467    case MVT::f32:
3468      Opc = AArch64::FABSSr;
3469      break;
3470    case MVT::f64:
3471      Opc = AArch64::FABSDr;
3472      break;
3473    }
3474    unsigned SrcReg = getRegForValue(II->getOperand(0));
3475    if (!SrcReg)
3476      return false;
3477    bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3478    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3479    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3480      .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3481    updateValueMap(II, ResultReg);
3482    return true;
3483  }
3484  case Intrinsic::trap: {
3485    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3486        .addImm(1);
3487    return true;
3488  }
3489  case Intrinsic::sqrt: {
3490    Type *RetTy = II->getCalledFunction()->getReturnType();
3491
3492    MVT VT;
3493    if (!isTypeLegal(RetTy, VT))
3494      return false;
3495
3496    unsigned Op0Reg = getRegForValue(II->getOperand(0));
3497    if (!Op0Reg)
3498      return false;
3499    bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3500
3501    unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3502    if (!ResultReg)
3503      return false;
3504
3505    updateValueMap(II, ResultReg);
3506    return true;
3507  }
3508  case Intrinsic::sadd_with_overflow:
3509  case Intrinsic::uadd_with_overflow:
3510  case Intrinsic::ssub_with_overflow:
3511  case Intrinsic::usub_with_overflow:
3512  case Intrinsic::smul_with_overflow:
3513  case Intrinsic::umul_with_overflow: {
3514    // This implements the basic lowering of the xalu with overflow intrinsics.
3515    const Function *Callee = II->getCalledFunction();
3516    auto *Ty = cast<StructType>(Callee->getReturnType());
3517    Type *RetTy = Ty->getTypeAtIndex(0U);
3518
3519    MVT VT;
3520    if (!isTypeLegal(RetTy, VT))
3521      return false;
3522
3523    if (VT != MVT::i32 && VT != MVT::i64)
3524      return false;
3525
3526    const Value *LHS = II->getArgOperand(0);
3527    const Value *RHS = II->getArgOperand(1);
3528    // Canonicalize immediate to the RHS.
3529    if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3530        isCommutativeIntrinsic(II))
3531      std::swap(LHS, RHS);
3532
3533    // Simplify multiplies.
3534    unsigned IID = II->getIntrinsicID();
3535    switch (IID) {
3536    default:
3537      break;
3538    case Intrinsic::smul_with_overflow:
3539      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3540        if (C->getValue() == 2) {
3541          IID = Intrinsic::sadd_with_overflow;
3542          RHS = LHS;
3543        }
3544      break;
3545    case Intrinsic::umul_with_overflow:
3546      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3547        if (C->getValue() == 2) {
3548          IID = Intrinsic::uadd_with_overflow;
3549          RHS = LHS;
3550        }
3551      break;
3552    }
3553
3554    unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3555    AArch64CC::CondCode CC = AArch64CC::Invalid;
3556    switch (IID) {
3557    default: llvm_unreachable("Unexpected intrinsic!");
3558    case Intrinsic::sadd_with_overflow:
3559      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3560      CC = AArch64CC::VS;
3561      break;
3562    case Intrinsic::uadd_with_overflow:
3563      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3564      CC = AArch64CC::HS;
3565      break;
3566    case Intrinsic::ssub_with_overflow:
3567      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3568      CC = AArch64CC::VS;
3569      break;
3570    case Intrinsic::usub_with_overflow:
3571      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3572      CC = AArch64CC::LO;
3573      break;
3574    case Intrinsic::smul_with_overflow: {
3575      CC = AArch64CC::NE;
3576      unsigned LHSReg = getRegForValue(LHS);
3577      if (!LHSReg)
3578        return false;
3579      bool LHSIsKill = hasTrivialKill(LHS);
3580
3581      unsigned RHSReg = getRegForValue(RHS);
3582      if (!RHSReg)
3583        return false;
3584      bool RHSIsKill = hasTrivialKill(RHS);
3585
3586      if (VT == MVT::i32) {
3587        MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3588        unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3589                                       /*IsKill=*/false, 32);
3590        MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3591                                            AArch64::sub_32);
3592        ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3593                                              AArch64::sub_32);
3594        emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3595                    AArch64_AM::ASR, 31, /*WantResult=*/false);
3596      } else {
3597        assert(VT == MVT::i64 && "Unexpected value type.");
3598        MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3599        unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3600                                        RHSReg, RHSIsKill);
3601        emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3602                    AArch64_AM::ASR, 63, /*WantResult=*/false);
3603      }
3604      break;
3605    }
3606    case Intrinsic::umul_with_overflow: {
3607      CC = AArch64CC::NE;
3608      unsigned LHSReg = getRegForValue(LHS);
3609      if (!LHSReg)
3610        return false;
3611      bool LHSIsKill = hasTrivialKill(LHS);
3612
3613      unsigned RHSReg = getRegForValue(RHS);
3614      if (!RHSReg)
3615        return false;
3616      bool RHSIsKill = hasTrivialKill(RHS);
3617
3618      if (VT == MVT::i32) {
3619        MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3620        emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3621                    /*IsKill=*/false, AArch64_AM::LSR, 32,
3622                    /*WantResult=*/false);
3623        MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3624                                            AArch64::sub_32);
3625      } else {
3626        assert(VT == MVT::i64 && "Unexpected value type.");
3627        MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3628        unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3629                                        RHSReg, RHSIsKill);
3630        emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3631                    /*IsKill=*/false, /*WantResult=*/false);
3632      }
3633      break;
3634    }
3635    }
3636
3637    if (MulReg) {
3638      ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3639      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3640              TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3641    }
3642
3643    ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3644                                  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3645                                  /*IsKill=*/true, getInvertedCondCode(CC));
3646    (void)ResultReg2;
3647    assert((ResultReg1 + 1) == ResultReg2 &&
3648           "Nonconsecutive result registers.");
3649    updateValueMap(II, ResultReg1, 2);
3650    return true;
3651  }
3652  }
3653  return false;
3654}
3655
3656bool AArch64FastISel::selectRet(const Instruction *I) {
3657  const ReturnInst *Ret = cast<ReturnInst>(I);
3658  const Function &F = *I->getParent()->getParent();
3659
3660  if (!FuncInfo.CanLowerReturn)
3661    return false;
3662
3663  if (F.isVarArg())
3664    return false;
3665
3666  // Build a list of return value registers.
3667  SmallVector<unsigned, 4> RetRegs;
3668
3669  if (Ret->getNumOperands() > 0) {
3670    CallingConv::ID CC = F.getCallingConv();
3671    SmallVector<ISD::OutputArg, 4> Outs;
3672    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
3673
3674    // Analyze operands of the call, assigning locations to each operand.
3675    SmallVector<CCValAssign, 16> ValLocs;
3676    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3677    CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3678                                                     : RetCC_AArch64_AAPCS;
3679    CCInfo.AnalyzeReturn(Outs, RetCC);
3680
3681    // Only handle a single return value for now.
3682    if (ValLocs.size() != 1)
3683      return false;
3684
3685    CCValAssign &VA = ValLocs[0];
3686    const Value *RV = Ret->getOperand(0);
3687
3688    // Don't bother handling odd stuff for now.
3689    if ((VA.getLocInfo() != CCValAssign::Full) &&
3690        (VA.getLocInfo() != CCValAssign::BCvt))
3691      return false;
3692
3693    // Only handle register returns for now.
3694    if (!VA.isRegLoc())
3695      return false;
3696
3697    unsigned Reg = getRegForValue(RV);
3698    if (Reg == 0)
3699      return false;
3700
3701    unsigned SrcReg = Reg + VA.getValNo();
3702    unsigned DestReg = VA.getLocReg();
3703    // Avoid a cross-class copy. This is very unlikely.
3704    if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3705      return false;
3706
3707    EVT RVEVT = TLI.getValueType(RV->getType());
3708    if (!RVEVT.isSimple())
3709      return false;
3710
3711    // Vectors (of > 1 lane) in big endian need tricky handling.
3712    if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3713        !Subtarget->isLittleEndian())
3714      return false;
3715
3716    MVT RVVT = RVEVT.getSimpleVT();
3717    if (RVVT == MVT::f128)
3718      return false;
3719
3720    MVT DestVT = VA.getValVT();
3721    // Special handling for extended integers.
3722    if (RVVT != DestVT) {
3723      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3724        return false;
3725
3726      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3727        return false;
3728
3729      bool IsZExt = Outs[0].Flags.isZExt();
3730      SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3731      if (SrcReg == 0)
3732        return false;
3733    }
3734
3735    // Make the copy.
3736    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3737            TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3738
3739    // Add register to return instruction.
3740    RetRegs.push_back(VA.getLocReg());
3741  }
3742
3743  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3744                                    TII.get(AArch64::RET_ReallyLR));
3745  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
3746    MIB.addReg(RetRegs[i], RegState::Implicit);
3747  return true;
3748}
3749
3750bool AArch64FastISel::selectTrunc(const Instruction *I) {
3751  Type *DestTy = I->getType();
3752  Value *Op = I->getOperand(0);
3753  Type *SrcTy = Op->getType();
3754
3755  EVT SrcEVT = TLI.getValueType(SrcTy, true);
3756  EVT DestEVT = TLI.getValueType(DestTy, true);
3757  if (!SrcEVT.isSimple())
3758    return false;
3759  if (!DestEVT.isSimple())
3760    return false;
3761
3762  MVT SrcVT = SrcEVT.getSimpleVT();
3763  MVT DestVT = DestEVT.getSimpleVT();
3764
3765  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3766      SrcVT != MVT::i8)
3767    return false;
3768  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3769      DestVT != MVT::i1)
3770    return false;
3771
3772  unsigned SrcReg = getRegForValue(Op);
3773  if (!SrcReg)
3774    return false;
3775  bool SrcIsKill = hasTrivialKill(Op);
3776
3777  // If we're truncating from i64 to a smaller non-legal type then generate an
3778  // AND. Otherwise, we know the high bits are undefined and a truncate only
3779  // generate a COPY. We cannot mark the source register also as result
3780  // register, because this can incorrectly transfer the kill flag onto the
3781  // source register.
3782  unsigned ResultReg;
3783  if (SrcVT == MVT::i64) {
3784    uint64_t Mask = 0;
3785    switch (DestVT.SimpleTy) {
3786    default:
3787      // Trunc i64 to i32 is handled by the target-independent fast-isel.
3788      return false;
3789    case MVT::i1:
3790      Mask = 0x1;
3791      break;
3792    case MVT::i8:
3793      Mask = 0xff;
3794      break;
3795    case MVT::i16:
3796      Mask = 0xffff;
3797      break;
3798    }
3799    // Issue an extract_subreg to get the lower 32-bits.
3800    unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3801                                                AArch64::sub_32);
3802    // Create the AND instruction which performs the actual truncation.
3803    ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3804    assert(ResultReg && "Unexpected AND instruction emission failure.");
3805  } else {
3806    ResultReg = createResultReg(&AArch64::GPR32RegClass);
3807    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3808            TII.get(TargetOpcode::COPY), ResultReg)
3809        .addReg(SrcReg, getKillRegState(SrcIsKill));
3810  }
3811
3812  updateValueMap(I, ResultReg);
3813  return true;
3814}
3815
3816unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3817  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3818          DestVT == MVT::i64) &&
3819         "Unexpected value type.");
3820  // Handle i8 and i16 as i32.
3821  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3822    DestVT = MVT::i32;
3823
3824  if (IsZExt) {
3825    unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3826    assert(ResultReg && "Unexpected AND instruction emission failure.");
3827    if (DestVT == MVT::i64) {
3828      // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3829      // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3830      unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3831      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3832              TII.get(AArch64::SUBREG_TO_REG), Reg64)
3833          .addImm(0)
3834          .addReg(ResultReg)
3835          .addImm(AArch64::sub_32);
3836      ResultReg = Reg64;
3837    }
3838    return ResultReg;
3839  } else {
3840    if (DestVT == MVT::i64) {
3841      // FIXME: We're SExt i1 to i64.
3842      return 0;
3843    }
3844    return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3845                            /*TODO:IsKill=*/false, 0, 0);
3846  }
3847}
3848
3849unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3850                                      unsigned Op1, bool Op1IsKill) {
3851  unsigned Opc, ZReg;
3852  switch (RetVT.SimpleTy) {
3853  default: return 0;
3854  case MVT::i8:
3855  case MVT::i16:
3856  case MVT::i32:
3857    RetVT = MVT::i32;
3858    Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3859  case MVT::i64:
3860    Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3861  }
3862
3863  const TargetRegisterClass *RC =
3864      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3865  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3866                          /*IsKill=*/ZReg, true);
3867}
3868
3869unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3870                                        unsigned Op1, bool Op1IsKill) {
3871  if (RetVT != MVT::i64)
3872    return 0;
3873
3874  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3875                          Op0, Op0IsKill, Op1, Op1IsKill,
3876                          AArch64::XZR, /*IsKill=*/true);
3877}
3878
3879unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3880                                        unsigned Op1, bool Op1IsKill) {
3881  if (RetVT != MVT::i64)
3882    return 0;
3883
3884  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3885                          Op0, Op0IsKill, Op1, Op1IsKill,
3886                          AArch64::XZR, /*IsKill=*/true);
3887}
3888
3889unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3890                                     unsigned Op1Reg, bool Op1IsKill) {
3891  unsigned Opc = 0;
3892  bool NeedTrunc = false;
3893  uint64_t Mask = 0;
3894  switch (RetVT.SimpleTy) {
3895  default: return 0;
3896  case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3897  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3898  case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3899  case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3900  }
3901
3902  const TargetRegisterClass *RC =
3903      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3904  if (NeedTrunc) {
3905    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3906    Op1IsKill = true;
3907  }
3908  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3909                                       Op1IsKill);
3910  if (NeedTrunc)
3911    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3912  return ResultReg;
3913}
3914
3915unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3916                                     bool Op0IsKill, uint64_t Shift,
3917                                     bool IsZExt) {
3918  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3919         "Unexpected source/return type pair.");
3920  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
3921          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
3922         "Unexpected source value type.");
3923  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3924          RetVT == MVT::i64) && "Unexpected return value type.");
3925
3926  bool Is64Bit = (RetVT == MVT::i64);
3927  unsigned RegSize = Is64Bit ? 64 : 32;
3928  unsigned DstBits = RetVT.getSizeInBits();
3929  unsigned SrcBits = SrcVT.getSizeInBits();
3930  const TargetRegisterClass *RC =
3931      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3932
3933  // Just emit a copy for "zero" shifts.
3934  if (Shift == 0) {
3935    if (RetVT == SrcVT) {
3936      unsigned ResultReg = createResultReg(RC);
3937      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3938              TII.get(TargetOpcode::COPY), ResultReg)
3939          .addReg(Op0, getKillRegState(Op0IsKill));
3940      return ResultReg;
3941    } else
3942      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
3943  }
3944
3945  // Don't deal with undefined shifts.
3946  if (Shift >= DstBits)
3947    return 0;
3948
3949  // For immediate shifts we can fold the zero-/sign-extension into the shift.
3950  // {S|U}BFM Wd, Wn, #r, #s
3951  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3952
3953  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3954  // %2 = shl i16 %1, 4
3955  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3956  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3957  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3958  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3959
3960  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3961  // %2 = shl i16 %1, 8
3962  // Wd<32+7-24,32-24> = Wn<7:0>
3963  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3964  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3965  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3966
3967  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3968  // %2 = shl i16 %1, 12
3969  // Wd<32+3-20,32-20> = Wn<3:0>
3970  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3971  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3972  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3973
3974  unsigned ImmR = RegSize - Shift;
3975  // Limit the width to the length of the source type.
3976  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3977  static const unsigned OpcTable[2][2] = {
3978    {AArch64::SBFMWri, AArch64::SBFMXri},
3979    {AArch64::UBFMWri, AArch64::UBFMXri}
3980  };
3981  unsigned Opc = OpcTable[IsZExt][Is64Bit];
3982  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3983    unsigned TmpReg = MRI.createVirtualRegister(RC);
3984    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3985            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3986        .addImm(0)
3987        .addReg(Op0, getKillRegState(Op0IsKill))
3988        .addImm(AArch64::sub_32);
3989    Op0 = TmpReg;
3990    Op0IsKill = true;
3991  }
3992  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3993}
3994
3995unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3996                                     unsigned Op1Reg, bool Op1IsKill) {
3997  unsigned Opc = 0;
3998  bool NeedTrunc = false;
3999  uint64_t Mask = 0;
4000  switch (RetVT.SimpleTy) {
4001  default: return 0;
4002  case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4003  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4004  case MVT::i32: Opc = AArch64::LSRVWr; break;
4005  case MVT::i64: Opc = AArch64::LSRVXr; break;
4006  }
4007
4008  const TargetRegisterClass *RC =
4009      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4010  if (NeedTrunc) {
4011    Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4012    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4013    Op0IsKill = Op1IsKill = true;
4014  }
4015  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4016                                       Op1IsKill);
4017  if (NeedTrunc)
4018    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4019  return ResultReg;
4020}
4021
4022unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4023                                     bool Op0IsKill, uint64_t Shift,
4024                                     bool IsZExt) {
4025  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4026         "Unexpected source/return type pair.");
4027  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4028          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4029         "Unexpected source value type.");
4030  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4031          RetVT == MVT::i64) && "Unexpected return value type.");
4032
4033  bool Is64Bit = (RetVT == MVT::i64);
4034  unsigned RegSize = Is64Bit ? 64 : 32;
4035  unsigned DstBits = RetVT.getSizeInBits();
4036  unsigned SrcBits = SrcVT.getSizeInBits();
4037  const TargetRegisterClass *RC =
4038      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4039
4040  // Just emit a copy for "zero" shifts.
4041  if (Shift == 0) {
4042    if (RetVT == SrcVT) {
4043      unsigned ResultReg = createResultReg(RC);
4044      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4045              TII.get(TargetOpcode::COPY), ResultReg)
4046      .addReg(Op0, getKillRegState(Op0IsKill));
4047      return ResultReg;
4048    } else
4049      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4050  }
4051
4052  // Don't deal with undefined shifts.
4053  if (Shift >= DstBits)
4054    return 0;
4055
4056  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4057  // {S|U}BFM Wd, Wn, #r, #s
4058  // Wd<s-r:0> = Wn<s:r> when r <= s
4059
4060  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4061  // %2 = lshr i16 %1, 4
4062  // Wd<7-4:0> = Wn<7:4>
4063  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4064  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4065  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4066
4067  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4068  // %2 = lshr i16 %1, 8
4069  // Wd<7-7,0> = Wn<7:7>
4070  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4071  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4072  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4073
4074  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4075  // %2 = lshr i16 %1, 12
4076  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4077  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4078  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4079  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4080
4081  if (Shift >= SrcBits && IsZExt)
4082    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4083
4084  // It is not possible to fold a sign-extend into the LShr instruction. In this
4085  // case emit a sign-extend.
4086  if (!IsZExt) {
4087    Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4088    if (!Op0)
4089      return 0;
4090    Op0IsKill = true;
4091    SrcVT = RetVT;
4092    SrcBits = SrcVT.getSizeInBits();
4093    IsZExt = true;
4094  }
4095
4096  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4097  unsigned ImmS = SrcBits - 1;
4098  static const unsigned OpcTable[2][2] = {
4099    {AArch64::SBFMWri, AArch64::SBFMXri},
4100    {AArch64::UBFMWri, AArch64::UBFMXri}
4101  };
4102  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4103  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4104    unsigned TmpReg = MRI.createVirtualRegister(RC);
4105    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4106            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4107        .addImm(0)
4108        .addReg(Op0, getKillRegState(Op0IsKill))
4109        .addImm(AArch64::sub_32);
4110    Op0 = TmpReg;
4111    Op0IsKill = true;
4112  }
4113  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4114}
4115
4116unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4117                                     unsigned Op1Reg, bool Op1IsKill) {
4118  unsigned Opc = 0;
4119  bool NeedTrunc = false;
4120  uint64_t Mask = 0;
4121  switch (RetVT.SimpleTy) {
4122  default: return 0;
4123  case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4124  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4125  case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4126  case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4127  }
4128
4129  const TargetRegisterClass *RC =
4130      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4131  if (NeedTrunc) {
4132    Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4133    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4134    Op0IsKill = Op1IsKill = true;
4135  }
4136  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4137                                       Op1IsKill);
4138  if (NeedTrunc)
4139    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4140  return ResultReg;
4141}
4142
4143unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4144                                     bool Op0IsKill, uint64_t Shift,
4145                                     bool IsZExt) {
4146  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4147         "Unexpected source/return type pair.");
4148  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4149          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4150         "Unexpected source value type.");
4151  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4152          RetVT == MVT::i64) && "Unexpected return value type.");
4153
4154  bool Is64Bit = (RetVT == MVT::i64);
4155  unsigned RegSize = Is64Bit ? 64 : 32;
4156  unsigned DstBits = RetVT.getSizeInBits();
4157  unsigned SrcBits = SrcVT.getSizeInBits();
4158  const TargetRegisterClass *RC =
4159      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4160
4161  // Just emit a copy for "zero" shifts.
4162  if (Shift == 0) {
4163    if (RetVT == SrcVT) {
4164      unsigned ResultReg = createResultReg(RC);
4165      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4166              TII.get(TargetOpcode::COPY), ResultReg)
4167      .addReg(Op0, getKillRegState(Op0IsKill));
4168      return ResultReg;
4169    } else
4170      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4171  }
4172
4173  // Don't deal with undefined shifts.
4174  if (Shift >= DstBits)
4175    return 0;
4176
4177  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4178  // {S|U}BFM Wd, Wn, #r, #s
4179  // Wd<s-r:0> = Wn<s:r> when r <= s
4180
4181  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4182  // %2 = ashr i16 %1, 4
4183  // Wd<7-4:0> = Wn<7:4>
4184  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4185  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4186  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4187
4188  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4189  // %2 = ashr i16 %1, 8
4190  // Wd<7-7,0> = Wn<7:7>
4191  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4192  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4193  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4194
4195  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4196  // %2 = ashr i16 %1, 12
4197  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4198  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4199  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4200  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4201
4202  if (Shift >= SrcBits && IsZExt)
4203    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4204
4205  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4206  unsigned ImmS = SrcBits - 1;
4207  static const unsigned OpcTable[2][2] = {
4208    {AArch64::SBFMWri, AArch64::SBFMXri},
4209    {AArch64::UBFMWri, AArch64::UBFMXri}
4210  };
4211  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4212  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4213    unsigned TmpReg = MRI.createVirtualRegister(RC);
4214    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4215            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4216        .addImm(0)
4217        .addReg(Op0, getKillRegState(Op0IsKill))
4218        .addImm(AArch64::sub_32);
4219    Op0 = TmpReg;
4220    Op0IsKill = true;
4221  }
4222  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4223}
4224
4225unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4226                                     bool IsZExt) {
4227  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4228
4229  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4230  // DestVT are odd things, so test to make sure that they are both types we can
4231  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4232  // bail out to SelectionDAG.
4233  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4234       (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4235      ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4236       (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4237    return 0;
4238
4239  unsigned Opc;
4240  unsigned Imm = 0;
4241
4242  switch (SrcVT.SimpleTy) {
4243  default:
4244    return 0;
4245  case MVT::i1:
4246    return emiti1Ext(SrcReg, DestVT, IsZExt);
4247  case MVT::i8:
4248    if (DestVT == MVT::i64)
4249      Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4250    else
4251      Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4252    Imm = 7;
4253    break;
4254  case MVT::i16:
4255    if (DestVT == MVT::i64)
4256      Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4257    else
4258      Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4259    Imm = 15;
4260    break;
4261  case MVT::i32:
4262    assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4263    Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4264    Imm = 31;
4265    break;
4266  }
4267
4268  // Handle i8 and i16 as i32.
4269  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4270    DestVT = MVT::i32;
4271  else if (DestVT == MVT::i64) {
4272    unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4273    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4274            TII.get(AArch64::SUBREG_TO_REG), Src64)
4275        .addImm(0)
4276        .addReg(SrcReg)
4277        .addImm(AArch64::sub_32);
4278    SrcReg = Src64;
4279  }
4280
4281  const TargetRegisterClass *RC =
4282      (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4283  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4284}
4285
4286static bool isZExtLoad(const MachineInstr *LI) {
4287  switch (LI->getOpcode()) {
4288  default:
4289    return false;
4290  case AArch64::LDURBBi:
4291  case AArch64::LDURHHi:
4292  case AArch64::LDURWi:
4293  case AArch64::LDRBBui:
4294  case AArch64::LDRHHui:
4295  case AArch64::LDRWui:
4296  case AArch64::LDRBBroX:
4297  case AArch64::LDRHHroX:
4298  case AArch64::LDRWroX:
4299  case AArch64::LDRBBroW:
4300  case AArch64::LDRHHroW:
4301  case AArch64::LDRWroW:
4302    return true;
4303  }
4304}
4305
4306static bool isSExtLoad(const MachineInstr *LI) {
4307  switch (LI->getOpcode()) {
4308  default:
4309    return false;
4310  case AArch64::LDURSBWi:
4311  case AArch64::LDURSHWi:
4312  case AArch64::LDURSBXi:
4313  case AArch64::LDURSHXi:
4314  case AArch64::LDURSWi:
4315  case AArch64::LDRSBWui:
4316  case AArch64::LDRSHWui:
4317  case AArch64::LDRSBXui:
4318  case AArch64::LDRSHXui:
4319  case AArch64::LDRSWui:
4320  case AArch64::LDRSBWroX:
4321  case AArch64::LDRSHWroX:
4322  case AArch64::LDRSBXroX:
4323  case AArch64::LDRSHXroX:
4324  case AArch64::LDRSWroX:
4325  case AArch64::LDRSBWroW:
4326  case AArch64::LDRSHWroW:
4327  case AArch64::LDRSBXroW:
4328  case AArch64::LDRSHXroW:
4329  case AArch64::LDRSWroW:
4330    return true;
4331  }
4332}
4333
4334bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4335                                         MVT SrcVT) {
4336  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4337  if (!LI || !LI->hasOneUse())
4338    return false;
4339
4340  // Check if the load instruction has already been selected.
4341  unsigned Reg = lookUpRegForValue(LI);
4342  if (!Reg)
4343    return false;
4344
4345  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4346  if (!MI)
4347    return false;
4348
4349  // Check if the correct load instruction has been emitted - SelectionDAG might
4350  // have emitted a zero-extending load, but we need a sign-extending load.
4351  bool IsZExt = isa<ZExtInst>(I);
4352  const auto *LoadMI = MI;
4353  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4354      LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4355    unsigned LoadReg = MI->getOperand(1).getReg();
4356    LoadMI = MRI.getUniqueVRegDef(LoadReg);
4357    assert(LoadMI && "Expected valid instruction");
4358  }
4359  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4360    return false;
4361
4362  // Nothing to be done.
4363  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4364    updateValueMap(I, Reg);
4365    return true;
4366  }
4367
4368  if (IsZExt) {
4369    unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4370    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4371            TII.get(AArch64::SUBREG_TO_REG), Reg64)
4372        .addImm(0)
4373        .addReg(Reg, getKillRegState(true))
4374        .addImm(AArch64::sub_32);
4375    Reg = Reg64;
4376  } else {
4377    assert((MI->getOpcode() == TargetOpcode::COPY &&
4378            MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4379           "Expected copy instruction");
4380    Reg = MI->getOperand(1).getReg();
4381    MI->eraseFromParent();
4382  }
4383  updateValueMap(I, Reg);
4384  return true;
4385}
4386
4387bool AArch64FastISel::selectIntExt(const Instruction *I) {
4388  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4389         "Unexpected integer extend instruction.");
4390  MVT RetVT;
4391  MVT SrcVT;
4392  if (!isTypeSupported(I->getType(), RetVT))
4393    return false;
4394
4395  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4396    return false;
4397
4398  // Try to optimize already sign-/zero-extended values from load instructions.
4399  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4400    return true;
4401
4402  unsigned SrcReg = getRegForValue(I->getOperand(0));
4403  if (!SrcReg)
4404    return false;
4405  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4406
4407  // Try to optimize already sign-/zero-extended values from function arguments.
4408  bool IsZExt = isa<ZExtInst>(I);
4409  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4410    if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4411      if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4412        unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4413        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4414                TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4415            .addImm(0)
4416            .addReg(SrcReg, getKillRegState(SrcIsKill))
4417            .addImm(AArch64::sub_32);
4418        SrcReg = ResultReg;
4419      }
4420      // Conservatively clear all kill flags from all uses, because we are
4421      // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4422      // level. The result of the instruction at IR level might have been
4423      // trivially dead, which is now not longer true.
4424      unsigned UseReg = lookUpRegForValue(I);
4425      if (UseReg)
4426        MRI.clearKillFlags(UseReg);
4427
4428      updateValueMap(I, SrcReg);
4429      return true;
4430    }
4431  }
4432
4433  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4434  if (!ResultReg)
4435    return false;
4436
4437  updateValueMap(I, ResultReg);
4438  return true;
4439}
4440
4441bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4442  EVT DestEVT = TLI.getValueType(I->getType(), true);
4443  if (!DestEVT.isSimple())
4444    return false;
4445
4446  MVT DestVT = DestEVT.getSimpleVT();
4447  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4448    return false;
4449
4450  unsigned DivOpc;
4451  bool Is64bit = (DestVT == MVT::i64);
4452  switch (ISDOpcode) {
4453  default:
4454    return false;
4455  case ISD::SREM:
4456    DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4457    break;
4458  case ISD::UREM:
4459    DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4460    break;
4461  }
4462  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4463  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4464  if (!Src0Reg)
4465    return false;
4466  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4467
4468  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4469  if (!Src1Reg)
4470    return false;
4471  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4472
4473  const TargetRegisterClass *RC =
4474      (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4475  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4476                                     Src1Reg, /*IsKill=*/false);
4477  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4478  // The remainder is computed as numerator - (quotient * denominator) using the
4479  // MSUB instruction.
4480  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4481                                        Src1Reg, Src1IsKill, Src0Reg,
4482                                        Src0IsKill);
4483  updateValueMap(I, ResultReg);
4484  return true;
4485}
4486
4487bool AArch64FastISel::selectMul(const Instruction *I) {
4488  MVT VT;
4489  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4490    return false;
4491
4492  if (VT.isVector())
4493    return selectBinaryOp(I, ISD::MUL);
4494
4495  const Value *Src0 = I->getOperand(0);
4496  const Value *Src1 = I->getOperand(1);
4497  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4498    if (C->getValue().isPowerOf2())
4499      std::swap(Src0, Src1);
4500
4501  // Try to simplify to a shift instruction.
4502  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4503    if (C->getValue().isPowerOf2()) {
4504      uint64_t ShiftVal = C->getValue().logBase2();
4505      MVT SrcVT = VT;
4506      bool IsZExt = true;
4507      if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4508        if (!isIntExtFree(ZExt)) {
4509          MVT VT;
4510          if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4511            SrcVT = VT;
4512            IsZExt = true;
4513            Src0 = ZExt->getOperand(0);
4514          }
4515        }
4516      } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4517        if (!isIntExtFree(SExt)) {
4518          MVT VT;
4519          if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4520            SrcVT = VT;
4521            IsZExt = false;
4522            Src0 = SExt->getOperand(0);
4523          }
4524        }
4525      }
4526
4527      unsigned Src0Reg = getRegForValue(Src0);
4528      if (!Src0Reg)
4529        return false;
4530      bool Src0IsKill = hasTrivialKill(Src0);
4531
4532      unsigned ResultReg =
4533          emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4534
4535      if (ResultReg) {
4536        updateValueMap(I, ResultReg);
4537        return true;
4538      }
4539    }
4540
4541  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4542  if (!Src0Reg)
4543    return false;
4544  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4545
4546  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4547  if (!Src1Reg)
4548    return false;
4549  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4550
4551  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4552
4553  if (!ResultReg)
4554    return false;
4555
4556  updateValueMap(I, ResultReg);
4557  return true;
4558}
4559
4560bool AArch64FastISel::selectShift(const Instruction *I) {
4561  MVT RetVT;
4562  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4563    return false;
4564
4565  if (RetVT.isVector())
4566    return selectOperator(I, I->getOpcode());
4567
4568  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4569    unsigned ResultReg = 0;
4570    uint64_t ShiftVal = C->getZExtValue();
4571    MVT SrcVT = RetVT;
4572    bool IsZExt = I->getOpcode() != Instruction::AShr;
4573    const Value *Op0 = I->getOperand(0);
4574    if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4575      if (!isIntExtFree(ZExt)) {
4576        MVT TmpVT;
4577        if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4578          SrcVT = TmpVT;
4579          IsZExt = true;
4580          Op0 = ZExt->getOperand(0);
4581        }
4582      }
4583    } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4584      if (!isIntExtFree(SExt)) {
4585        MVT TmpVT;
4586        if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4587          SrcVT = TmpVT;
4588          IsZExt = false;
4589          Op0 = SExt->getOperand(0);
4590        }
4591      }
4592    }
4593
4594    unsigned Op0Reg = getRegForValue(Op0);
4595    if (!Op0Reg)
4596      return false;
4597    bool Op0IsKill = hasTrivialKill(Op0);
4598
4599    switch (I->getOpcode()) {
4600    default: llvm_unreachable("Unexpected instruction.");
4601    case Instruction::Shl:
4602      ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4603      break;
4604    case Instruction::AShr:
4605      ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4606      break;
4607    case Instruction::LShr:
4608      ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4609      break;
4610    }
4611    if (!ResultReg)
4612      return false;
4613
4614    updateValueMap(I, ResultReg);
4615    return true;
4616  }
4617
4618  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4619  if (!Op0Reg)
4620    return false;
4621  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4622
4623  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4624  if (!Op1Reg)
4625    return false;
4626  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4627
4628  unsigned ResultReg = 0;
4629  switch (I->getOpcode()) {
4630  default: llvm_unreachable("Unexpected instruction.");
4631  case Instruction::Shl:
4632    ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4633    break;
4634  case Instruction::AShr:
4635    ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4636    break;
4637  case Instruction::LShr:
4638    ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4639    break;
4640  }
4641
4642  if (!ResultReg)
4643    return false;
4644
4645  updateValueMap(I, ResultReg);
4646  return true;
4647}
4648
4649bool AArch64FastISel::selectBitCast(const Instruction *I) {
4650  MVT RetVT, SrcVT;
4651
4652  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4653    return false;
4654  if (!isTypeLegal(I->getType(), RetVT))
4655    return false;
4656
4657  unsigned Opc;
4658  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4659    Opc = AArch64::FMOVWSr;
4660  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4661    Opc = AArch64::FMOVXDr;
4662  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4663    Opc = AArch64::FMOVSWr;
4664  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4665    Opc = AArch64::FMOVDXr;
4666  else
4667    return false;
4668
4669  const TargetRegisterClass *RC = nullptr;
4670  switch (RetVT.SimpleTy) {
4671  default: llvm_unreachable("Unexpected value type.");
4672  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4673  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4674  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4675  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4676  }
4677  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4678  if (!Op0Reg)
4679    return false;
4680  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4681  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4682
4683  if (!ResultReg)
4684    return false;
4685
4686  updateValueMap(I, ResultReg);
4687  return true;
4688}
4689
4690bool AArch64FastISel::selectFRem(const Instruction *I) {
4691  MVT RetVT;
4692  if (!isTypeLegal(I->getType(), RetVT))
4693    return false;
4694
4695  RTLIB::Libcall LC;
4696  switch (RetVT.SimpleTy) {
4697  default:
4698    return false;
4699  case MVT::f32:
4700    LC = RTLIB::REM_F32;
4701    break;
4702  case MVT::f64:
4703    LC = RTLIB::REM_F64;
4704    break;
4705  }
4706
4707  ArgListTy Args;
4708  Args.reserve(I->getNumOperands());
4709
4710  // Populate the argument list.
4711  for (auto &Arg : I->operands()) {
4712    ArgListEntry Entry;
4713    Entry.Val = Arg;
4714    Entry.Ty = Arg->getType();
4715    Args.push_back(Entry);
4716  }
4717
4718  CallLoweringInfo CLI;
4719  CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
4720                TLI.getLibcallName(LC), std::move(Args));
4721  if (!lowerCallTo(CLI))
4722    return false;
4723  updateValueMap(I, CLI.ResultReg);
4724  return true;
4725}
4726
4727bool AArch64FastISel::selectSDiv(const Instruction *I) {
4728  MVT VT;
4729  if (!isTypeLegal(I->getType(), VT))
4730    return false;
4731
4732  if (!isa<ConstantInt>(I->getOperand(1)))
4733    return selectBinaryOp(I, ISD::SDIV);
4734
4735  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4736  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4737      !(C.isPowerOf2() || (-C).isPowerOf2()))
4738    return selectBinaryOp(I, ISD::SDIV);
4739
4740  unsigned Lg2 = C.countTrailingZeros();
4741  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4742  if (!Src0Reg)
4743    return false;
4744  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4745
4746  if (cast<BinaryOperator>(I)->isExact()) {
4747    unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4748    if (!ResultReg)
4749      return false;
4750    updateValueMap(I, ResultReg);
4751    return true;
4752  }
4753
4754  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4755  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4756  if (!AddReg)
4757    return false;
4758
4759  // (Src0 < 0) ? Pow2 - 1 : 0;
4760  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4761    return false;
4762
4763  unsigned SelectOpc;
4764  const TargetRegisterClass *RC;
4765  if (VT == MVT::i64) {
4766    SelectOpc = AArch64::CSELXr;
4767    RC = &AArch64::GPR64RegClass;
4768  } else {
4769    SelectOpc = AArch64::CSELWr;
4770    RC = &AArch64::GPR32RegClass;
4771  }
4772  unsigned SelectReg =
4773      fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4774                       Src0IsKill, AArch64CC::LT);
4775  if (!SelectReg)
4776    return false;
4777
4778  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4779  // negate the result.
4780  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4781  unsigned ResultReg;
4782  if (C.isNegative())
4783    ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4784                              SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4785  else
4786    ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4787
4788  if (!ResultReg)
4789    return false;
4790
4791  updateValueMap(I, ResultReg);
4792  return true;
4793}
4794
4795/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4796/// have to duplicate it for AArch64, because otherwise we would fail during the
4797/// sign-extend emission.
4798std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4799  unsigned IdxN = getRegForValue(Idx);
4800  if (IdxN == 0)
4801    // Unhandled operand. Halt "fast" selection and bail.
4802    return std::pair<unsigned, bool>(0, false);
4803
4804  bool IdxNIsKill = hasTrivialKill(Idx);
4805
4806  // If the index is smaller or larger than intptr_t, truncate or extend it.
4807  MVT PtrVT = TLI.getPointerTy();
4808  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4809  if (IdxVT.bitsLT(PtrVT)) {
4810    IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4811    IdxNIsKill = true;
4812  } else if (IdxVT.bitsGT(PtrVT))
4813    llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4814  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4815}
4816
4817/// This is mostly a copy of the existing FastISel GEP code, but we have to
4818/// duplicate it for AArch64, because otherwise we would bail out even for
4819/// simple cases. This is because the standard fastEmit functions don't cover
4820/// MUL at all and ADD is lowered very inefficientily.
4821bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4822  unsigned N = getRegForValue(I->getOperand(0));
4823  if (!N)
4824    return false;
4825  bool NIsKill = hasTrivialKill(I->getOperand(0));
4826
4827  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4828  // into a single N = N + TotalOffset.
4829  uint64_t TotalOffs = 0;
4830  Type *Ty = I->getOperand(0)->getType();
4831  MVT VT = TLI.getPointerTy();
4832  for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
4833    const Value *Idx = *OI;
4834    if (auto *StTy = dyn_cast<StructType>(Ty)) {
4835      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4836      // N = N + Offset
4837      if (Field)
4838        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4839      Ty = StTy->getElementType(Field);
4840    } else {
4841      Ty = cast<SequentialType>(Ty)->getElementType();
4842      // If this is a constant subscript, handle it quickly.
4843      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4844        if (CI->isZero())
4845          continue;
4846        // N = N + Offset
4847        TotalOffs +=
4848            DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4849        continue;
4850      }
4851      if (TotalOffs) {
4852        N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4853        if (!N)
4854          return false;
4855        NIsKill = true;
4856        TotalOffs = 0;
4857      }
4858
4859      // N = N + Idx * ElementSize;
4860      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4861      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4862      unsigned IdxN = Pair.first;
4863      bool IdxNIsKill = Pair.second;
4864      if (!IdxN)
4865        return false;
4866
4867      if (ElementSize != 1) {
4868        unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4869        if (!C)
4870          return false;
4871        IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4872        if (!IdxN)
4873          return false;
4874        IdxNIsKill = true;
4875      }
4876      N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4877      if (!N)
4878        return false;
4879    }
4880  }
4881  if (TotalOffs) {
4882    N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4883    if (!N)
4884      return false;
4885  }
4886  updateValueMap(I, N);
4887  return true;
4888}
4889
4890bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4891  switch (I->getOpcode()) {
4892  default:
4893    break;
4894  case Instruction::Add:
4895  case Instruction::Sub:
4896    return selectAddSub(I);
4897  case Instruction::Mul:
4898    return selectMul(I);
4899  case Instruction::SDiv:
4900    return selectSDiv(I);
4901  case Instruction::SRem:
4902    if (!selectBinaryOp(I, ISD::SREM))
4903      return selectRem(I, ISD::SREM);
4904    return true;
4905  case Instruction::URem:
4906    if (!selectBinaryOp(I, ISD::UREM))
4907      return selectRem(I, ISD::UREM);
4908    return true;
4909  case Instruction::Shl:
4910  case Instruction::LShr:
4911  case Instruction::AShr:
4912    return selectShift(I);
4913  case Instruction::And:
4914  case Instruction::Or:
4915  case Instruction::Xor:
4916    return selectLogicalOp(I);
4917  case Instruction::Br:
4918    return selectBranch(I);
4919  case Instruction::IndirectBr:
4920    return selectIndirectBr(I);
4921  case Instruction::BitCast:
4922    if (!FastISel::selectBitCast(I))
4923      return selectBitCast(I);
4924    return true;
4925  case Instruction::FPToSI:
4926    if (!selectCast(I, ISD::FP_TO_SINT))
4927      return selectFPToInt(I, /*Signed=*/true);
4928    return true;
4929  case Instruction::FPToUI:
4930    return selectFPToInt(I, /*Signed=*/false);
4931  case Instruction::ZExt:
4932  case Instruction::SExt:
4933    return selectIntExt(I);
4934  case Instruction::Trunc:
4935    if (!selectCast(I, ISD::TRUNCATE))
4936      return selectTrunc(I);
4937    return true;
4938  case Instruction::FPExt:
4939    return selectFPExt(I);
4940  case Instruction::FPTrunc:
4941    return selectFPTrunc(I);
4942  case Instruction::SIToFP:
4943    if (!selectCast(I, ISD::SINT_TO_FP))
4944      return selectIntToFP(I, /*Signed=*/true);
4945    return true;
4946  case Instruction::UIToFP:
4947    return selectIntToFP(I, /*Signed=*/false);
4948  case Instruction::Load:
4949    return selectLoad(I);
4950  case Instruction::Store:
4951    return selectStore(I);
4952  case Instruction::FCmp:
4953  case Instruction::ICmp:
4954    return selectCmp(I);
4955  case Instruction::Select:
4956    return selectSelect(I);
4957  case Instruction::Ret:
4958    return selectRet(I);
4959  case Instruction::FRem:
4960    return selectFRem(I);
4961  case Instruction::GetElementPtr:
4962    return selectGetElementPtr(I);
4963  }
4964
4965  // fall-back to target-independent instruction selection.
4966  return selectOperator(I, I->getOpcode());
4967  // Silence warnings.
4968  (void)&CC_AArch64_DarwinPCS_VarArg;
4969}
4970
4971namespace llvm {
4972llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
4973                                        const TargetLibraryInfo *LibInfo) {
4974  return new AArch64FastISel(FuncInfo, LibInfo);
4975}
4976}
4977