1//===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the AArch64-specific support for the FastISel class. Some 11// of the target-specific code is generated by tablegen in the file 12// AArch64GenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "AArch64.h" 17#include "AArch64CallingConvention.h" 18#include "AArch64Subtarget.h" 19#include "AArch64TargetMachine.h" 20#include "MCTargetDesc/AArch64AddressingModes.h" 21#include "llvm/Analysis/BranchProbabilityInfo.h" 22#include "llvm/CodeGen/CallingConvLower.h" 23#include "llvm/CodeGen/FastISel.h" 24#include "llvm/CodeGen/FunctionLoweringInfo.h" 25#include "llvm/CodeGen/MachineConstantPool.h" 26#include "llvm/CodeGen/MachineFrameInfo.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/MachineRegisterInfo.h" 29#include "llvm/IR/CallingConv.h" 30#include "llvm/IR/DataLayout.h" 31#include "llvm/IR/DerivedTypes.h" 32#include "llvm/IR/Function.h" 33#include "llvm/IR/GetElementPtrTypeIterator.h" 34#include "llvm/IR/GlobalAlias.h" 35#include "llvm/IR/GlobalVariable.h" 36#include "llvm/IR/Instructions.h" 37#include "llvm/IR/IntrinsicInst.h" 38#include "llvm/IR/Operator.h" 39#include "llvm/MC/MCSymbol.h" 40using namespace llvm; 41 42namespace { 43 44class AArch64FastISel final : public FastISel { 45 class Address { 46 public: 47 typedef enum { 48 RegBase, 49 FrameIndexBase 50 } BaseKind; 51 52 private: 53 BaseKind Kind; 54 AArch64_AM::ShiftExtendType ExtType; 55 union { 56 unsigned Reg; 57 int FI; 58 } Base; 59 unsigned OffsetReg; 60 unsigned Shift; 61 int64_t Offset; 62 const GlobalValue *GV; 63 64 public: 65 Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend), 66 OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; } 67 void setKind(BaseKind K) { Kind = K; } 68 BaseKind getKind() const { return Kind; } 69 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 70 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 71 bool isRegBase() const { return Kind == RegBase; } 72 bool isFIBase() const { return Kind == FrameIndexBase; } 73 void setReg(unsigned Reg) { 74 assert(isRegBase() && "Invalid base register access!"); 75 Base.Reg = Reg; 76 } 77 unsigned getReg() const { 78 assert(isRegBase() && "Invalid base register access!"); 79 return Base.Reg; 80 } 81 void setOffsetReg(unsigned Reg) { 82 OffsetReg = Reg; 83 } 84 unsigned getOffsetReg() const { 85 return OffsetReg; 86 } 87 void setFI(unsigned FI) { 88 assert(isFIBase() && "Invalid base frame index access!"); 89 Base.FI = FI; 90 } 91 unsigned getFI() const { 92 assert(isFIBase() && "Invalid base frame index access!"); 93 return Base.FI; 94 } 95 void setOffset(int64_t O) { Offset = O; } 96 int64_t getOffset() { return Offset; } 97 void setShift(unsigned S) { Shift = S; } 98 unsigned getShift() { return Shift; } 99 100 void setGlobalValue(const GlobalValue *G) { GV = G; } 101 const GlobalValue *getGlobalValue() { return GV; } 102 }; 103 104 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 105 /// make the right decision when generating code for different targets. 106 const AArch64Subtarget *Subtarget; 107 LLVMContext *Context; 108 109 bool fastLowerArguments() override; 110 bool fastLowerCall(CallLoweringInfo &CLI) override; 111 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 112 113private: 114 // Selection routines. 115 bool selectAddSub(const Instruction *I); 116 bool selectLogicalOp(const Instruction *I); 117 bool selectLoad(const Instruction *I); 118 bool selectStore(const Instruction *I); 119 bool selectBranch(const Instruction *I); 120 bool selectIndirectBr(const Instruction *I); 121 bool selectCmp(const Instruction *I); 122 bool selectSelect(const Instruction *I); 123 bool selectFPExt(const Instruction *I); 124 bool selectFPTrunc(const Instruction *I); 125 bool selectFPToInt(const Instruction *I, bool Signed); 126 bool selectIntToFP(const Instruction *I, bool Signed); 127 bool selectRem(const Instruction *I, unsigned ISDOpcode); 128 bool selectRet(const Instruction *I); 129 bool selectTrunc(const Instruction *I); 130 bool selectIntExt(const Instruction *I); 131 bool selectMul(const Instruction *I); 132 bool selectShift(const Instruction *I); 133 bool selectBitCast(const Instruction *I); 134 bool selectFRem(const Instruction *I); 135 bool selectSDiv(const Instruction *I); 136 bool selectGetElementPtr(const Instruction *I); 137 138 // Utility helper routines. 139 bool isTypeLegal(Type *Ty, MVT &VT); 140 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 141 bool isValueAvailable(const Value *V) const; 142 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 143 bool computeCallAddress(const Value *V, Address &Addr); 144 bool simplifyAddress(Address &Addr, MVT VT); 145 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 146 unsigned Flags, unsigned ScaleFactor, 147 MachineMemOperand *MMO); 148 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 149 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 150 unsigned Alignment); 151 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 152 const Value *Cond); 153 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 154 bool optimizeSelect(const SelectInst *SI); 155 std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); 156 157 // Emit helper routines. 158 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 159 const Value *RHS, bool SetFlags = false, 160 bool WantResult = true, bool IsZExt = false); 161 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 162 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 163 bool SetFlags = false, bool WantResult = true); 164 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 165 bool LHSIsKill, uint64_t Imm, bool SetFlags = false, 166 bool WantResult = true); 167 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 168 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 169 AArch64_AM::ShiftExtendType ShiftType, 170 uint64_t ShiftImm, bool SetFlags = false, 171 bool WantResult = true); 172 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 173 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 174 AArch64_AM::ShiftExtendType ExtType, 175 uint64_t ShiftImm, bool SetFlags = false, 176 bool WantResult = true); 177 178 // Emit functions. 179 bool emitCompareAndBranch(const BranchInst *BI); 180 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 181 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 182 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 183 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 184 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 185 MachineMemOperand *MMO = nullptr); 186 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 187 MachineMemOperand *MMO = nullptr); 188 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 189 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 190 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 191 bool SetFlags = false, bool WantResult = true, 192 bool IsZExt = false); 193 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); 194 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 195 bool SetFlags = false, bool WantResult = true, 196 bool IsZExt = false); 197 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 198 unsigned RHSReg, bool RHSIsKill, bool WantResult = true); 199 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 200 unsigned RHSReg, bool RHSIsKill, 201 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 202 bool WantResult = true); 203 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 204 const Value *RHS); 205 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 206 bool LHSIsKill, uint64_t Imm); 207 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 208 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 209 uint64_t ShiftImm); 210 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 211 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 212 unsigned Op1, bool Op1IsKill); 213 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 214 unsigned Op1, bool Op1IsKill); 215 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 216 unsigned Op1, bool Op1IsKill); 217 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 218 unsigned Op1Reg, bool Op1IsKill); 219 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 220 uint64_t Imm, bool IsZExt = true); 221 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 222 unsigned Op1Reg, bool Op1IsKill); 223 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 224 uint64_t Imm, bool IsZExt = true); 225 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 226 unsigned Op1Reg, bool Op1IsKill); 227 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 228 uint64_t Imm, bool IsZExt = false); 229 230 unsigned materializeInt(const ConstantInt *CI, MVT VT); 231 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 232 unsigned materializeGV(const GlobalValue *GV); 233 234 // Call handling routines. 235private: 236 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 237 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 238 unsigned &NumBytes); 239 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 240 241public: 242 // Backend specific FastISel code. 243 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 244 unsigned fastMaterializeConstant(const Constant *C) override; 245 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 246 247 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 248 const TargetLibraryInfo *LibInfo) 249 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 250 Subtarget = 251 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); 252 Context = &FuncInfo.Fn->getContext(); 253 } 254 255 bool fastSelectInstruction(const Instruction *I) override; 256 257#include "AArch64GenFastISel.inc" 258}; 259 260} // end anonymous namespace 261 262#include "AArch64GenCallingConv.inc" 263 264/// \brief Check if the sign-/zero-extend will be a noop. 265static bool isIntExtFree(const Instruction *I) { 266 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 267 "Unexpected integer extend instruction."); 268 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 269 "Unexpected value type."); 270 bool IsZExt = isa<ZExtInst>(I); 271 272 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 273 if (LI->hasOneUse()) 274 return true; 275 276 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 277 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 278 return true; 279 280 return false; 281} 282 283/// \brief Determine the implicit scale factor that is applied by a memory 284/// operation for a given value type. 285static unsigned getImplicitScaleFactor(MVT VT) { 286 switch (VT.SimpleTy) { 287 default: 288 return 0; // invalid 289 case MVT::i1: // fall-through 290 case MVT::i8: 291 return 1; 292 case MVT::i16: 293 return 2; 294 case MVT::i32: // fall-through 295 case MVT::f32: 296 return 4; 297 case MVT::i64: // fall-through 298 case MVT::f64: 299 return 8; 300 } 301} 302 303CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 304 if (CC == CallingConv::WebKit_JS) 305 return CC_AArch64_WebKit_JS; 306 if (CC == CallingConv::GHC) 307 return CC_AArch64_GHC; 308 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 309} 310 311unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 312 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 313 "Alloca should always return a pointer."); 314 315 // Don't handle dynamic allocas. 316 if (!FuncInfo.StaticAllocaMap.count(AI)) 317 return 0; 318 319 DenseMap<const AllocaInst *, int>::iterator SI = 320 FuncInfo.StaticAllocaMap.find(AI); 321 322 if (SI != FuncInfo.StaticAllocaMap.end()) { 323 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 324 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 325 ResultReg) 326 .addFrameIndex(SI->second) 327 .addImm(0) 328 .addImm(0); 329 return ResultReg; 330 } 331 332 return 0; 333} 334 335unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 336 if (VT > MVT::i64) 337 return 0; 338 339 if (!CI->isZero()) 340 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 341 342 // Create a copy from the zero register to materialize a "0" value. 343 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 344 : &AArch64::GPR32RegClass; 345 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 346 unsigned ResultReg = createResultReg(RC); 347 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 348 ResultReg).addReg(ZeroReg, getKillRegState(true)); 349 return ResultReg; 350} 351 352unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 353 // Positive zero (+0.0) has to be materialized with a fmov from the zero 354 // register, because the immediate version of fmov cannot encode zero. 355 if (CFP->isNullValue()) 356 return fastMaterializeFloatZero(CFP); 357 358 if (VT != MVT::f32 && VT != MVT::f64) 359 return 0; 360 361 const APFloat Val = CFP->getValueAPF(); 362 bool Is64Bit = (VT == MVT::f64); 363 // This checks to see if we can use FMOV instructions to materialize 364 // a constant, otherwise we have to materialize via the constant pool. 365 if (TLI.isFPImmLegal(Val, VT)) { 366 int Imm = 367 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 368 assert((Imm != -1) && "Cannot encode floating-point constant."); 369 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 370 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 371 } 372 373 // For the MachO large code model materialize the FP constant in code. 374 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { 375 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 376 const TargetRegisterClass *RC = Is64Bit ? 377 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 378 379 unsigned TmpReg = createResultReg(RC); 380 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) 381 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 382 383 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 384 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 385 TII.get(TargetOpcode::COPY), ResultReg) 386 .addReg(TmpReg, getKillRegState(true)); 387 388 return ResultReg; 389 } 390 391 // Materialize via constant pool. MachineConstantPool wants an explicit 392 // alignment. 393 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 394 if (Align == 0) 395 Align = DL.getTypeAllocSize(CFP->getType()); 396 397 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 398 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 399 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 400 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 401 402 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 403 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 404 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 405 .addReg(ADRPReg) 406 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 407 return ResultReg; 408} 409 410unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 411 // We can't handle thread-local variables quickly yet. 412 if (GV->isThreadLocal()) 413 return 0; 414 415 // MachO still uses GOT for large code-model accesses, but ELF requires 416 // movz/movk sequences, which FastISel doesn't handle yet. 417 if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) 418 return 0; 419 420 unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 421 422 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 423 if (!DestEVT.isSimple()) 424 return 0; 425 426 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 427 unsigned ResultReg; 428 429 if (OpFlags & AArch64II::MO_GOT) { 430 // ADRP + LDRX 431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 432 ADRPReg) 433 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE); 434 435 ResultReg = createResultReg(&AArch64::GPR64RegClass); 436 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), 437 ResultReg) 438 .addReg(ADRPReg) 439 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 440 AArch64II::MO_NC); 441 } else { 442 // ADRP + ADDX 443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 444 ADRPReg) 445 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE); 446 447 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 448 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 449 ResultReg) 450 .addReg(ADRPReg) 451 .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) 452 .addImm(0); 453 } 454 return ResultReg; 455} 456 457unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 458 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 459 460 // Only handle simple types. 461 if (!CEVT.isSimple()) 462 return 0; 463 MVT VT = CEVT.getSimpleVT(); 464 465 if (const auto *CI = dyn_cast<ConstantInt>(C)) 466 return materializeInt(CI, VT); 467 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 468 return materializeFP(CFP, VT); 469 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 470 return materializeGV(GV); 471 472 return 0; 473} 474 475unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 476 assert(CFP->isNullValue() && 477 "Floating-point constant is not a positive zero."); 478 MVT VT; 479 if (!isTypeLegal(CFP->getType(), VT)) 480 return 0; 481 482 if (VT != MVT::f32 && VT != MVT::f64) 483 return 0; 484 485 bool Is64Bit = (VT == MVT::f64); 486 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 487 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 488 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); 489} 490 491/// \brief Check if the multiply is by a power-of-2 constant. 492static bool isMulPowOf2(const Value *I) { 493 if (const auto *MI = dyn_cast<MulOperator>(I)) { 494 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 495 if (C->getValue().isPowerOf2()) 496 return true; 497 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 498 if (C->getValue().isPowerOf2()) 499 return true; 500 } 501 return false; 502} 503 504// Computes the address to get to an object. 505bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 506{ 507 const User *U = nullptr; 508 unsigned Opcode = Instruction::UserOp1; 509 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 510 // Don't walk into other basic blocks unless the object is an alloca from 511 // another block, otherwise it may not have a virtual register assigned. 512 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 513 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 514 Opcode = I->getOpcode(); 515 U = I; 516 } 517 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 518 Opcode = C->getOpcode(); 519 U = C; 520 } 521 522 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 523 if (Ty->getAddressSpace() > 255) 524 // Fast instruction selection doesn't support the special 525 // address spaces. 526 return false; 527 528 switch (Opcode) { 529 default: 530 break; 531 case Instruction::BitCast: { 532 // Look through bitcasts. 533 return computeAddress(U->getOperand(0), Addr, Ty); 534 } 535 case Instruction::IntToPtr: { 536 // Look past no-op inttoptrs. 537 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 538 TLI.getPointerTy(DL)) 539 return computeAddress(U->getOperand(0), Addr, Ty); 540 break; 541 } 542 case Instruction::PtrToInt: { 543 // Look past no-op ptrtoints. 544 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 545 return computeAddress(U->getOperand(0), Addr, Ty); 546 break; 547 } 548 case Instruction::GetElementPtr: { 549 Address SavedAddr = Addr; 550 uint64_t TmpOffset = Addr.getOffset(); 551 552 // Iterate through the GEP folding the constants into offsets where 553 // we can. 554 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 555 GTI != E; ++GTI) { 556 const Value *Op = GTI.getOperand(); 557 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 558 const StructLayout *SL = DL.getStructLayout(STy); 559 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 560 TmpOffset += SL->getElementOffset(Idx); 561 } else { 562 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 563 for (;;) { 564 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 565 // Constant-offset addressing. 566 TmpOffset += CI->getSExtValue() * S; 567 break; 568 } 569 if (canFoldAddIntoGEP(U, Op)) { 570 // A compatible add with a constant operand. Fold the constant. 571 ConstantInt *CI = 572 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 573 TmpOffset += CI->getSExtValue() * S; 574 // Iterate on the other operand. 575 Op = cast<AddOperator>(Op)->getOperand(0); 576 continue; 577 } 578 // Unsupported 579 goto unsupported_gep; 580 } 581 } 582 } 583 584 // Try to grab the base operand now. 585 Addr.setOffset(TmpOffset); 586 if (computeAddress(U->getOperand(0), Addr, Ty)) 587 return true; 588 589 // We failed, restore everything and try the other options. 590 Addr = SavedAddr; 591 592 unsupported_gep: 593 break; 594 } 595 case Instruction::Alloca: { 596 const AllocaInst *AI = cast<AllocaInst>(Obj); 597 DenseMap<const AllocaInst *, int>::iterator SI = 598 FuncInfo.StaticAllocaMap.find(AI); 599 if (SI != FuncInfo.StaticAllocaMap.end()) { 600 Addr.setKind(Address::FrameIndexBase); 601 Addr.setFI(SI->second); 602 return true; 603 } 604 break; 605 } 606 case Instruction::Add: { 607 // Adds of constants are common and easy enough. 608 const Value *LHS = U->getOperand(0); 609 const Value *RHS = U->getOperand(1); 610 611 if (isa<ConstantInt>(LHS)) 612 std::swap(LHS, RHS); 613 614 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 615 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 616 return computeAddress(LHS, Addr, Ty); 617 } 618 619 Address Backup = Addr; 620 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 621 return true; 622 Addr = Backup; 623 624 break; 625 } 626 case Instruction::Sub: { 627 // Subs of constants are common and easy enough. 628 const Value *LHS = U->getOperand(0); 629 const Value *RHS = U->getOperand(1); 630 631 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 632 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 633 return computeAddress(LHS, Addr, Ty); 634 } 635 break; 636 } 637 case Instruction::Shl: { 638 if (Addr.getOffsetReg()) 639 break; 640 641 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 642 if (!CI) 643 break; 644 645 unsigned Val = CI->getZExtValue(); 646 if (Val < 1 || Val > 3) 647 break; 648 649 uint64_t NumBytes = 0; 650 if (Ty && Ty->isSized()) { 651 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 652 NumBytes = NumBits / 8; 653 if (!isPowerOf2_64(NumBits)) 654 NumBytes = 0; 655 } 656 657 if (NumBytes != (1ULL << Val)) 658 break; 659 660 Addr.setShift(Val); 661 Addr.setExtendType(AArch64_AM::LSL); 662 663 const Value *Src = U->getOperand(0); 664 if (const auto *I = dyn_cast<Instruction>(Src)) { 665 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 666 // Fold the zext or sext when it won't become a noop. 667 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 668 if (!isIntExtFree(ZE) && 669 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 670 Addr.setExtendType(AArch64_AM::UXTW); 671 Src = ZE->getOperand(0); 672 } 673 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 674 if (!isIntExtFree(SE) && 675 SE->getOperand(0)->getType()->isIntegerTy(32)) { 676 Addr.setExtendType(AArch64_AM::SXTW); 677 Src = SE->getOperand(0); 678 } 679 } 680 } 681 } 682 683 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 684 if (AI->getOpcode() == Instruction::And) { 685 const Value *LHS = AI->getOperand(0); 686 const Value *RHS = AI->getOperand(1); 687 688 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 689 if (C->getValue() == 0xffffffff) 690 std::swap(LHS, RHS); 691 692 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 693 if (C->getValue() == 0xffffffff) { 694 Addr.setExtendType(AArch64_AM::UXTW); 695 unsigned Reg = getRegForValue(LHS); 696 if (!Reg) 697 return false; 698 bool RegIsKill = hasTrivialKill(LHS); 699 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 700 AArch64::sub_32); 701 Addr.setOffsetReg(Reg); 702 return true; 703 } 704 } 705 706 unsigned Reg = getRegForValue(Src); 707 if (!Reg) 708 return false; 709 Addr.setOffsetReg(Reg); 710 return true; 711 } 712 case Instruction::Mul: { 713 if (Addr.getOffsetReg()) 714 break; 715 716 if (!isMulPowOf2(U)) 717 break; 718 719 const Value *LHS = U->getOperand(0); 720 const Value *RHS = U->getOperand(1); 721 722 // Canonicalize power-of-2 value to the RHS. 723 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 724 if (C->getValue().isPowerOf2()) 725 std::swap(LHS, RHS); 726 727 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 728 const auto *C = cast<ConstantInt>(RHS); 729 unsigned Val = C->getValue().logBase2(); 730 if (Val < 1 || Val > 3) 731 break; 732 733 uint64_t NumBytes = 0; 734 if (Ty && Ty->isSized()) { 735 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 736 NumBytes = NumBits / 8; 737 if (!isPowerOf2_64(NumBits)) 738 NumBytes = 0; 739 } 740 741 if (NumBytes != (1ULL << Val)) 742 break; 743 744 Addr.setShift(Val); 745 Addr.setExtendType(AArch64_AM::LSL); 746 747 const Value *Src = LHS; 748 if (const auto *I = dyn_cast<Instruction>(Src)) { 749 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 750 // Fold the zext or sext when it won't become a noop. 751 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 752 if (!isIntExtFree(ZE) && 753 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 754 Addr.setExtendType(AArch64_AM::UXTW); 755 Src = ZE->getOperand(0); 756 } 757 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 758 if (!isIntExtFree(SE) && 759 SE->getOperand(0)->getType()->isIntegerTy(32)) { 760 Addr.setExtendType(AArch64_AM::SXTW); 761 Src = SE->getOperand(0); 762 } 763 } 764 } 765 } 766 767 unsigned Reg = getRegForValue(Src); 768 if (!Reg) 769 return false; 770 Addr.setOffsetReg(Reg); 771 return true; 772 } 773 case Instruction::And: { 774 if (Addr.getOffsetReg()) 775 break; 776 777 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 778 break; 779 780 const Value *LHS = U->getOperand(0); 781 const Value *RHS = U->getOperand(1); 782 783 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 784 if (C->getValue() == 0xffffffff) 785 std::swap(LHS, RHS); 786 787 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 788 if (C->getValue() == 0xffffffff) { 789 Addr.setShift(0); 790 Addr.setExtendType(AArch64_AM::LSL); 791 Addr.setExtendType(AArch64_AM::UXTW); 792 793 unsigned Reg = getRegForValue(LHS); 794 if (!Reg) 795 return false; 796 bool RegIsKill = hasTrivialKill(LHS); 797 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 798 AArch64::sub_32); 799 Addr.setOffsetReg(Reg); 800 return true; 801 } 802 break; 803 } 804 case Instruction::SExt: 805 case Instruction::ZExt: { 806 if (!Addr.getReg() || Addr.getOffsetReg()) 807 break; 808 809 const Value *Src = nullptr; 810 // Fold the zext or sext when it won't become a noop. 811 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 812 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 813 Addr.setExtendType(AArch64_AM::UXTW); 814 Src = ZE->getOperand(0); 815 } 816 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 817 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 818 Addr.setExtendType(AArch64_AM::SXTW); 819 Src = SE->getOperand(0); 820 } 821 } 822 823 if (!Src) 824 break; 825 826 Addr.setShift(0); 827 unsigned Reg = getRegForValue(Src); 828 if (!Reg) 829 return false; 830 Addr.setOffsetReg(Reg); 831 return true; 832 } 833 } // end switch 834 835 if (Addr.isRegBase() && !Addr.getReg()) { 836 unsigned Reg = getRegForValue(Obj); 837 if (!Reg) 838 return false; 839 Addr.setReg(Reg); 840 return true; 841 } 842 843 if (!Addr.getOffsetReg()) { 844 unsigned Reg = getRegForValue(Obj); 845 if (!Reg) 846 return false; 847 Addr.setOffsetReg(Reg); 848 return true; 849 } 850 851 return false; 852} 853 854bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 855 const User *U = nullptr; 856 unsigned Opcode = Instruction::UserOp1; 857 bool InMBB = true; 858 859 if (const auto *I = dyn_cast<Instruction>(V)) { 860 Opcode = I->getOpcode(); 861 U = I; 862 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 863 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 864 Opcode = C->getOpcode(); 865 U = C; 866 } 867 868 switch (Opcode) { 869 default: break; 870 case Instruction::BitCast: 871 // Look past bitcasts if its operand is in the same BB. 872 if (InMBB) 873 return computeCallAddress(U->getOperand(0), Addr); 874 break; 875 case Instruction::IntToPtr: 876 // Look past no-op inttoptrs if its operand is in the same BB. 877 if (InMBB && 878 TLI.getValueType(DL, U->getOperand(0)->getType()) == 879 TLI.getPointerTy(DL)) 880 return computeCallAddress(U->getOperand(0), Addr); 881 break; 882 case Instruction::PtrToInt: 883 // Look past no-op ptrtoints if its operand is in the same BB. 884 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 885 return computeCallAddress(U->getOperand(0), Addr); 886 break; 887 } 888 889 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 890 Addr.setGlobalValue(GV); 891 return true; 892 } 893 894 // If all else fails, try to materialize the value in a register. 895 if (!Addr.getGlobalValue()) { 896 Addr.setReg(getRegForValue(V)); 897 return Addr.getReg() != 0; 898 } 899 900 return false; 901} 902 903 904bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 905 EVT evt = TLI.getValueType(DL, Ty, true); 906 907 // Only handle simple types. 908 if (evt == MVT::Other || !evt.isSimple()) 909 return false; 910 VT = evt.getSimpleVT(); 911 912 // This is a legal type, but it's not something we handle in fast-isel. 913 if (VT == MVT::f128) 914 return false; 915 916 // Handle all other legal types, i.e. a register that will directly hold this 917 // value. 918 return TLI.isTypeLegal(VT); 919} 920 921/// \brief Determine if the value type is supported by FastISel. 922/// 923/// FastISel for AArch64 can handle more value types than are legal. This adds 924/// simple value type such as i1, i8, and i16. 925bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 926 if (Ty->isVectorTy() && !IsVectorAllowed) 927 return false; 928 929 if (isTypeLegal(Ty, VT)) 930 return true; 931 932 // If this is a type than can be sign or zero-extended to a basic operation 933 // go ahead and accept it now. 934 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 935 return true; 936 937 return false; 938} 939 940bool AArch64FastISel::isValueAvailable(const Value *V) const { 941 if (!isa<Instruction>(V)) 942 return true; 943 944 const auto *I = cast<Instruction>(V); 945 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 946} 947 948bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 949 unsigned ScaleFactor = getImplicitScaleFactor(VT); 950 if (!ScaleFactor) 951 return false; 952 953 bool ImmediateOffsetNeedsLowering = false; 954 bool RegisterOffsetNeedsLowering = false; 955 int64_t Offset = Addr.getOffset(); 956 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 957 ImmediateOffsetNeedsLowering = true; 958 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 959 !isUInt<12>(Offset / ScaleFactor)) 960 ImmediateOffsetNeedsLowering = true; 961 962 // Cannot encode an offset register and an immediate offset in the same 963 // instruction. Fold the immediate offset into the load/store instruction and 964 // emit an additional add to take care of the offset register. 965 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 966 RegisterOffsetNeedsLowering = true; 967 968 // Cannot encode zero register as base. 969 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 970 RegisterOffsetNeedsLowering = true; 971 972 // If this is a stack pointer and the offset needs to be simplified then put 973 // the alloca address into a register, set the base type back to register and 974 // continue. This should almost never happen. 975 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 976 { 977 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 978 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 979 ResultReg) 980 .addFrameIndex(Addr.getFI()) 981 .addImm(0) 982 .addImm(0); 983 Addr.setKind(Address::RegBase); 984 Addr.setReg(ResultReg); 985 } 986 987 if (RegisterOffsetNeedsLowering) { 988 unsigned ResultReg = 0; 989 if (Addr.getReg()) { 990 if (Addr.getExtendType() == AArch64_AM::SXTW || 991 Addr.getExtendType() == AArch64_AM::UXTW ) 992 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 993 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 994 /*TODO:IsKill=*/false, Addr.getExtendType(), 995 Addr.getShift()); 996 else 997 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 998 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 999 /*TODO:IsKill=*/false, AArch64_AM::LSL, 1000 Addr.getShift()); 1001 } else { 1002 if (Addr.getExtendType() == AArch64_AM::UXTW) 1003 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1004 /*Op0IsKill=*/false, Addr.getShift(), 1005 /*IsZExt=*/true); 1006 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1007 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1008 /*Op0IsKill=*/false, Addr.getShift(), 1009 /*IsZExt=*/false); 1010 else 1011 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1012 /*Op0IsKill=*/false, Addr.getShift()); 1013 } 1014 if (!ResultReg) 1015 return false; 1016 1017 Addr.setReg(ResultReg); 1018 Addr.setOffsetReg(0); 1019 Addr.setShift(0); 1020 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1021 } 1022 1023 // Since the offset is too large for the load/store instruction get the 1024 // reg+offset into a register. 1025 if (ImmediateOffsetNeedsLowering) { 1026 unsigned ResultReg; 1027 if (Addr.getReg()) 1028 // Try to fold the immediate into the add instruction. 1029 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); 1030 else 1031 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1032 1033 if (!ResultReg) 1034 return false; 1035 Addr.setReg(ResultReg); 1036 Addr.setOffset(0); 1037 } 1038 return true; 1039} 1040 1041void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1042 const MachineInstrBuilder &MIB, 1043 unsigned Flags, 1044 unsigned ScaleFactor, 1045 MachineMemOperand *MMO) { 1046 int64_t Offset = Addr.getOffset() / ScaleFactor; 1047 // Frame base works a bit differently. Handle it separately. 1048 if (Addr.isFIBase()) { 1049 int FI = Addr.getFI(); 1050 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1051 // and alignment should be based on the VT. 1052 MMO = FuncInfo.MF->getMachineMemOperand( 1053 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1054 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); 1055 // Now add the rest of the operands. 1056 MIB.addFrameIndex(FI).addImm(Offset); 1057 } else { 1058 assert(Addr.isRegBase() && "Unexpected address kind."); 1059 const MCInstrDesc &II = MIB->getDesc(); 1060 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1061 Addr.setReg( 1062 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1063 Addr.setOffsetReg( 1064 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1065 if (Addr.getOffsetReg()) { 1066 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1067 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1068 Addr.getExtendType() == AArch64_AM::SXTX; 1069 MIB.addReg(Addr.getReg()); 1070 MIB.addReg(Addr.getOffsetReg()); 1071 MIB.addImm(IsSigned); 1072 MIB.addImm(Addr.getShift() != 0); 1073 } else 1074 MIB.addReg(Addr.getReg()).addImm(Offset); 1075 } 1076 1077 if (MMO) 1078 MIB.addMemOperand(MMO); 1079} 1080 1081unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1082 const Value *RHS, bool SetFlags, 1083 bool WantResult, bool IsZExt) { 1084 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1085 bool NeedExtend = false; 1086 switch (RetVT.SimpleTy) { 1087 default: 1088 return 0; 1089 case MVT::i1: 1090 NeedExtend = true; 1091 break; 1092 case MVT::i8: 1093 NeedExtend = true; 1094 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1095 break; 1096 case MVT::i16: 1097 NeedExtend = true; 1098 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1099 break; 1100 case MVT::i32: // fall-through 1101 case MVT::i64: 1102 break; 1103 } 1104 MVT SrcVT = RetVT; 1105 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1106 1107 // Canonicalize immediates to the RHS first. 1108 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1109 std::swap(LHS, RHS); 1110 1111 // Canonicalize mul by power of 2 to the RHS. 1112 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1113 if (isMulPowOf2(LHS)) 1114 std::swap(LHS, RHS); 1115 1116 // Canonicalize shift immediate to the RHS. 1117 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1118 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1119 if (isa<ConstantInt>(SI->getOperand(1))) 1120 if (SI->getOpcode() == Instruction::Shl || 1121 SI->getOpcode() == Instruction::LShr || 1122 SI->getOpcode() == Instruction::AShr ) 1123 std::swap(LHS, RHS); 1124 1125 unsigned LHSReg = getRegForValue(LHS); 1126 if (!LHSReg) 1127 return 0; 1128 bool LHSIsKill = hasTrivialKill(LHS); 1129 1130 if (NeedExtend) 1131 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1132 1133 unsigned ResultReg = 0; 1134 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1135 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1136 if (C->isNegative()) 1137 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, 1138 SetFlags, WantResult); 1139 else 1140 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, 1141 WantResult); 1142 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1143 if (C->isNullValue()) 1144 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, 1145 WantResult); 1146 1147 if (ResultReg) 1148 return ResultReg; 1149 1150 // Only extend the RHS within the instruction if there is a valid extend type. 1151 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1152 isValueAvailable(RHS)) { 1153 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1154 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1155 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1156 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1157 if (!RHSReg) 1158 return 0; 1159 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1160 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1161 RHSIsKill, ExtendType, C->getZExtValue(), 1162 SetFlags, WantResult); 1163 } 1164 unsigned RHSReg = getRegForValue(RHS); 1165 if (!RHSReg) 1166 return 0; 1167 bool RHSIsKill = hasTrivialKill(RHS); 1168 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1169 ExtendType, 0, SetFlags, WantResult); 1170 } 1171 1172 // Check if the mul can be folded into the instruction. 1173 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1174 if (isMulPowOf2(RHS)) { 1175 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1176 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1177 1178 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1179 if (C->getValue().isPowerOf2()) 1180 std::swap(MulLHS, MulRHS); 1181 1182 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1183 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1184 unsigned RHSReg = getRegForValue(MulLHS); 1185 if (!RHSReg) 1186 return 0; 1187 bool RHSIsKill = hasTrivialKill(MulLHS); 1188 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1189 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, 1190 WantResult); 1191 if (ResultReg) 1192 return ResultReg; 1193 } 1194 } 1195 1196 // Check if the shift can be folded into the instruction. 1197 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1198 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1199 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1200 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1201 switch (SI->getOpcode()) { 1202 default: break; 1203 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1204 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1205 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1206 } 1207 uint64_t ShiftVal = C->getZExtValue(); 1208 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1209 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1210 if (!RHSReg) 1211 return 0; 1212 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1213 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1214 RHSIsKill, ShiftType, ShiftVal, SetFlags, 1215 WantResult); 1216 if (ResultReg) 1217 return ResultReg; 1218 } 1219 } 1220 } 1221 } 1222 1223 unsigned RHSReg = getRegForValue(RHS); 1224 if (!RHSReg) 1225 return 0; 1226 bool RHSIsKill = hasTrivialKill(RHS); 1227 1228 if (NeedExtend) 1229 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1230 1231 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1232 SetFlags, WantResult); 1233} 1234 1235unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1236 bool LHSIsKill, unsigned RHSReg, 1237 bool RHSIsKill, bool SetFlags, 1238 bool WantResult) { 1239 assert(LHSReg && RHSReg && "Invalid register number."); 1240 1241 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1242 return 0; 1243 1244 static const unsigned OpcTable[2][2][2] = { 1245 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1246 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1247 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1248 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1249 }; 1250 bool Is64Bit = RetVT == MVT::i64; 1251 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1252 const TargetRegisterClass *RC = 1253 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1254 unsigned ResultReg; 1255 if (WantResult) 1256 ResultReg = createResultReg(RC); 1257 else 1258 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1259 1260 const MCInstrDesc &II = TII.get(Opc); 1261 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1262 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1263 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1264 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1265 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1266 return ResultReg; 1267} 1268 1269unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1270 bool LHSIsKill, uint64_t Imm, 1271 bool SetFlags, bool WantResult) { 1272 assert(LHSReg && "Invalid register number."); 1273 1274 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1275 return 0; 1276 1277 unsigned ShiftImm; 1278 if (isUInt<12>(Imm)) 1279 ShiftImm = 0; 1280 else if ((Imm & 0xfff000) == Imm) { 1281 ShiftImm = 12; 1282 Imm >>= 12; 1283 } else 1284 return 0; 1285 1286 static const unsigned OpcTable[2][2][2] = { 1287 { { AArch64::SUBWri, AArch64::SUBXri }, 1288 { AArch64::ADDWri, AArch64::ADDXri } }, 1289 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1290 { AArch64::ADDSWri, AArch64::ADDSXri } } 1291 }; 1292 bool Is64Bit = RetVT == MVT::i64; 1293 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1294 const TargetRegisterClass *RC; 1295 if (SetFlags) 1296 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1297 else 1298 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1299 unsigned ResultReg; 1300 if (WantResult) 1301 ResultReg = createResultReg(RC); 1302 else 1303 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1304 1305 const MCInstrDesc &II = TII.get(Opc); 1306 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1307 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1308 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1309 .addImm(Imm) 1310 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1311 return ResultReg; 1312} 1313 1314unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1315 bool LHSIsKill, unsigned RHSReg, 1316 bool RHSIsKill, 1317 AArch64_AM::ShiftExtendType ShiftType, 1318 uint64_t ShiftImm, bool SetFlags, 1319 bool WantResult) { 1320 assert(LHSReg && RHSReg && "Invalid register number."); 1321 1322 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1323 return 0; 1324 1325 // Don't deal with undefined shifts. 1326 if (ShiftImm >= RetVT.getSizeInBits()) 1327 return 0; 1328 1329 static const unsigned OpcTable[2][2][2] = { 1330 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1331 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1332 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1333 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1334 }; 1335 bool Is64Bit = RetVT == MVT::i64; 1336 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1337 const TargetRegisterClass *RC = 1338 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1339 unsigned ResultReg; 1340 if (WantResult) 1341 ResultReg = createResultReg(RC); 1342 else 1343 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1344 1345 const MCInstrDesc &II = TII.get(Opc); 1346 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1347 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1348 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1349 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1350 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1351 .addImm(getShifterImm(ShiftType, ShiftImm)); 1352 return ResultReg; 1353} 1354 1355unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1356 bool LHSIsKill, unsigned RHSReg, 1357 bool RHSIsKill, 1358 AArch64_AM::ShiftExtendType ExtType, 1359 uint64_t ShiftImm, bool SetFlags, 1360 bool WantResult) { 1361 assert(LHSReg && RHSReg && "Invalid register number."); 1362 1363 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1364 return 0; 1365 1366 if (ShiftImm >= 4) 1367 return 0; 1368 1369 static const unsigned OpcTable[2][2][2] = { 1370 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1371 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1372 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1373 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1374 }; 1375 bool Is64Bit = RetVT == MVT::i64; 1376 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1377 const TargetRegisterClass *RC = nullptr; 1378 if (SetFlags) 1379 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1380 else 1381 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1382 unsigned ResultReg; 1383 if (WantResult) 1384 ResultReg = createResultReg(RC); 1385 else 1386 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1387 1388 const MCInstrDesc &II = TII.get(Opc); 1389 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1390 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1391 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1392 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1393 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1394 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1395 return ResultReg; 1396} 1397 1398bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1399 Type *Ty = LHS->getType(); 1400 EVT EVT = TLI.getValueType(DL, Ty, true); 1401 if (!EVT.isSimple()) 1402 return false; 1403 MVT VT = EVT.getSimpleVT(); 1404 1405 switch (VT.SimpleTy) { 1406 default: 1407 return false; 1408 case MVT::i1: 1409 case MVT::i8: 1410 case MVT::i16: 1411 case MVT::i32: 1412 case MVT::i64: 1413 return emitICmp(VT, LHS, RHS, IsZExt); 1414 case MVT::f32: 1415 case MVT::f64: 1416 return emitFCmp(VT, LHS, RHS); 1417 } 1418} 1419 1420bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1421 bool IsZExt) { 1422 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1423 IsZExt) != 0; 1424} 1425 1426bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1427 uint64_t Imm) { 1428 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, 1429 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1430} 1431 1432bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1433 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1434 return false; 1435 1436 // Check to see if the 2nd operand is a constant that we can encode directly 1437 // in the compare. 1438 bool UseImm = false; 1439 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1440 if (CFP->isZero() && !CFP->isNegative()) 1441 UseImm = true; 1442 1443 unsigned LHSReg = getRegForValue(LHS); 1444 if (!LHSReg) 1445 return false; 1446 bool LHSIsKill = hasTrivialKill(LHS); 1447 1448 if (UseImm) { 1449 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1451 .addReg(LHSReg, getKillRegState(LHSIsKill)); 1452 return true; 1453 } 1454 1455 unsigned RHSReg = getRegForValue(RHS); 1456 if (!RHSReg) 1457 return false; 1458 bool RHSIsKill = hasTrivialKill(RHS); 1459 1460 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1461 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1462 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1463 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1464 return true; 1465} 1466 1467unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1468 bool SetFlags, bool WantResult, bool IsZExt) { 1469 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1470 IsZExt); 1471} 1472 1473/// \brief This method is a wrapper to simplify add emission. 1474/// 1475/// First try to emit an add with an immediate operand using emitAddSub_ri. If 1476/// that fails, then try to materialize the immediate into a register and use 1477/// emitAddSub_rr instead. 1478unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, 1479 int64_t Imm) { 1480 unsigned ResultReg; 1481 if (Imm < 0) 1482 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); 1483 else 1484 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); 1485 1486 if (ResultReg) 1487 return ResultReg; 1488 1489 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1490 if (!CReg) 1491 return 0; 1492 1493 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); 1494 return ResultReg; 1495} 1496 1497unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1498 bool SetFlags, bool WantResult, bool IsZExt) { 1499 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1500 IsZExt); 1501} 1502 1503unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1504 bool LHSIsKill, unsigned RHSReg, 1505 bool RHSIsKill, bool WantResult) { 1506 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1507 RHSIsKill, /*SetFlags=*/true, WantResult); 1508} 1509 1510unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1511 bool LHSIsKill, unsigned RHSReg, 1512 bool RHSIsKill, 1513 AArch64_AM::ShiftExtendType ShiftType, 1514 uint64_t ShiftImm, bool WantResult) { 1515 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1516 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, 1517 WantResult); 1518} 1519 1520unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1521 const Value *LHS, const Value *RHS) { 1522 // Canonicalize immediates to the RHS first. 1523 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1524 std::swap(LHS, RHS); 1525 1526 // Canonicalize mul by power-of-2 to the RHS. 1527 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1528 if (isMulPowOf2(LHS)) 1529 std::swap(LHS, RHS); 1530 1531 // Canonicalize shift immediate to the RHS. 1532 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1533 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1534 if (isa<ConstantInt>(SI->getOperand(1))) 1535 std::swap(LHS, RHS); 1536 1537 unsigned LHSReg = getRegForValue(LHS); 1538 if (!LHSReg) 1539 return 0; 1540 bool LHSIsKill = hasTrivialKill(LHS); 1541 1542 unsigned ResultReg = 0; 1543 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1544 uint64_t Imm = C->getZExtValue(); 1545 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); 1546 } 1547 if (ResultReg) 1548 return ResultReg; 1549 1550 // Check if the mul can be folded into the instruction. 1551 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1552 if (isMulPowOf2(RHS)) { 1553 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1554 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1555 1556 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1557 if (C->getValue().isPowerOf2()) 1558 std::swap(MulLHS, MulRHS); 1559 1560 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1561 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1562 1563 unsigned RHSReg = getRegForValue(MulLHS); 1564 if (!RHSReg) 1565 return 0; 1566 bool RHSIsKill = hasTrivialKill(MulLHS); 1567 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1568 RHSIsKill, ShiftVal); 1569 if (ResultReg) 1570 return ResultReg; 1571 } 1572 } 1573 1574 // Check if the shift can be folded into the instruction. 1575 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1576 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1577 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1578 uint64_t ShiftVal = C->getZExtValue(); 1579 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1580 if (!RHSReg) 1581 return 0; 1582 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1583 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1584 RHSIsKill, ShiftVal); 1585 if (ResultReg) 1586 return ResultReg; 1587 } 1588 } 1589 1590 unsigned RHSReg = getRegForValue(RHS); 1591 if (!RHSReg) 1592 return 0; 1593 bool RHSIsKill = hasTrivialKill(RHS); 1594 1595 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1596 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 1597 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1598 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1599 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1600 } 1601 return ResultReg; 1602} 1603 1604unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1605 unsigned LHSReg, bool LHSIsKill, 1606 uint64_t Imm) { 1607 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1608 "ISD nodes are not consecutive!"); 1609 static const unsigned OpcTable[3][2] = { 1610 { AArch64::ANDWri, AArch64::ANDXri }, 1611 { AArch64::ORRWri, AArch64::ORRXri }, 1612 { AArch64::EORWri, AArch64::EORXri } 1613 }; 1614 const TargetRegisterClass *RC; 1615 unsigned Opc; 1616 unsigned RegSize; 1617 switch (RetVT.SimpleTy) { 1618 default: 1619 return 0; 1620 case MVT::i1: 1621 case MVT::i8: 1622 case MVT::i16: 1623 case MVT::i32: { 1624 unsigned Idx = ISDOpc - ISD::AND; 1625 Opc = OpcTable[Idx][0]; 1626 RC = &AArch64::GPR32spRegClass; 1627 RegSize = 32; 1628 break; 1629 } 1630 case MVT::i64: 1631 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1632 RC = &AArch64::GPR64spRegClass; 1633 RegSize = 64; 1634 break; 1635 } 1636 1637 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1638 return 0; 1639 1640 unsigned ResultReg = 1641 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, 1642 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1643 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1644 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1645 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1646 } 1647 return ResultReg; 1648} 1649 1650unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1651 unsigned LHSReg, bool LHSIsKill, 1652 unsigned RHSReg, bool RHSIsKill, 1653 uint64_t ShiftImm) { 1654 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1655 "ISD nodes are not consecutive!"); 1656 static const unsigned OpcTable[3][2] = { 1657 { AArch64::ANDWrs, AArch64::ANDXrs }, 1658 { AArch64::ORRWrs, AArch64::ORRXrs }, 1659 { AArch64::EORWrs, AArch64::EORXrs } 1660 }; 1661 1662 // Don't deal with undefined shifts. 1663 if (ShiftImm >= RetVT.getSizeInBits()) 1664 return 0; 1665 1666 const TargetRegisterClass *RC; 1667 unsigned Opc; 1668 switch (RetVT.SimpleTy) { 1669 default: 1670 return 0; 1671 case MVT::i1: 1672 case MVT::i8: 1673 case MVT::i16: 1674 case MVT::i32: 1675 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1676 RC = &AArch64::GPR32RegClass; 1677 break; 1678 case MVT::i64: 1679 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1680 RC = &AArch64::GPR64RegClass; 1681 break; 1682 } 1683 unsigned ResultReg = 1684 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1685 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1686 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1687 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1688 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1689 } 1690 return ResultReg; 1691} 1692 1693unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1694 uint64_t Imm) { 1695 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); 1696} 1697 1698unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1699 bool WantZExt, MachineMemOperand *MMO) { 1700 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1701 return 0; 1702 1703 // Simplify this down to something we can handle. 1704 if (!simplifyAddress(Addr, VT)) 1705 return 0; 1706 1707 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1708 if (!ScaleFactor) 1709 llvm_unreachable("Unexpected value type."); 1710 1711 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1712 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1713 bool UseScaled = true; 1714 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1715 UseScaled = false; 1716 ScaleFactor = 1; 1717 } 1718 1719 static const unsigned GPOpcTable[2][8][4] = { 1720 // Sign-extend. 1721 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1722 AArch64::LDURXi }, 1723 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1724 AArch64::LDURXi }, 1725 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1726 AArch64::LDRXui }, 1727 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1728 AArch64::LDRXui }, 1729 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1730 AArch64::LDRXroX }, 1731 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1732 AArch64::LDRXroX }, 1733 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1734 AArch64::LDRXroW }, 1735 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1736 AArch64::LDRXroW } 1737 }, 1738 // Zero-extend. 1739 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1740 AArch64::LDURXi }, 1741 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1742 AArch64::LDURXi }, 1743 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1744 AArch64::LDRXui }, 1745 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1746 AArch64::LDRXui }, 1747 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1748 AArch64::LDRXroX }, 1749 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1750 AArch64::LDRXroX }, 1751 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1752 AArch64::LDRXroW }, 1753 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1754 AArch64::LDRXroW } 1755 } 1756 }; 1757 1758 static const unsigned FPOpcTable[4][2] = { 1759 { AArch64::LDURSi, AArch64::LDURDi }, 1760 { AArch64::LDRSui, AArch64::LDRDui }, 1761 { AArch64::LDRSroX, AArch64::LDRDroX }, 1762 { AArch64::LDRSroW, AArch64::LDRDroW } 1763 }; 1764 1765 unsigned Opc; 1766 const TargetRegisterClass *RC; 1767 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1768 Addr.getOffsetReg(); 1769 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1770 if (Addr.getExtendType() == AArch64_AM::UXTW || 1771 Addr.getExtendType() == AArch64_AM::SXTW) 1772 Idx++; 1773 1774 bool IsRet64Bit = RetVT == MVT::i64; 1775 switch (VT.SimpleTy) { 1776 default: 1777 llvm_unreachable("Unexpected value type."); 1778 case MVT::i1: // Intentional fall-through. 1779 case MVT::i8: 1780 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1781 RC = (IsRet64Bit && !WantZExt) ? 1782 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1783 break; 1784 case MVT::i16: 1785 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1786 RC = (IsRet64Bit && !WantZExt) ? 1787 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1788 break; 1789 case MVT::i32: 1790 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1791 RC = (IsRet64Bit && !WantZExt) ? 1792 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1793 break; 1794 case MVT::i64: 1795 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1796 RC = &AArch64::GPR64RegClass; 1797 break; 1798 case MVT::f32: 1799 Opc = FPOpcTable[Idx][0]; 1800 RC = &AArch64::FPR32RegClass; 1801 break; 1802 case MVT::f64: 1803 Opc = FPOpcTable[Idx][1]; 1804 RC = &AArch64::FPR64RegClass; 1805 break; 1806 } 1807 1808 // Create the base instruction, then add the operands. 1809 unsigned ResultReg = createResultReg(RC); 1810 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1811 TII.get(Opc), ResultReg); 1812 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1813 1814 // Loading an i1 requires special handling. 1815 if (VT == MVT::i1) { 1816 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); 1817 assert(ANDReg && "Unexpected AND instruction emission failure."); 1818 ResultReg = ANDReg; 1819 } 1820 1821 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1822 // the 32bit reg to a 64bit reg. 1823 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1824 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 1825 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1826 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1827 .addImm(0) 1828 .addReg(ResultReg, getKillRegState(true)) 1829 .addImm(AArch64::sub_32); 1830 ResultReg = Reg64; 1831 } 1832 return ResultReg; 1833} 1834 1835bool AArch64FastISel::selectAddSub(const Instruction *I) { 1836 MVT VT; 1837 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1838 return false; 1839 1840 if (VT.isVector()) 1841 return selectOperator(I, I->getOpcode()); 1842 1843 unsigned ResultReg; 1844 switch (I->getOpcode()) { 1845 default: 1846 llvm_unreachable("Unexpected instruction."); 1847 case Instruction::Add: 1848 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1849 break; 1850 case Instruction::Sub: 1851 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1852 break; 1853 } 1854 if (!ResultReg) 1855 return false; 1856 1857 updateValueMap(I, ResultReg); 1858 return true; 1859} 1860 1861bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1862 MVT VT; 1863 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1864 return false; 1865 1866 if (VT.isVector()) 1867 return selectOperator(I, I->getOpcode()); 1868 1869 unsigned ResultReg; 1870 switch (I->getOpcode()) { 1871 default: 1872 llvm_unreachable("Unexpected instruction."); 1873 case Instruction::And: 1874 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1875 break; 1876 case Instruction::Or: 1877 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1878 break; 1879 case Instruction::Xor: 1880 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1881 break; 1882 } 1883 if (!ResultReg) 1884 return false; 1885 1886 updateValueMap(I, ResultReg); 1887 return true; 1888} 1889 1890bool AArch64FastISel::selectLoad(const Instruction *I) { 1891 MVT VT; 1892 // Verify we have a legal type before going any further. Currently, we handle 1893 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1894 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1895 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1896 cast<LoadInst>(I)->isAtomic()) 1897 return false; 1898 1899 const Value *SV = I->getOperand(0); 1900 if (TLI.supportSwiftError()) { 1901 // Swifterror values can come from either a function parameter with 1902 // swifterror attribute or an alloca with swifterror attribute. 1903 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1904 if (Arg->hasSwiftErrorAttr()) 1905 return false; 1906 } 1907 1908 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1909 if (Alloca->isSwiftError()) 1910 return false; 1911 } 1912 } 1913 1914 // See if we can handle this address. 1915 Address Addr; 1916 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1917 return false; 1918 1919 // Fold the following sign-/zero-extend into the load instruction. 1920 bool WantZExt = true; 1921 MVT RetVT = VT; 1922 const Value *IntExtVal = nullptr; 1923 if (I->hasOneUse()) { 1924 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1925 if (isTypeSupported(ZE->getType(), RetVT)) 1926 IntExtVal = ZE; 1927 else 1928 RetVT = VT; 1929 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1930 if (isTypeSupported(SE->getType(), RetVT)) 1931 IntExtVal = SE; 1932 else 1933 RetVT = VT; 1934 WantZExt = false; 1935 } 1936 } 1937 1938 unsigned ResultReg = 1939 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1940 if (!ResultReg) 1941 return false; 1942 1943 // There are a few different cases we have to handle, because the load or the 1944 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1945 // SelectionDAG. There is also an ordering issue when both instructions are in 1946 // different basic blocks. 1947 // 1.) The load instruction is selected by FastISel, but the integer extend 1948 // not. This usually happens when the integer extend is in a different 1949 // basic block and SelectionDAG took over for that basic block. 1950 // 2.) The load instruction is selected before the integer extend. This only 1951 // happens when the integer extend is in a different basic block. 1952 // 3.) The load instruction is selected by SelectionDAG and the integer extend 1953 // by FastISel. This happens if there are instructions between the load 1954 // and the integer extend that couldn't be selected by FastISel. 1955 if (IntExtVal) { 1956 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 1957 // could select it. Emit a copy to subreg if necessary. FastISel will remove 1958 // it when it selects the integer extend. 1959 unsigned Reg = lookUpRegForValue(IntExtVal); 1960 auto *MI = MRI.getUniqueVRegDef(Reg); 1961 if (!MI) { 1962 if (RetVT == MVT::i64 && VT <= MVT::i32) { 1963 if (WantZExt) { 1964 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 1965 std::prev(FuncInfo.InsertPt)->eraseFromParent(); 1966 ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg(); 1967 } else 1968 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 1969 /*IsKill=*/true, 1970 AArch64::sub_32); 1971 } 1972 updateValueMap(I, ResultReg); 1973 return true; 1974 } 1975 1976 // The integer extend has already been emitted - delete all the instructions 1977 // that have been emitted by the integer extend lowering code and use the 1978 // result from the load instruction directly. 1979 while (MI) { 1980 Reg = 0; 1981 for (auto &Opnd : MI->uses()) { 1982 if (Opnd.isReg()) { 1983 Reg = Opnd.getReg(); 1984 break; 1985 } 1986 } 1987 MI->eraseFromParent(); 1988 MI = nullptr; 1989 if (Reg) 1990 MI = MRI.getUniqueVRegDef(Reg); 1991 } 1992 updateValueMap(IntExtVal, ResultReg); 1993 return true; 1994 } 1995 1996 updateValueMap(I, ResultReg); 1997 return true; 1998} 1999 2000bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2001 MachineMemOperand *MMO) { 2002 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2003 return false; 2004 2005 // Simplify this down to something we can handle. 2006 if (!simplifyAddress(Addr, VT)) 2007 return false; 2008 2009 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2010 if (!ScaleFactor) 2011 llvm_unreachable("Unexpected value type."); 2012 2013 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2014 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2015 bool UseScaled = true; 2016 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2017 UseScaled = false; 2018 ScaleFactor = 1; 2019 } 2020 2021 static const unsigned OpcTable[4][6] = { 2022 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2023 AArch64::STURSi, AArch64::STURDi }, 2024 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2025 AArch64::STRSui, AArch64::STRDui }, 2026 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2027 AArch64::STRSroX, AArch64::STRDroX }, 2028 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2029 AArch64::STRSroW, AArch64::STRDroW } 2030 }; 2031 2032 unsigned Opc; 2033 bool VTIsi1 = false; 2034 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2035 Addr.getOffsetReg(); 2036 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2037 if (Addr.getExtendType() == AArch64_AM::UXTW || 2038 Addr.getExtendType() == AArch64_AM::SXTW) 2039 Idx++; 2040 2041 switch (VT.SimpleTy) { 2042 default: llvm_unreachable("Unexpected value type."); 2043 case MVT::i1: VTIsi1 = true; 2044 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2045 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2046 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2047 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2048 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2049 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2050 } 2051 2052 // Storing an i1 requires special handling. 2053 if (VTIsi1 && SrcReg != AArch64::WZR) { 2054 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 2055 assert(ANDReg && "Unexpected AND instruction emission failure."); 2056 SrcReg = ANDReg; 2057 } 2058 // Create the base instruction, then add the operands. 2059 const MCInstrDesc &II = TII.get(Opc); 2060 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2061 MachineInstrBuilder MIB = 2062 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 2063 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2064 2065 return true; 2066} 2067 2068bool AArch64FastISel::selectStore(const Instruction *I) { 2069 MVT VT; 2070 const Value *Op0 = I->getOperand(0); 2071 // Verify we have a legal type before going any further. Currently, we handle 2072 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2073 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2074 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) || 2075 cast<StoreInst>(I)->isAtomic()) 2076 return false; 2077 2078 const Value *PtrV = I->getOperand(1); 2079 if (TLI.supportSwiftError()) { 2080 // Swifterror values can come from either a function parameter with 2081 // swifterror attribute or an alloca with swifterror attribute. 2082 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2083 if (Arg->hasSwiftErrorAttr()) 2084 return false; 2085 } 2086 2087 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2088 if (Alloca->isSwiftError()) 2089 return false; 2090 } 2091 } 2092 2093 // Get the value to be stored into a register. Use the zero register directly 2094 // when possible to avoid an unnecessary copy and a wasted register. 2095 unsigned SrcReg = 0; 2096 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2097 if (CI->isZero()) 2098 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2099 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2100 if (CF->isZero() && !CF->isNegative()) { 2101 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2102 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2103 } 2104 } 2105 2106 if (!SrcReg) 2107 SrcReg = getRegForValue(Op0); 2108 2109 if (!SrcReg) 2110 return false; 2111 2112 // See if we can handle this address. 2113 Address Addr; 2114 if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType())) 2115 return false; 2116 2117 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2118 return false; 2119 return true; 2120} 2121 2122static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2123 switch (Pred) { 2124 case CmpInst::FCMP_ONE: 2125 case CmpInst::FCMP_UEQ: 2126 default: 2127 // AL is our "false" for now. The other two need more compares. 2128 return AArch64CC::AL; 2129 case CmpInst::ICMP_EQ: 2130 case CmpInst::FCMP_OEQ: 2131 return AArch64CC::EQ; 2132 case CmpInst::ICMP_SGT: 2133 case CmpInst::FCMP_OGT: 2134 return AArch64CC::GT; 2135 case CmpInst::ICMP_SGE: 2136 case CmpInst::FCMP_OGE: 2137 return AArch64CC::GE; 2138 case CmpInst::ICMP_UGT: 2139 case CmpInst::FCMP_UGT: 2140 return AArch64CC::HI; 2141 case CmpInst::FCMP_OLT: 2142 return AArch64CC::MI; 2143 case CmpInst::ICMP_ULE: 2144 case CmpInst::FCMP_OLE: 2145 return AArch64CC::LS; 2146 case CmpInst::FCMP_ORD: 2147 return AArch64CC::VC; 2148 case CmpInst::FCMP_UNO: 2149 return AArch64CC::VS; 2150 case CmpInst::FCMP_UGE: 2151 return AArch64CC::PL; 2152 case CmpInst::ICMP_SLT: 2153 case CmpInst::FCMP_ULT: 2154 return AArch64CC::LT; 2155 case CmpInst::ICMP_SLE: 2156 case CmpInst::FCMP_ULE: 2157 return AArch64CC::LE; 2158 case CmpInst::FCMP_UNE: 2159 case CmpInst::ICMP_NE: 2160 return AArch64CC::NE; 2161 case CmpInst::ICMP_UGE: 2162 return AArch64CC::HS; 2163 case CmpInst::ICMP_ULT: 2164 return AArch64CC::LO; 2165 } 2166} 2167 2168/// \brief Try to emit a combined compare-and-branch instruction. 2169bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2170 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2171 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2172 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2173 2174 const Value *LHS = CI->getOperand(0); 2175 const Value *RHS = CI->getOperand(1); 2176 2177 MVT VT; 2178 if (!isTypeSupported(LHS->getType(), VT)) 2179 return false; 2180 2181 unsigned BW = VT.getSizeInBits(); 2182 if (BW > 64) 2183 return false; 2184 2185 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2186 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2187 2188 // Try to take advantage of fallthrough opportunities. 2189 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2190 std::swap(TBB, FBB); 2191 Predicate = CmpInst::getInversePredicate(Predicate); 2192 } 2193 2194 int TestBit = -1; 2195 bool IsCmpNE; 2196 switch (Predicate) { 2197 default: 2198 return false; 2199 case CmpInst::ICMP_EQ: 2200 case CmpInst::ICMP_NE: 2201 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2202 std::swap(LHS, RHS); 2203 2204 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2205 return false; 2206 2207 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2208 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2209 const Value *AndLHS = AI->getOperand(0); 2210 const Value *AndRHS = AI->getOperand(1); 2211 2212 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2213 if (C->getValue().isPowerOf2()) 2214 std::swap(AndLHS, AndRHS); 2215 2216 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2217 if (C->getValue().isPowerOf2()) { 2218 TestBit = C->getValue().logBase2(); 2219 LHS = AndLHS; 2220 } 2221 } 2222 2223 if (VT == MVT::i1) 2224 TestBit = 0; 2225 2226 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2227 break; 2228 case CmpInst::ICMP_SLT: 2229 case CmpInst::ICMP_SGE: 2230 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2231 return false; 2232 2233 TestBit = BW - 1; 2234 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2235 break; 2236 case CmpInst::ICMP_SGT: 2237 case CmpInst::ICMP_SLE: 2238 if (!isa<ConstantInt>(RHS)) 2239 return false; 2240 2241 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2242 return false; 2243 2244 TestBit = BW - 1; 2245 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2246 break; 2247 } // end switch 2248 2249 static const unsigned OpcTable[2][2][2] = { 2250 { {AArch64::CBZW, AArch64::CBZX }, 2251 {AArch64::CBNZW, AArch64::CBNZX} }, 2252 { {AArch64::TBZW, AArch64::TBZX }, 2253 {AArch64::TBNZW, AArch64::TBNZX} } 2254 }; 2255 2256 bool IsBitTest = TestBit != -1; 2257 bool Is64Bit = BW == 64; 2258 if (TestBit < 32 && TestBit >= 0) 2259 Is64Bit = false; 2260 2261 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2262 const MCInstrDesc &II = TII.get(Opc); 2263 2264 unsigned SrcReg = getRegForValue(LHS); 2265 if (!SrcReg) 2266 return false; 2267 bool SrcIsKill = hasTrivialKill(LHS); 2268 2269 if (BW == 64 && !Is64Bit) 2270 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 2271 AArch64::sub_32); 2272 2273 if ((BW < 32) && !IsBitTest) 2274 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true); 2275 2276 // Emit the combined compare and branch instruction. 2277 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2278 MachineInstrBuilder MIB = 2279 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 2280 .addReg(SrcReg, getKillRegState(SrcIsKill)); 2281 if (IsBitTest) 2282 MIB.addImm(TestBit); 2283 MIB.addMBB(TBB); 2284 2285 finishCondBranch(BI->getParent(), TBB, FBB); 2286 return true; 2287} 2288 2289bool AArch64FastISel::selectBranch(const Instruction *I) { 2290 const BranchInst *BI = cast<BranchInst>(I); 2291 if (BI->isUnconditional()) { 2292 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2293 fastEmitBranch(MSucc, BI->getDebugLoc()); 2294 return true; 2295 } 2296 2297 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2298 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2299 2300 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2301 if (CI->hasOneUse() && isValueAvailable(CI)) { 2302 // Try to optimize or fold the cmp. 2303 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2304 switch (Predicate) { 2305 default: 2306 break; 2307 case CmpInst::FCMP_FALSE: 2308 fastEmitBranch(FBB, DbgLoc); 2309 return true; 2310 case CmpInst::FCMP_TRUE: 2311 fastEmitBranch(TBB, DbgLoc); 2312 return true; 2313 } 2314 2315 // Try to emit a combined compare-and-branch first. 2316 if (emitCompareAndBranch(BI)) 2317 return true; 2318 2319 // Try to take advantage of fallthrough opportunities. 2320 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2321 std::swap(TBB, FBB); 2322 Predicate = CmpInst::getInversePredicate(Predicate); 2323 } 2324 2325 // Emit the cmp. 2326 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2327 return false; 2328 2329 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2330 // instruction. 2331 AArch64CC::CondCode CC = getCompareCC(Predicate); 2332 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2333 switch (Predicate) { 2334 default: 2335 break; 2336 case CmpInst::FCMP_UEQ: 2337 ExtraCC = AArch64CC::EQ; 2338 CC = AArch64CC::VS; 2339 break; 2340 case CmpInst::FCMP_ONE: 2341 ExtraCC = AArch64CC::MI; 2342 CC = AArch64CC::GT; 2343 break; 2344 } 2345 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2346 2347 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2348 if (ExtraCC != AArch64CC::AL) { 2349 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2350 .addImm(ExtraCC) 2351 .addMBB(TBB); 2352 } 2353 2354 // Emit the branch. 2355 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2356 .addImm(CC) 2357 .addMBB(TBB); 2358 2359 finishCondBranch(BI->getParent(), TBB, FBB); 2360 return true; 2361 } 2362 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2363 uint64_t Imm = CI->getZExtValue(); 2364 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2365 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 2366 .addMBB(Target); 2367 2368 // Obtain the branch probability and add the target to the successor list. 2369 if (FuncInfo.BPI) { 2370 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2371 BI->getParent(), Target->getBasicBlock()); 2372 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2373 } else 2374 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2375 return true; 2376 } else { 2377 AArch64CC::CondCode CC = AArch64CC::NE; 2378 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2379 // Fake request the condition, otherwise the intrinsic might be completely 2380 // optimized away. 2381 unsigned CondReg = getRegForValue(BI->getCondition()); 2382 if (!CondReg) 2383 return false; 2384 2385 // Emit the branch. 2386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2387 .addImm(CC) 2388 .addMBB(TBB); 2389 2390 finishCondBranch(BI->getParent(), TBB, FBB); 2391 return true; 2392 } 2393 } 2394 2395 unsigned CondReg = getRegForValue(BI->getCondition()); 2396 if (CondReg == 0) 2397 return false; 2398 bool CondRegIsKill = hasTrivialKill(BI->getCondition()); 2399 2400 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2401 unsigned Opcode = AArch64::TBNZW; 2402 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2403 std::swap(TBB, FBB); 2404 Opcode = AArch64::TBZW; 2405 } 2406 2407 const MCInstrDesc &II = TII.get(Opcode); 2408 unsigned ConstrainedCondReg 2409 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2410 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2411 .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) 2412 .addImm(0) 2413 .addMBB(TBB); 2414 2415 finishCondBranch(BI->getParent(), TBB, FBB); 2416 return true; 2417} 2418 2419bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2420 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2421 unsigned AddrReg = getRegForValue(BI->getOperand(0)); 2422 if (AddrReg == 0) 2423 return false; 2424 2425 // Emit the indirect branch. 2426 const MCInstrDesc &II = TII.get(AArch64::BR); 2427 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2428 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 2429 2430 // Make sure the CFG is up-to-date. 2431 for (auto *Succ : BI->successors()) 2432 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2433 2434 return true; 2435} 2436 2437bool AArch64FastISel::selectCmp(const Instruction *I) { 2438 const CmpInst *CI = cast<CmpInst>(I); 2439 2440 // Vectors of i1 are weird: bail out. 2441 if (CI->getType()->isVectorTy()) 2442 return false; 2443 2444 // Try to optimize or fold the cmp. 2445 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2446 unsigned ResultReg = 0; 2447 switch (Predicate) { 2448 default: 2449 break; 2450 case CmpInst::FCMP_FALSE: 2451 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2453 TII.get(TargetOpcode::COPY), ResultReg) 2454 .addReg(AArch64::WZR, getKillRegState(true)); 2455 break; 2456 case CmpInst::FCMP_TRUE: 2457 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2458 break; 2459 } 2460 2461 if (ResultReg) { 2462 updateValueMap(I, ResultReg); 2463 return true; 2464 } 2465 2466 // Emit the cmp. 2467 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2468 return false; 2469 2470 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2471 2472 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2473 // condition codes are inverted, because they are used by CSINC. 2474 static unsigned CondCodeTable[2][2] = { 2475 { AArch64CC::NE, AArch64CC::VC }, 2476 { AArch64CC::PL, AArch64CC::LE } 2477 }; 2478 unsigned *CondCodes = nullptr; 2479 switch (Predicate) { 2480 default: 2481 break; 2482 case CmpInst::FCMP_UEQ: 2483 CondCodes = &CondCodeTable[0][0]; 2484 break; 2485 case CmpInst::FCMP_ONE: 2486 CondCodes = &CondCodeTable[1][0]; 2487 break; 2488 } 2489 2490 if (CondCodes) { 2491 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2492 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2493 TmpReg1) 2494 .addReg(AArch64::WZR, getKillRegState(true)) 2495 .addReg(AArch64::WZR, getKillRegState(true)) 2496 .addImm(CondCodes[0]); 2497 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2498 ResultReg) 2499 .addReg(TmpReg1, getKillRegState(true)) 2500 .addReg(AArch64::WZR, getKillRegState(true)) 2501 .addImm(CondCodes[1]); 2502 2503 updateValueMap(I, ResultReg); 2504 return true; 2505 } 2506 2507 // Now set a register based on the comparison. 2508 AArch64CC::CondCode CC = getCompareCC(Predicate); 2509 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2510 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2512 ResultReg) 2513 .addReg(AArch64::WZR, getKillRegState(true)) 2514 .addReg(AArch64::WZR, getKillRegState(true)) 2515 .addImm(invertedCC); 2516 2517 updateValueMap(I, ResultReg); 2518 return true; 2519} 2520 2521/// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false' 2522/// value. 2523bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2524 if (!SI->getType()->isIntegerTy(1)) 2525 return false; 2526 2527 const Value *Src1Val, *Src2Val; 2528 unsigned Opc = 0; 2529 bool NeedExtraOp = false; 2530 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2531 if (CI->isOne()) { 2532 Src1Val = SI->getCondition(); 2533 Src2Val = SI->getFalseValue(); 2534 Opc = AArch64::ORRWrr; 2535 } else { 2536 assert(CI->isZero()); 2537 Src1Val = SI->getFalseValue(); 2538 Src2Val = SI->getCondition(); 2539 Opc = AArch64::BICWrr; 2540 } 2541 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2542 if (CI->isOne()) { 2543 Src1Val = SI->getCondition(); 2544 Src2Val = SI->getTrueValue(); 2545 Opc = AArch64::ORRWrr; 2546 NeedExtraOp = true; 2547 } else { 2548 assert(CI->isZero()); 2549 Src1Val = SI->getCondition(); 2550 Src2Val = SI->getTrueValue(); 2551 Opc = AArch64::ANDWrr; 2552 } 2553 } 2554 2555 if (!Opc) 2556 return false; 2557 2558 unsigned Src1Reg = getRegForValue(Src1Val); 2559 if (!Src1Reg) 2560 return false; 2561 bool Src1IsKill = hasTrivialKill(Src1Val); 2562 2563 unsigned Src2Reg = getRegForValue(Src2Val); 2564 if (!Src2Reg) 2565 return false; 2566 bool Src2IsKill = hasTrivialKill(Src2Val); 2567 2568 if (NeedExtraOp) { 2569 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); 2570 Src1IsKill = true; 2571 } 2572 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2573 Src1IsKill, Src2Reg, Src2IsKill); 2574 updateValueMap(SI, ResultReg); 2575 return true; 2576} 2577 2578bool AArch64FastISel::selectSelect(const Instruction *I) { 2579 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2580 MVT VT; 2581 if (!isTypeSupported(I->getType(), VT)) 2582 return false; 2583 2584 unsigned Opc; 2585 const TargetRegisterClass *RC; 2586 switch (VT.SimpleTy) { 2587 default: 2588 return false; 2589 case MVT::i1: 2590 case MVT::i8: 2591 case MVT::i16: 2592 case MVT::i32: 2593 Opc = AArch64::CSELWr; 2594 RC = &AArch64::GPR32RegClass; 2595 break; 2596 case MVT::i64: 2597 Opc = AArch64::CSELXr; 2598 RC = &AArch64::GPR64RegClass; 2599 break; 2600 case MVT::f32: 2601 Opc = AArch64::FCSELSrrr; 2602 RC = &AArch64::FPR32RegClass; 2603 break; 2604 case MVT::f64: 2605 Opc = AArch64::FCSELDrrr; 2606 RC = &AArch64::FPR64RegClass; 2607 break; 2608 } 2609 2610 const SelectInst *SI = cast<SelectInst>(I); 2611 const Value *Cond = SI->getCondition(); 2612 AArch64CC::CondCode CC = AArch64CC::NE; 2613 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2614 2615 if (optimizeSelect(SI)) 2616 return true; 2617 2618 // Try to pickup the flags, so we don't have to emit another compare. 2619 if (foldXALUIntrinsic(CC, I, Cond)) { 2620 // Fake request the condition to force emission of the XALU intrinsic. 2621 unsigned CondReg = getRegForValue(Cond); 2622 if (!CondReg) 2623 return false; 2624 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2625 isValueAvailable(Cond)) { 2626 const auto *Cmp = cast<CmpInst>(Cond); 2627 // Try to optimize or fold the cmp. 2628 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2629 const Value *FoldSelect = nullptr; 2630 switch (Predicate) { 2631 default: 2632 break; 2633 case CmpInst::FCMP_FALSE: 2634 FoldSelect = SI->getFalseValue(); 2635 break; 2636 case CmpInst::FCMP_TRUE: 2637 FoldSelect = SI->getTrueValue(); 2638 break; 2639 } 2640 2641 if (FoldSelect) { 2642 unsigned SrcReg = getRegForValue(FoldSelect); 2643 if (!SrcReg) 2644 return false; 2645 unsigned UseReg = lookUpRegForValue(SI); 2646 if (UseReg) 2647 MRI.clearKillFlags(UseReg); 2648 2649 updateValueMap(I, SrcReg); 2650 return true; 2651 } 2652 2653 // Emit the cmp. 2654 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2655 return false; 2656 2657 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2658 CC = getCompareCC(Predicate); 2659 switch (Predicate) { 2660 default: 2661 break; 2662 case CmpInst::FCMP_UEQ: 2663 ExtraCC = AArch64CC::EQ; 2664 CC = AArch64CC::VS; 2665 break; 2666 case CmpInst::FCMP_ONE: 2667 ExtraCC = AArch64CC::MI; 2668 CC = AArch64CC::GT; 2669 break; 2670 } 2671 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2672 } else { 2673 unsigned CondReg = getRegForValue(Cond); 2674 if (!CondReg) 2675 return false; 2676 bool CondIsKill = hasTrivialKill(Cond); 2677 2678 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2679 CondReg = constrainOperandRegClass(II, CondReg, 1); 2680 2681 // Emit a TST instruction (ANDS wzr, reg, #imm). 2682 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, 2683 AArch64::WZR) 2684 .addReg(CondReg, getKillRegState(CondIsKill)) 2685 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2686 } 2687 2688 unsigned Src1Reg = getRegForValue(SI->getTrueValue()); 2689 bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); 2690 2691 unsigned Src2Reg = getRegForValue(SI->getFalseValue()); 2692 bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); 2693 2694 if (!Src1Reg || !Src2Reg) 2695 return false; 2696 2697 if (ExtraCC != AArch64CC::AL) { 2698 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2699 Src2IsKill, ExtraCC); 2700 Src2IsKill = true; 2701 } 2702 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2703 Src2IsKill, CC); 2704 updateValueMap(I, ResultReg); 2705 return true; 2706} 2707 2708bool AArch64FastISel::selectFPExt(const Instruction *I) { 2709 Value *V = I->getOperand(0); 2710 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2711 return false; 2712 2713 unsigned Op = getRegForValue(V); 2714 if (Op == 0) 2715 return false; 2716 2717 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); 2718 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 2719 ResultReg).addReg(Op); 2720 updateValueMap(I, ResultReg); 2721 return true; 2722} 2723 2724bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2725 Value *V = I->getOperand(0); 2726 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2727 return false; 2728 2729 unsigned Op = getRegForValue(V); 2730 if (Op == 0) 2731 return false; 2732 2733 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); 2734 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 2735 ResultReg).addReg(Op); 2736 updateValueMap(I, ResultReg); 2737 return true; 2738} 2739 2740// FPToUI and FPToSI 2741bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2742 MVT DestVT; 2743 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2744 return false; 2745 2746 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2747 if (SrcReg == 0) 2748 return false; 2749 2750 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2751 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2752 return false; 2753 2754 unsigned Opc; 2755 if (SrcVT == MVT::f64) { 2756 if (Signed) 2757 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2758 else 2759 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2760 } else { 2761 if (Signed) 2762 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2763 else 2764 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2765 } 2766 unsigned ResultReg = createResultReg( 2767 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2768 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2769 .addReg(SrcReg); 2770 updateValueMap(I, ResultReg); 2771 return true; 2772} 2773 2774bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2775 MVT DestVT; 2776 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2777 return false; 2778 // Let regular ISEL handle FP16 2779 if (DestVT == MVT::f16) 2780 return false; 2781 2782 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2783 "Unexpected value type."); 2784 2785 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2786 if (!SrcReg) 2787 return false; 2788 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 2789 2790 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2791 2792 // Handle sign-extension. 2793 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2794 SrcReg = 2795 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2796 if (!SrcReg) 2797 return false; 2798 SrcIsKill = true; 2799 } 2800 2801 unsigned Opc; 2802 if (SrcVT == MVT::i64) { 2803 if (Signed) 2804 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2805 else 2806 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2807 } else { 2808 if (Signed) 2809 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2810 else 2811 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2812 } 2813 2814 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, 2815 SrcIsKill); 2816 updateValueMap(I, ResultReg); 2817 return true; 2818} 2819 2820bool AArch64FastISel::fastLowerArguments() { 2821 if (!FuncInfo.CanLowerReturn) 2822 return false; 2823 2824 const Function *F = FuncInfo.Fn; 2825 if (F->isVarArg()) 2826 return false; 2827 2828 CallingConv::ID CC = F->getCallingConv(); 2829 if (CC != CallingConv::C) 2830 return false; 2831 2832 // Only handle simple cases of up to 8 GPR and FPR each. 2833 unsigned GPRCnt = 0; 2834 unsigned FPRCnt = 0; 2835 unsigned Idx = 0; 2836 for (auto const &Arg : F->args()) { 2837 // The first argument is at index 1. 2838 ++Idx; 2839 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || 2840 F->getAttributes().hasAttribute(Idx, Attribute::InReg) || 2841 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || 2842 F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || 2843 F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || 2844 F->getAttributes().hasAttribute(Idx, Attribute::Nest)) 2845 return false; 2846 2847 Type *ArgTy = Arg.getType(); 2848 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2849 return false; 2850 2851 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2852 if (!ArgVT.isSimple()) 2853 return false; 2854 2855 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2856 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2857 return false; 2858 2859 if (VT.isVector() && 2860 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2861 return false; 2862 2863 if (VT >= MVT::i1 && VT <= MVT::i64) 2864 ++GPRCnt; 2865 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2866 VT.is128BitVector()) 2867 ++FPRCnt; 2868 else 2869 return false; 2870 2871 if (GPRCnt > 8 || FPRCnt > 8) 2872 return false; 2873 } 2874 2875 static const MCPhysReg Registers[6][8] = { 2876 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2877 AArch64::W5, AArch64::W6, AArch64::W7 }, 2878 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2879 AArch64::X5, AArch64::X6, AArch64::X7 }, 2880 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2881 AArch64::H5, AArch64::H6, AArch64::H7 }, 2882 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2883 AArch64::S5, AArch64::S6, AArch64::S7 }, 2884 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2885 AArch64::D5, AArch64::D6, AArch64::D7 }, 2886 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2887 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2888 }; 2889 2890 unsigned GPRIdx = 0; 2891 unsigned FPRIdx = 0; 2892 for (auto const &Arg : F->args()) { 2893 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2894 unsigned SrcReg; 2895 const TargetRegisterClass *RC; 2896 if (VT >= MVT::i1 && VT <= MVT::i32) { 2897 SrcReg = Registers[0][GPRIdx++]; 2898 RC = &AArch64::GPR32RegClass; 2899 VT = MVT::i32; 2900 } else if (VT == MVT::i64) { 2901 SrcReg = Registers[1][GPRIdx++]; 2902 RC = &AArch64::GPR64RegClass; 2903 } else if (VT == MVT::f16) { 2904 SrcReg = Registers[2][FPRIdx++]; 2905 RC = &AArch64::FPR16RegClass; 2906 } else if (VT == MVT::f32) { 2907 SrcReg = Registers[3][FPRIdx++]; 2908 RC = &AArch64::FPR32RegClass; 2909 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2910 SrcReg = Registers[4][FPRIdx++]; 2911 RC = &AArch64::FPR64RegClass; 2912 } else if (VT.is128BitVector()) { 2913 SrcReg = Registers[5][FPRIdx++]; 2914 RC = &AArch64::FPR128RegClass; 2915 } else 2916 llvm_unreachable("Unexpected value type."); 2917 2918 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2919 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2920 // Without this, EmitLiveInCopies may eliminate the livein if its only 2921 // use is a bitcast (which isn't turned into an instruction). 2922 unsigned ResultReg = createResultReg(RC); 2923 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2924 TII.get(TargetOpcode::COPY), ResultReg) 2925 .addReg(DstReg, getKillRegState(true)); 2926 updateValueMap(&Arg, ResultReg); 2927 } 2928 return true; 2929} 2930 2931bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 2932 SmallVectorImpl<MVT> &OutVTs, 2933 unsigned &NumBytes) { 2934 CallingConv::ID CC = CLI.CallConv; 2935 SmallVector<CCValAssign, 16> ArgLocs; 2936 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 2937 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 2938 2939 // Get a count of how many bytes are to be pushed on the stack. 2940 NumBytes = CCInfo.getNextStackOffset(); 2941 2942 // Issue CALLSEQ_START 2943 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 2944 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 2945 .addImm(NumBytes); 2946 2947 // Process the args. 2948 for (CCValAssign &VA : ArgLocs) { 2949 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 2950 MVT ArgVT = OutVTs[VA.getValNo()]; 2951 2952 unsigned ArgReg = getRegForValue(ArgVal); 2953 if (!ArgReg) 2954 return false; 2955 2956 // Handle arg promotion: SExt, ZExt, AExt. 2957 switch (VA.getLocInfo()) { 2958 case CCValAssign::Full: 2959 break; 2960 case CCValAssign::SExt: { 2961 MVT DestVT = VA.getLocVT(); 2962 MVT SrcVT = ArgVT; 2963 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 2964 if (!ArgReg) 2965 return false; 2966 break; 2967 } 2968 case CCValAssign::AExt: 2969 // Intentional fall-through. 2970 case CCValAssign::ZExt: { 2971 MVT DestVT = VA.getLocVT(); 2972 MVT SrcVT = ArgVT; 2973 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 2974 if (!ArgReg) 2975 return false; 2976 break; 2977 } 2978 default: 2979 llvm_unreachable("Unknown arg promotion!"); 2980 } 2981 2982 // Now copy/store arg to correct locations. 2983 if (VA.isRegLoc() && !VA.needsCustom()) { 2984 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2985 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 2986 CLI.OutRegs.push_back(VA.getLocReg()); 2987 } else if (VA.needsCustom()) { 2988 // FIXME: Handle custom args. 2989 return false; 2990 } else { 2991 assert(VA.isMemLoc() && "Assuming store on stack."); 2992 2993 // Don't emit stores for undef values. 2994 if (isa<UndefValue>(ArgVal)) 2995 continue; 2996 2997 // Need to store on the stack. 2998 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 2999 3000 unsigned BEAlign = 0; 3001 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3002 BEAlign = 8 - ArgSize; 3003 3004 Address Addr; 3005 Addr.setKind(Address::RegBase); 3006 Addr.setReg(AArch64::SP); 3007 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3008 3009 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); 3010 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3011 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3012 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3013 3014 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3015 return false; 3016 } 3017 } 3018 return true; 3019} 3020 3021bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 3022 unsigned NumBytes) { 3023 CallingConv::ID CC = CLI.CallConv; 3024 3025 // Issue CALLSEQ_END 3026 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3028 .addImm(NumBytes).addImm(0); 3029 3030 // Now the return value. 3031 if (RetVT != MVT::isVoid) { 3032 SmallVector<CCValAssign, 16> RVLocs; 3033 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3034 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3035 3036 // Only handle a single return value. 3037 if (RVLocs.size() != 1) 3038 return false; 3039 3040 // Copy all of the result registers out of their specified physreg. 3041 MVT CopyVT = RVLocs[0].getValVT(); 3042 3043 // TODO: Handle big-endian results 3044 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3045 return false; 3046 3047 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3048 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3049 TII.get(TargetOpcode::COPY), ResultReg) 3050 .addReg(RVLocs[0].getLocReg()); 3051 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3052 3053 CLI.ResultReg = ResultReg; 3054 CLI.NumResultRegs = 1; 3055 } 3056 3057 return true; 3058} 3059 3060bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3061 CallingConv::ID CC = CLI.CallConv; 3062 bool IsTailCall = CLI.IsTailCall; 3063 bool IsVarArg = CLI.IsVarArg; 3064 const Value *Callee = CLI.Callee; 3065 MCSymbol *Symbol = CLI.Symbol; 3066 3067 if (!Callee && !Symbol) 3068 return false; 3069 3070 // Allow SelectionDAG isel to handle tail calls. 3071 if (IsTailCall) 3072 return false; 3073 3074 CodeModel::Model CM = TM.getCodeModel(); 3075 // Only support the small and large code model. 3076 if (CM != CodeModel::Small && CM != CodeModel::Large) 3077 return false; 3078 3079 // FIXME: Add large code model support for ELF. 3080 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3081 return false; 3082 3083 // Let SDISel handle vararg functions. 3084 if (IsVarArg) 3085 return false; 3086 3087 // FIXME: Only handle *simple* calls for now. 3088 MVT RetVT; 3089 if (CLI.RetTy->isVoidTy()) 3090 RetVT = MVT::isVoid; 3091 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3092 return false; 3093 3094 for (auto Flag : CLI.OutFlags) 3095 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3096 Flag.isSwiftSelf() || Flag.isSwiftError()) 3097 return false; 3098 3099 // Set up the argument vectors. 3100 SmallVector<MVT, 16> OutVTs; 3101 OutVTs.reserve(CLI.OutVals.size()); 3102 3103 for (auto *Val : CLI.OutVals) { 3104 MVT VT; 3105 if (!isTypeLegal(Val->getType(), VT) && 3106 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3107 return false; 3108 3109 // We don't handle vector parameters yet. 3110 if (VT.isVector() || VT.getSizeInBits() > 64) 3111 return false; 3112 3113 OutVTs.push_back(VT); 3114 } 3115 3116 Address Addr; 3117 if (Callee && !computeCallAddress(Callee, Addr)) 3118 return false; 3119 3120 // Handle the arguments now that we've gotten them. 3121 unsigned NumBytes; 3122 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3123 return false; 3124 3125 // Issue the call. 3126 MachineInstrBuilder MIB; 3127 if (CM == CodeModel::Small) { 3128 const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); 3129 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 3130 if (Symbol) 3131 MIB.addSym(Symbol, 0); 3132 else if (Addr.getGlobalValue()) 3133 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3134 else if (Addr.getReg()) { 3135 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3136 MIB.addReg(Reg); 3137 } else 3138 return false; 3139 } else { 3140 unsigned CallReg = 0; 3141 if (Symbol) { 3142 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3143 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 3144 ADRPReg) 3145 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3146 3147 CallReg = createResultReg(&AArch64::GPR64RegClass); 3148 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3149 TII.get(AArch64::LDRXui), CallReg) 3150 .addReg(ADRPReg) 3151 .addSym(Symbol, 3152 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3153 } else if (Addr.getGlobalValue()) 3154 CallReg = materializeGV(Addr.getGlobalValue()); 3155 else if (Addr.getReg()) 3156 CallReg = Addr.getReg(); 3157 3158 if (!CallReg) 3159 return false; 3160 3161 const MCInstrDesc &II = TII.get(AArch64::BLR); 3162 CallReg = constrainOperandRegClass(II, CallReg, 0); 3163 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 3164 } 3165 3166 // Add implicit physical register uses to the call. 3167 for (auto Reg : CLI.OutRegs) 3168 MIB.addReg(Reg, RegState::Implicit); 3169 3170 // Add a register mask with the call-preserved registers. 3171 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3172 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3173 3174 CLI.Call = MIB; 3175 3176 // Finish off the call including any return values. 3177 return finishCall(CLI, RetVT, NumBytes); 3178} 3179 3180bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 3181 if (Alignment) 3182 return Len / Alignment <= 4; 3183 else 3184 return Len < 32; 3185} 3186 3187bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3188 uint64_t Len, unsigned Alignment) { 3189 // Make sure we don't bloat code by inlining very large memcpy's. 3190 if (!isMemCpySmall(Len, Alignment)) 3191 return false; 3192 3193 int64_t UnscaledOffset = 0; 3194 Address OrigDest = Dest; 3195 Address OrigSrc = Src; 3196 3197 while (Len) { 3198 MVT VT; 3199 if (!Alignment || Alignment >= 8) { 3200 if (Len >= 8) 3201 VT = MVT::i64; 3202 else if (Len >= 4) 3203 VT = MVT::i32; 3204 else if (Len >= 2) 3205 VT = MVT::i16; 3206 else { 3207 VT = MVT::i8; 3208 } 3209 } else { 3210 // Bound based on alignment. 3211 if (Len >= 4 && Alignment == 4) 3212 VT = MVT::i32; 3213 else if (Len >= 2 && Alignment == 2) 3214 VT = MVT::i16; 3215 else { 3216 VT = MVT::i8; 3217 } 3218 } 3219 3220 unsigned ResultReg = emitLoad(VT, VT, Src); 3221 if (!ResultReg) 3222 return false; 3223 3224 if (!emitStore(VT, ResultReg, Dest)) 3225 return false; 3226 3227 int64_t Size = VT.getSizeInBits() / 8; 3228 Len -= Size; 3229 UnscaledOffset += Size; 3230 3231 // We need to recompute the unscaled offset for each iteration. 3232 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3233 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3234 } 3235 3236 return true; 3237} 3238 3239/// \brief Check if it is possible to fold the condition from the XALU intrinsic 3240/// into the user. The condition code will only be updated on success. 3241bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3242 const Instruction *I, 3243 const Value *Cond) { 3244 if (!isa<ExtractValueInst>(Cond)) 3245 return false; 3246 3247 const auto *EV = cast<ExtractValueInst>(Cond); 3248 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3249 return false; 3250 3251 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3252 MVT RetVT; 3253 const Function *Callee = II->getCalledFunction(); 3254 Type *RetTy = 3255 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3256 if (!isTypeLegal(RetTy, RetVT)) 3257 return false; 3258 3259 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3260 return false; 3261 3262 const Value *LHS = II->getArgOperand(0); 3263 const Value *RHS = II->getArgOperand(1); 3264 3265 // Canonicalize immediate to the RHS. 3266 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3267 isCommutativeIntrinsic(II)) 3268 std::swap(LHS, RHS); 3269 3270 // Simplify multiplies. 3271 Intrinsic::ID IID = II->getIntrinsicID(); 3272 switch (IID) { 3273 default: 3274 break; 3275 case Intrinsic::smul_with_overflow: 3276 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3277 if (C->getValue() == 2) 3278 IID = Intrinsic::sadd_with_overflow; 3279 break; 3280 case Intrinsic::umul_with_overflow: 3281 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3282 if (C->getValue() == 2) 3283 IID = Intrinsic::uadd_with_overflow; 3284 break; 3285 } 3286 3287 AArch64CC::CondCode TmpCC; 3288 switch (IID) { 3289 default: 3290 return false; 3291 case Intrinsic::sadd_with_overflow: 3292 case Intrinsic::ssub_with_overflow: 3293 TmpCC = AArch64CC::VS; 3294 break; 3295 case Intrinsic::uadd_with_overflow: 3296 TmpCC = AArch64CC::HS; 3297 break; 3298 case Intrinsic::usub_with_overflow: 3299 TmpCC = AArch64CC::LO; 3300 break; 3301 case Intrinsic::smul_with_overflow: 3302 case Intrinsic::umul_with_overflow: 3303 TmpCC = AArch64CC::NE; 3304 break; 3305 } 3306 3307 // Check if both instructions are in the same basic block. 3308 if (!isValueAvailable(II)) 3309 return false; 3310 3311 // Make sure nothing is in the way 3312 BasicBlock::const_iterator Start(I); 3313 BasicBlock::const_iterator End(II); 3314 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3315 // We only expect extractvalue instructions between the intrinsic and the 3316 // instruction to be selected. 3317 if (!isa<ExtractValueInst>(Itr)) 3318 return false; 3319 3320 // Check that the extractvalue operand comes from the intrinsic. 3321 const auto *EVI = cast<ExtractValueInst>(Itr); 3322 if (EVI->getAggregateOperand() != II) 3323 return false; 3324 } 3325 3326 CC = TmpCC; 3327 return true; 3328} 3329 3330bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3331 // FIXME: Handle more intrinsics. 3332 switch (II->getIntrinsicID()) { 3333 default: return false; 3334 case Intrinsic::frameaddress: { 3335 MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); 3336 MFI->setFrameAddressIsTaken(true); 3337 3338 const AArch64RegisterInfo *RegInfo = 3339 static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo()); 3340 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3341 unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3342 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3343 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3344 // Recursively load frame address 3345 // ldr x0, [fp] 3346 // ldr x0, [x0] 3347 // ldr x0, [x0] 3348 // ... 3349 unsigned DestReg; 3350 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3351 while (Depth--) { 3352 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3353 SrcReg, /*IsKill=*/true, 0); 3354 assert(DestReg && "Unexpected LDR instruction emission failure."); 3355 SrcReg = DestReg; 3356 } 3357 3358 updateValueMap(II, SrcReg); 3359 return true; 3360 } 3361 case Intrinsic::memcpy: 3362 case Intrinsic::memmove: { 3363 const auto *MTI = cast<MemTransferInst>(II); 3364 // Don't handle volatile. 3365 if (MTI->isVolatile()) 3366 return false; 3367 3368 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3369 // we would emit dead code because we don't currently handle memmoves. 3370 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3371 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3372 // Small memcpy's are common enough that we want to do them without a call 3373 // if possible. 3374 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3375 unsigned Alignment = MTI->getAlignment(); 3376 if (isMemCpySmall(Len, Alignment)) { 3377 Address Dest, Src; 3378 if (!computeAddress(MTI->getRawDest(), Dest) || 3379 !computeAddress(MTI->getRawSource(), Src)) 3380 return false; 3381 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3382 return true; 3383 } 3384 } 3385 3386 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3387 return false; 3388 3389 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3390 // Fast instruction selection doesn't support the special 3391 // address spaces. 3392 return false; 3393 3394 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3395 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); 3396 } 3397 case Intrinsic::memset: { 3398 const MemSetInst *MSI = cast<MemSetInst>(II); 3399 // Don't handle volatile. 3400 if (MSI->isVolatile()) 3401 return false; 3402 3403 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3404 return false; 3405 3406 if (MSI->getDestAddressSpace() > 255) 3407 // Fast instruction selection doesn't support the special 3408 // address spaces. 3409 return false; 3410 3411 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); 3412 } 3413 case Intrinsic::sin: 3414 case Intrinsic::cos: 3415 case Intrinsic::pow: { 3416 MVT RetVT; 3417 if (!isTypeLegal(II->getType(), RetVT)) 3418 return false; 3419 3420 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3421 return false; 3422 3423 static const RTLIB::Libcall LibCallTable[3][2] = { 3424 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3425 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3426 { RTLIB::POW_F32, RTLIB::POW_F64 } 3427 }; 3428 RTLIB::Libcall LC; 3429 bool Is64Bit = RetVT == MVT::f64; 3430 switch (II->getIntrinsicID()) { 3431 default: 3432 llvm_unreachable("Unexpected intrinsic."); 3433 case Intrinsic::sin: 3434 LC = LibCallTable[0][Is64Bit]; 3435 break; 3436 case Intrinsic::cos: 3437 LC = LibCallTable[1][Is64Bit]; 3438 break; 3439 case Intrinsic::pow: 3440 LC = LibCallTable[2][Is64Bit]; 3441 break; 3442 } 3443 3444 ArgListTy Args; 3445 Args.reserve(II->getNumArgOperands()); 3446 3447 // Populate the argument list. 3448 for (auto &Arg : II->arg_operands()) { 3449 ArgListEntry Entry; 3450 Entry.Val = Arg; 3451 Entry.Ty = Arg->getType(); 3452 Args.push_back(Entry); 3453 } 3454 3455 CallLoweringInfo CLI; 3456 MCContext &Ctx = MF->getContext(); 3457 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3458 TLI.getLibcallName(LC), std::move(Args)); 3459 if (!lowerCallTo(CLI)) 3460 return false; 3461 updateValueMap(II, CLI.ResultReg); 3462 return true; 3463 } 3464 case Intrinsic::fabs: { 3465 MVT VT; 3466 if (!isTypeLegal(II->getType(), VT)) 3467 return false; 3468 3469 unsigned Opc; 3470 switch (VT.SimpleTy) { 3471 default: 3472 return false; 3473 case MVT::f32: 3474 Opc = AArch64::FABSSr; 3475 break; 3476 case MVT::f64: 3477 Opc = AArch64::FABSDr; 3478 break; 3479 } 3480 unsigned SrcReg = getRegForValue(II->getOperand(0)); 3481 if (!SrcReg) 3482 return false; 3483 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); 3484 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3485 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 3486 .addReg(SrcReg, getKillRegState(SrcRegIsKill)); 3487 updateValueMap(II, ResultReg); 3488 return true; 3489 } 3490 case Intrinsic::trap: { 3491 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3492 .addImm(1); 3493 return true; 3494 } 3495 case Intrinsic::sqrt: { 3496 Type *RetTy = II->getCalledFunction()->getReturnType(); 3497 3498 MVT VT; 3499 if (!isTypeLegal(RetTy, VT)) 3500 return false; 3501 3502 unsigned Op0Reg = getRegForValue(II->getOperand(0)); 3503 if (!Op0Reg) 3504 return false; 3505 bool Op0IsKill = hasTrivialKill(II->getOperand(0)); 3506 3507 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); 3508 if (!ResultReg) 3509 return false; 3510 3511 updateValueMap(II, ResultReg); 3512 return true; 3513 } 3514 case Intrinsic::sadd_with_overflow: 3515 case Intrinsic::uadd_with_overflow: 3516 case Intrinsic::ssub_with_overflow: 3517 case Intrinsic::usub_with_overflow: 3518 case Intrinsic::smul_with_overflow: 3519 case Intrinsic::umul_with_overflow: { 3520 // This implements the basic lowering of the xalu with overflow intrinsics. 3521 const Function *Callee = II->getCalledFunction(); 3522 auto *Ty = cast<StructType>(Callee->getReturnType()); 3523 Type *RetTy = Ty->getTypeAtIndex(0U); 3524 3525 MVT VT; 3526 if (!isTypeLegal(RetTy, VT)) 3527 return false; 3528 3529 if (VT != MVT::i32 && VT != MVT::i64) 3530 return false; 3531 3532 const Value *LHS = II->getArgOperand(0); 3533 const Value *RHS = II->getArgOperand(1); 3534 // Canonicalize immediate to the RHS. 3535 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3536 isCommutativeIntrinsic(II)) 3537 std::swap(LHS, RHS); 3538 3539 // Simplify multiplies. 3540 Intrinsic::ID IID = II->getIntrinsicID(); 3541 switch (IID) { 3542 default: 3543 break; 3544 case Intrinsic::smul_with_overflow: 3545 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3546 if (C->getValue() == 2) { 3547 IID = Intrinsic::sadd_with_overflow; 3548 RHS = LHS; 3549 } 3550 break; 3551 case Intrinsic::umul_with_overflow: 3552 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3553 if (C->getValue() == 2) { 3554 IID = Intrinsic::uadd_with_overflow; 3555 RHS = LHS; 3556 } 3557 break; 3558 } 3559 3560 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3561 AArch64CC::CondCode CC = AArch64CC::Invalid; 3562 switch (IID) { 3563 default: llvm_unreachable("Unexpected intrinsic!"); 3564 case Intrinsic::sadd_with_overflow: 3565 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3566 CC = AArch64CC::VS; 3567 break; 3568 case Intrinsic::uadd_with_overflow: 3569 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3570 CC = AArch64CC::HS; 3571 break; 3572 case Intrinsic::ssub_with_overflow: 3573 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3574 CC = AArch64CC::VS; 3575 break; 3576 case Intrinsic::usub_with_overflow: 3577 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3578 CC = AArch64CC::LO; 3579 break; 3580 case Intrinsic::smul_with_overflow: { 3581 CC = AArch64CC::NE; 3582 unsigned LHSReg = getRegForValue(LHS); 3583 if (!LHSReg) 3584 return false; 3585 bool LHSIsKill = hasTrivialKill(LHS); 3586 3587 unsigned RHSReg = getRegForValue(RHS); 3588 if (!RHSReg) 3589 return false; 3590 bool RHSIsKill = hasTrivialKill(RHS); 3591 3592 if (VT == MVT::i32) { 3593 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3594 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 3595 /*IsKill=*/false, 32); 3596 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3597 AArch64::sub_32); 3598 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, 3599 AArch64::sub_32); 3600 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3601 AArch64_AM::ASR, 31, /*WantResult=*/false); 3602 } else { 3603 assert(VT == MVT::i64 && "Unexpected value type."); 3604 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3605 // reused in the next instruction. 3606 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3607 /*IsKill=*/false); 3608 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, 3609 RHSReg, RHSIsKill); 3610 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3611 AArch64_AM::ASR, 63, /*WantResult=*/false); 3612 } 3613 break; 3614 } 3615 case Intrinsic::umul_with_overflow: { 3616 CC = AArch64CC::NE; 3617 unsigned LHSReg = getRegForValue(LHS); 3618 if (!LHSReg) 3619 return false; 3620 bool LHSIsKill = hasTrivialKill(LHS); 3621 3622 unsigned RHSReg = getRegForValue(RHS); 3623 if (!RHSReg) 3624 return false; 3625 bool RHSIsKill = hasTrivialKill(RHS); 3626 3627 if (VT == MVT::i32) { 3628 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3629 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, 3630 /*IsKill=*/false, AArch64_AM::LSR, 32, 3631 /*WantResult=*/false); 3632 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3633 AArch64::sub_32); 3634 } else { 3635 assert(VT == MVT::i64 && "Unexpected value type."); 3636 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3637 // reused in the next instruction. 3638 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3639 /*IsKill=*/false); 3640 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, 3641 RHSReg, RHSIsKill); 3642 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, 3643 /*IsKill=*/false, /*WantResult=*/false); 3644 } 3645 break; 3646 } 3647 } 3648 3649 if (MulReg) { 3650 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3651 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3652 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3653 } 3654 3655 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3656 AArch64::WZR, /*IsKill=*/true, AArch64::WZR, 3657 /*IsKill=*/true, getInvertedCondCode(CC)); 3658 (void)ResultReg2; 3659 assert((ResultReg1 + 1) == ResultReg2 && 3660 "Nonconsecutive result registers."); 3661 updateValueMap(II, ResultReg1, 2); 3662 return true; 3663 } 3664 } 3665 return false; 3666} 3667 3668bool AArch64FastISel::selectRet(const Instruction *I) { 3669 const ReturnInst *Ret = cast<ReturnInst>(I); 3670 const Function &F = *I->getParent()->getParent(); 3671 3672 if (!FuncInfo.CanLowerReturn) 3673 return false; 3674 3675 if (F.isVarArg()) 3676 return false; 3677 3678 if (TLI.supportSwiftError() && 3679 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3680 return false; 3681 3682 if (TLI.supportSplitCSR(FuncInfo.MF)) 3683 return false; 3684 3685 // Build a list of return value registers. 3686 SmallVector<unsigned, 4> RetRegs; 3687 3688 if (Ret->getNumOperands() > 0) { 3689 CallingConv::ID CC = F.getCallingConv(); 3690 SmallVector<ISD::OutputArg, 4> Outs; 3691 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3692 3693 // Analyze operands of the call, assigning locations to each operand. 3694 SmallVector<CCValAssign, 16> ValLocs; 3695 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3696 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3697 : RetCC_AArch64_AAPCS; 3698 CCInfo.AnalyzeReturn(Outs, RetCC); 3699 3700 // Only handle a single return value for now. 3701 if (ValLocs.size() != 1) 3702 return false; 3703 3704 CCValAssign &VA = ValLocs[0]; 3705 const Value *RV = Ret->getOperand(0); 3706 3707 // Don't bother handling odd stuff for now. 3708 if ((VA.getLocInfo() != CCValAssign::Full) && 3709 (VA.getLocInfo() != CCValAssign::BCvt)) 3710 return false; 3711 3712 // Only handle register returns for now. 3713 if (!VA.isRegLoc()) 3714 return false; 3715 3716 unsigned Reg = getRegForValue(RV); 3717 if (Reg == 0) 3718 return false; 3719 3720 unsigned SrcReg = Reg + VA.getValNo(); 3721 unsigned DestReg = VA.getLocReg(); 3722 // Avoid a cross-class copy. This is very unlikely. 3723 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3724 return false; 3725 3726 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3727 if (!RVEVT.isSimple()) 3728 return false; 3729 3730 // Vectors (of > 1 lane) in big endian need tricky handling. 3731 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 && 3732 !Subtarget->isLittleEndian()) 3733 return false; 3734 3735 MVT RVVT = RVEVT.getSimpleVT(); 3736 if (RVVT == MVT::f128) 3737 return false; 3738 3739 MVT DestVT = VA.getValVT(); 3740 // Special handling for extended integers. 3741 if (RVVT != DestVT) { 3742 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3743 return false; 3744 3745 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3746 return false; 3747 3748 bool IsZExt = Outs[0].Flags.isZExt(); 3749 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3750 if (SrcReg == 0) 3751 return false; 3752 } 3753 3754 // Make the copy. 3755 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3756 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3757 3758 // Add register to return instruction. 3759 RetRegs.push_back(VA.getLocReg()); 3760 } 3761 3762 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3763 TII.get(AArch64::RET_ReallyLR)); 3764 for (unsigned RetReg : RetRegs) 3765 MIB.addReg(RetReg, RegState::Implicit); 3766 return true; 3767} 3768 3769bool AArch64FastISel::selectTrunc(const Instruction *I) { 3770 Type *DestTy = I->getType(); 3771 Value *Op = I->getOperand(0); 3772 Type *SrcTy = Op->getType(); 3773 3774 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3775 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3776 if (!SrcEVT.isSimple()) 3777 return false; 3778 if (!DestEVT.isSimple()) 3779 return false; 3780 3781 MVT SrcVT = SrcEVT.getSimpleVT(); 3782 MVT DestVT = DestEVT.getSimpleVT(); 3783 3784 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3785 SrcVT != MVT::i8) 3786 return false; 3787 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3788 DestVT != MVT::i1) 3789 return false; 3790 3791 unsigned SrcReg = getRegForValue(Op); 3792 if (!SrcReg) 3793 return false; 3794 bool SrcIsKill = hasTrivialKill(Op); 3795 3796 // If we're truncating from i64 to a smaller non-legal type then generate an 3797 // AND. Otherwise, we know the high bits are undefined and a truncate only 3798 // generate a COPY. We cannot mark the source register also as result 3799 // register, because this can incorrectly transfer the kill flag onto the 3800 // source register. 3801 unsigned ResultReg; 3802 if (SrcVT == MVT::i64) { 3803 uint64_t Mask = 0; 3804 switch (DestVT.SimpleTy) { 3805 default: 3806 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3807 return false; 3808 case MVT::i1: 3809 Mask = 0x1; 3810 break; 3811 case MVT::i8: 3812 Mask = 0xff; 3813 break; 3814 case MVT::i16: 3815 Mask = 0xffff; 3816 break; 3817 } 3818 // Issue an extract_subreg to get the lower 32-bits. 3819 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 3820 AArch64::sub_32); 3821 // Create the AND instruction which performs the actual truncation. 3822 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); 3823 assert(ResultReg && "Unexpected AND instruction emission failure."); 3824 } else { 3825 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3826 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3827 TII.get(TargetOpcode::COPY), ResultReg) 3828 .addReg(SrcReg, getKillRegState(SrcIsKill)); 3829 } 3830 3831 updateValueMap(I, ResultReg); 3832 return true; 3833} 3834 3835unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 3836 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 3837 DestVT == MVT::i64) && 3838 "Unexpected value type."); 3839 // Handle i8 and i16 as i32. 3840 if (DestVT == MVT::i8 || DestVT == MVT::i16) 3841 DestVT = MVT::i32; 3842 3843 if (IsZExt) { 3844 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 3845 assert(ResultReg && "Unexpected AND instruction emission failure."); 3846 if (DestVT == MVT::i64) { 3847 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 3848 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 3849 unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3850 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3851 TII.get(AArch64::SUBREG_TO_REG), Reg64) 3852 .addImm(0) 3853 .addReg(ResultReg) 3854 .addImm(AArch64::sub_32); 3855 ResultReg = Reg64; 3856 } 3857 return ResultReg; 3858 } else { 3859 if (DestVT == MVT::i64) { 3860 // FIXME: We're SExt i1 to i64. 3861 return 0; 3862 } 3863 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 3864 /*TODO:IsKill=*/false, 0, 0); 3865 } 3866} 3867 3868unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3869 unsigned Op1, bool Op1IsKill) { 3870 unsigned Opc, ZReg; 3871 switch (RetVT.SimpleTy) { 3872 default: return 0; 3873 case MVT::i8: 3874 case MVT::i16: 3875 case MVT::i32: 3876 RetVT = MVT::i32; 3877 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 3878 case MVT::i64: 3879 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 3880 } 3881 3882 const TargetRegisterClass *RC = 3883 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3884 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, 3885 /*IsKill=*/ZReg, true); 3886} 3887 3888unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3889 unsigned Op1, bool Op1IsKill) { 3890 if (RetVT != MVT::i64) 3891 return 0; 3892 3893 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 3894 Op0, Op0IsKill, Op1, Op1IsKill, 3895 AArch64::XZR, /*IsKill=*/true); 3896} 3897 3898unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3899 unsigned Op1, bool Op1IsKill) { 3900 if (RetVT != MVT::i64) 3901 return 0; 3902 3903 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 3904 Op0, Op0IsKill, Op1, Op1IsKill, 3905 AArch64::XZR, /*IsKill=*/true); 3906} 3907 3908unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 3909 unsigned Op1Reg, bool Op1IsKill) { 3910 unsigned Opc = 0; 3911 bool NeedTrunc = false; 3912 uint64_t Mask = 0; 3913 switch (RetVT.SimpleTy) { 3914 default: return 0; 3915 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 3916 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 3917 case MVT::i32: Opc = AArch64::LSLVWr; break; 3918 case MVT::i64: Opc = AArch64::LSLVXr; break; 3919 } 3920 3921 const TargetRegisterClass *RC = 3922 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3923 if (NeedTrunc) { 3924 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 3925 Op1IsKill = true; 3926 } 3927 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 3928 Op1IsKill); 3929 if (NeedTrunc) 3930 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 3931 return ResultReg; 3932} 3933 3934unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 3935 bool Op0IsKill, uint64_t Shift, 3936 bool IsZExt) { 3937 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 3938 "Unexpected source/return type pair."); 3939 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 3940 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 3941 "Unexpected source value type."); 3942 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 3943 RetVT == MVT::i64) && "Unexpected return value type."); 3944 3945 bool Is64Bit = (RetVT == MVT::i64); 3946 unsigned RegSize = Is64Bit ? 64 : 32; 3947 unsigned DstBits = RetVT.getSizeInBits(); 3948 unsigned SrcBits = SrcVT.getSizeInBits(); 3949 const TargetRegisterClass *RC = 3950 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3951 3952 // Just emit a copy for "zero" shifts. 3953 if (Shift == 0) { 3954 if (RetVT == SrcVT) { 3955 unsigned ResultReg = createResultReg(RC); 3956 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3957 TII.get(TargetOpcode::COPY), ResultReg) 3958 .addReg(Op0, getKillRegState(Op0IsKill)); 3959 return ResultReg; 3960 } else 3961 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 3962 } 3963 3964 // Don't deal with undefined shifts. 3965 if (Shift >= DstBits) 3966 return 0; 3967 3968 // For immediate shifts we can fold the zero-/sign-extension into the shift. 3969 // {S|U}BFM Wd, Wn, #r, #s 3970 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 3971 3972 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 3973 // %2 = shl i16 %1, 4 3974 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 3975 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 3976 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 3977 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 3978 3979 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 3980 // %2 = shl i16 %1, 8 3981 // Wd<32+7-24,32-24> = Wn<7:0> 3982 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 3983 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 3984 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 3985 3986 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 3987 // %2 = shl i16 %1, 12 3988 // Wd<32+3-20,32-20> = Wn<3:0> 3989 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 3990 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 3991 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 3992 3993 unsigned ImmR = RegSize - Shift; 3994 // Limit the width to the length of the source type. 3995 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 3996 static const unsigned OpcTable[2][2] = { 3997 {AArch64::SBFMWri, AArch64::SBFMXri}, 3998 {AArch64::UBFMWri, AArch64::UBFMXri} 3999 }; 4000 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4001 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4002 unsigned TmpReg = MRI.createVirtualRegister(RC); 4003 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4004 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4005 .addImm(0) 4006 .addReg(Op0, getKillRegState(Op0IsKill)) 4007 .addImm(AArch64::sub_32); 4008 Op0 = TmpReg; 4009 Op0IsKill = true; 4010 } 4011 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4012} 4013 4014unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4015 unsigned Op1Reg, bool Op1IsKill) { 4016 unsigned Opc = 0; 4017 bool NeedTrunc = false; 4018 uint64_t Mask = 0; 4019 switch (RetVT.SimpleTy) { 4020 default: return 0; 4021 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4022 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4023 case MVT::i32: Opc = AArch64::LSRVWr; break; 4024 case MVT::i64: Opc = AArch64::LSRVXr; break; 4025 } 4026 4027 const TargetRegisterClass *RC = 4028 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4029 if (NeedTrunc) { 4030 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); 4031 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4032 Op0IsKill = Op1IsKill = true; 4033 } 4034 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4035 Op1IsKill); 4036 if (NeedTrunc) 4037 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4038 return ResultReg; 4039} 4040 4041unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4042 bool Op0IsKill, uint64_t Shift, 4043 bool IsZExt) { 4044 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4045 "Unexpected source/return type pair."); 4046 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4047 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4048 "Unexpected source value type."); 4049 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4050 RetVT == MVT::i64) && "Unexpected return value type."); 4051 4052 bool Is64Bit = (RetVT == MVT::i64); 4053 unsigned RegSize = Is64Bit ? 64 : 32; 4054 unsigned DstBits = RetVT.getSizeInBits(); 4055 unsigned SrcBits = SrcVT.getSizeInBits(); 4056 const TargetRegisterClass *RC = 4057 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4058 4059 // Just emit a copy for "zero" shifts. 4060 if (Shift == 0) { 4061 if (RetVT == SrcVT) { 4062 unsigned ResultReg = createResultReg(RC); 4063 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4064 TII.get(TargetOpcode::COPY), ResultReg) 4065 .addReg(Op0, getKillRegState(Op0IsKill)); 4066 return ResultReg; 4067 } else 4068 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4069 } 4070 4071 // Don't deal with undefined shifts. 4072 if (Shift >= DstBits) 4073 return 0; 4074 4075 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4076 // {S|U}BFM Wd, Wn, #r, #s 4077 // Wd<s-r:0> = Wn<s:r> when r <= s 4078 4079 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4080 // %2 = lshr i16 %1, 4 4081 // Wd<7-4:0> = Wn<7:4> 4082 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4083 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4084 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4085 4086 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4087 // %2 = lshr i16 %1, 8 4088 // Wd<7-7,0> = Wn<7:7> 4089 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4090 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4091 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4092 4093 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4094 // %2 = lshr i16 %1, 12 4095 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4096 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4097 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4098 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4099 4100 if (Shift >= SrcBits && IsZExt) 4101 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4102 4103 // It is not possible to fold a sign-extend into the LShr instruction. In this 4104 // case emit a sign-extend. 4105 if (!IsZExt) { 4106 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4107 if (!Op0) 4108 return 0; 4109 Op0IsKill = true; 4110 SrcVT = RetVT; 4111 SrcBits = SrcVT.getSizeInBits(); 4112 IsZExt = true; 4113 } 4114 4115 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4116 unsigned ImmS = SrcBits - 1; 4117 static const unsigned OpcTable[2][2] = { 4118 {AArch64::SBFMWri, AArch64::SBFMXri}, 4119 {AArch64::UBFMWri, AArch64::UBFMXri} 4120 }; 4121 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4122 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4123 unsigned TmpReg = MRI.createVirtualRegister(RC); 4124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4125 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4126 .addImm(0) 4127 .addReg(Op0, getKillRegState(Op0IsKill)) 4128 .addImm(AArch64::sub_32); 4129 Op0 = TmpReg; 4130 Op0IsKill = true; 4131 } 4132 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4133} 4134 4135unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4136 unsigned Op1Reg, bool Op1IsKill) { 4137 unsigned Opc = 0; 4138 bool NeedTrunc = false; 4139 uint64_t Mask = 0; 4140 switch (RetVT.SimpleTy) { 4141 default: return 0; 4142 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4143 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4144 case MVT::i32: Opc = AArch64::ASRVWr; break; 4145 case MVT::i64: Opc = AArch64::ASRVXr; break; 4146 } 4147 4148 const TargetRegisterClass *RC = 4149 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4150 if (NeedTrunc) { 4151 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false); 4152 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4153 Op0IsKill = Op1IsKill = true; 4154 } 4155 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4156 Op1IsKill); 4157 if (NeedTrunc) 4158 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4159 return ResultReg; 4160} 4161 4162unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4163 bool Op0IsKill, uint64_t Shift, 4164 bool IsZExt) { 4165 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4166 "Unexpected source/return type pair."); 4167 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4168 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4169 "Unexpected source value type."); 4170 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4171 RetVT == MVT::i64) && "Unexpected return value type."); 4172 4173 bool Is64Bit = (RetVT == MVT::i64); 4174 unsigned RegSize = Is64Bit ? 64 : 32; 4175 unsigned DstBits = RetVT.getSizeInBits(); 4176 unsigned SrcBits = SrcVT.getSizeInBits(); 4177 const TargetRegisterClass *RC = 4178 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4179 4180 // Just emit a copy for "zero" shifts. 4181 if (Shift == 0) { 4182 if (RetVT == SrcVT) { 4183 unsigned ResultReg = createResultReg(RC); 4184 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4185 TII.get(TargetOpcode::COPY), ResultReg) 4186 .addReg(Op0, getKillRegState(Op0IsKill)); 4187 return ResultReg; 4188 } else 4189 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4190 } 4191 4192 // Don't deal with undefined shifts. 4193 if (Shift >= DstBits) 4194 return 0; 4195 4196 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4197 // {S|U}BFM Wd, Wn, #r, #s 4198 // Wd<s-r:0> = Wn<s:r> when r <= s 4199 4200 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4201 // %2 = ashr i16 %1, 4 4202 // Wd<7-4:0> = Wn<7:4> 4203 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4204 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4205 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4206 4207 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4208 // %2 = ashr i16 %1, 8 4209 // Wd<7-7,0> = Wn<7:7> 4210 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4211 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4212 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4213 4214 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4215 // %2 = ashr i16 %1, 12 4216 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4217 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4218 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4219 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4220 4221 if (Shift >= SrcBits && IsZExt) 4222 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4223 4224 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4225 unsigned ImmS = SrcBits - 1; 4226 static const unsigned OpcTable[2][2] = { 4227 {AArch64::SBFMWri, AArch64::SBFMXri}, 4228 {AArch64::UBFMWri, AArch64::UBFMXri} 4229 }; 4230 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4231 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4232 unsigned TmpReg = MRI.createVirtualRegister(RC); 4233 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4234 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4235 .addImm(0) 4236 .addReg(Op0, getKillRegState(Op0IsKill)) 4237 .addImm(AArch64::sub_32); 4238 Op0 = TmpReg; 4239 Op0IsKill = true; 4240 } 4241 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4242} 4243 4244unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4245 bool IsZExt) { 4246 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4247 4248 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4249 // DestVT are odd things, so test to make sure that they are both types we can 4250 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4251 // bail out to SelectionDAG. 4252 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4253 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4254 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4255 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4256 return 0; 4257 4258 unsigned Opc; 4259 unsigned Imm = 0; 4260 4261 switch (SrcVT.SimpleTy) { 4262 default: 4263 return 0; 4264 case MVT::i1: 4265 return emiti1Ext(SrcReg, DestVT, IsZExt); 4266 case MVT::i8: 4267 if (DestVT == MVT::i64) 4268 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4269 else 4270 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4271 Imm = 7; 4272 break; 4273 case MVT::i16: 4274 if (DestVT == MVT::i64) 4275 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4276 else 4277 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4278 Imm = 15; 4279 break; 4280 case MVT::i32: 4281 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4282 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4283 Imm = 31; 4284 break; 4285 } 4286 4287 // Handle i8 and i16 as i32. 4288 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4289 DestVT = MVT::i32; 4290 else if (DestVT == MVT::i64) { 4291 unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4292 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4293 TII.get(AArch64::SUBREG_TO_REG), Src64) 4294 .addImm(0) 4295 .addReg(SrcReg) 4296 .addImm(AArch64::sub_32); 4297 SrcReg = Src64; 4298 } 4299 4300 const TargetRegisterClass *RC = 4301 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4302 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); 4303} 4304 4305static bool isZExtLoad(const MachineInstr *LI) { 4306 switch (LI->getOpcode()) { 4307 default: 4308 return false; 4309 case AArch64::LDURBBi: 4310 case AArch64::LDURHHi: 4311 case AArch64::LDURWi: 4312 case AArch64::LDRBBui: 4313 case AArch64::LDRHHui: 4314 case AArch64::LDRWui: 4315 case AArch64::LDRBBroX: 4316 case AArch64::LDRHHroX: 4317 case AArch64::LDRWroX: 4318 case AArch64::LDRBBroW: 4319 case AArch64::LDRHHroW: 4320 case AArch64::LDRWroW: 4321 return true; 4322 } 4323} 4324 4325static bool isSExtLoad(const MachineInstr *LI) { 4326 switch (LI->getOpcode()) { 4327 default: 4328 return false; 4329 case AArch64::LDURSBWi: 4330 case AArch64::LDURSHWi: 4331 case AArch64::LDURSBXi: 4332 case AArch64::LDURSHXi: 4333 case AArch64::LDURSWi: 4334 case AArch64::LDRSBWui: 4335 case AArch64::LDRSHWui: 4336 case AArch64::LDRSBXui: 4337 case AArch64::LDRSHXui: 4338 case AArch64::LDRSWui: 4339 case AArch64::LDRSBWroX: 4340 case AArch64::LDRSHWroX: 4341 case AArch64::LDRSBXroX: 4342 case AArch64::LDRSHXroX: 4343 case AArch64::LDRSWroX: 4344 case AArch64::LDRSBWroW: 4345 case AArch64::LDRSHWroW: 4346 case AArch64::LDRSBXroW: 4347 case AArch64::LDRSHXroW: 4348 case AArch64::LDRSWroW: 4349 return true; 4350 } 4351} 4352 4353bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4354 MVT SrcVT) { 4355 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4356 if (!LI || !LI->hasOneUse()) 4357 return false; 4358 4359 // Check if the load instruction has already been selected. 4360 unsigned Reg = lookUpRegForValue(LI); 4361 if (!Reg) 4362 return false; 4363 4364 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4365 if (!MI) 4366 return false; 4367 4368 // Check if the correct load instruction has been emitted - SelectionDAG might 4369 // have emitted a zero-extending load, but we need a sign-extending load. 4370 bool IsZExt = isa<ZExtInst>(I); 4371 const auto *LoadMI = MI; 4372 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4373 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4374 unsigned LoadReg = MI->getOperand(1).getReg(); 4375 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4376 assert(LoadMI && "Expected valid instruction"); 4377 } 4378 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4379 return false; 4380 4381 // Nothing to be done. 4382 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4383 updateValueMap(I, Reg); 4384 return true; 4385 } 4386 4387 if (IsZExt) { 4388 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 4389 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4390 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4391 .addImm(0) 4392 .addReg(Reg, getKillRegState(true)) 4393 .addImm(AArch64::sub_32); 4394 Reg = Reg64; 4395 } else { 4396 assert((MI->getOpcode() == TargetOpcode::COPY && 4397 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4398 "Expected copy instruction"); 4399 Reg = MI->getOperand(1).getReg(); 4400 MI->eraseFromParent(); 4401 } 4402 updateValueMap(I, Reg); 4403 return true; 4404} 4405 4406bool AArch64FastISel::selectIntExt(const Instruction *I) { 4407 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4408 "Unexpected integer extend instruction."); 4409 MVT RetVT; 4410 MVT SrcVT; 4411 if (!isTypeSupported(I->getType(), RetVT)) 4412 return false; 4413 4414 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4415 return false; 4416 4417 // Try to optimize already sign-/zero-extended values from load instructions. 4418 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4419 return true; 4420 4421 unsigned SrcReg = getRegForValue(I->getOperand(0)); 4422 if (!SrcReg) 4423 return false; 4424 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 4425 4426 // Try to optimize already sign-/zero-extended values from function arguments. 4427 bool IsZExt = isa<ZExtInst>(I); 4428 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4429 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4430 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4431 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); 4432 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4433 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4434 .addImm(0) 4435 .addReg(SrcReg, getKillRegState(SrcIsKill)) 4436 .addImm(AArch64::sub_32); 4437 SrcReg = ResultReg; 4438 } 4439 // Conservatively clear all kill flags from all uses, because we are 4440 // replacing a sign-/zero-extend instruction at IR level with a nop at MI 4441 // level. The result of the instruction at IR level might have been 4442 // trivially dead, which is now not longer true. 4443 unsigned UseReg = lookUpRegForValue(I); 4444 if (UseReg) 4445 MRI.clearKillFlags(UseReg); 4446 4447 updateValueMap(I, SrcReg); 4448 return true; 4449 } 4450 } 4451 4452 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4453 if (!ResultReg) 4454 return false; 4455 4456 updateValueMap(I, ResultReg); 4457 return true; 4458} 4459 4460bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4461 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4462 if (!DestEVT.isSimple()) 4463 return false; 4464 4465 MVT DestVT = DestEVT.getSimpleVT(); 4466 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4467 return false; 4468 4469 unsigned DivOpc; 4470 bool Is64bit = (DestVT == MVT::i64); 4471 switch (ISDOpcode) { 4472 default: 4473 return false; 4474 case ISD::SREM: 4475 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4476 break; 4477 case ISD::UREM: 4478 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4479 break; 4480 } 4481 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4482 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4483 if (!Src0Reg) 4484 return false; 4485 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4486 4487 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4488 if (!Src1Reg) 4489 return false; 4490 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4491 4492 const TargetRegisterClass *RC = 4493 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4494 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, 4495 Src1Reg, /*IsKill=*/false); 4496 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4497 // The remainder is computed as numerator - (quotient * denominator) using the 4498 // MSUB instruction. 4499 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, 4500 Src1Reg, Src1IsKill, Src0Reg, 4501 Src0IsKill); 4502 updateValueMap(I, ResultReg); 4503 return true; 4504} 4505 4506bool AArch64FastISel::selectMul(const Instruction *I) { 4507 MVT VT; 4508 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4509 return false; 4510 4511 if (VT.isVector()) 4512 return selectBinaryOp(I, ISD::MUL); 4513 4514 const Value *Src0 = I->getOperand(0); 4515 const Value *Src1 = I->getOperand(1); 4516 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4517 if (C->getValue().isPowerOf2()) 4518 std::swap(Src0, Src1); 4519 4520 // Try to simplify to a shift instruction. 4521 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4522 if (C->getValue().isPowerOf2()) { 4523 uint64_t ShiftVal = C->getValue().logBase2(); 4524 MVT SrcVT = VT; 4525 bool IsZExt = true; 4526 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4527 if (!isIntExtFree(ZExt)) { 4528 MVT VT; 4529 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4530 SrcVT = VT; 4531 IsZExt = true; 4532 Src0 = ZExt->getOperand(0); 4533 } 4534 } 4535 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4536 if (!isIntExtFree(SExt)) { 4537 MVT VT; 4538 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4539 SrcVT = VT; 4540 IsZExt = false; 4541 Src0 = SExt->getOperand(0); 4542 } 4543 } 4544 } 4545 4546 unsigned Src0Reg = getRegForValue(Src0); 4547 if (!Src0Reg) 4548 return false; 4549 bool Src0IsKill = hasTrivialKill(Src0); 4550 4551 unsigned ResultReg = 4552 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); 4553 4554 if (ResultReg) { 4555 updateValueMap(I, ResultReg); 4556 return true; 4557 } 4558 } 4559 4560 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4561 if (!Src0Reg) 4562 return false; 4563 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4564 4565 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4566 if (!Src1Reg) 4567 return false; 4568 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4569 4570 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); 4571 4572 if (!ResultReg) 4573 return false; 4574 4575 updateValueMap(I, ResultReg); 4576 return true; 4577} 4578 4579bool AArch64FastISel::selectShift(const Instruction *I) { 4580 MVT RetVT; 4581 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4582 return false; 4583 4584 if (RetVT.isVector()) 4585 return selectOperator(I, I->getOpcode()); 4586 4587 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4588 unsigned ResultReg = 0; 4589 uint64_t ShiftVal = C->getZExtValue(); 4590 MVT SrcVT = RetVT; 4591 bool IsZExt = I->getOpcode() != Instruction::AShr; 4592 const Value *Op0 = I->getOperand(0); 4593 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4594 if (!isIntExtFree(ZExt)) { 4595 MVT TmpVT; 4596 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4597 SrcVT = TmpVT; 4598 IsZExt = true; 4599 Op0 = ZExt->getOperand(0); 4600 } 4601 } 4602 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4603 if (!isIntExtFree(SExt)) { 4604 MVT TmpVT; 4605 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4606 SrcVT = TmpVT; 4607 IsZExt = false; 4608 Op0 = SExt->getOperand(0); 4609 } 4610 } 4611 } 4612 4613 unsigned Op0Reg = getRegForValue(Op0); 4614 if (!Op0Reg) 4615 return false; 4616 bool Op0IsKill = hasTrivialKill(Op0); 4617 4618 switch (I->getOpcode()) { 4619 default: llvm_unreachable("Unexpected instruction."); 4620 case Instruction::Shl: 4621 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4622 break; 4623 case Instruction::AShr: 4624 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4625 break; 4626 case Instruction::LShr: 4627 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4628 break; 4629 } 4630 if (!ResultReg) 4631 return false; 4632 4633 updateValueMap(I, ResultReg); 4634 return true; 4635 } 4636 4637 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4638 if (!Op0Reg) 4639 return false; 4640 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4641 4642 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 4643 if (!Op1Reg) 4644 return false; 4645 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 4646 4647 unsigned ResultReg = 0; 4648 switch (I->getOpcode()) { 4649 default: llvm_unreachable("Unexpected instruction."); 4650 case Instruction::Shl: 4651 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4652 break; 4653 case Instruction::AShr: 4654 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4655 break; 4656 case Instruction::LShr: 4657 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4658 break; 4659 } 4660 4661 if (!ResultReg) 4662 return false; 4663 4664 updateValueMap(I, ResultReg); 4665 return true; 4666} 4667 4668bool AArch64FastISel::selectBitCast(const Instruction *I) { 4669 MVT RetVT, SrcVT; 4670 4671 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4672 return false; 4673 if (!isTypeLegal(I->getType(), RetVT)) 4674 return false; 4675 4676 unsigned Opc; 4677 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4678 Opc = AArch64::FMOVWSr; 4679 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4680 Opc = AArch64::FMOVXDr; 4681 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4682 Opc = AArch64::FMOVSWr; 4683 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4684 Opc = AArch64::FMOVDXr; 4685 else 4686 return false; 4687 4688 const TargetRegisterClass *RC = nullptr; 4689 switch (RetVT.SimpleTy) { 4690 default: llvm_unreachable("Unexpected value type."); 4691 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4692 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4693 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4694 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4695 } 4696 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4697 if (!Op0Reg) 4698 return false; 4699 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4700 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); 4701 4702 if (!ResultReg) 4703 return false; 4704 4705 updateValueMap(I, ResultReg); 4706 return true; 4707} 4708 4709bool AArch64FastISel::selectFRem(const Instruction *I) { 4710 MVT RetVT; 4711 if (!isTypeLegal(I->getType(), RetVT)) 4712 return false; 4713 4714 RTLIB::Libcall LC; 4715 switch (RetVT.SimpleTy) { 4716 default: 4717 return false; 4718 case MVT::f32: 4719 LC = RTLIB::REM_F32; 4720 break; 4721 case MVT::f64: 4722 LC = RTLIB::REM_F64; 4723 break; 4724 } 4725 4726 ArgListTy Args; 4727 Args.reserve(I->getNumOperands()); 4728 4729 // Populate the argument list. 4730 for (auto &Arg : I->operands()) { 4731 ArgListEntry Entry; 4732 Entry.Val = Arg; 4733 Entry.Ty = Arg->getType(); 4734 Args.push_back(Entry); 4735 } 4736 4737 CallLoweringInfo CLI; 4738 MCContext &Ctx = MF->getContext(); 4739 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4740 TLI.getLibcallName(LC), std::move(Args)); 4741 if (!lowerCallTo(CLI)) 4742 return false; 4743 updateValueMap(I, CLI.ResultReg); 4744 return true; 4745} 4746 4747bool AArch64FastISel::selectSDiv(const Instruction *I) { 4748 MVT VT; 4749 if (!isTypeLegal(I->getType(), VT)) 4750 return false; 4751 4752 if (!isa<ConstantInt>(I->getOperand(1))) 4753 return selectBinaryOp(I, ISD::SDIV); 4754 4755 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4756 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4757 !(C.isPowerOf2() || (-C).isPowerOf2())) 4758 return selectBinaryOp(I, ISD::SDIV); 4759 4760 unsigned Lg2 = C.countTrailingZeros(); 4761 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4762 if (!Src0Reg) 4763 return false; 4764 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4765 4766 if (cast<BinaryOperator>(I)->isExact()) { 4767 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); 4768 if (!ResultReg) 4769 return false; 4770 updateValueMap(I, ResultReg); 4771 return true; 4772 } 4773 4774 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4775 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); 4776 if (!AddReg) 4777 return false; 4778 4779 // (Src0 < 0) ? Pow2 - 1 : 0; 4780 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) 4781 return false; 4782 4783 unsigned SelectOpc; 4784 const TargetRegisterClass *RC; 4785 if (VT == MVT::i64) { 4786 SelectOpc = AArch64::CSELXr; 4787 RC = &AArch64::GPR64RegClass; 4788 } else { 4789 SelectOpc = AArch64::CSELWr; 4790 RC = &AArch64::GPR32RegClass; 4791 } 4792 unsigned SelectReg = 4793 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, 4794 Src0IsKill, AArch64CC::LT); 4795 if (!SelectReg) 4796 return false; 4797 4798 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4799 // negate the result. 4800 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4801 unsigned ResultReg; 4802 if (C.isNegative()) 4803 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, 4804 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); 4805 else 4806 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); 4807 4808 if (!ResultReg) 4809 return false; 4810 4811 updateValueMap(I, ResultReg); 4812 return true; 4813} 4814 4815/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4816/// have to duplicate it for AArch64, because otherwise we would fail during the 4817/// sign-extend emission. 4818std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4819 unsigned IdxN = getRegForValue(Idx); 4820 if (IdxN == 0) 4821 // Unhandled operand. Halt "fast" selection and bail. 4822 return std::pair<unsigned, bool>(0, false); 4823 4824 bool IdxNIsKill = hasTrivialKill(Idx); 4825 4826 // If the index is smaller or larger than intptr_t, truncate or extend it. 4827 MVT PtrVT = TLI.getPointerTy(DL); 4828 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4829 if (IdxVT.bitsLT(PtrVT)) { 4830 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false); 4831 IdxNIsKill = true; 4832 } else if (IdxVT.bitsGT(PtrVT)) 4833 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4834 return std::pair<unsigned, bool>(IdxN, IdxNIsKill); 4835} 4836 4837/// This is mostly a copy of the existing FastISel GEP code, but we have to 4838/// duplicate it for AArch64, because otherwise we would bail out even for 4839/// simple cases. This is because the standard fastEmit functions don't cover 4840/// MUL at all and ADD is lowered very inefficientily. 4841bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4842 unsigned N = getRegForValue(I->getOperand(0)); 4843 if (!N) 4844 return false; 4845 bool NIsKill = hasTrivialKill(I->getOperand(0)); 4846 4847 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4848 // into a single N = N + TotalOffset. 4849 uint64_t TotalOffs = 0; 4850 MVT VT = TLI.getPointerTy(DL); 4851 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4852 GTI != E; ++GTI) { 4853 const Value *Idx = GTI.getOperand(); 4854 if (auto *StTy = dyn_cast<StructType>(*GTI)) { 4855 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4856 // N = N + Offset 4857 if (Field) 4858 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4859 } else { 4860 Type *Ty = GTI.getIndexedType(); 4861 4862 // If this is a constant subscript, handle it quickly. 4863 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4864 if (CI->isZero()) 4865 continue; 4866 // N = N + Offset 4867 TotalOffs += 4868 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 4869 continue; 4870 } 4871 if (TotalOffs) { 4872 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 4873 if (!N) 4874 return false; 4875 NIsKill = true; 4876 TotalOffs = 0; 4877 } 4878 4879 // N = N + Idx * ElementSize; 4880 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 4881 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); 4882 unsigned IdxN = Pair.first; 4883 bool IdxNIsKill = Pair.second; 4884 if (!IdxN) 4885 return false; 4886 4887 if (ElementSize != 1) { 4888 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 4889 if (!C) 4890 return false; 4891 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); 4892 if (!IdxN) 4893 return false; 4894 IdxNIsKill = true; 4895 } 4896 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); 4897 if (!N) 4898 return false; 4899 } 4900 } 4901 if (TotalOffs) { 4902 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 4903 if (!N) 4904 return false; 4905 } 4906 updateValueMap(I, N); 4907 return true; 4908} 4909 4910bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 4911 switch (I->getOpcode()) { 4912 default: 4913 break; 4914 case Instruction::Add: 4915 case Instruction::Sub: 4916 return selectAddSub(I); 4917 case Instruction::Mul: 4918 return selectMul(I); 4919 case Instruction::SDiv: 4920 return selectSDiv(I); 4921 case Instruction::SRem: 4922 if (!selectBinaryOp(I, ISD::SREM)) 4923 return selectRem(I, ISD::SREM); 4924 return true; 4925 case Instruction::URem: 4926 if (!selectBinaryOp(I, ISD::UREM)) 4927 return selectRem(I, ISD::UREM); 4928 return true; 4929 case Instruction::Shl: 4930 case Instruction::LShr: 4931 case Instruction::AShr: 4932 return selectShift(I); 4933 case Instruction::And: 4934 case Instruction::Or: 4935 case Instruction::Xor: 4936 return selectLogicalOp(I); 4937 case Instruction::Br: 4938 return selectBranch(I); 4939 case Instruction::IndirectBr: 4940 return selectIndirectBr(I); 4941 case Instruction::BitCast: 4942 if (!FastISel::selectBitCast(I)) 4943 return selectBitCast(I); 4944 return true; 4945 case Instruction::FPToSI: 4946 if (!selectCast(I, ISD::FP_TO_SINT)) 4947 return selectFPToInt(I, /*Signed=*/true); 4948 return true; 4949 case Instruction::FPToUI: 4950 return selectFPToInt(I, /*Signed=*/false); 4951 case Instruction::ZExt: 4952 case Instruction::SExt: 4953 return selectIntExt(I); 4954 case Instruction::Trunc: 4955 if (!selectCast(I, ISD::TRUNCATE)) 4956 return selectTrunc(I); 4957 return true; 4958 case Instruction::FPExt: 4959 return selectFPExt(I); 4960 case Instruction::FPTrunc: 4961 return selectFPTrunc(I); 4962 case Instruction::SIToFP: 4963 if (!selectCast(I, ISD::SINT_TO_FP)) 4964 return selectIntToFP(I, /*Signed=*/true); 4965 return true; 4966 case Instruction::UIToFP: 4967 return selectIntToFP(I, /*Signed=*/false); 4968 case Instruction::Load: 4969 return selectLoad(I); 4970 case Instruction::Store: 4971 return selectStore(I); 4972 case Instruction::FCmp: 4973 case Instruction::ICmp: 4974 return selectCmp(I); 4975 case Instruction::Select: 4976 return selectSelect(I); 4977 case Instruction::Ret: 4978 return selectRet(I); 4979 case Instruction::FRem: 4980 return selectFRem(I); 4981 case Instruction::GetElementPtr: 4982 return selectGetElementPtr(I); 4983 } 4984 4985 // fall-back to target-independent instruction selection. 4986 return selectOperator(I, I->getOpcode()); 4987 // Silence warnings. 4988 (void)&CC_AArch64_DarwinPCS_VarArg; 4989} 4990 4991namespace llvm { 4992llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 4993 const TargetLibraryInfo *LibInfo) { 4994 return new AArch64FastISel(FuncInfo, LibInfo); 4995} 4996} 4997