AArch64ISelDAGToDAG.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines an instruction selector for the AArch64 target. 11// 12//===----------------------------------------------------------------------===// 13 14#include "AArch64TargetMachine.h" 15#include "MCTargetDesc/AArch64AddressingModes.h" 16#include "llvm/ADT/APSInt.h" 17#include "llvm/CodeGen/SelectionDAGISel.h" 18#include "llvm/IR/Function.h" // To access function attributes. 19#include "llvm/IR/GlobalValue.h" 20#include "llvm/IR/Intrinsics.h" 21#include "llvm/Support/Debug.h" 22#include "llvm/Support/ErrorHandling.h" 23#include "llvm/Support/MathExtras.h" 24#include "llvm/Support/raw_ostream.h" 25 26using namespace llvm; 27 28#define DEBUG_TYPE "aarch64-isel" 29 30//===--------------------------------------------------------------------===// 31/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 32/// instructions for SelectionDAG operations. 33/// 34namespace { 35 36class AArch64DAGToDAGISel : public SelectionDAGISel { 37 AArch64TargetMachine &TM; 38 39 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 40 /// make the right decision when generating code for different targets. 41 const AArch64Subtarget *Subtarget; 42 43 bool ForCodeSize; 44 45public: 46 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 47 CodeGenOpt::Level OptLevel) 48 : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr), 49 ForCodeSize(false) {} 50 51 const char *getPassName() const override { 52 return "AArch64 Instruction Selection"; 53 } 54 55 bool runOnMachineFunction(MachineFunction &MF) override { 56 AttributeSet FnAttrs = MF.getFunction()->getAttributes(); 57 ForCodeSize = 58 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, 59 Attribute::OptimizeForSize) || 60 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 61 Subtarget = &TM.getSubtarget<AArch64Subtarget>(); 62 return SelectionDAGISel::runOnMachineFunction(MF); 63 } 64 65 SDNode *Select(SDNode *Node) override; 66 67 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 68 /// inline asm expressions. 69 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 70 char ConstraintCode, 71 std::vector<SDValue> &OutOps) override; 72 73 SDNode *SelectMLAV64LaneV128(SDNode *N); 74 SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N); 75 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 76 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 77 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 78 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 79 return SelectShiftedRegister(N, false, Reg, Shift); 80 } 81 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 82 return SelectShiftedRegister(N, true, Reg, Shift); 83 } 84 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 85 return SelectAddrModeIndexed(N, 1, Base, OffImm); 86 } 87 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 88 return SelectAddrModeIndexed(N, 2, Base, OffImm); 89 } 90 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 91 return SelectAddrModeIndexed(N, 4, Base, OffImm); 92 } 93 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 94 return SelectAddrModeIndexed(N, 8, Base, OffImm); 95 } 96 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 97 return SelectAddrModeIndexed(N, 16, Base, OffImm); 98 } 99 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 100 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 101 } 102 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 103 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 104 } 105 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 106 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 107 } 108 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 109 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 110 } 111 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 112 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 113 } 114 115 template<int Width> 116 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 117 SDValue &SignExtend, SDValue &DoShift) { 118 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 119 } 120 121 template<int Width> 122 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 123 SDValue &SignExtend, SDValue &DoShift) { 124 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 125 } 126 127 128 /// Form sequences of consecutive 64/128-bit registers for use in NEON 129 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 130 /// between 1 and 4 elements. If it contains a single element that is returned 131 /// unchanged; otherwise a REG_SEQUENCE value is returned. 132 SDValue createDTuple(ArrayRef<SDValue> Vecs); 133 SDValue createQTuple(ArrayRef<SDValue> Vecs); 134 135 /// Generic helper for the createDTuple/createQTuple 136 /// functions. Those should almost always be called instead. 137 SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[], 138 unsigned SubRegs[]); 139 140 SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 141 142 SDNode *SelectIndexedLoad(SDNode *N, bool &Done); 143 144 SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 145 unsigned SubRegIdx); 146 SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 147 unsigned SubRegIdx); 148 SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 149 SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 150 151 SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 152 SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 153 SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 154 SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 155 156 SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node); 157 SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node); 158 159 SDNode *SelectBitfieldExtractOp(SDNode *N); 160 SDNode *SelectBitfieldInsertOp(SDNode *N); 161 162 SDNode *SelectLIBM(SDNode *N); 163 164// Include the pieces autogenerated from the target description. 165#include "AArch64GenDAGISel.inc" 166 167private: 168 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 169 SDValue &Shift); 170 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 171 SDValue &OffImm); 172 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 173 SDValue &OffImm); 174 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 175 SDValue &Offset, SDValue &SignExtend, 176 SDValue &DoShift); 177 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 178 SDValue &Offset, SDValue &SignExtend, 179 SDValue &DoShift); 180 bool isWorthFolding(SDValue V) const; 181 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 182 SDValue &Offset, SDValue &SignExtend); 183 184 template<unsigned RegWidth> 185 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 186 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 187 } 188 189 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 190}; 191} // end anonymous namespace 192 193/// isIntImmediate - This method tests to see if the node is a constant 194/// operand. If so Imm will receive the 32-bit value. 195static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 196 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 197 Imm = C->getZExtValue(); 198 return true; 199 } 200 return false; 201} 202 203// isIntImmediate - This method tests to see if a constant operand. 204// If so Imm will receive the value. 205static bool isIntImmediate(SDValue N, uint64_t &Imm) { 206 return isIntImmediate(N.getNode(), Imm); 207} 208 209// isOpcWithIntImmediate - This method tests to see if the node is a specific 210// opcode and that it has a immediate integer right operand. 211// If so Imm will receive the 32 bit value. 212static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 213 uint64_t &Imm) { 214 return N->getOpcode() == Opc && 215 isIntImmediate(N->getOperand(1).getNode(), Imm); 216} 217 218bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 219 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { 220 assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); 221 // Require the address to be in a register. That is safe for all AArch64 222 // variants and it is hard to do anything much smarter without knowing 223 // how the operand is used. 224 OutOps.push_back(Op); 225 return false; 226} 227 228/// SelectArithImmed - Select an immediate value that can be represented as 229/// a 12-bit value shifted left by either 0 or 12. If so, return true with 230/// Val set to the 12-bit value and Shift set to the shifter operand. 231bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 232 SDValue &Shift) { 233 // This function is called from the addsub_shifted_imm ComplexPattern, 234 // which lists [imm] as the list of opcode it's interested in, however 235 // we still need to check whether the operand is actually an immediate 236 // here because the ComplexPattern opcode list is only used in 237 // root-level opcode matching. 238 if (!isa<ConstantSDNode>(N.getNode())) 239 return false; 240 241 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 242 unsigned ShiftAmt; 243 244 if (Immed >> 12 == 0) { 245 ShiftAmt = 0; 246 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 247 ShiftAmt = 12; 248 Immed = Immed >> 12; 249 } else 250 return false; 251 252 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 253 Val = CurDAG->getTargetConstant(Immed, MVT::i32); 254 Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); 255 return true; 256} 257 258/// SelectNegArithImmed - As above, but negates the value before trying to 259/// select it. 260bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 261 SDValue &Shift) { 262 // This function is called from the addsub_shifted_imm ComplexPattern, 263 // which lists [imm] as the list of opcode it's interested in, however 264 // we still need to check whether the operand is actually an immediate 265 // here because the ComplexPattern opcode list is only used in 266 // root-level opcode matching. 267 if (!isa<ConstantSDNode>(N.getNode())) 268 return false; 269 270 // The immediate operand must be a 24-bit zero-extended immediate. 271 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 272 273 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 274 // have the opposite effect on the C flag, so this pattern mustn't match under 275 // those circumstances. 276 if (Immed == 0) 277 return false; 278 279 if (N.getValueType() == MVT::i32) 280 Immed = ~((uint32_t)Immed) + 1; 281 else 282 Immed = ~Immed + 1ULL; 283 if (Immed & 0xFFFFFFFFFF000000ULL) 284 return false; 285 286 Immed &= 0xFFFFFFULL; 287 return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift); 288} 289 290/// getShiftTypeForNode - Translate a shift node to the corresponding 291/// ShiftType value. 292static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 293 switch (N.getOpcode()) { 294 default: 295 return AArch64_AM::InvalidShiftExtend; 296 case ISD::SHL: 297 return AArch64_AM::LSL; 298 case ISD::SRL: 299 return AArch64_AM::LSR; 300 case ISD::SRA: 301 return AArch64_AM::ASR; 302 case ISD::ROTR: 303 return AArch64_AM::ROR; 304 } 305} 306 307/// \brief Determine wether it is worth to fold V into an extended register. 308bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 309 // it hurts if the a value is used at least twice, unless we are optimizing 310 // for code size. 311 if (ForCodeSize || V.hasOneUse()) 312 return true; 313 return false; 314} 315 316/// SelectShiftedRegister - Select a "shifted register" operand. If the value 317/// is not shifted, set the Shift operand to default of "LSL 0". The logical 318/// instructions allow the shifted register to be rotated, but the arithmetic 319/// instructions do not. The AllowROR parameter specifies whether ROR is 320/// supported. 321bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 322 SDValue &Reg, SDValue &Shift) { 323 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 324 if (ShType == AArch64_AM::InvalidShiftExtend) 325 return false; 326 if (!AllowROR && ShType == AArch64_AM::ROR) 327 return false; 328 329 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 330 unsigned BitSize = N.getValueType().getSizeInBits(); 331 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 332 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 333 334 Reg = N.getOperand(0); 335 Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); 336 return isWorthFolding(N); 337 } 338 339 return false; 340} 341 342/// getExtendTypeForNode - Translate an extend node to the corresponding 343/// ExtendType value. 344static AArch64_AM::ShiftExtendType 345getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 346 if (N.getOpcode() == ISD::SIGN_EXTEND || 347 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 348 EVT SrcVT; 349 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 350 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 351 else 352 SrcVT = N.getOperand(0).getValueType(); 353 354 if (!IsLoadStore && SrcVT == MVT::i8) 355 return AArch64_AM::SXTB; 356 else if (!IsLoadStore && SrcVT == MVT::i16) 357 return AArch64_AM::SXTH; 358 else if (SrcVT == MVT::i32) 359 return AArch64_AM::SXTW; 360 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 361 362 return AArch64_AM::InvalidShiftExtend; 363 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 364 N.getOpcode() == ISD::ANY_EXTEND) { 365 EVT SrcVT = N.getOperand(0).getValueType(); 366 if (!IsLoadStore && SrcVT == MVT::i8) 367 return AArch64_AM::UXTB; 368 else if (!IsLoadStore && SrcVT == MVT::i16) 369 return AArch64_AM::UXTH; 370 else if (SrcVT == MVT::i32) 371 return AArch64_AM::UXTW; 372 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 373 374 return AArch64_AM::InvalidShiftExtend; 375 } else if (N.getOpcode() == ISD::AND) { 376 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 377 if (!CSD) 378 return AArch64_AM::InvalidShiftExtend; 379 uint64_t AndMask = CSD->getZExtValue(); 380 381 switch (AndMask) { 382 default: 383 return AArch64_AM::InvalidShiftExtend; 384 case 0xFF: 385 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 386 case 0xFFFF: 387 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 388 case 0xFFFFFFFF: 389 return AArch64_AM::UXTW; 390 } 391 } 392 393 return AArch64_AM::InvalidShiftExtend; 394} 395 396// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 397static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 398 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 399 DL->getOpcode() != AArch64ISD::DUPLANE32) 400 return false; 401 402 SDValue SV = DL->getOperand(0); 403 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 404 return false; 405 406 SDValue EV = SV.getOperand(1); 407 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 408 return false; 409 410 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 411 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 412 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 413 LaneOp = EV.getOperand(0); 414 415 return true; 416} 417 418// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a 419// high lane extract. 420static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 421 SDValue &LaneOp, int &LaneIdx) { 422 423 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 424 std::swap(Op0, Op1); 425 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 426 return false; 427 } 428 StdOp = Op1; 429 return true; 430} 431 432/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 433/// is a lane in the upper half of a 128-bit vector. Recognize and select this 434/// so that we don't emit unnecessary lane extracts. 435SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { 436 SDValue Op0 = N->getOperand(0); 437 SDValue Op1 = N->getOperand(1); 438 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 439 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 440 int LaneIdx = -1; // Will hold the lane index. 441 442 if (Op1.getOpcode() != ISD::MUL || 443 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 444 LaneIdx)) { 445 std::swap(Op0, Op1); 446 if (Op1.getOpcode() != ISD::MUL || 447 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 448 LaneIdx)) 449 return nullptr; 450 } 451 452 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); 453 454 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 455 456 unsigned MLAOpc = ~0U; 457 458 switch (N->getSimpleValueType(0).SimpleTy) { 459 default: 460 llvm_unreachable("Unrecognized MLA."); 461 case MVT::v4i16: 462 MLAOpc = AArch64::MLAv4i16_indexed; 463 break; 464 case MVT::v8i16: 465 MLAOpc = AArch64::MLAv8i16_indexed; 466 break; 467 case MVT::v2i32: 468 MLAOpc = AArch64::MLAv2i32_indexed; 469 break; 470 case MVT::v4i32: 471 MLAOpc = AArch64::MLAv4i32_indexed; 472 break; 473 } 474 475 return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops); 476} 477 478SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { 479 SDValue SMULLOp0; 480 SDValue SMULLOp1; 481 int LaneIdx; 482 483 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 484 LaneIdx)) 485 return nullptr; 486 487 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); 488 489 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 490 491 unsigned SMULLOpc = ~0U; 492 493 if (IntNo == Intrinsic::aarch64_neon_smull) { 494 switch (N->getSimpleValueType(0).SimpleTy) { 495 default: 496 llvm_unreachable("Unrecognized SMULL."); 497 case MVT::v4i32: 498 SMULLOpc = AArch64::SMULLv4i16_indexed; 499 break; 500 case MVT::v2i64: 501 SMULLOpc = AArch64::SMULLv2i32_indexed; 502 break; 503 } 504 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 505 switch (N->getSimpleValueType(0).SimpleTy) { 506 default: 507 llvm_unreachable("Unrecognized SMULL."); 508 case MVT::v4i32: 509 SMULLOpc = AArch64::UMULLv4i16_indexed; 510 break; 511 case MVT::v2i64: 512 SMULLOpc = AArch64::UMULLv2i32_indexed; 513 break; 514 } 515 } else 516 llvm_unreachable("Unrecognized intrinsic."); 517 518 return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops); 519} 520 521/// Instructions that accept extend modifiers like UXTW expect the register 522/// being extended to be a GPR32, but the incoming DAG might be acting on a 523/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 524/// this is the case. 525static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 526 if (N.getValueType() == MVT::i32) 527 return N; 528 529 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 530 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 531 SDLoc(N), MVT::i32, N, SubReg); 532 return SDValue(Node, 0); 533} 534 535 536/// SelectArithExtendedRegister - Select a "extended register" operand. This 537/// operand folds in an extend followed by an optional left shift. 538bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 539 SDValue &Shift) { 540 unsigned ShiftVal = 0; 541 AArch64_AM::ShiftExtendType Ext; 542 543 if (N.getOpcode() == ISD::SHL) { 544 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 545 if (!CSD) 546 return false; 547 ShiftVal = CSD->getZExtValue(); 548 if (ShiftVal > 4) 549 return false; 550 551 Ext = getExtendTypeForNode(N.getOperand(0)); 552 if (Ext == AArch64_AM::InvalidShiftExtend) 553 return false; 554 555 Reg = N.getOperand(0).getOperand(0); 556 } else { 557 Ext = getExtendTypeForNode(N); 558 if (Ext == AArch64_AM::InvalidShiftExtend) 559 return false; 560 561 Reg = N.getOperand(0); 562 } 563 564 // AArch64 mandates that the RHS of the operation must use the smallest 565 // register classs that could contain the size being extended from. Thus, 566 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 567 // there might not be an actual 32-bit value in the program. We can 568 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 569 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 570 Reg = narrowIfNeeded(CurDAG, Reg); 571 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); 572 return isWorthFolding(N); 573} 574 575/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 576/// immediate" address. The "Size" argument is the size in bytes of the memory 577/// reference, which determines the scale. 578bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 579 SDValue &Base, SDValue &OffImm) { 580 const TargetLowering *TLI = getTargetLowering(); 581 if (N.getOpcode() == ISD::FrameIndex) { 582 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 583 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 584 OffImm = CurDAG->getTargetConstant(0, MVT::i64); 585 return true; 586 } 587 588 if (N.getOpcode() == AArch64ISD::ADDlow) { 589 GlobalAddressSDNode *GAN = 590 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 591 Base = N.getOperand(0); 592 OffImm = N.getOperand(1); 593 if (!GAN) 594 return true; 595 596 const GlobalValue *GV = GAN->getGlobal(); 597 unsigned Alignment = GV->getAlignment(); 598 const DataLayout *DL = TLI->getDataLayout(); 599 if (Alignment == 0 && !Subtarget->isTargetDarwin()) 600 Alignment = DL->getABITypeAlignment(GV->getType()->getElementType()); 601 602 if (Alignment >= Size) 603 return true; 604 } 605 606 if (CurDAG->isBaseWithConstantOffset(N)) { 607 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 608 int64_t RHSC = (int64_t)RHS->getZExtValue(); 609 unsigned Scale = Log2_32(Size); 610 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 611 Base = N.getOperand(0); 612 if (Base.getOpcode() == ISD::FrameIndex) { 613 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 614 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 615 } 616 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64); 617 return true; 618 } 619 } 620 } 621 622 // Before falling back to our general case, check if the unscaled 623 // instructions can handle this. If so, that's preferable. 624 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 625 return false; 626 627 // Base only. The address will be materialized into a register before 628 // the memory is accessed. 629 // add x0, Xbase, #offset 630 // ldr x0, [x0] 631 Base = N; 632 OffImm = CurDAG->getTargetConstant(0, MVT::i64); 633 return true; 634} 635 636/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 637/// immediate" address. This should only match when there is an offset that 638/// is not valid for a scaled immediate addressing mode. The "Size" argument 639/// is the size in bytes of the memory reference, which is needed here to know 640/// what is valid for a scaled immediate. 641bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 642 SDValue &Base, 643 SDValue &OffImm) { 644 if (!CurDAG->isBaseWithConstantOffset(N)) 645 return false; 646 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 647 int64_t RHSC = RHS->getSExtValue(); 648 // If the offset is valid as a scaled immediate, don't match here. 649 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 650 RHSC < (0x1000 << Log2_32(Size))) 651 return false; 652 if (RHSC >= -256 && RHSC < 256) { 653 Base = N.getOperand(0); 654 if (Base.getOpcode() == ISD::FrameIndex) { 655 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 656 const TargetLowering *TLI = getTargetLowering(); 657 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 658 } 659 OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64); 660 return true; 661 } 662 } 663 return false; 664} 665 666static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 667 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 668 SDValue ImpDef = SDValue( 669 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64), 670 0); 671 MachineSDNode *Node = CurDAG->getMachineNode( 672 TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg); 673 return SDValue(Node, 0); 674} 675 676/// \brief Check if the given SHL node (\p N), can be used to form an 677/// extended register for an addressing mode. 678bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 679 bool WantExtend, SDValue &Offset, 680 SDValue &SignExtend) { 681 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 682 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 683 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 684 return false; 685 686 if (WantExtend) { 687 AArch64_AM::ShiftExtendType Ext = 688 getExtendTypeForNode(N.getOperand(0), true); 689 if (Ext == AArch64_AM::InvalidShiftExtend) 690 return false; 691 692 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 693 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); 694 } else { 695 Offset = N.getOperand(0); 696 SignExtend = CurDAG->getTargetConstant(0, MVT::i32); 697 } 698 699 unsigned LegalShiftVal = Log2_32(Size); 700 unsigned ShiftVal = CSD->getZExtValue(); 701 702 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 703 return false; 704 705 if (isWorthFolding(N)) 706 return true; 707 708 return false; 709} 710 711bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 712 SDValue &Base, SDValue &Offset, 713 SDValue &SignExtend, 714 SDValue &DoShift) { 715 if (N.getOpcode() != ISD::ADD) 716 return false; 717 SDValue LHS = N.getOperand(0); 718 SDValue RHS = N.getOperand(1); 719 720 // We don't want to match immediate adds here, because they are better lowered 721 // to the register-immediate addressing modes. 722 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 723 return false; 724 725 // Check if this particular node is reused in any non-memory related 726 // operation. If yes, do not try to fold this node into the address 727 // computation, since the computation will be kept. 728 const SDNode *Node = N.getNode(); 729 for (SDNode *UI : Node->uses()) { 730 if (!isa<MemSDNode>(*UI)) 731 return false; 732 } 733 734 // Remember if it is worth folding N when it produces extended register. 735 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 736 737 // Try to match a shifted extend on the RHS. 738 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 739 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 740 Base = LHS; 741 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 742 return true; 743 } 744 745 // Try to match a shifted extend on the LHS. 746 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 747 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 748 Base = RHS; 749 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 750 return true; 751 } 752 753 // There was no shift, whatever else we find. 754 DoShift = CurDAG->getTargetConstant(false, MVT::i32); 755 756 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 757 // Try to match an unshifted extend on the LHS. 758 if (IsExtendedRegisterWorthFolding && 759 (Ext = getExtendTypeForNode(LHS, true)) != 760 AArch64_AM::InvalidShiftExtend) { 761 Base = RHS; 762 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 763 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); 764 if (isWorthFolding(LHS)) 765 return true; 766 } 767 768 // Try to match an unshifted extend on the RHS. 769 if (IsExtendedRegisterWorthFolding && 770 (Ext = getExtendTypeForNode(RHS, true)) != 771 AArch64_AM::InvalidShiftExtend) { 772 Base = LHS; 773 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 774 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); 775 if (isWorthFolding(RHS)) 776 return true; 777 } 778 779 return false; 780} 781 782bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 783 SDValue &Base, SDValue &Offset, 784 SDValue &SignExtend, 785 SDValue &DoShift) { 786 if (N.getOpcode() != ISD::ADD) 787 return false; 788 SDValue LHS = N.getOperand(0); 789 SDValue RHS = N.getOperand(1); 790 791 // We don't want to match immediate adds here, because they are better lowered 792 // to the register-immediate addressing modes. 793 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 794 return false; 795 796 // Check if this particular node is reused in any non-memory related 797 // operation. If yes, do not try to fold this node into the address 798 // computation, since the computation will be kept. 799 const SDNode *Node = N.getNode(); 800 for (SDNode *UI : Node->uses()) { 801 if (!isa<MemSDNode>(*UI)) 802 return false; 803 } 804 805 // Remember if it is worth folding N when it produces extended register. 806 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 807 808 // Try to match a shifted extend on the RHS. 809 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 810 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 811 Base = LHS; 812 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 813 return true; 814 } 815 816 // Try to match a shifted extend on the LHS. 817 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 818 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 819 Base = RHS; 820 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 821 return true; 822 } 823 824 // Match any non-shifted, non-extend, non-immediate add expression. 825 Base = LHS; 826 Offset = RHS; 827 SignExtend = CurDAG->getTargetConstant(false, MVT::i32); 828 DoShift = CurDAG->getTargetConstant(false, MVT::i32); 829 // Reg1 + Reg2 is free: no check needed. 830 return true; 831} 832 833SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 834 static unsigned RegClassIDs[] = { 835 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 836 static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1, 837 AArch64::dsub2, AArch64::dsub3 }; 838 839 return createTuple(Regs, RegClassIDs, SubRegs); 840} 841 842SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 843 static unsigned RegClassIDs[] = { 844 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 845 static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1, 846 AArch64::qsub2, AArch64::qsub3 }; 847 848 return createTuple(Regs, RegClassIDs, SubRegs); 849} 850 851SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 852 unsigned RegClassIDs[], 853 unsigned SubRegs[]) { 854 // There's no special register-class for a vector-list of 1 element: it's just 855 // a vector. 856 if (Regs.size() == 1) 857 return Regs[0]; 858 859 assert(Regs.size() >= 2 && Regs.size() <= 4); 860 861 SDLoc DL(Regs[0].getNode()); 862 863 SmallVector<SDValue, 4> Ops; 864 865 // First operand of REG_SEQUENCE is the desired RegClass. 866 Ops.push_back( 867 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); 868 869 // Then we get pairs of source & subregister-position for the components. 870 for (unsigned i = 0; i < Regs.size(); ++i) { 871 Ops.push_back(Regs[i]); 872 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); 873 } 874 875 SDNode *N = 876 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 877 return SDValue(N, 0); 878} 879 880SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, 881 unsigned Opc, bool isExt) { 882 SDLoc dl(N); 883 EVT VT = N->getValueType(0); 884 885 unsigned ExtOff = isExt; 886 887 // Form a REG_SEQUENCE to force register allocation. 888 unsigned Vec0Off = ExtOff + 1; 889 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 890 N->op_begin() + Vec0Off + NumVecs); 891 SDValue RegSeq = createQTuple(Regs); 892 893 SmallVector<SDValue, 6> Ops; 894 if (isExt) 895 Ops.push_back(N->getOperand(1)); 896 Ops.push_back(RegSeq); 897 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 898 return CurDAG->getMachineNode(Opc, dl, VT, Ops); 899} 900 901SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { 902 LoadSDNode *LD = cast<LoadSDNode>(N); 903 if (LD->isUnindexed()) 904 return nullptr; 905 EVT VT = LD->getMemoryVT(); 906 EVT DstVT = N->getValueType(0); 907 ISD::MemIndexedMode AM = LD->getAddressingMode(); 908 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 909 910 // We're not doing validity checking here. That was done when checking 911 // if we should mark the load as indexed or not. We're just selecting 912 // the right instruction. 913 unsigned Opcode = 0; 914 915 ISD::LoadExtType ExtType = LD->getExtensionType(); 916 bool InsertTo64 = false; 917 if (VT == MVT::i64) 918 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 919 else if (VT == MVT::i32) { 920 if (ExtType == ISD::NON_EXTLOAD) 921 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 922 else if (ExtType == ISD::SEXTLOAD) 923 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 924 else { 925 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 926 InsertTo64 = true; 927 // The result of the load is only i32. It's the subreg_to_reg that makes 928 // it into an i64. 929 DstVT = MVT::i32; 930 } 931 } else if (VT == MVT::i16) { 932 if (ExtType == ISD::SEXTLOAD) { 933 if (DstVT == MVT::i64) 934 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 935 else 936 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 937 } else { 938 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 939 InsertTo64 = DstVT == MVT::i64; 940 // The result of the load is only i32. It's the subreg_to_reg that makes 941 // it into an i64. 942 DstVT = MVT::i32; 943 } 944 } else if (VT == MVT::i8) { 945 if (ExtType == ISD::SEXTLOAD) { 946 if (DstVT == MVT::i64) 947 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 948 else 949 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 950 } else { 951 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 952 InsertTo64 = DstVT == MVT::i64; 953 // The result of the load is only i32. It's the subreg_to_reg that makes 954 // it into an i64. 955 DstVT = MVT::i32; 956 } 957 } else if (VT == MVT::f32) { 958 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 959 } else if (VT == MVT::f64 || VT.is64BitVector()) { 960 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 961 } else if (VT.is128BitVector()) { 962 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 963 } else 964 return nullptr; 965 SDValue Chain = LD->getChain(); 966 SDValue Base = LD->getBasePtr(); 967 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 968 int OffsetVal = (int)OffsetOp->getZExtValue(); 969 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64); 970 SDValue Ops[] = { Base, Offset, Chain }; 971 SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT, 972 MVT::Other, Ops); 973 // Either way, we're replacing the node, so tell the caller that. 974 Done = true; 975 SDValue LoadedVal = SDValue(Res, 1); 976 if (InsertTo64) { 977 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 978 LoadedVal = 979 SDValue(CurDAG->getMachineNode( 980 AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64, 981 CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg), 982 0); 983 } 984 985 ReplaceUses(SDValue(N, 0), LoadedVal); 986 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 987 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 988 989 return nullptr; 990} 991 992SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, 993 unsigned Opc, unsigned SubRegIdx) { 994 SDLoc dl(N); 995 EVT VT = N->getValueType(0); 996 SDValue Chain = N->getOperand(0); 997 998 SmallVector<SDValue, 6> Ops; 999 Ops.push_back(N->getOperand(2)); // Mem operand; 1000 Ops.push_back(Chain); 1001 1002 std::vector<EVT> ResTys; 1003 ResTys.push_back(MVT::Untyped); 1004 ResTys.push_back(MVT::Other); 1005 1006 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1007 SDValue SuperReg = SDValue(Ld, 0); 1008 for (unsigned i = 0; i < NumVecs; ++i) 1009 ReplaceUses(SDValue(N, i), 1010 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1011 1012 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1013 return nullptr; 1014} 1015 1016SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1017 unsigned Opc, unsigned SubRegIdx) { 1018 SDLoc dl(N); 1019 EVT VT = N->getValueType(0); 1020 SDValue Chain = N->getOperand(0); 1021 1022 SmallVector<SDValue, 6> Ops; 1023 Ops.push_back(N->getOperand(1)); // Mem operand 1024 Ops.push_back(N->getOperand(2)); // Incremental 1025 Ops.push_back(Chain); 1026 1027 std::vector<EVT> ResTys; 1028 ResTys.push_back(MVT::i64); // Type of the write back register 1029 ResTys.push_back(MVT::Untyped); 1030 ResTys.push_back(MVT::Other); 1031 1032 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1033 1034 // Update uses of write back register 1035 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1036 1037 // Update uses of vector list 1038 SDValue SuperReg = SDValue(Ld, 1); 1039 if (NumVecs == 1) 1040 ReplaceUses(SDValue(N, 0), SuperReg); 1041 else 1042 for (unsigned i = 0; i < NumVecs; ++i) 1043 ReplaceUses(SDValue(N, i), 1044 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1045 1046 // Update the chain 1047 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1048 return nullptr; 1049} 1050 1051SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1052 unsigned Opc) { 1053 SDLoc dl(N); 1054 EVT VT = N->getOperand(2)->getValueType(0); 1055 1056 // Form a REG_SEQUENCE to force register allocation. 1057 bool Is128Bit = VT.getSizeInBits() == 128; 1058 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1059 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1060 1061 SmallVector<SDValue, 6> Ops; 1062 Ops.push_back(RegSeq); 1063 Ops.push_back(N->getOperand(NumVecs + 2)); 1064 Ops.push_back(N->getOperand(0)); 1065 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1066 1067 return St; 1068} 1069 1070SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1071 unsigned Opc) { 1072 SDLoc dl(N); 1073 EVT VT = N->getOperand(2)->getValueType(0); 1074 SmallVector<EVT, 2> ResTys; 1075 ResTys.push_back(MVT::i64); // Type of the write back register 1076 ResTys.push_back(MVT::Other); // Type for the Chain 1077 1078 // Form a REG_SEQUENCE to force register allocation. 1079 bool Is128Bit = VT.getSizeInBits() == 128; 1080 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1081 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1082 1083 SmallVector<SDValue, 6> Ops; 1084 Ops.push_back(RegSeq); 1085 Ops.push_back(N->getOperand(NumVecs + 1)); // base register 1086 Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental 1087 Ops.push_back(N->getOperand(0)); // Chain 1088 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1089 1090 return St; 1091} 1092 1093/// WidenVector - Given a value in the V64 register class, produce the 1094/// equivalent value in the V128 register class. 1095class WidenVector { 1096 SelectionDAG &DAG; 1097 1098public: 1099 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1100 1101 SDValue operator()(SDValue V64Reg) { 1102 EVT VT = V64Reg.getValueType(); 1103 unsigned NarrowSize = VT.getVectorNumElements(); 1104 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1105 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1106 SDLoc DL(V64Reg); 1107 1108 SDValue Undef = 1109 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1110 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1111 } 1112}; 1113 1114/// NarrowVector - Given a value in the V128 register class, produce the 1115/// equivalent value in the V64 register class. 1116static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1117 EVT VT = V128Reg.getValueType(); 1118 unsigned WideSize = VT.getVectorNumElements(); 1119 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1120 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1121 1122 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1123 V128Reg); 1124} 1125 1126SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1127 unsigned Opc) { 1128 SDLoc dl(N); 1129 EVT VT = N->getValueType(0); 1130 bool Narrow = VT.getSizeInBits() == 64; 1131 1132 // Form a REG_SEQUENCE to force register allocation. 1133 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1134 1135 if (Narrow) 1136 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 1137 WidenVector(*CurDAG)); 1138 1139 SDValue RegSeq = createQTuple(Regs); 1140 1141 std::vector<EVT> ResTys; 1142 ResTys.push_back(MVT::Untyped); 1143 ResTys.push_back(MVT::Other); 1144 1145 unsigned LaneNo = 1146 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1147 1148 SmallVector<SDValue, 6> Ops; 1149 Ops.push_back(RegSeq); 1150 Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); 1151 Ops.push_back(N->getOperand(NumVecs + 3)); 1152 Ops.push_back(N->getOperand(0)); 1153 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1154 SDValue SuperReg = SDValue(Ld, 0); 1155 1156 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1157 static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, 1158 AArch64::qsub3 }; 1159 for (unsigned i = 0; i < NumVecs; ++i) { 1160 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1161 if (Narrow) 1162 NV = NarrowVector(NV, *CurDAG); 1163 ReplaceUses(SDValue(N, i), NV); 1164 } 1165 1166 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1167 1168 return Ld; 1169} 1170 1171SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1172 unsigned Opc) { 1173 SDLoc dl(N); 1174 EVT VT = N->getValueType(0); 1175 bool Narrow = VT.getSizeInBits() == 64; 1176 1177 // Form a REG_SEQUENCE to force register allocation. 1178 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1179 1180 if (Narrow) 1181 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 1182 WidenVector(*CurDAG)); 1183 1184 SDValue RegSeq = createQTuple(Regs); 1185 1186 std::vector<EVT> ResTys; 1187 ResTys.push_back(MVT::i64); // Type of the write back register 1188 ResTys.push_back(MVT::Untyped); 1189 ResTys.push_back(MVT::Other); 1190 1191 unsigned LaneNo = 1192 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1193 1194 SmallVector<SDValue, 6> Ops; 1195 Ops.push_back(RegSeq); 1196 Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number 1197 Ops.push_back(N->getOperand(NumVecs + 2)); // Base register 1198 Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental 1199 Ops.push_back(N->getOperand(0)); 1200 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1201 1202 // Update uses of the write back register 1203 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1204 1205 // Update uses of the vector list 1206 SDValue SuperReg = SDValue(Ld, 1); 1207 if (NumVecs == 1) { 1208 ReplaceUses(SDValue(N, 0), 1209 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1210 } else { 1211 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1212 static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, 1213 AArch64::qsub3 }; 1214 for (unsigned i = 0; i < NumVecs; ++i) { 1215 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1216 SuperReg); 1217 if (Narrow) 1218 NV = NarrowVector(NV, *CurDAG); 1219 ReplaceUses(SDValue(N, i), NV); 1220 } 1221 } 1222 1223 // Update the Chain 1224 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1225 1226 return Ld; 1227} 1228 1229SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1230 unsigned Opc) { 1231 SDLoc dl(N); 1232 EVT VT = N->getOperand(2)->getValueType(0); 1233 bool Narrow = VT.getSizeInBits() == 64; 1234 1235 // Form a REG_SEQUENCE to force register allocation. 1236 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1237 1238 if (Narrow) 1239 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 1240 WidenVector(*CurDAG)); 1241 1242 SDValue RegSeq = createQTuple(Regs); 1243 1244 unsigned LaneNo = 1245 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1246 1247 SmallVector<SDValue, 6> Ops; 1248 Ops.push_back(RegSeq); 1249 Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); 1250 Ops.push_back(N->getOperand(NumVecs + 3)); 1251 Ops.push_back(N->getOperand(0)); 1252 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1253 1254 // Transfer memoperands. 1255 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1256 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1257 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 1258 1259 return St; 1260} 1261 1262SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1263 unsigned Opc) { 1264 SDLoc dl(N); 1265 EVT VT = N->getOperand(2)->getValueType(0); 1266 bool Narrow = VT.getSizeInBits() == 64; 1267 1268 // Form a REG_SEQUENCE to force register allocation. 1269 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1270 1271 if (Narrow) 1272 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 1273 WidenVector(*CurDAG)); 1274 1275 SDValue RegSeq = createQTuple(Regs); 1276 1277 SmallVector<EVT, 2> ResTys; 1278 ResTys.push_back(MVT::i64); // Type of the write back register 1279 ResTys.push_back(MVT::Other); 1280 1281 unsigned LaneNo = 1282 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1283 1284 SmallVector<SDValue, 6> Ops; 1285 Ops.push_back(RegSeq); 1286 Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); 1287 Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register 1288 Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental 1289 Ops.push_back(N->getOperand(0)); 1290 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1291 1292 // Transfer memoperands. 1293 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1294 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1295 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 1296 1297 return St; 1298} 1299 1300static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1301 unsigned &Opc, SDValue &Opd0, 1302 unsigned &LSB, unsigned &MSB, 1303 unsigned NumberOfIgnoredLowBits, 1304 bool BiggerPattern) { 1305 assert(N->getOpcode() == ISD::AND && 1306 "N must be a AND operation to call this function"); 1307 1308 EVT VT = N->getValueType(0); 1309 1310 // Here we can test the type of VT and return false when the type does not 1311 // match, but since it is done prior to that call in the current context 1312 // we turned that into an assert to avoid redundant code. 1313 assert((VT == MVT::i32 || VT == MVT::i64) && 1314 "Type checking must have been done before calling this function"); 1315 1316 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1317 // changed the AND node to a 32-bit mask operation. We'll have to 1318 // undo that as part of the transform here if we want to catch all 1319 // the opportunities. 1320 // Currently the NumberOfIgnoredLowBits argument helps to recover 1321 // form these situations when matching bigger pattern (bitfield insert). 1322 1323 // For unsigned extracts, check for a shift right and mask 1324 uint64_t And_imm = 0; 1325 if (!isOpcWithIntImmediate(N, ISD::AND, And_imm)) 1326 return false; 1327 1328 const SDNode *Op0 = N->getOperand(0).getNode(); 1329 1330 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1331 // simplified. Try to undo that 1332 And_imm |= (1 << NumberOfIgnoredLowBits) - 1; 1333 1334 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1335 if (And_imm & (And_imm + 1)) 1336 return false; 1337 1338 bool ClampMSB = false; 1339 uint64_t Srl_imm = 0; 1340 // Handle the SRL + ANY_EXTEND case. 1341 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1342 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) { 1343 // Extend the incoming operand of the SRL to 64-bit. 1344 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1345 // Make sure to clamp the MSB so that we preserve the semantics of the 1346 // original operations. 1347 ClampMSB = true; 1348 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1349 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1350 Srl_imm)) { 1351 // If the shift result was truncated, we can still combine them. 1352 Opd0 = Op0->getOperand(0).getOperand(0); 1353 1354 // Use the type of SRL node. 1355 VT = Opd0->getValueType(0); 1356 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) { 1357 Opd0 = Op0->getOperand(0); 1358 } else if (BiggerPattern) { 1359 // Let's pretend a 0 shift right has been performed. 1360 // The resulting code will be at least as good as the original one 1361 // plus it may expose more opportunities for bitfield insert pattern. 1362 // FIXME: Currently we limit this to the bigger pattern, because 1363 // some optimizations expect AND and not UBFM 1364 Opd0 = N->getOperand(0); 1365 } else 1366 return false; 1367 1368 assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) && 1369 "bad amount in shift node!"); 1370 1371 LSB = Srl_imm; 1372 MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm) 1373 : CountTrailingOnes_64(And_imm)) - 1374 1; 1375 if (ClampMSB) 1376 // Since we're moving the extend before the right shift operation, we need 1377 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1378 // the zeros which would get shifted in with the original right shift 1379 // operation. 1380 MSB = MSB > 31 ? 31 : MSB; 1381 1382 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1383 return true; 1384} 1385 1386static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 1387 unsigned &LSB, unsigned &MSB) { 1388 // We are looking for the following pattern which basically extracts a single 1389 // bit from the source value and places it in the LSB of the destination 1390 // value, all other bits of the destination value or set to zero: 1391 // 1392 // Value2 = AND Value, MaskImm 1393 // SRL Value2, ShiftImm 1394 // 1395 // with MaskImm >> ShiftImm == 1. 1396 // 1397 // This gets selected into a single UBFM: 1398 // 1399 // UBFM Value, ShiftImm, ShiftImm 1400 // 1401 1402 if (N->getOpcode() != ISD::SRL) 1403 return false; 1404 1405 uint64_t And_mask = 0; 1406 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask)) 1407 return false; 1408 1409 Opd0 = N->getOperand(0).getOperand(0); 1410 1411 uint64_t Srl_imm = 0; 1412 if (!isIntImmediate(N->getOperand(1), Srl_imm)) 1413 return false; 1414 1415 // Check whether we really have a one bit extract here. 1416 if (And_mask >> Srl_imm == 0x1) { 1417 if (N->getValueType(0) == MVT::i32) 1418 Opc = AArch64::UBFMWri; 1419 else 1420 Opc = AArch64::UBFMXri; 1421 1422 LSB = MSB = Srl_imm; 1423 1424 return true; 1425 } 1426 1427 return false; 1428} 1429 1430static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 1431 unsigned &LSB, unsigned &MSB, 1432 bool BiggerPattern) { 1433 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 1434 "N must be a SHR/SRA operation to call this function"); 1435 1436 EVT VT = N->getValueType(0); 1437 1438 // Here we can test the type of VT and return false when the type does not 1439 // match, but since it is done prior to that call in the current context 1440 // we turned that into an assert to avoid redundant code. 1441 assert((VT == MVT::i32 || VT == MVT::i64) && 1442 "Type checking must have been done before calling this function"); 1443 1444 // Check for AND + SRL doing a one bit extract. 1445 if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) 1446 return true; 1447 1448 // we're looking for a shift of a shift 1449 uint64_t Shl_imm = 0; 1450 uint64_t Trunc_bits = 0; 1451 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 1452 Opd0 = N->getOperand(0).getOperand(0); 1453 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 1454 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 1455 // We are looking for a shift of truncate. Truncate from i64 to i32 could 1456 // be considered as setting high 32 bits as zero. Our strategy here is to 1457 // always generate 64bit UBFM. This consistency will help the CSE pass 1458 // later find more redundancy. 1459 Opd0 = N->getOperand(0).getOperand(0); 1460 Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 1461 VT = Opd0->getValueType(0); 1462 assert(VT == MVT::i64 && "the promoted type should be i64"); 1463 } else if (BiggerPattern) { 1464 // Let's pretend a 0 shift left has been performed. 1465 // FIXME: Currently we limit this to the bigger pattern case, 1466 // because some optimizations expect AND and not UBFM 1467 Opd0 = N->getOperand(0); 1468 } else 1469 return false; 1470 1471 assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!"); 1472 uint64_t Srl_imm = 0; 1473 if (!isIntImmediate(N->getOperand(1), Srl_imm)) 1474 return false; 1475 1476 assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && 1477 "bad amount in shift node!"); 1478 // Note: The width operand is encoded as width-1. 1479 unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1; 1480 int sLSB = Srl_imm - Shl_imm; 1481 if (sLSB < 0) 1482 return false; 1483 LSB = sLSB; 1484 MSB = LSB + Width; 1485 // SRA requires a signed extraction 1486 if (VT == MVT::i32) 1487 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 1488 else 1489 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 1490 return true; 1491} 1492 1493static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 1494 SDValue &Opd0, unsigned &LSB, unsigned &MSB, 1495 unsigned NumberOfIgnoredLowBits = 0, 1496 bool BiggerPattern = false) { 1497 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 1498 return false; 1499 1500 switch (N->getOpcode()) { 1501 default: 1502 if (!N->isMachineOpcode()) 1503 return false; 1504 break; 1505 case ISD::AND: 1506 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB, 1507 NumberOfIgnoredLowBits, BiggerPattern); 1508 case ISD::SRL: 1509 case ISD::SRA: 1510 return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern); 1511 } 1512 1513 unsigned NOpc = N->getMachineOpcode(); 1514 switch (NOpc) { 1515 default: 1516 return false; 1517 case AArch64::SBFMWri: 1518 case AArch64::UBFMWri: 1519 case AArch64::SBFMXri: 1520 case AArch64::UBFMXri: 1521 Opc = NOpc; 1522 Opd0 = N->getOperand(0); 1523 LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 1524 MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 1525 return true; 1526 } 1527 // Unreachable 1528 return false; 1529} 1530 1531SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { 1532 unsigned Opc, LSB, MSB; 1533 SDValue Opd0; 1534 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB)) 1535 return nullptr; 1536 1537 EVT VT = N->getValueType(0); 1538 1539 // If the bit extract operation is 64bit but the original type is 32bit, we 1540 // need to add one EXTRACT_SUBREG. 1541 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 1542 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64), 1543 CurDAG->getTargetConstant(MSB, MVT::i64)}; 1544 1545 SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64); 1546 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 1547 MachineSDNode *Node = 1548 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, 1549 SDValue(BFM, 0), SubReg); 1550 return Node; 1551 } 1552 1553 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT), 1554 CurDAG->getTargetConstant(MSB, VT)}; 1555 return CurDAG->SelectNodeTo(N, Opc, VT, Ops); 1556} 1557 1558/// Does DstMask form a complementary pair with the mask provided by 1559/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 1560/// this asks whether DstMask zeroes precisely those bits that will be set by 1561/// the other half. 1562static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted, 1563 unsigned NumberOfIgnoredHighBits, EVT VT) { 1564 assert((VT == MVT::i32 || VT == MVT::i64) && 1565 "i32 or i64 mask type expected!"); 1566 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 1567 1568 APInt SignificantDstMask = APInt(BitWidth, DstMask); 1569 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 1570 1571 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 1572 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); 1573} 1574 1575// Look for bits that will be useful for later uses. 1576// A bit is consider useless as soon as it is dropped and never used 1577// before it as been dropped. 1578// E.g., looking for useful bit of x 1579// 1. y = x & 0x7 1580// 2. z = y >> 2 1581// After #1, x useful bits are 0x7, then the useful bits of x, live through 1582// y. 1583// After #2, the useful bits of x are 0x4. 1584// However, if x is used on an unpredicatable instruction, then all its bits 1585// are useful. 1586// E.g. 1587// 1. y = x & 0x7 1588// 2. z = y >> 2 1589// 3. str x, [@x] 1590static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 1591 1592static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 1593 unsigned Depth) { 1594 uint64_t Imm = 1595 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 1596 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 1597 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 1598 getUsefulBits(Op, UsefulBits, Depth + 1); 1599} 1600 1601static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 1602 uint64_t Imm, uint64_t MSB, 1603 unsigned Depth) { 1604 // inherit the bitwidth value 1605 APInt OpUsefulBits(UsefulBits); 1606 OpUsefulBits = 1; 1607 1608 if (MSB >= Imm) { 1609 OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); 1610 --OpUsefulBits; 1611 // The interesting part will be in the lower part of the result 1612 getUsefulBits(Op, OpUsefulBits, Depth + 1); 1613 // The interesting part was starting at Imm in the argument 1614 OpUsefulBits = OpUsefulBits.shl(Imm); 1615 } else { 1616 OpUsefulBits = OpUsefulBits.shl(MSB + 1); 1617 --OpUsefulBits; 1618 // The interesting part will be shifted in the result 1619 OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); 1620 getUsefulBits(Op, OpUsefulBits, Depth + 1); 1621 // The interesting part was at zero in the argument 1622 OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); 1623 } 1624 1625 UsefulBits &= OpUsefulBits; 1626} 1627 1628static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 1629 unsigned Depth) { 1630 uint64_t Imm = 1631 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 1632 uint64_t MSB = 1633 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1634 1635 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 1636} 1637 1638static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 1639 unsigned Depth) { 1640 uint64_t ShiftTypeAndValue = 1641 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1642 APInt Mask(UsefulBits); 1643 Mask.clearAllBits(); 1644 Mask.flipAllBits(); 1645 1646 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 1647 // Shift Left 1648 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 1649 Mask = Mask.shl(ShiftAmt); 1650 getUsefulBits(Op, Mask, Depth + 1); 1651 Mask = Mask.lshr(ShiftAmt); 1652 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 1653 // Shift Right 1654 // We do not handle AArch64_AM::ASR, because the sign will change the 1655 // number of useful bits 1656 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 1657 Mask = Mask.lshr(ShiftAmt); 1658 getUsefulBits(Op, Mask, Depth + 1); 1659 Mask = Mask.shl(ShiftAmt); 1660 } else 1661 return; 1662 1663 UsefulBits &= Mask; 1664} 1665 1666static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 1667 unsigned Depth) { 1668 uint64_t Imm = 1669 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1670 uint64_t MSB = 1671 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 1672 1673 if (Op.getOperand(1) == Orig) 1674 return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 1675 1676 APInt OpUsefulBits(UsefulBits); 1677 OpUsefulBits = 1; 1678 1679 if (MSB >= Imm) { 1680 OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); 1681 --OpUsefulBits; 1682 UsefulBits &= ~OpUsefulBits; 1683 getUsefulBits(Op, UsefulBits, Depth + 1); 1684 } else { 1685 OpUsefulBits = OpUsefulBits.shl(MSB + 1); 1686 --OpUsefulBits; 1687 UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm)); 1688 getUsefulBits(Op, UsefulBits, Depth + 1); 1689 } 1690} 1691 1692static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 1693 SDValue Orig, unsigned Depth) { 1694 1695 // Users of this node should have already been instruction selected 1696 // FIXME: Can we turn that into an assert? 1697 if (!UserNode->isMachineOpcode()) 1698 return; 1699 1700 switch (UserNode->getMachineOpcode()) { 1701 default: 1702 return; 1703 case AArch64::ANDSWri: 1704 case AArch64::ANDSXri: 1705 case AArch64::ANDWri: 1706 case AArch64::ANDXri: 1707 // We increment Depth only when we call the getUsefulBits 1708 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 1709 Depth); 1710 case AArch64::UBFMWri: 1711 case AArch64::UBFMXri: 1712 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 1713 1714 case AArch64::ORRWrs: 1715 case AArch64::ORRXrs: 1716 if (UserNode->getOperand(1) != Orig) 1717 return; 1718 return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 1719 Depth); 1720 case AArch64::BFMWri: 1721 case AArch64::BFMXri: 1722 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 1723 } 1724} 1725 1726static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 1727 if (Depth >= 6) 1728 return; 1729 // Initialize UsefulBits 1730 if (!Depth) { 1731 unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits(); 1732 // At the beginning, assume every produced bits is useful 1733 UsefulBits = APInt(Bitwidth, 0); 1734 UsefulBits.flipAllBits(); 1735 } 1736 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 1737 1738 for (SDNode *Node : Op.getNode()->uses()) { 1739 // A use cannot produce useful bits 1740 APInt UsefulBitsForUse = APInt(UsefulBits); 1741 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 1742 UsersUsefulBits |= UsefulBitsForUse; 1743 } 1744 // UsefulBits contains the produced bits that are meaningful for the 1745 // current definition, thus a user cannot make a bit meaningful at 1746 // this point 1747 UsefulBits &= UsersUsefulBits; 1748} 1749 1750/// Create a machine node performing a notional SHL of Op by ShlAmount. If 1751/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 1752/// 0, return Op unchanged. 1753static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 1754 if (ShlAmount == 0) 1755 return Op; 1756 1757 EVT VT = Op.getValueType(); 1758 unsigned BitWidth = VT.getSizeInBits(); 1759 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1760 1761 SDNode *ShiftNode; 1762 if (ShlAmount > 0) { 1763 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 1764 ShiftNode = CurDAG->getMachineNode( 1765 UBFMOpc, SDLoc(Op), VT, Op, 1766 CurDAG->getTargetConstant(BitWidth - ShlAmount, VT), 1767 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT)); 1768 } else { 1769 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 1770 assert(ShlAmount < 0 && "expected right shift"); 1771 int ShrAmount = -ShlAmount; 1772 ShiftNode = CurDAG->getMachineNode( 1773 UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT), 1774 CurDAG->getTargetConstant(BitWidth - 1, VT)); 1775 } 1776 1777 return SDValue(ShiftNode, 0); 1778} 1779 1780/// Does this tree qualify as an attempt to move a bitfield into position, 1781/// essentially "(and (shl VAL, N), Mask)". 1782static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 1783 SDValue &Src, int &ShiftAmount, 1784 int &MaskWidth) { 1785 EVT VT = Op.getValueType(); 1786 unsigned BitWidth = VT.getSizeInBits(); 1787 (void)BitWidth; 1788 assert(BitWidth == 32 || BitWidth == 64); 1789 1790 APInt KnownZero, KnownOne; 1791 CurDAG->computeKnownBits(Op, KnownZero, KnownOne); 1792 1793 // Non-zero in the sense that they're not provably zero, which is the key 1794 // point if we want to use this value 1795 uint64_t NonZeroBits = (~KnownZero).getZExtValue(); 1796 1797 // Discard a constant AND mask if present. It's safe because the node will 1798 // already have been factored into the computeKnownBits calculation above. 1799 uint64_t AndImm; 1800 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 1801 assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0); 1802 Op = Op.getOperand(0); 1803 } 1804 1805 uint64_t ShlImm; 1806 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 1807 return false; 1808 Op = Op.getOperand(0); 1809 1810 if (!isShiftedMask_64(NonZeroBits)) 1811 return false; 1812 1813 ShiftAmount = countTrailingZeros(NonZeroBits); 1814 MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount); 1815 1816 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 1817 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 1818 // amount. 1819 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 1820 1821 return true; 1822} 1823 1824// Given a OR operation, check if we have the following pattern 1825// ubfm c, b, imm, imm2 (or something that does the same jobs, see 1826// isBitfieldExtractOp) 1827// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 1828// countTrailingZeros(mask2) == imm2 - imm + 1 1829// f = d | c 1830// if yes, given reference arguments will be update so that one can replace 1831// the OR instruction with: 1832// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2 1833static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, 1834 SDValue &Src, unsigned &ImmR, 1835 unsigned &ImmS, SelectionDAG *CurDAG) { 1836 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 1837 1838 // Set Opc 1839 EVT VT = N->getValueType(0); 1840 if (VT == MVT::i32) 1841 Opc = AArch64::BFMWri; 1842 else if (VT == MVT::i64) 1843 Opc = AArch64::BFMXri; 1844 else 1845 return false; 1846 1847 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 1848 // have the expected shape. Try to undo that. 1849 APInt UsefulBits; 1850 getUsefulBits(SDValue(N, 0), UsefulBits); 1851 1852 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 1853 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 1854 1855 // OR is commutative, check both possibilities (does llvm provide a 1856 // way to do that directely, e.g., via code matcher?) 1857 SDValue OrOpd1Val = N->getOperand(1); 1858 SDNode *OrOpd0 = N->getOperand(0).getNode(); 1859 SDNode *OrOpd1 = N->getOperand(1).getNode(); 1860 for (int i = 0; i < 2; 1861 ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) { 1862 unsigned BFXOpc; 1863 int DstLSB, Width; 1864 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 1865 NumberOfIgnoredLowBits, true)) { 1866 // Check that the returned opcode is compatible with the pattern, 1867 // i.e., same type and zero extended (U and not S) 1868 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 1869 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 1870 continue; 1871 1872 // Compute the width of the bitfield insertion 1873 DstLSB = 0; 1874 Width = ImmS - ImmR + 1; 1875 // FIXME: This constraint is to catch bitfield insertion we may 1876 // want to widen the pattern if we want to grab general bitfied 1877 // move case 1878 if (Width <= 0) 1879 continue; 1880 1881 // If the mask on the insertee is correct, we have a BFXIL operation. We 1882 // can share the ImmR and ImmS values from the already-computed UBFM. 1883 } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src, 1884 DstLSB, Width)) { 1885 ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 1886 ImmS = Width - 1; 1887 } else 1888 continue; 1889 1890 // Check the second part of the pattern 1891 EVT VT = OrOpd1->getValueType(0); 1892 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 1893 1894 // Compute the Known Zero for the candidate of the first operand. 1895 // This allows to catch more general case than just looking for 1896 // AND with imm. Indeed, simplify-demanded-bits may have removed 1897 // the AND instruction because it proves it was useless. 1898 APInt KnownZero, KnownOne; 1899 CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne); 1900 1901 // Check if there is enough room for the second operand to appear 1902 // in the first one 1903 APInt BitsToBeInserted = 1904 APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width); 1905 1906 if ((BitsToBeInserted & ~KnownZero) != 0) 1907 continue; 1908 1909 // Set the first operand 1910 uint64_t Imm; 1911 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 1912 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 1913 // In that case, we can eliminate the AND 1914 Dst = OrOpd1->getOperand(0); 1915 else 1916 // Maybe the AND has been removed by simplify-demanded-bits 1917 // or is useful because it discards more bits 1918 Dst = OrOpd1Val; 1919 1920 // both parts match 1921 return true; 1922 } 1923 1924 return false; 1925} 1926 1927SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { 1928 if (N->getOpcode() != ISD::OR) 1929 return nullptr; 1930 1931 unsigned Opc; 1932 unsigned LSB, MSB; 1933 SDValue Opd0, Opd1; 1934 1935 if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG)) 1936 return nullptr; 1937 1938 EVT VT = N->getValueType(0); 1939 SDValue Ops[] = { Opd0, 1940 Opd1, 1941 CurDAG->getTargetConstant(LSB, VT), 1942 CurDAG->getTargetConstant(MSB, VT) }; 1943 return CurDAG->SelectNodeTo(N, Opc, VT, Ops); 1944} 1945 1946SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) { 1947 EVT VT = N->getValueType(0); 1948 unsigned Variant; 1949 unsigned Opc; 1950 unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr }; 1951 1952 if (VT == MVT::f32) { 1953 Variant = 0; 1954 } else if (VT == MVT::f64) { 1955 Variant = 1; 1956 } else 1957 return nullptr; // Unrecognized argument type. Fall back on default codegen. 1958 1959 // Pick the FRINTX variant needed to set the flags. 1960 unsigned FRINTXOpc = FRINTXOpcs[Variant]; 1961 1962 switch (N->getOpcode()) { 1963 default: 1964 return nullptr; // Unrecognized libm ISD node. Fall back on default codegen. 1965 case ISD::FCEIL: { 1966 unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr }; 1967 Opc = FRINTPOpcs[Variant]; 1968 break; 1969 } 1970 case ISD::FFLOOR: { 1971 unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr }; 1972 Opc = FRINTMOpcs[Variant]; 1973 break; 1974 } 1975 case ISD::FTRUNC: { 1976 unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr }; 1977 Opc = FRINTZOpcs[Variant]; 1978 break; 1979 } 1980 case ISD::FROUND: { 1981 unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr }; 1982 Opc = FRINTAOpcs[Variant]; 1983 break; 1984 } 1985 } 1986 1987 SDLoc dl(N); 1988 SDValue In = N->getOperand(0); 1989 SmallVector<SDValue, 2> Ops; 1990 Ops.push_back(In); 1991 1992 if (!TM.Options.UnsafeFPMath) { 1993 SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In); 1994 Ops.push_back(SDValue(FRINTX, 1)); 1995 } 1996 1997 return CurDAG->getMachineNode(Opc, dl, VT, Ops); 1998} 1999 2000bool 2001AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2002 unsigned RegWidth) { 2003 APFloat FVal(0.0); 2004 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 2005 FVal = CN->getValueAPF(); 2006 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 2007 // Some otherwise illegal constants are allowed in this case. 2008 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 2009 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 2010 return false; 2011 2012 ConstantPoolSDNode *CN = 2013 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 2014 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 2015 } else 2016 return false; 2017 2018 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 2019 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 2020 // x-register. 2021 // 2022 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 2023 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 2024 // integers. 2025 bool IsExact; 2026 2027 // fbits is between 1 and 64 in the worst-case, which means the fmul 2028 // could have 2^64 as an actual operand. Need 65 bits of precision. 2029 APSInt IntVal(65, true); 2030 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 2031 2032 // N.b. isPowerOf2 also checks for > 0. 2033 if (!IsExact || !IntVal.isPowerOf2()) return false; 2034 unsigned FBits = IntVal.logBase2(); 2035 2036 // Checks above should have guaranteed that we haven't lost information in 2037 // finding FBits, but it must still be in range. 2038 if (FBits == 0 || FBits > RegWidth) return false; 2039 2040 FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32); 2041 return true; 2042} 2043 2044SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { 2045 // Dump information about the Node being selected 2046 DEBUG(errs() << "Selecting: "); 2047 DEBUG(Node->dump(CurDAG)); 2048 DEBUG(errs() << "\n"); 2049 2050 // If we have a custom node, we already have selected! 2051 if (Node->isMachineOpcode()) { 2052 DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 2053 Node->setNodeId(-1); 2054 return nullptr; 2055 } 2056 2057 // Few custom selection stuff. 2058 SDNode *ResNode = nullptr; 2059 EVT VT = Node->getValueType(0); 2060 2061 switch (Node->getOpcode()) { 2062 default: 2063 break; 2064 2065 case ISD::ADD: 2066 if (SDNode *I = SelectMLAV64LaneV128(Node)) 2067 return I; 2068 break; 2069 2070 case ISD::LOAD: { 2071 // Try to select as an indexed load. Fall through to normal processing 2072 // if we can't. 2073 bool Done = false; 2074 SDNode *I = SelectIndexedLoad(Node, Done); 2075 if (Done) 2076 return I; 2077 break; 2078 } 2079 2080 case ISD::SRL: 2081 case ISD::AND: 2082 case ISD::SRA: 2083 if (SDNode *I = SelectBitfieldExtractOp(Node)) 2084 return I; 2085 break; 2086 2087 case ISD::OR: 2088 if (SDNode *I = SelectBitfieldInsertOp(Node)) 2089 return I; 2090 break; 2091 2092 case ISD::EXTRACT_VECTOR_ELT: { 2093 // Extracting lane zero is a special case where we can just use a plain 2094 // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for 2095 // the rest of the compiler, especially the register allocator and copyi 2096 // propagation, to reason about, so is preferred when it's possible to 2097 // use it. 2098 ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1)); 2099 // Bail and use the default Select() for non-zero lanes. 2100 if (LaneNode->getZExtValue() != 0) 2101 break; 2102 // If the element type is not the same as the result type, likewise 2103 // bail and use the default Select(), as there's more to do than just 2104 // a cross-class COPY. This catches extracts of i8 and i16 elements 2105 // since they will need an explicit zext. 2106 if (VT != Node->getOperand(0).getValueType().getVectorElementType()) 2107 break; 2108 unsigned SubReg; 2109 switch (Node->getOperand(0) 2110 .getValueType() 2111 .getVectorElementType() 2112 .getSizeInBits()) { 2113 default: 2114 assert(0 && "Unexpected vector element type!"); 2115 case 64: 2116 SubReg = AArch64::dsub; 2117 break; 2118 case 32: 2119 SubReg = AArch64::ssub; 2120 break; 2121 case 16: // FALLTHROUGH 2122 case 8: 2123 llvm_unreachable("unexpected zext-requiring extract element!"); 2124 } 2125 SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, 2126 Node->getOperand(0)); 2127 DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); 2128 DEBUG(Extract->dumpr(CurDAG)); 2129 DEBUG(dbgs() << "\n"); 2130 return Extract.getNode(); 2131 } 2132 case ISD::Constant: { 2133 // Materialize zero constants as copies from WZR/XZR. This allows 2134 // the coalescer to propagate these into other instructions. 2135 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 2136 if (ConstNode->isNullValue()) { 2137 if (VT == MVT::i32) 2138 return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), 2139 AArch64::WZR, MVT::i32).getNode(); 2140 else if (VT == MVT::i64) 2141 return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), 2142 AArch64::XZR, MVT::i64).getNode(); 2143 } 2144 break; 2145 } 2146 2147 case ISD::FrameIndex: { 2148 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 2149 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 2150 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 2151 const TargetLowering *TLI = getTargetLowering(); 2152 SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 2153 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), 2154 CurDAG->getTargetConstant(Shifter, MVT::i32) }; 2155 return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 2156 } 2157 case ISD::INTRINSIC_W_CHAIN: { 2158 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 2159 switch (IntNo) { 2160 default: 2161 break; 2162 case Intrinsic::aarch64_ldaxp: 2163 case Intrinsic::aarch64_ldxp: { 2164 unsigned Op = 2165 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 2166 SDValue MemAddr = Node->getOperand(2); 2167 SDLoc DL(Node); 2168 SDValue Chain = Node->getOperand(0); 2169 2170 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 2171 MVT::Other, MemAddr, Chain); 2172 2173 // Transfer memoperands. 2174 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2175 MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 2176 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); 2177 return Ld; 2178 } 2179 case Intrinsic::aarch64_stlxp: 2180 case Intrinsic::aarch64_stxp: { 2181 unsigned Op = 2182 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 2183 SDLoc DL(Node); 2184 SDValue Chain = Node->getOperand(0); 2185 SDValue ValLo = Node->getOperand(2); 2186 SDValue ValHi = Node->getOperand(3); 2187 SDValue MemAddr = Node->getOperand(4); 2188 2189 // Place arguments in the right order. 2190 SmallVector<SDValue, 7> Ops; 2191 Ops.push_back(ValLo); 2192 Ops.push_back(ValHi); 2193 Ops.push_back(MemAddr); 2194 Ops.push_back(Chain); 2195 2196 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 2197 // Transfer memoperands. 2198 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2199 MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 2200 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 2201 2202 return St; 2203 } 2204 case Intrinsic::aarch64_neon_ld1x2: 2205 if (VT == MVT::v8i8) 2206 return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 2207 else if (VT == MVT::v16i8) 2208 return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 2209 else if (VT == MVT::v4i16) 2210 return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 2211 else if (VT == MVT::v8i16) 2212 return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 2213 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2214 return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 2215 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2216 return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 2217 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2218 return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 2219 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2220 return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 2221 break; 2222 case Intrinsic::aarch64_neon_ld1x3: 2223 if (VT == MVT::v8i8) 2224 return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 2225 else if (VT == MVT::v16i8) 2226 return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 2227 else if (VT == MVT::v4i16) 2228 return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 2229 else if (VT == MVT::v8i16) 2230 return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 2231 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2232 return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 2233 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2234 return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 2235 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2236 return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 2237 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2238 return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 2239 break; 2240 case Intrinsic::aarch64_neon_ld1x4: 2241 if (VT == MVT::v8i8) 2242 return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 2243 else if (VT == MVT::v16i8) 2244 return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 2245 else if (VT == MVT::v4i16) 2246 return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 2247 else if (VT == MVT::v8i16) 2248 return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 2249 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2250 return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 2251 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2252 return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 2253 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2254 return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 2255 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2256 return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 2257 break; 2258 case Intrinsic::aarch64_neon_ld2: 2259 if (VT == MVT::v8i8) 2260 return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 2261 else if (VT == MVT::v16i8) 2262 return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 2263 else if (VT == MVT::v4i16) 2264 return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 2265 else if (VT == MVT::v8i16) 2266 return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 2267 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2268 return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 2269 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2270 return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 2271 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2272 return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 2273 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2274 return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 2275 break; 2276 case Intrinsic::aarch64_neon_ld3: 2277 if (VT == MVT::v8i8) 2278 return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 2279 else if (VT == MVT::v16i8) 2280 return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 2281 else if (VT == MVT::v4i16) 2282 return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 2283 else if (VT == MVT::v8i16) 2284 return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 2285 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2286 return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 2287 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2288 return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 2289 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2290 return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 2291 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2292 return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 2293 break; 2294 case Intrinsic::aarch64_neon_ld4: 2295 if (VT == MVT::v8i8) 2296 return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 2297 else if (VT == MVT::v16i8) 2298 return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 2299 else if (VT == MVT::v4i16) 2300 return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 2301 else if (VT == MVT::v8i16) 2302 return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 2303 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2304 return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 2305 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2306 return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 2307 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2308 return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 2309 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2310 return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 2311 break; 2312 case Intrinsic::aarch64_neon_ld2r: 2313 if (VT == MVT::v8i8) 2314 return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 2315 else if (VT == MVT::v16i8) 2316 return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 2317 else if (VT == MVT::v4i16) 2318 return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 2319 else if (VT == MVT::v8i16) 2320 return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 2321 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2322 return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 2323 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2324 return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 2325 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2326 return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 2327 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2328 return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 2329 break; 2330 case Intrinsic::aarch64_neon_ld3r: 2331 if (VT == MVT::v8i8) 2332 return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 2333 else if (VT == MVT::v16i8) 2334 return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 2335 else if (VT == MVT::v4i16) 2336 return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 2337 else if (VT == MVT::v8i16) 2338 return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 2339 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2340 return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 2341 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2342 return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 2343 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2344 return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 2345 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2346 return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 2347 break; 2348 case Intrinsic::aarch64_neon_ld4r: 2349 if (VT == MVT::v8i8) 2350 return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 2351 else if (VT == MVT::v16i8) 2352 return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 2353 else if (VT == MVT::v4i16) 2354 return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 2355 else if (VT == MVT::v8i16) 2356 return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 2357 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2358 return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 2359 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2360 return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 2361 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2362 return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 2363 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2364 return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 2365 break; 2366 case Intrinsic::aarch64_neon_ld2lane: 2367 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2368 return SelectLoadLane(Node, 2, AArch64::LD2i8); 2369 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2370 return SelectLoadLane(Node, 2, AArch64::LD2i16); 2371 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2372 VT == MVT::v2f32) 2373 return SelectLoadLane(Node, 2, AArch64::LD2i32); 2374 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2375 VT == MVT::v1f64) 2376 return SelectLoadLane(Node, 2, AArch64::LD2i64); 2377 break; 2378 case Intrinsic::aarch64_neon_ld3lane: 2379 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2380 return SelectLoadLane(Node, 3, AArch64::LD3i8); 2381 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2382 return SelectLoadLane(Node, 3, AArch64::LD3i16); 2383 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2384 VT == MVT::v2f32) 2385 return SelectLoadLane(Node, 3, AArch64::LD3i32); 2386 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2387 VT == MVT::v1f64) 2388 return SelectLoadLane(Node, 3, AArch64::LD3i64); 2389 break; 2390 case Intrinsic::aarch64_neon_ld4lane: 2391 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2392 return SelectLoadLane(Node, 4, AArch64::LD4i8); 2393 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2394 return SelectLoadLane(Node, 4, AArch64::LD4i16); 2395 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2396 VT == MVT::v2f32) 2397 return SelectLoadLane(Node, 4, AArch64::LD4i32); 2398 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2399 VT == MVT::v1f64) 2400 return SelectLoadLane(Node, 4, AArch64::LD4i64); 2401 break; 2402 } 2403 } break; 2404 case ISD::INTRINSIC_WO_CHAIN: { 2405 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 2406 switch (IntNo) { 2407 default: 2408 break; 2409 case Intrinsic::aarch64_neon_tbl2: 2410 return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two 2411 : AArch64::TBLv16i8Two, 2412 false); 2413 case Intrinsic::aarch64_neon_tbl3: 2414 return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 2415 : AArch64::TBLv16i8Three, 2416 false); 2417 case Intrinsic::aarch64_neon_tbl4: 2418 return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 2419 : AArch64::TBLv16i8Four, 2420 false); 2421 case Intrinsic::aarch64_neon_tbx2: 2422 return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two 2423 : AArch64::TBXv16i8Two, 2424 true); 2425 case Intrinsic::aarch64_neon_tbx3: 2426 return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 2427 : AArch64::TBXv16i8Three, 2428 true); 2429 case Intrinsic::aarch64_neon_tbx4: 2430 return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 2431 : AArch64::TBXv16i8Four, 2432 true); 2433 case Intrinsic::aarch64_neon_smull: 2434 case Intrinsic::aarch64_neon_umull: 2435 if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node)) 2436 return N; 2437 break; 2438 } 2439 break; 2440 } 2441 case ISD::INTRINSIC_VOID: { 2442 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 2443 if (Node->getNumOperands() >= 3) 2444 VT = Node->getOperand(2)->getValueType(0); 2445 switch (IntNo) { 2446 default: 2447 break; 2448 case Intrinsic::aarch64_neon_st1x2: { 2449 if (VT == MVT::v8i8) 2450 return SelectStore(Node, 2, AArch64::ST1Twov8b); 2451 else if (VT == MVT::v16i8) 2452 return SelectStore(Node, 2, AArch64::ST1Twov16b); 2453 else if (VT == MVT::v4i16) 2454 return SelectStore(Node, 2, AArch64::ST1Twov4h); 2455 else if (VT == MVT::v8i16) 2456 return SelectStore(Node, 2, AArch64::ST1Twov8h); 2457 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2458 return SelectStore(Node, 2, AArch64::ST1Twov2s); 2459 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2460 return SelectStore(Node, 2, AArch64::ST1Twov4s); 2461 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2462 return SelectStore(Node, 2, AArch64::ST1Twov2d); 2463 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2464 return SelectStore(Node, 2, AArch64::ST1Twov1d); 2465 break; 2466 } 2467 case Intrinsic::aarch64_neon_st1x3: { 2468 if (VT == MVT::v8i8) 2469 return SelectStore(Node, 3, AArch64::ST1Threev8b); 2470 else if (VT == MVT::v16i8) 2471 return SelectStore(Node, 3, AArch64::ST1Threev16b); 2472 else if (VT == MVT::v4i16) 2473 return SelectStore(Node, 3, AArch64::ST1Threev4h); 2474 else if (VT == MVT::v8i16) 2475 return SelectStore(Node, 3, AArch64::ST1Threev8h); 2476 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2477 return SelectStore(Node, 3, AArch64::ST1Threev2s); 2478 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2479 return SelectStore(Node, 3, AArch64::ST1Threev4s); 2480 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2481 return SelectStore(Node, 3, AArch64::ST1Threev2d); 2482 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2483 return SelectStore(Node, 3, AArch64::ST1Threev1d); 2484 break; 2485 } 2486 case Intrinsic::aarch64_neon_st1x4: { 2487 if (VT == MVT::v8i8) 2488 return SelectStore(Node, 4, AArch64::ST1Fourv8b); 2489 else if (VT == MVT::v16i8) 2490 return SelectStore(Node, 4, AArch64::ST1Fourv16b); 2491 else if (VT == MVT::v4i16) 2492 return SelectStore(Node, 4, AArch64::ST1Fourv4h); 2493 else if (VT == MVT::v8i16) 2494 return SelectStore(Node, 4, AArch64::ST1Fourv8h); 2495 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2496 return SelectStore(Node, 4, AArch64::ST1Fourv2s); 2497 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2498 return SelectStore(Node, 4, AArch64::ST1Fourv4s); 2499 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2500 return SelectStore(Node, 4, AArch64::ST1Fourv2d); 2501 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2502 return SelectStore(Node, 4, AArch64::ST1Fourv1d); 2503 break; 2504 } 2505 case Intrinsic::aarch64_neon_st2: { 2506 if (VT == MVT::v8i8) 2507 return SelectStore(Node, 2, AArch64::ST2Twov8b); 2508 else if (VT == MVT::v16i8) 2509 return SelectStore(Node, 2, AArch64::ST2Twov16b); 2510 else if (VT == MVT::v4i16) 2511 return SelectStore(Node, 2, AArch64::ST2Twov4h); 2512 else if (VT == MVT::v8i16) 2513 return SelectStore(Node, 2, AArch64::ST2Twov8h); 2514 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2515 return SelectStore(Node, 2, AArch64::ST2Twov2s); 2516 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2517 return SelectStore(Node, 2, AArch64::ST2Twov4s); 2518 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2519 return SelectStore(Node, 2, AArch64::ST2Twov2d); 2520 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2521 return SelectStore(Node, 2, AArch64::ST1Twov1d); 2522 break; 2523 } 2524 case Intrinsic::aarch64_neon_st3: { 2525 if (VT == MVT::v8i8) 2526 return SelectStore(Node, 3, AArch64::ST3Threev8b); 2527 else if (VT == MVT::v16i8) 2528 return SelectStore(Node, 3, AArch64::ST3Threev16b); 2529 else if (VT == MVT::v4i16) 2530 return SelectStore(Node, 3, AArch64::ST3Threev4h); 2531 else if (VT == MVT::v8i16) 2532 return SelectStore(Node, 3, AArch64::ST3Threev8h); 2533 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2534 return SelectStore(Node, 3, AArch64::ST3Threev2s); 2535 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2536 return SelectStore(Node, 3, AArch64::ST3Threev4s); 2537 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2538 return SelectStore(Node, 3, AArch64::ST3Threev2d); 2539 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2540 return SelectStore(Node, 3, AArch64::ST1Threev1d); 2541 break; 2542 } 2543 case Intrinsic::aarch64_neon_st4: { 2544 if (VT == MVT::v8i8) 2545 return SelectStore(Node, 4, AArch64::ST4Fourv8b); 2546 else if (VT == MVT::v16i8) 2547 return SelectStore(Node, 4, AArch64::ST4Fourv16b); 2548 else if (VT == MVT::v4i16) 2549 return SelectStore(Node, 4, AArch64::ST4Fourv4h); 2550 else if (VT == MVT::v8i16) 2551 return SelectStore(Node, 4, AArch64::ST4Fourv8h); 2552 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2553 return SelectStore(Node, 4, AArch64::ST4Fourv2s); 2554 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2555 return SelectStore(Node, 4, AArch64::ST4Fourv4s); 2556 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2557 return SelectStore(Node, 4, AArch64::ST4Fourv2d); 2558 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2559 return SelectStore(Node, 4, AArch64::ST1Fourv1d); 2560 break; 2561 } 2562 case Intrinsic::aarch64_neon_st2lane: { 2563 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2564 return SelectStoreLane(Node, 2, AArch64::ST2i8); 2565 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2566 return SelectStoreLane(Node, 2, AArch64::ST2i16); 2567 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2568 VT == MVT::v2f32) 2569 return SelectStoreLane(Node, 2, AArch64::ST2i32); 2570 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2571 VT == MVT::v1f64) 2572 return SelectStoreLane(Node, 2, AArch64::ST2i64); 2573 break; 2574 } 2575 case Intrinsic::aarch64_neon_st3lane: { 2576 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2577 return SelectStoreLane(Node, 3, AArch64::ST3i8); 2578 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2579 return SelectStoreLane(Node, 3, AArch64::ST3i16); 2580 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2581 VT == MVT::v2f32) 2582 return SelectStoreLane(Node, 3, AArch64::ST3i32); 2583 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2584 VT == MVT::v1f64) 2585 return SelectStoreLane(Node, 3, AArch64::ST3i64); 2586 break; 2587 } 2588 case Intrinsic::aarch64_neon_st4lane: { 2589 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2590 return SelectStoreLane(Node, 4, AArch64::ST4i8); 2591 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2592 return SelectStoreLane(Node, 4, AArch64::ST4i16); 2593 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2594 VT == MVT::v2f32) 2595 return SelectStoreLane(Node, 4, AArch64::ST4i32); 2596 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2597 VT == MVT::v1f64) 2598 return SelectStoreLane(Node, 4, AArch64::ST4i64); 2599 break; 2600 } 2601 } 2602 } 2603 case AArch64ISD::LD2post: { 2604 if (VT == MVT::v8i8) 2605 return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 2606 else if (VT == MVT::v16i8) 2607 return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 2608 else if (VT == MVT::v4i16) 2609 return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 2610 else if (VT == MVT::v8i16) 2611 return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 2612 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2613 return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 2614 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2615 return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 2616 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2617 return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 2618 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2619 return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 2620 break; 2621 } 2622 case AArch64ISD::LD3post: { 2623 if (VT == MVT::v8i8) 2624 return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 2625 else if (VT == MVT::v16i8) 2626 return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 2627 else if (VT == MVT::v4i16) 2628 return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 2629 else if (VT == MVT::v8i16) 2630 return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 2631 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2632 return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 2633 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2634 return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 2635 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2636 return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 2637 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2638 return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 2639 break; 2640 } 2641 case AArch64ISD::LD4post: { 2642 if (VT == MVT::v8i8) 2643 return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 2644 else if (VT == MVT::v16i8) 2645 return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 2646 else if (VT == MVT::v4i16) 2647 return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 2648 else if (VT == MVT::v8i16) 2649 return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 2650 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2651 return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 2652 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2653 return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 2654 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2655 return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 2656 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2657 return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 2658 break; 2659 } 2660 case AArch64ISD::LD1x2post: { 2661 if (VT == MVT::v8i8) 2662 return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 2663 else if (VT == MVT::v16i8) 2664 return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 2665 else if (VT == MVT::v4i16) 2666 return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 2667 else if (VT == MVT::v8i16) 2668 return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 2669 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2670 return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 2671 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2672 return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 2673 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2674 return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 2675 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2676 return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 2677 break; 2678 } 2679 case AArch64ISD::LD1x3post: { 2680 if (VT == MVT::v8i8) 2681 return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 2682 else if (VT == MVT::v16i8) 2683 return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 2684 else if (VT == MVT::v4i16) 2685 return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 2686 else if (VT == MVT::v8i16) 2687 return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 2688 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2689 return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 2690 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2691 return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 2692 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2693 return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 2694 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2695 return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 2696 break; 2697 } 2698 case AArch64ISD::LD1x4post: { 2699 if (VT == MVT::v8i8) 2700 return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 2701 else if (VT == MVT::v16i8) 2702 return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 2703 else if (VT == MVT::v4i16) 2704 return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 2705 else if (VT == MVT::v8i16) 2706 return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 2707 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2708 return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 2709 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2710 return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 2711 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2712 return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 2713 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2714 return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 2715 break; 2716 } 2717 case AArch64ISD::LD1DUPpost: { 2718 if (VT == MVT::v8i8) 2719 return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 2720 else if (VT == MVT::v16i8) 2721 return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 2722 else if (VT == MVT::v4i16) 2723 return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 2724 else if (VT == MVT::v8i16) 2725 return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 2726 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2727 return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 2728 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2729 return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 2730 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2731 return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 2732 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2733 return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 2734 break; 2735 } 2736 case AArch64ISD::LD2DUPpost: { 2737 if (VT == MVT::v8i8) 2738 return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 2739 else if (VT == MVT::v16i8) 2740 return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 2741 else if (VT == MVT::v4i16) 2742 return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 2743 else if (VT == MVT::v8i16) 2744 return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 2745 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2746 return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 2747 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2748 return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 2749 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2750 return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 2751 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2752 return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 2753 break; 2754 } 2755 case AArch64ISD::LD3DUPpost: { 2756 if (VT == MVT::v8i8) 2757 return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 2758 else if (VT == MVT::v16i8) 2759 return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 2760 else if (VT == MVT::v4i16) 2761 return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 2762 else if (VT == MVT::v8i16) 2763 return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 2764 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2765 return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 2766 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2767 return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 2768 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2769 return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 2770 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2771 return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 2772 break; 2773 } 2774 case AArch64ISD::LD4DUPpost: { 2775 if (VT == MVT::v8i8) 2776 return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 2777 else if (VT == MVT::v16i8) 2778 return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 2779 else if (VT == MVT::v4i16) 2780 return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 2781 else if (VT == MVT::v8i16) 2782 return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 2783 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2784 return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 2785 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2786 return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 2787 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2788 return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 2789 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2790 return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 2791 break; 2792 } 2793 case AArch64ISD::LD1LANEpost: { 2794 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2795 return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 2796 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2797 return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 2798 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2799 VT == MVT::v2f32) 2800 return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 2801 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2802 VT == MVT::v1f64) 2803 return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 2804 break; 2805 } 2806 case AArch64ISD::LD2LANEpost: { 2807 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2808 return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 2809 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2810 return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 2811 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2812 VT == MVT::v2f32) 2813 return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 2814 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2815 VT == MVT::v1f64) 2816 return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 2817 break; 2818 } 2819 case AArch64ISD::LD3LANEpost: { 2820 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2821 return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 2822 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2823 return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 2824 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2825 VT == MVT::v2f32) 2826 return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 2827 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2828 VT == MVT::v1f64) 2829 return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 2830 break; 2831 } 2832 case AArch64ISD::LD4LANEpost: { 2833 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2834 return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 2835 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2836 return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 2837 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2838 VT == MVT::v2f32) 2839 return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 2840 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2841 VT == MVT::v1f64) 2842 return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 2843 break; 2844 } 2845 case AArch64ISD::ST2post: { 2846 VT = Node->getOperand(1).getValueType(); 2847 if (VT == MVT::v8i8) 2848 return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 2849 else if (VT == MVT::v16i8) 2850 return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 2851 else if (VT == MVT::v4i16) 2852 return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 2853 else if (VT == MVT::v8i16) 2854 return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 2855 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2856 return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 2857 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2858 return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 2859 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2860 return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 2861 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2862 return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 2863 break; 2864 } 2865 case AArch64ISD::ST3post: { 2866 VT = Node->getOperand(1).getValueType(); 2867 if (VT == MVT::v8i8) 2868 return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 2869 else if (VT == MVT::v16i8) 2870 return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 2871 else if (VT == MVT::v4i16) 2872 return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 2873 else if (VT == MVT::v8i16) 2874 return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 2875 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2876 return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 2877 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2878 return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 2879 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2880 return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 2881 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2882 return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 2883 break; 2884 } 2885 case AArch64ISD::ST4post: { 2886 VT = Node->getOperand(1).getValueType(); 2887 if (VT == MVT::v8i8) 2888 return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 2889 else if (VT == MVT::v16i8) 2890 return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 2891 else if (VT == MVT::v4i16) 2892 return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 2893 else if (VT == MVT::v8i16) 2894 return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 2895 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2896 return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 2897 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2898 return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 2899 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2900 return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 2901 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2902 return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 2903 break; 2904 } 2905 case AArch64ISD::ST1x2post: { 2906 VT = Node->getOperand(1).getValueType(); 2907 if (VT == MVT::v8i8) 2908 return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 2909 else if (VT == MVT::v16i8) 2910 return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 2911 else if (VT == MVT::v4i16) 2912 return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 2913 else if (VT == MVT::v8i16) 2914 return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 2915 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2916 return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 2917 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2918 return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 2919 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2920 return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 2921 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2922 return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 2923 break; 2924 } 2925 case AArch64ISD::ST1x3post: { 2926 VT = Node->getOperand(1).getValueType(); 2927 if (VT == MVT::v8i8) 2928 return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 2929 else if (VT == MVT::v16i8) 2930 return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 2931 else if (VT == MVT::v4i16) 2932 return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 2933 else if (VT == MVT::v8i16) 2934 return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 2935 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2936 return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 2937 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2938 return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 2939 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2940 return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 2941 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2942 return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 2943 break; 2944 } 2945 case AArch64ISD::ST1x4post: { 2946 VT = Node->getOperand(1).getValueType(); 2947 if (VT == MVT::v8i8) 2948 return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 2949 else if (VT == MVT::v16i8) 2950 return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 2951 else if (VT == MVT::v4i16) 2952 return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 2953 else if (VT == MVT::v8i16) 2954 return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 2955 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 2956 return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 2957 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 2958 return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 2959 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 2960 return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 2961 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 2962 return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 2963 break; 2964 } 2965 case AArch64ISD::ST2LANEpost: { 2966 VT = Node->getOperand(1).getValueType(); 2967 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2968 return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 2969 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2970 return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 2971 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2972 VT == MVT::v2f32) 2973 return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 2974 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2975 VT == MVT::v1f64) 2976 return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 2977 break; 2978 } 2979 case AArch64ISD::ST3LANEpost: { 2980 VT = Node->getOperand(1).getValueType(); 2981 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2982 return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 2983 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2984 return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 2985 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 2986 VT == MVT::v2f32) 2987 return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 2988 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 2989 VT == MVT::v1f64) 2990 return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 2991 break; 2992 } 2993 case AArch64ISD::ST4LANEpost: { 2994 VT = Node->getOperand(1).getValueType(); 2995 if (VT == MVT::v16i8 || VT == MVT::v8i8) 2996 return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 2997 else if (VT == MVT::v8i16 || VT == MVT::v4i16) 2998 return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 2999 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3000 VT == MVT::v2f32) 3001 return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 3002 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3003 VT == MVT::v1f64) 3004 return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 3005 break; 3006 } 3007 3008 case ISD::FCEIL: 3009 case ISD::FFLOOR: 3010 case ISD::FTRUNC: 3011 case ISD::FROUND: 3012 if (SDNode *I = SelectLIBM(Node)) 3013 return I; 3014 break; 3015 } 3016 3017 // Select the default instruction 3018 ResNode = SelectCode(Node); 3019 3020 DEBUG(errs() << "=> "); 3021 if (ResNode == nullptr || ResNode == Node) 3022 DEBUG(Node->dump(CurDAG)); 3023 else 3024 DEBUG(ResNode->dump(CurDAG)); 3025 DEBUG(errs() << "\n"); 3026 3027 return ResNode; 3028} 3029 3030/// createAArch64ISelDag - This pass converts a legalized DAG into a 3031/// AArch64-specific DAG, ready for instruction scheduling. 3032FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 3033 CodeGenOpt::Level OptLevel) { 3034 return new AArch64DAGToDAGISel(TM, OptLevel); 3035} 3036