AArch64ISelDAGToDAG.cpp revision f116f8a63fb26feb0f8c59ba4d69d7954393d7d1
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines an instruction selector for the AArch64 target. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "aarch64-isel" 15#include "AArch64.h" 16#include "AArch64InstrInfo.h" 17#include "AArch64Subtarget.h" 18#include "AArch64TargetMachine.h" 19#include "Utils/AArch64BaseInfo.h" 20#include "llvm/ADT/APSInt.h" 21#include "llvm/CodeGen/SelectionDAGISel.h" 22#include "llvm/IR/GlobalValue.h" 23#include "llvm/Support/Debug.h" 24#include "llvm/Support/raw_ostream.h" 25 26using namespace llvm; 27 28//===--------------------------------------------------------------------===// 29/// AArch64 specific code to select AArch64 machine instructions for 30/// SelectionDAG operations. 31/// 32namespace { 33 34class AArch64DAGToDAGISel : public SelectionDAGISel { 35 AArch64TargetMachine &TM; 36 37 /// Keep a pointer to the AArch64Subtarget around so that we can 38 /// make the right decision when generating code for different targets. 39 const AArch64Subtarget *Subtarget; 40 41public: 42 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 43 CodeGenOpt::Level OptLevel) 44 : SelectionDAGISel(tm, OptLevel), TM(tm), 45 Subtarget(&TM.getSubtarget<AArch64Subtarget>()) { 46 } 47 48 virtual const char *getPassName() const { 49 return "AArch64 Instruction Selection"; 50 } 51 52 // Include the pieces autogenerated from the target description. 53#include "AArch64GenDAGISel.inc" 54 55 template<unsigned MemSize> 56 bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) { 57 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); 58 if (!CN || CN->getZExtValue() % MemSize != 0 59 || CN->getZExtValue() / MemSize > 0xfff) 60 return false; 61 62 UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64); 63 return true; 64 } 65 66 template<unsigned RegWidth> 67 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 68 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 69 } 70 71 /// Used for pre-lowered address-reference nodes, so we already know 72 /// the fields match. This operand's job is simply to add an 73 /// appropriate shift operand to the MOVZ/MOVK instruction. 74 template<unsigned LogShift> 75 bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) { 76 Imm = N; 77 Shift = CurDAG->getTargetConstant(LogShift, MVT::i32); 78 return true; 79 } 80 81 bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); 82 83 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 84 unsigned RegWidth); 85 86 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 87 char ConstraintCode, 88 std::vector<SDValue> &OutOps); 89 90 bool SelectLogicalImm(SDValue N, SDValue &Imm); 91 92 template<unsigned RegWidth> 93 bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) { 94 return SelectTSTBOperand(N, FixedPos, RegWidth); 95 } 96 97 bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); 98 99 SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, 100 unsigned Op64); 101 102 /// Put the given constant into a pool and return a DAG which will give its 103 /// address. 104 SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV); 105 106 SDNode *TrySelectToMoveImm(SDNode *N); 107 SDNode *LowerToFPLitPool(SDNode *Node); 108 SDNode *SelectToLitPool(SDNode *N); 109 110 SDNode* Select(SDNode*); 111private: 112 /// Get the opcode for table lookup instruction 113 unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec); 114 115 /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4. 116 /// IsExt is to indicate if the result will be extended with an argument. 117 SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt); 118 119 /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4. 120 SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating, 121 const uint16_t *Opcode); 122 123 /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4. 124 SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating, 125 const uint16_t *Opcodes); 126 127 /// Form sequences of consecutive 64/128-bit registers for use in NEON 128 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 129 /// between 1 and 4 elements. If it contains a single element that is returned 130 /// unchanged; otherwise a REG_SEQUENCE value is returned. 131 SDValue createDTuple(ArrayRef<SDValue> Vecs); 132 SDValue createQTuple(ArrayRef<SDValue> Vecs); 133 134 /// Generic helper for the createDTuple/createQTuple 135 /// functions. Those should almost always be called instead. 136 SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[], 137 unsigned SubRegs[]); 138}; 139} 140 141bool 142AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 143 unsigned RegWidth) { 144 const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); 145 if (!CN) return false; 146 147 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 148 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 149 // x-register. 150 // 151 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 152 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 153 // integers. 154 bool IsExact; 155 156 // fbits is between 1 and 64 in the worst-case, which means the fmul 157 // could have 2^64 as an actual operand. Need 65 bits of precision. 158 APSInt IntVal(65, true); 159 CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 160 161 // N.b. isPowerOf2 also checks for > 0. 162 if (!IsExact || !IntVal.isPowerOf2()) return false; 163 unsigned FBits = IntVal.logBase2(); 164 165 // Checks above should have guaranteed that we haven't lost information in 166 // finding FBits, but it must still be in range. 167 if (FBits == 0 || FBits > RegWidth) return false; 168 169 FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32); 170 return true; 171} 172 173bool 174AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, 175 char ConstraintCode, 176 std::vector<SDValue> &OutOps) { 177 switch (ConstraintCode) { 178 default: llvm_unreachable("Unrecognised AArch64 memory constraint"); 179 case 'm': 180 // FIXME: more freedom is actually permitted for 'm'. We can go 181 // hunting for a base and an offset if we want. Of course, since 182 // we don't really know how the operand is going to be used we're 183 // probably restricted to the load/store pair's simm7 as an offset 184 // range anyway. 185 case 'Q': 186 OutOps.push_back(Op); 187 } 188 189 return false; 190} 191 192bool 193AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { 194 ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N); 195 if (!Imm || !Imm->getValueAPF().isPosZero()) 196 return false; 197 198 // Doesn't actually carry any information, but keeps TableGen quiet. 199 Dummy = CurDAG->getTargetConstant(0, MVT::i32); 200 return true; 201} 202 203bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { 204 uint32_t Bits; 205 uint32_t RegWidth = N.getValueType().getSizeInBits(); 206 207 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); 208 if (!CN) return false; 209 210 if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) 211 return false; 212 213 Imm = CurDAG->getTargetConstant(Bits, MVT::i32); 214 return true; 215} 216 217SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { 218 SDNode *ResNode; 219 SDLoc dl(Node); 220 EVT DestType = Node->getValueType(0); 221 unsigned DestWidth = DestType.getSizeInBits(); 222 223 unsigned MOVOpcode; 224 EVT MOVType; 225 int UImm16, Shift; 226 uint32_t LogicalBits; 227 228 uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue(); 229 if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) { 230 MOVType = DestType; 231 MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii; 232 } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) { 233 MOVType = DestType; 234 MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii; 235 } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) { 236 // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can 237 // use a 32-bit instruction: "movn w0, 0xedbc". 238 MOVType = MVT::i32; 239 MOVOpcode = AArch64::MOVNwii; 240 } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) { 241 MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi; 242 uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR; 243 244 return CurDAG->getMachineNode(MOVOpcode, dl, DestType, 245 CurDAG->getRegister(ZR, DestType), 246 CurDAG->getTargetConstant(LogicalBits, MVT::i32)); 247 } else { 248 // Can't handle it in one instruction. There's scope for permitting two (or 249 // more) instructions, but that'll need more thought. 250 return NULL; 251 } 252 253 ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType, 254 CurDAG->getTargetConstant(UImm16, MVT::i32), 255 CurDAG->getTargetConstant(Shift, MVT::i32)); 256 257 if (MOVType != DestType) { 258 ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, 259 MVT::i64, MVT::i32, MVT::Other, 260 CurDAG->getTargetConstant(0, MVT::i64), 261 SDValue(ResNode, 0), 262 CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); 263 } 264 265 return ResNode; 266} 267 268SDValue 269AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL, 270 const Constant *CV) { 271 EVT PtrVT = getTargetLowering()->getPointerTy(); 272 273 switch (getTargetLowering()->getTargetMachine().getCodeModel()) { 274 case CodeModel::Small: { 275 unsigned Alignment = 276 getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); 277 return CurDAG->getNode( 278 AArch64ISD::WrapperSmall, DL, PtrVT, 279 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG), 280 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12), 281 CurDAG->getConstant(Alignment, MVT::i32)); 282 } 283 case CodeModel::Large: { 284 SDNode *LitAddr; 285 LitAddr = CurDAG->getMachineNode( 286 AArch64::MOVZxii, DL, PtrVT, 287 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3), 288 CurDAG->getTargetConstant(3, MVT::i32)); 289 LitAddr = CurDAG->getMachineNode( 290 AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), 291 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), 292 CurDAG->getTargetConstant(2, MVT::i32)); 293 LitAddr = CurDAG->getMachineNode( 294 AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), 295 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), 296 CurDAG->getTargetConstant(1, MVT::i32)); 297 LitAddr = CurDAG->getMachineNode( 298 AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), 299 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC), 300 CurDAG->getTargetConstant(0, MVT::i32)); 301 return SDValue(LitAddr, 0); 302 } 303 default: 304 llvm_unreachable("Only small and large code models supported now"); 305 } 306} 307 308SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { 309 SDLoc DL(Node); 310 uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue(); 311 int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue(); 312 EVT DestType = Node->getValueType(0); 313 314 // Since we may end up loading a 64-bit constant from a 32-bit entry the 315 // constant in the pool may have a different type to the eventual node. 316 ISD::LoadExtType Extension; 317 EVT MemType; 318 319 assert((DestType == MVT::i64 || DestType == MVT::i32) 320 && "Only expect integer constants at the moment"); 321 322 if (DestType == MVT::i32) { 323 Extension = ISD::NON_EXTLOAD; 324 MemType = MVT::i32; 325 } else if (UnsignedVal <= UINT32_MAX) { 326 Extension = ISD::ZEXTLOAD; 327 MemType = MVT::i32; 328 } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) { 329 Extension = ISD::SEXTLOAD; 330 MemType = MVT::i32; 331 } else { 332 Extension = ISD::NON_EXTLOAD; 333 MemType = MVT::i64; 334 } 335 336 Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(), 337 MemType.getSizeInBits()), 338 UnsignedVal); 339 SDValue PoolAddr = getConstantPoolItemAddress(DL, CV); 340 unsigned Alignment = 341 getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); 342 343 return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), 344 PoolAddr, 345 MachinePointerInfo::getConstantPool(), MemType, 346 /* isVolatile = */ false, 347 /* isNonTemporal = */ false, 348 Alignment).getNode(); 349} 350 351SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { 352 SDLoc DL(Node); 353 const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue(); 354 EVT DestType = Node->getValueType(0); 355 356 unsigned Alignment = 357 getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType()); 358 SDValue PoolAddr = getConstantPoolItemAddress(DL, FV); 359 360 return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, 361 MachinePointerInfo::getConstantPool(), 362 /* isVolatile = */ false, 363 /* isNonTemporal = */ false, 364 /* isInvariant = */ true, 365 Alignment).getNode(); 366} 367 368bool 369AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, 370 unsigned RegWidth) { 371 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); 372 if (!CN) return false; 373 374 uint64_t Val = CN->getZExtValue(); 375 376 if (!isPowerOf2_64(Val)) return false; 377 378 unsigned TestedBit = Log2_64(Val); 379 // Checks above should have guaranteed that we haven't lost information in 380 // finding TestedBit, but it must still be in range. 381 if (TestedBit >= RegWidth) return false; 382 383 FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64); 384 return true; 385} 386 387SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, 388 unsigned Op16,unsigned Op32, 389 unsigned Op64) { 390 // Mostly direct translation to the given operations, except that we preserve 391 // the AtomicOrdering for use later on. 392 AtomicSDNode *AN = cast<AtomicSDNode>(Node); 393 EVT VT = AN->getMemoryVT(); 394 395 unsigned Op; 396 if (VT == MVT::i8) 397 Op = Op8; 398 else if (VT == MVT::i16) 399 Op = Op16; 400 else if (VT == MVT::i32) 401 Op = Op32; 402 else if (VT == MVT::i64) 403 Op = Op64; 404 else 405 llvm_unreachable("Unexpected atomic operation"); 406 407 SmallVector<SDValue, 4> Ops; 408 for (unsigned i = 1; i < AN->getNumOperands(); ++i) 409 Ops.push_back(AN->getOperand(i)); 410 411 Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); 412 Ops.push_back(AN->getOperand(0)); // Chain moves to the end 413 414 return CurDAG->SelectNodeTo(Node, Op, 415 AN->getValueType(0), MVT::Other, 416 &Ops[0], Ops.size()); 417} 418 419SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 420 static unsigned RegClassIDs[] = { AArch64::DPairRegClassID, 421 AArch64::DTripleRegClassID, 422 AArch64::DQuadRegClassID }; 423 static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1, 424 AArch64::dsub_2, AArch64::dsub_3 }; 425 426 return createTuple(Regs, RegClassIDs, SubRegs); 427} 428 429SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 430 static unsigned RegClassIDs[] = { AArch64::QPairRegClassID, 431 AArch64::QTripleRegClassID, 432 AArch64::QQuadRegClassID }; 433 static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1, 434 AArch64::qsub_2, AArch64::qsub_3 }; 435 436 return createTuple(Regs, RegClassIDs, SubRegs); 437} 438 439SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 440 unsigned RegClassIDs[], 441 unsigned SubRegs[]) { 442 // There's no special register-class for a vector-list of 1 element: it's just 443 // a vector. 444 if (Regs.size() == 1) 445 return Regs[0]; 446 447 assert(Regs.size() >= 2 && Regs.size() <= 4); 448 449 SDLoc DL(Regs[0].getNode()); 450 451 SmallVector<SDValue, 4> Ops; 452 453 // First operand of REG_SEQUENCE is the desired RegClass. 454 Ops.push_back( 455 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); 456 457 // Then we get pairs of source & subregister-position for the components. 458 for (unsigned i = 0; i < Regs.size(); ++i) { 459 Ops.push_back(Regs[i]); 460 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); 461 } 462 463 SDNode *N = 464 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 465 return SDValue(N, 0); 466} 467 468 469// Get the register stride update opcode of a VLD/VST instruction that 470// is otherwise equivalent to the given fixed stride updating instruction. 471static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 472 switch (Opc) { 473 default: break; 474 case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register; 475 case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register; 476 case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register; 477 case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register; 478 case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register; 479 case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register; 480 case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register; 481 case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register; 482 483 case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register; 484 case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register; 485 case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register; 486 case AArch64::LD1WB2V_1D_fixed: return AArch64::LD1WB2V_1D_register; 487 case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register; 488 case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register; 489 case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register; 490 case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register; 491 492 case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register; 493 case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register; 494 case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register; 495 case AArch64::LD1WB3V_1D_fixed: return AArch64::LD1WB3V_1D_register; 496 case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register; 497 case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register; 498 case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register; 499 case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register; 500 501 case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register; 502 case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register; 503 case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register; 504 case AArch64::LD1WB4V_1D_fixed: return AArch64::LD1WB4V_1D_register; 505 case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register; 506 case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register; 507 case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register; 508 case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register; 509 510 case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register; 511 case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register; 512 case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register; 513 case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register; 514 case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register; 515 case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register; 516 case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register; 517 case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register; 518 519 case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register; 520 case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register; 521 case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register; 522 case AArch64::ST1WB2V_1D_fixed: return AArch64::ST1WB2V_1D_register; 523 case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register; 524 case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register; 525 case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register; 526 case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register; 527 528 case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register; 529 case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register; 530 case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register; 531 case AArch64::ST1WB3V_1D_fixed: return AArch64::ST1WB3V_1D_register; 532 case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register; 533 case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register; 534 case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register; 535 case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register; 536 537 case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register; 538 case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register; 539 case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register; 540 case AArch64::ST1WB4V_1D_fixed: return AArch64::ST1WB4V_1D_register; 541 case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register; 542 case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register; 543 case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register; 544 case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register; 545 } 546 return Opc; // If not one we handle, return it unchanged. 547} 548 549SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, 550 bool isUpdating, 551 const uint16_t *Opcodes) { 552 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 553 554 EVT VT = N->getValueType(0); 555 unsigned OpcodeIndex; 556 switch (VT.getSimpleVT().SimpleTy) { 557 default: llvm_unreachable("unhandled vector load type"); 558 case MVT::v8i8: OpcodeIndex = 0; break; 559 case MVT::v4i16: OpcodeIndex = 1; break; 560 case MVT::v2f32: 561 case MVT::v2i32: OpcodeIndex = 2; break; 562 case MVT::v1f64: 563 case MVT::v1i64: OpcodeIndex = 3; break; 564 case MVT::v16i8: OpcodeIndex = 4; break; 565 case MVT::v8f16: 566 case MVT::v8i16: OpcodeIndex = 5; break; 567 case MVT::v4f32: 568 case MVT::v4i32: OpcodeIndex = 6; break; 569 case MVT::v2f64: 570 case MVT::v2i64: OpcodeIndex = 7; break; 571 } 572 unsigned Opc = Opcodes[OpcodeIndex]; 573 574 SmallVector<SDValue, 2> Ops; 575 unsigned AddrOpIdx = isUpdating ? 1 : 2; 576 Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address 577 578 if (isUpdating) { 579 SDValue Inc = N->getOperand(AddrOpIdx + 1); 580 if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register 581 Opc = getVLDSTRegisterUpdateOpcode(Opc); 582 Ops.push_back(Inc); 583 } 584 585 Ops.push_back(N->getOperand(0)); // Push back the Chain 586 587 std::vector<EVT> ResTys; 588 bool is64BitVector = VT.is64BitVector(); 589 590 if (NumVecs == 1) 591 ResTys.push_back(VT); 592 else if (NumVecs == 3) 593 ResTys.push_back(MVT::Untyped); 594 else { 595 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, 596 is64BitVector ? NumVecs : NumVecs * 2); 597 ResTys.push_back(ResTy); 598 } 599 600 if (isUpdating) 601 ResTys.push_back(MVT::i64); // Type of the updated register 602 ResTys.push_back(MVT::Other); // Type of the Chain 603 SDLoc dl(N); 604 SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 605 606 // Transfer memoperands. 607 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 608 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 609 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); 610 611 if (NumVecs == 1) 612 return VLd; 613 614 // If NumVecs > 1, the return result is a super register containing 2-4 615 // consecutive vector registers. 616 SDValue SuperReg = SDValue(VLd, 0); 617 618 unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; 619 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 620 ReplaceUses(SDValue(N, Vec), 621 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 622 // Update users of the Chain 623 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 624 if (isUpdating) 625 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 626 627 return NULL; 628} 629 630SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, 631 bool isUpdating, 632 const uint16_t *Opcodes) { 633 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 634 SDLoc dl(N); 635 636 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 637 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 638 639 unsigned AddrOpIdx = isUpdating ? 1 : 2; 640 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 641 EVT VT = N->getOperand(Vec0Idx).getValueType(); 642 unsigned OpcodeIndex; 643 switch (VT.getSimpleVT().SimpleTy) { 644 default: llvm_unreachable("unhandled vector store type"); 645 case MVT::v8i8: OpcodeIndex = 0; break; 646 case MVT::v4i16: OpcodeIndex = 1; break; 647 case MVT::v2f32: 648 case MVT::v2i32: OpcodeIndex = 2; break; 649 case MVT::v1f64: 650 case MVT::v1i64: OpcodeIndex = 3; break; 651 case MVT::v16i8: OpcodeIndex = 4; break; 652 case MVT::v8f16: 653 case MVT::v8i16: OpcodeIndex = 5; break; 654 case MVT::v4f32: 655 case MVT::v4i32: OpcodeIndex = 6; break; 656 case MVT::v2f64: 657 case MVT::v2i64: OpcodeIndex = 7; break; 658 } 659 unsigned Opc = Opcodes[OpcodeIndex]; 660 661 std::vector<EVT> ResTys; 662 if (isUpdating) 663 ResTys.push_back(MVT::i64); 664 ResTys.push_back(MVT::Other); // Type for the Chain 665 666 SmallVector<SDValue, 6> Ops; 667 Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address 668 669 if (isUpdating) { 670 SDValue Inc = N->getOperand(AddrOpIdx + 1); 671 if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register 672 Opc = getVLDSTRegisterUpdateOpcode(Opc); 673 Ops.push_back(Inc); 674 } 675 bool is64BitVector = VT.is64BitVector(); 676 677 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx, 678 N->op_begin() + Vec0Idx + NumVecs); 679 SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs); 680 Ops.push_back(SrcReg); 681 682 // Push back the Chain 683 Ops.push_back(N->getOperand(0)); 684 685 // Transfer memoperands. 686 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 687 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); 688 689 return VSt; 690} 691 692unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit, 693 unsigned NumOfVec) { 694 assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range"); 695 696 unsigned Opc = 0; 697 switch (NumOfVec) { 698 default: 699 break; 700 case 1: 701 if (IsExt) 702 Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b; 703 else 704 Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b; 705 break; 706 case 2: 707 if (IsExt) 708 Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b; 709 else 710 Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b; 711 break; 712 case 3: 713 if (IsExt) 714 Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b; 715 else 716 Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b; 717 break; 718 case 4: 719 if (IsExt) 720 Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b; 721 else 722 Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b; 723 break; 724 } 725 726 return Opc; 727} 728 729SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs, 730 bool IsExt) { 731 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 732 SDLoc dl(N); 733 734 // Check the element of look up table is 64-bit or not 735 unsigned Vec0Idx = IsExt ? 2 : 1; 736 assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() && 737 "The element of lookup table for vtbl and vtbx must be 128-bit"); 738 739 // Check the return value type is 64-bit or not 740 EVT ResVT = N->getValueType(0); 741 bool is64BitRes = ResVT.is64BitVector(); 742 743 // Create new SDValue for vector list 744 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx, 745 N->op_begin() + Vec0Idx + NumVecs); 746 SDValue TblReg = createQTuple(Regs); 747 unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs); 748 749 SmallVector<SDValue, 3> Ops; 750 if (IsExt) 751 Ops.push_back(N->getOperand(1)); 752 Ops.push_back(TblReg); 753 Ops.push_back(N->getOperand(Vec0Idx + NumVecs)); 754 return CurDAG->getMachineNode(Opc, dl, ResVT, Ops); 755} 756 757SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { 758 // Dump information about the Node being selected 759 DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); 760 761 if (Node->isMachineOpcode()) { 762 DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 763 Node->setNodeId(-1); 764 return NULL; 765 } 766 767 switch (Node->getOpcode()) { 768 case ISD::ATOMIC_LOAD_ADD: 769 return SelectAtomic(Node, 770 AArch64::ATOMIC_LOAD_ADD_I8, 771 AArch64::ATOMIC_LOAD_ADD_I16, 772 AArch64::ATOMIC_LOAD_ADD_I32, 773 AArch64::ATOMIC_LOAD_ADD_I64); 774 case ISD::ATOMIC_LOAD_SUB: 775 return SelectAtomic(Node, 776 AArch64::ATOMIC_LOAD_SUB_I8, 777 AArch64::ATOMIC_LOAD_SUB_I16, 778 AArch64::ATOMIC_LOAD_SUB_I32, 779 AArch64::ATOMIC_LOAD_SUB_I64); 780 case ISD::ATOMIC_LOAD_AND: 781 return SelectAtomic(Node, 782 AArch64::ATOMIC_LOAD_AND_I8, 783 AArch64::ATOMIC_LOAD_AND_I16, 784 AArch64::ATOMIC_LOAD_AND_I32, 785 AArch64::ATOMIC_LOAD_AND_I64); 786 case ISD::ATOMIC_LOAD_OR: 787 return SelectAtomic(Node, 788 AArch64::ATOMIC_LOAD_OR_I8, 789 AArch64::ATOMIC_LOAD_OR_I16, 790 AArch64::ATOMIC_LOAD_OR_I32, 791 AArch64::ATOMIC_LOAD_OR_I64); 792 case ISD::ATOMIC_LOAD_XOR: 793 return SelectAtomic(Node, 794 AArch64::ATOMIC_LOAD_XOR_I8, 795 AArch64::ATOMIC_LOAD_XOR_I16, 796 AArch64::ATOMIC_LOAD_XOR_I32, 797 AArch64::ATOMIC_LOAD_XOR_I64); 798 case ISD::ATOMIC_LOAD_NAND: 799 return SelectAtomic(Node, 800 AArch64::ATOMIC_LOAD_NAND_I8, 801 AArch64::ATOMIC_LOAD_NAND_I16, 802 AArch64::ATOMIC_LOAD_NAND_I32, 803 AArch64::ATOMIC_LOAD_NAND_I64); 804 case ISD::ATOMIC_LOAD_MIN: 805 return SelectAtomic(Node, 806 AArch64::ATOMIC_LOAD_MIN_I8, 807 AArch64::ATOMIC_LOAD_MIN_I16, 808 AArch64::ATOMIC_LOAD_MIN_I32, 809 AArch64::ATOMIC_LOAD_MIN_I64); 810 case ISD::ATOMIC_LOAD_MAX: 811 return SelectAtomic(Node, 812 AArch64::ATOMIC_LOAD_MAX_I8, 813 AArch64::ATOMIC_LOAD_MAX_I16, 814 AArch64::ATOMIC_LOAD_MAX_I32, 815 AArch64::ATOMIC_LOAD_MAX_I64); 816 case ISD::ATOMIC_LOAD_UMIN: 817 return SelectAtomic(Node, 818 AArch64::ATOMIC_LOAD_UMIN_I8, 819 AArch64::ATOMIC_LOAD_UMIN_I16, 820 AArch64::ATOMIC_LOAD_UMIN_I32, 821 AArch64::ATOMIC_LOAD_UMIN_I64); 822 case ISD::ATOMIC_LOAD_UMAX: 823 return SelectAtomic(Node, 824 AArch64::ATOMIC_LOAD_UMAX_I8, 825 AArch64::ATOMIC_LOAD_UMAX_I16, 826 AArch64::ATOMIC_LOAD_UMAX_I32, 827 AArch64::ATOMIC_LOAD_UMAX_I64); 828 case ISD::ATOMIC_SWAP: 829 return SelectAtomic(Node, 830 AArch64::ATOMIC_SWAP_I8, 831 AArch64::ATOMIC_SWAP_I16, 832 AArch64::ATOMIC_SWAP_I32, 833 AArch64::ATOMIC_SWAP_I64); 834 case ISD::ATOMIC_CMP_SWAP: 835 return SelectAtomic(Node, 836 AArch64::ATOMIC_CMP_SWAP_I8, 837 AArch64::ATOMIC_CMP_SWAP_I16, 838 AArch64::ATOMIC_CMP_SWAP_I32, 839 AArch64::ATOMIC_CMP_SWAP_I64); 840 case ISD::FrameIndex: { 841 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 842 EVT PtrTy = getTargetLowering()->getPointerTy(); 843 SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy); 844 return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, 845 TFI, CurDAG->getTargetConstant(0, PtrTy)); 846 } 847 case ISD::ConstantPool: { 848 // Constant pools are fine, just create a Target entry. 849 ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node); 850 const Constant *C = CN->getConstVal(); 851 SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0)); 852 853 ReplaceUses(SDValue(Node, 0), CP); 854 return NULL; 855 } 856 case ISD::Constant: { 857 SDNode *ResNode = 0; 858 if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) { 859 // XZR and WZR are probably even better than an actual move: most of the 860 // time they can be folded into another instruction with *no* cost. 861 862 EVT Ty = Node->getValueType(0); 863 assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type"); 864 uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR; 865 ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 866 SDLoc(Node), 867 Register, Ty).getNode(); 868 } 869 870 // Next best option is a move-immediate, see if we can do that. 871 if (!ResNode) { 872 ResNode = TrySelectToMoveImm(Node); 873 } 874 875 if (ResNode) 876 return ResNode; 877 878 // If even that fails we fall back to a lit-pool entry at the moment. Future 879 // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions. 880 ResNode = SelectToLitPool(Node); 881 assert(ResNode && "We need *some* way to materialise a constant"); 882 883 // We want to continue selection at this point since the litpool access 884 // generated used generic nodes for simplicity. 885 ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); 886 Node = ResNode; 887 break; 888 } 889 case ISD::ConstantFP: { 890 if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) { 891 // FMOV will take care of it from TableGen 892 break; 893 } 894 895 SDNode *ResNode = LowerToFPLitPool(Node); 896 ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); 897 898 // We want to continue selection at this point since the litpool access 899 // generated used generic nodes for simplicity. 900 Node = ResNode; 901 break; 902 } 903 case AArch64ISD::NEON_LD1_UPD: { 904 static const uint16_t Opcodes[] = { 905 AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed, 906 AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed, 907 AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed, 908 AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed 909 }; 910 return SelectVLD(Node, 1, true, Opcodes); 911 } 912 case AArch64ISD::NEON_LD2_UPD: { 913 static const uint16_t Opcodes[] = { 914 AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed, 915 AArch64::LD2WB_2S_fixed, AArch64::LD1WB2V_1D_fixed, 916 AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed, 917 AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed 918 }; 919 return SelectVLD(Node, 2, true, Opcodes); 920 } 921 case AArch64ISD::NEON_LD3_UPD: { 922 static const uint16_t Opcodes[] = { 923 AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed, 924 AArch64::LD3WB_2S_fixed, AArch64::LD1WB3V_1D_fixed, 925 AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed, 926 AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed 927 }; 928 return SelectVLD(Node, 3, true, Opcodes); 929 } 930 case AArch64ISD::NEON_LD4_UPD: { 931 static const uint16_t Opcodes[] = { 932 AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed, 933 AArch64::LD4WB_2S_fixed, AArch64::LD1WB4V_1D_fixed, 934 AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed, 935 AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed 936 }; 937 return SelectVLD(Node, 4, true, Opcodes); 938 } 939 case AArch64ISD::NEON_ST1_UPD: { 940 static const uint16_t Opcodes[] = { 941 AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed, 942 AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed, 943 AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed, 944 AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed 945 }; 946 return SelectVST(Node, 1, true, Opcodes); 947 } 948 case AArch64ISD::NEON_ST2_UPD: { 949 static const uint16_t Opcodes[] = { 950 AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed, 951 AArch64::ST2WB_2S_fixed, AArch64::ST1WB2V_1D_fixed, 952 AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed, 953 AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed 954 }; 955 return SelectVST(Node, 2, true, Opcodes); 956 } 957 case AArch64ISD::NEON_ST3_UPD: { 958 static const uint16_t Opcodes[] = { 959 AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed, 960 AArch64::ST3WB_2S_fixed, AArch64::ST1WB3V_1D_fixed, 961 AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed, 962 AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed 963 }; 964 return SelectVST(Node, 3, true, Opcodes); 965 } 966 case AArch64ISD::NEON_ST4_UPD: { 967 static const uint16_t Opcodes[] = { 968 AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed, 969 AArch64::ST4WB_2S_fixed, AArch64::ST1WB4V_1D_fixed, 970 AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed, 971 AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed 972 }; 973 return SelectVST(Node, 4, true, Opcodes); 974 } 975 case ISD::INTRINSIC_WO_CHAIN: { 976 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 977 bool IsExt = false; 978 switch (IntNo) { 979 default: 980 break; 981 case Intrinsic::aarch64_neon_vtbx1: 982 IsExt = true; 983 case Intrinsic::aarch64_neon_vtbl1: 984 return SelectVTBL(Node, 1, IsExt); 985 case Intrinsic::aarch64_neon_vtbx2: 986 IsExt = true; 987 case Intrinsic::aarch64_neon_vtbl2: 988 return SelectVTBL(Node, 2, IsExt); 989 case Intrinsic::aarch64_neon_vtbx3: 990 IsExt = true; 991 case Intrinsic::aarch64_neon_vtbl3: 992 return SelectVTBL(Node, 3, IsExt); 993 case Intrinsic::aarch64_neon_vtbx4: 994 IsExt = true; 995 case Intrinsic::aarch64_neon_vtbl4: 996 return SelectVTBL(Node, 4, IsExt); 997 } 998 break; 999 } 1000 case ISD::INTRINSIC_VOID: 1001 case ISD::INTRINSIC_W_CHAIN: { 1002 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1003 switch (IntNo) { 1004 default: 1005 break; 1006 1007 case Intrinsic::arm_neon_vld1: { 1008 static const uint16_t Opcodes[] = { AArch64::LD1_8B, AArch64::LD1_4H, 1009 AArch64::LD1_2S, AArch64::LD1_1D, 1010 AArch64::LD1_16B, AArch64::LD1_8H, 1011 AArch64::LD1_4S, AArch64::LD1_2D }; 1012 return SelectVLD(Node, 1, false, Opcodes); 1013 } 1014 case Intrinsic::arm_neon_vld2: { 1015 static const uint16_t Opcodes[] = { AArch64::LD2_8B, AArch64::LD2_4H, 1016 AArch64::LD2_2S, AArch64::LD1_2V_1D, 1017 AArch64::LD2_16B, AArch64::LD2_8H, 1018 AArch64::LD2_4S, AArch64::LD2_2D }; 1019 return SelectVLD(Node, 2, false, Opcodes); 1020 } 1021 case Intrinsic::arm_neon_vld3: { 1022 static const uint16_t Opcodes[] = { AArch64::LD3_8B, AArch64::LD3_4H, 1023 AArch64::LD3_2S, AArch64::LD1_3V_1D, 1024 AArch64::LD3_16B, AArch64::LD3_8H, 1025 AArch64::LD3_4S, AArch64::LD3_2D }; 1026 return SelectVLD(Node, 3, false, Opcodes); 1027 } 1028 case Intrinsic::arm_neon_vld4: { 1029 static const uint16_t Opcodes[] = { AArch64::LD4_8B, AArch64::LD4_4H, 1030 AArch64::LD4_2S, AArch64::LD1_4V_1D, 1031 AArch64::LD4_16B, AArch64::LD4_8H, 1032 AArch64::LD4_4S, AArch64::LD4_2D }; 1033 return SelectVLD(Node, 4, false, Opcodes); 1034 } 1035 case Intrinsic::arm_neon_vst1: { 1036 static const uint16_t Opcodes[] = { AArch64::ST1_8B, AArch64::ST1_4H, 1037 AArch64::ST1_2S, AArch64::ST1_1D, 1038 AArch64::ST1_16B, AArch64::ST1_8H, 1039 AArch64::ST1_4S, AArch64::ST1_2D }; 1040 return SelectVST(Node, 1, false, Opcodes); 1041 } 1042 case Intrinsic::arm_neon_vst2: { 1043 static const uint16_t Opcodes[] = { AArch64::ST2_8B, AArch64::ST2_4H, 1044 AArch64::ST2_2S, AArch64::ST1_2V_1D, 1045 AArch64::ST2_16B, AArch64::ST2_8H, 1046 AArch64::ST2_4S, AArch64::ST2_2D }; 1047 return SelectVST(Node, 2, false, Opcodes); 1048 } 1049 case Intrinsic::arm_neon_vst3: { 1050 static const uint16_t Opcodes[] = { AArch64::ST3_8B, AArch64::ST3_4H, 1051 AArch64::ST3_2S, AArch64::ST1_3V_1D, 1052 AArch64::ST3_16B, AArch64::ST3_8H, 1053 AArch64::ST3_4S, AArch64::ST3_2D }; 1054 return SelectVST(Node, 3, false, Opcodes); 1055 } 1056 case Intrinsic::arm_neon_vst4: { 1057 static const uint16_t Opcodes[] = { AArch64::ST4_8B, AArch64::ST4_4H, 1058 AArch64::ST4_2S, AArch64::ST1_4V_1D, 1059 AArch64::ST4_16B, AArch64::ST4_8H, 1060 AArch64::ST4_4S, AArch64::ST4_2D }; 1061 return SelectVST(Node, 4, false, Opcodes); 1062 } 1063 } 1064 break; 1065 } 1066 default: 1067 break; // Let generic code handle it 1068 } 1069 1070 SDNode *ResNode = SelectCode(Node); 1071 1072 DEBUG(dbgs() << "=> "; 1073 if (ResNode == NULL || ResNode == Node) 1074 Node->dump(CurDAG); 1075 else 1076 ResNode->dump(CurDAG); 1077 dbgs() << "\n"); 1078 1079 return ResNode; 1080} 1081 1082/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for 1083/// instruction scheduling. 1084FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM, 1085 CodeGenOpt::Level OptLevel) { 1086 return new AArch64DAGToDAGISel(TM, OptLevel); 1087} 1088