ARMInstrNEON.td revision eaa192af18677c4dc5894e049514d8a6b1d6d7c2
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14 15//===----------------------------------------------------------------------===// 16// NEON-specific Operands. 17//===----------------------------------------------------------------------===// 18def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20} 21 22def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26} 27def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31} 32def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36} 37def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 38def nImmVMOVI32 : Operand<i32> { 39 let PrintMethod = "printNEONModImmOperand"; 40 let ParserMatchClass = nImmVMOVI32AsmOperand; 41} 42def nImmVMOVF32 : Operand<i32> { 43 let PrintMethod = "printFPImmOperand"; 44 let ParserMatchClass = FPImmOperand; 45} 46def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 47def nImmSplatI64 : Operand<i32> { 48 let PrintMethod = "printNEONModImmOperand"; 49 let ParserMatchClass = nImmSplatI64AsmOperand; 50} 51 52def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 53def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 54def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 55def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 56 return ((uint64_t)Imm) < 8; 57}]> { 58 let ParserMatchClass = VectorIndex8Operand; 59 let PrintMethod = "printVectorIndex"; 60 let MIOperandInfo = (ops i32imm); 61} 62def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 63 return ((uint64_t)Imm) < 4; 64}]> { 65 let ParserMatchClass = VectorIndex16Operand; 66 let PrintMethod = "printVectorIndex"; 67 let MIOperandInfo = (ops i32imm); 68} 69def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 70 return ((uint64_t)Imm) < 2; 71}]> { 72 let ParserMatchClass = VectorIndex32Operand; 73 let PrintMethod = "printVectorIndex"; 74 let MIOperandInfo = (ops i32imm); 75} 76 77def VecListOneDAsmOperand : AsmOperandClass { 78 let Name = "VecListOneD"; 79 let ParserMethod = "parseVectorList"; 80} 81def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 82 let ParserMatchClass = VecListOneDAsmOperand; 83} 84// Register list of two sequential D registers. 85def VecListTwoDAsmOperand : AsmOperandClass { 86 let Name = "VecListTwoD"; 87 let ParserMethod = "parseVectorList"; 88} 89def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { 90 let ParserMatchClass = VecListTwoDAsmOperand; 91} 92// Register list of three sequential D registers. 93def VecListThreeDAsmOperand : AsmOperandClass { 94 let Name = "VecListThreeD"; 95 let ParserMethod = "parseVectorList"; 96} 97def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 98 let ParserMatchClass = VecListThreeDAsmOperand; 99} 100// Register list of four sequential D registers. 101def VecListFourDAsmOperand : AsmOperandClass { 102 let Name = "VecListFourD"; 103 let ParserMethod = "parseVectorList"; 104} 105def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 106 let ParserMatchClass = VecListFourDAsmOperand; 107} 108// Register list of two D registers spaced by 2 (two sequential Q registers). 109def VecListTwoQAsmOperand : AsmOperandClass { 110 let Name = "VecListTwoQ"; 111 let ParserMethod = "parseVectorList"; 112} 113def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwo"> { 114 let ParserMatchClass = VecListTwoQAsmOperand; 115} 116 117//===----------------------------------------------------------------------===// 118// NEON-specific DAG Nodes. 119//===----------------------------------------------------------------------===// 120 121def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 122def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 123 124def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 125def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 126def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 127def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 128def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 129def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 130def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 131def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 132def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 133def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 134def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 135 136// Types for vector shift by immediates. The "SHX" version is for long and 137// narrow operations where the source and destination vectors have different 138// types. The "SHINS" version is for shift and insert operations. 139def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 140 SDTCisVT<2, i32>]>; 141def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 142 SDTCisVT<2, i32>]>; 143def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 144 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 145 146def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 147def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 148def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 149def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 150def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 151def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 152def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 153 154def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 155def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 156def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 157 158def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 159def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 160def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 161def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 162def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 163def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 164 165def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 166def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 167def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 168 169def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 170def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 171 172def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 173 SDTCisVT<2, i32>]>; 174def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 175def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 176 177def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 178def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 179def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 180def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 181 182def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 183 SDTCisVT<2, i32>]>; 184def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 185def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 186 187def NEONvbsl : SDNode<"ARMISD::VBSL", 188 SDTypeProfile<1, 3, [SDTCisVec<0>, 189 SDTCisSameAs<0, 1>, 190 SDTCisSameAs<0, 2>, 191 SDTCisSameAs<0, 3>]>>; 192 193def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 194 195// VDUPLANE can produce a quad-register result from a double-register source, 196// so the result is not constrained to match the source. 197def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 198 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 199 SDTCisVT<2, i32>]>>; 200 201def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 202 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 203def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 204 205def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 206def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 207def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 208def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 209 210def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 211 SDTCisSameAs<0, 2>, 212 SDTCisSameAs<0, 3>]>; 213def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 214def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 215def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 216 217def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 218 SDTCisSameAs<1, 2>]>; 219def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 220def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 221 222def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 223 SDTCisSameAs<0, 2>]>; 224def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 225def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 226 227def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 228 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 229 unsigned EltBits = 0; 230 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 231 return (EltBits == 32 && EltVal == 0); 232}]>; 233 234def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 235 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 236 unsigned EltBits = 0; 237 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 238 return (EltBits == 8 && EltVal == 0xff); 239}]>; 240 241//===----------------------------------------------------------------------===// 242// NEON load / store instructions 243//===----------------------------------------------------------------------===// 244 245// Use VLDM to load a Q register as a D register pair. 246// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 247def VLDMQIA 248 : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), 249 IIC_fpLoad_m, "", 250 [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; 251 252// Use VSTM to store a Q register as a D register pair. 253// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 254def VSTMQIA 255 : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), 256 IIC_fpStore_m, "", 257 [(store (v2f64 QPR:$src), GPR:$Rn)]>; 258 259// Classes for VLD* pseudo-instructions with multi-register operands. 260// These are expanded to real instructions after register allocation. 261class VLDQPseudo<InstrItinClass itin> 262 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 263class VLDQWBPseudo<InstrItinClass itin> 264 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 265 (ins addrmode6:$addr, am6offset:$offset), itin, 266 "$addr.addr = $wb">; 267class VLDQWBfixedPseudo<InstrItinClass itin> 268 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 269 (ins addrmode6:$addr), itin, 270 "$addr.addr = $wb">; 271class VLDQWBregisterPseudo<InstrItinClass itin> 272 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 273 (ins addrmode6:$addr, rGPR:$offset), itin, 274 "$addr.addr = $wb">; 275class VLDQQPseudo<InstrItinClass itin> 276 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 277class VLDQQWBPseudo<InstrItinClass itin> 278 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 279 (ins addrmode6:$addr, am6offset:$offset), itin, 280 "$addr.addr = $wb">; 281class VLDQQQQPseudo<InstrItinClass itin> 282 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 283 "$src = $dst">; 284class VLDQQQQWBPseudo<InstrItinClass itin> 285 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 286 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 287 "$addr.addr = $wb, $src = $dst">; 288 289let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 290 291// VLD1 : Vector Load (multiple single elements) 292class VLD1D<bits<4> op7_4, string Dt> 293 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 294 (ins addrmode6:$Rn), IIC_VLD1, 295 "vld1", Dt, "$Vd, $Rn", "", []> { 296 let Rm = 0b1111; 297 let Inst{4} = Rn{4}; 298 let DecoderMethod = "DecodeVLDInstruction"; 299} 300class VLD1Q<bits<4> op7_4, string Dt> 301 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd), 302 (ins addrmode6:$Rn), IIC_VLD1x2, 303 "vld1", Dt, "$Vd, $Rn", "", []> { 304 let Rm = 0b1111; 305 let Inst{5-4} = Rn{5-4}; 306 let DecoderMethod = "DecodeVLDInstruction"; 307} 308 309def VLD1d8 : VLD1D<{0,0,0,?}, "8">; 310def VLD1d16 : VLD1D<{0,1,0,?}, "16">; 311def VLD1d32 : VLD1D<{1,0,0,?}, "32">; 312def VLD1d64 : VLD1D<{1,1,0,?}, "64">; 313 314def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; 315def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; 316def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; 317def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; 318 319def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>; 320def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>; 321def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>; 322def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>; 323 324// ...with address register writeback: 325multiclass VLD1DWB<bits<4> op7_4, string Dt> { 326 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 327 (ins addrmode6:$Rn), IIC_VLD1u, 328 "vld1", Dt, "$Vd, $Rn!", 329 "$Rn.addr = $wb", []> { 330 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 331 let Inst{4} = Rn{4}; 332 let DecoderMethod = "DecodeVLDInstruction"; 333 let AsmMatchConverter = "cvtVLDwbFixed"; 334 } 335 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 336 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, 337 "vld1", Dt, "$Vd, $Rn, $Rm", 338 "$Rn.addr = $wb", []> { 339 let Inst{4} = Rn{4}; 340 let DecoderMethod = "DecodeVLDInstruction"; 341 let AsmMatchConverter = "cvtVLDwbRegister"; 342 } 343} 344multiclass VLD1QWB<bits<4> op7_4, string Dt> { 345 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), 346 (ins addrmode6:$Rn), IIC_VLD1x2u, 347 "vld1", Dt, "$Vd, $Rn!", 348 "$Rn.addr = $wb", []> { 349 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 350 let Inst{5-4} = Rn{5-4}; 351 let DecoderMethod = "DecodeVLDInstruction"; 352 let AsmMatchConverter = "cvtVLDwbFixed"; 353 } 354 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), 355 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 356 "vld1", Dt, "$Vd, $Rn, $Rm", 357 "$Rn.addr = $wb", []> { 358 let Inst{5-4} = Rn{5-4}; 359 let DecoderMethod = "DecodeVLDInstruction"; 360 let AsmMatchConverter = "cvtVLDwbRegister"; 361 } 362} 363 364defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; 365defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; 366defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; 367defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; 368defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; 369defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; 370defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; 371defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; 372 373def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; 374def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; 375def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; 376def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; 377def VLD1q8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; 378def VLD1q16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; 379def VLD1q32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; 380def VLD1q64PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; 381 382// ...with 3 registers 383class VLD1D3<bits<4> op7_4, string Dt> 384 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 385 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, 386 "$Vd, $Rn", "", []> { 387 let Rm = 0b1111; 388 let Inst{4} = Rn{4}; 389 let DecoderMethod = "DecodeVLDInstruction"; 390} 391multiclass VLD1D3WB<bits<4> op7_4, string Dt> { 392 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 393 (ins addrmode6:$Rn), IIC_VLD1x2u, 394 "vld1", Dt, "$Vd, $Rn!", 395 "$Rn.addr = $wb", []> { 396 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 397 let Inst{4} = Rn{4}; 398 let DecoderMethod = "DecodeVLDInstruction"; 399 let AsmMatchConverter = "cvtVLDwbFixed"; 400 } 401 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 402 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 403 "vld1", Dt, "$Vd, $Rn, $Rm", 404 "$Rn.addr = $wb", []> { 405 let Inst{4} = Rn{4}; 406 let DecoderMethod = "DecodeVLDInstruction"; 407 let AsmMatchConverter = "cvtVLDwbRegister"; 408 } 409} 410 411def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; 412def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; 413def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; 414def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; 415 416defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; 417defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; 418defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; 419defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; 420 421def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 422 423// ...with 4 registers 424class VLD1D4<bits<4> op7_4, string Dt> 425 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 426 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, 427 "$Vd, $Rn", "", []> { 428 let Rm = 0b1111; 429 let Inst{5-4} = Rn{5-4}; 430 let DecoderMethod = "DecodeVLDInstruction"; 431} 432multiclass VLD1D4WB<bits<4> op7_4, string Dt> { 433 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 434 (ins addrmode6:$Rn), IIC_VLD1x2u, 435 "vld1", Dt, "$Vd, $Rn!", 436 "$Rn.addr = $wb", []> { 437 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 438 let Inst{5-4} = Rn{5-4}; 439 let DecoderMethod = "DecodeVLDInstruction"; 440 let AsmMatchConverter = "cvtVLDwbFixed"; 441 } 442 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 443 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 444 "vld1", Dt, "$Vd, $Rn, $Rm", 445 "$Rn.addr = $wb", []> { 446 let Inst{5-4} = Rn{5-4}; 447 let DecoderMethod = "DecodeVLDInstruction"; 448 let AsmMatchConverter = "cvtVLDwbRegister"; 449 } 450} 451 452def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; 453def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; 454def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; 455def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; 456 457defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; 458defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; 459defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; 460defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; 461 462def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 463 464// VLD2 : Vector Load (multiple 2-element structures) 465class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> 466 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 467 (ins addrmode6:$Rn), IIC_VLD2, 468 "vld2", Dt, "$Vd, $Rn", "", []> { 469 let Rm = 0b1111; 470 let Inst{5-4} = Rn{5-4}; 471 let DecoderMethod = "DecodeVLDInstruction"; 472} 473class VLD2Q<bits<4> op7_4, string Dt, RegisterOperand VdTy> 474 : NLdSt<0, 0b10, 0b0011, op7_4, 475 (outs VdTy:$Vd), 476 (ins addrmode6:$Rn), IIC_VLD2x2, 477 "vld2", Dt, "$Vd, $Rn", "", []> { 478 let Rm = 0b1111; 479 let Inst{5-4} = Rn{5-4}; 480 let DecoderMethod = "DecodeVLDInstruction"; 481} 482 483def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>; 484def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>; 485def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>; 486 487def VLD2q8 : VLD2Q<{0,0,?,?}, "8", VecListFourD>; 488def VLD2q16 : VLD2Q<{0,1,?,?}, "16", VecListFourD>; 489def VLD2q32 : VLD2Q<{1,0,?,?}, "32", VecListFourD>; 490 491def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; 492def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; 493def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>; 494 495def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 496def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 497def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 498 499// ...with address register writeback: 500class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> 501 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 502 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u, 503 "vld2", Dt, "$Vd, $Rn$Rm", 504 "$Rn.addr = $wb", []> { 505 let Inst{5-4} = Rn{5-4}; 506 let DecoderMethod = "DecodeVLDInstruction"; 507} 508class VLD2QWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> 509 : NLdSt<0, 0b10, 0b0011, op7_4, 510 (outs VdTy:$Vd, GPR:$wb), 511 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u, 512 "vld2", Dt, "$Vd, $Rn$Rm", 513 "$Rn.addr = $wb", []> { 514 let Inst{5-4} = Rn{5-4}; 515 let DecoderMethod = "DecodeVLDInstruction"; 516} 517 518def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; 519def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; 520def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; 521 522def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8", VecListFourD>; 523def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16", VecListFourD>; 524def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32", VecListFourD>; 525 526def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; 527def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; 528def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; 529 530def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; 531def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; 532def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; 533 534// ...with double-spaced registers 535def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8", VecListTwoQ>; 536def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16", VecListTwoQ>; 537def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32", VecListTwoQ>; 538def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; 539def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; 540def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; 541 542// VLD3 : Vector Load (multiple 3-element structures) 543class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 544 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 545 (ins addrmode6:$Rn), IIC_VLD3, 546 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 547 let Rm = 0b1111; 548 let Inst{4} = Rn{4}; 549 let DecoderMethod = "DecodeVLDInstruction"; 550} 551 552def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 553def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 554def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 555 556def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 557def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 558def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 559 560// ...with address register writeback: 561class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 562 : NLdSt<0, 0b10, op11_8, op7_4, 563 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 564 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 565 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 566 "$Rn.addr = $wb", []> { 567 let Inst{4} = Rn{4}; 568 let DecoderMethod = "DecodeVLDInstruction"; 569} 570 571def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 572def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 573def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 574 575def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 576def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 577def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 578 579// ...with double-spaced registers: 580def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 581def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 582def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 583def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 584def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 585def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 586 587def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 588def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 589def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 590 591// ...alternate versions to be allocated odd register numbers: 592def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 593def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 594def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 595 596def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 597def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 598def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 599 600// VLD4 : Vector Load (multiple 4-element structures) 601class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 602 : NLdSt<0, 0b10, op11_8, op7_4, 603 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 604 (ins addrmode6:$Rn), IIC_VLD4, 605 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 606 let Rm = 0b1111; 607 let Inst{5-4} = Rn{5-4}; 608 let DecoderMethod = "DecodeVLDInstruction"; 609} 610 611def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 612def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 613def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 614 615def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 616def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 617def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 618 619// ...with address register writeback: 620class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 621 : NLdSt<0, 0b10, op11_8, op7_4, 622 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 623 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 624 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 625 "$Rn.addr = $wb", []> { 626 let Inst{5-4} = Rn{5-4}; 627 let DecoderMethod = "DecodeVLDInstruction"; 628} 629 630def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 631def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 632def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 633 634def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 635def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 636def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 637 638// ...with double-spaced registers: 639def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 640def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 641def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 642def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 643def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 644def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 645 646def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 647def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 648def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 649 650// ...alternate versions to be allocated odd register numbers: 651def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 652def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 653def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 654 655def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 656def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 657def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 658 659} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 660 661// Classes for VLD*LN pseudo-instructions with multi-register operands. 662// These are expanded to real instructions after register allocation. 663class VLDQLNPseudo<InstrItinClass itin> 664 : PseudoNLdSt<(outs QPR:$dst), 665 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 666 itin, "$src = $dst">; 667class VLDQLNWBPseudo<InstrItinClass itin> 668 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 669 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 670 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 671class VLDQQLNPseudo<InstrItinClass itin> 672 : PseudoNLdSt<(outs QQPR:$dst), 673 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 674 itin, "$src = $dst">; 675class VLDQQLNWBPseudo<InstrItinClass itin> 676 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 677 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 678 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 679class VLDQQQQLNPseudo<InstrItinClass itin> 680 : PseudoNLdSt<(outs QQQQPR:$dst), 681 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 682 itin, "$src = $dst">; 683class VLDQQQQLNWBPseudo<InstrItinClass itin> 684 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 685 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 686 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 687 688// VLD1LN : Vector Load (single element to one lane) 689class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 690 PatFrag LoadOp> 691 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 692 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 693 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 694 "$src = $Vd", 695 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 696 (i32 (LoadOp addrmode6:$Rn)), 697 imm:$lane))]> { 698 let Rm = 0b1111; 699 let DecoderMethod = "DecodeVLD1LN"; 700} 701class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 702 PatFrag LoadOp> 703 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 704 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 705 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 706 "$src = $Vd", 707 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 708 (i32 (LoadOp addrmode6oneL32:$Rn)), 709 imm:$lane))]> { 710 let Rm = 0b1111; 711 let DecoderMethod = "DecodeVLD1LN"; 712} 713class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 714 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 715 (i32 (LoadOp addrmode6:$addr)), 716 imm:$lane))]; 717} 718 719def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 720 let Inst{7-5} = lane{2-0}; 721} 722def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 723 let Inst{7-6} = lane{1-0}; 724 let Inst{4} = Rn{4}; 725} 726def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 727 let Inst{7} = lane{0}; 728 let Inst{5} = Rn{4}; 729 let Inst{4} = Rn{4}; 730} 731 732def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 733def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 734def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 735 736def : Pat<(vector_insert (v2f32 DPR:$src), 737 (f32 (load addrmode6:$addr)), imm:$lane), 738 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 739def : Pat<(vector_insert (v4f32 QPR:$src), 740 (f32 (load addrmode6:$addr)), imm:$lane), 741 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 742 743let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 744 745// ...with address register writeback: 746class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 747 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 748 (ins addrmode6:$Rn, am6offset:$Rm, 749 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 750 "\\{$Vd[$lane]\\}, $Rn$Rm", 751 "$src = $Vd, $Rn.addr = $wb", []> { 752 let DecoderMethod = "DecodeVLD1LN"; 753} 754 755def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 756 let Inst{7-5} = lane{2-0}; 757} 758def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 759 let Inst{7-6} = lane{1-0}; 760 let Inst{4} = Rn{4}; 761} 762def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 763 let Inst{7} = lane{0}; 764 let Inst{5} = Rn{4}; 765 let Inst{4} = Rn{4}; 766} 767 768def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 769def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 770def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 771 772// VLD2LN : Vector Load (single 2-element structure to one lane) 773class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 774 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 775 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 776 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 777 "$src1 = $Vd, $src2 = $dst2", []> { 778 let Rm = 0b1111; 779 let Inst{4} = Rn{4}; 780 let DecoderMethod = "DecodeVLD2LN"; 781} 782 783def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 784 let Inst{7-5} = lane{2-0}; 785} 786def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 787 let Inst{7-6} = lane{1-0}; 788} 789def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 790 let Inst{7} = lane{0}; 791} 792 793def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 794def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 795def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 796 797// ...with double-spaced registers: 798def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 799 let Inst{7-6} = lane{1-0}; 800} 801def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 802 let Inst{7} = lane{0}; 803} 804 805def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 806def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 807 808// ...with address register writeback: 809class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 810 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 811 (ins addrmode6:$Rn, am6offset:$Rm, 812 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 813 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 814 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 815 let Inst{4} = Rn{4}; 816 let DecoderMethod = "DecodeVLD2LN"; 817} 818 819def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 820 let Inst{7-5} = lane{2-0}; 821} 822def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 823 let Inst{7-6} = lane{1-0}; 824} 825def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 826 let Inst{7} = lane{0}; 827} 828 829def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 830def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 831def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 832 833def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 834 let Inst{7-6} = lane{1-0}; 835} 836def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 837 let Inst{7} = lane{0}; 838} 839 840def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 841def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 842 843// VLD3LN : Vector Load (single 3-element structure to one lane) 844class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 845 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 846 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 847 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 848 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 849 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 850 let Rm = 0b1111; 851 let DecoderMethod = "DecodeVLD3LN"; 852} 853 854def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 855 let Inst{7-5} = lane{2-0}; 856} 857def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 858 let Inst{7-6} = lane{1-0}; 859} 860def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 861 let Inst{7} = lane{0}; 862} 863 864def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 865def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 866def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 867 868// ...with double-spaced registers: 869def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 870 let Inst{7-6} = lane{1-0}; 871} 872def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 873 let Inst{7} = lane{0}; 874} 875 876def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 877def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 878 879// ...with address register writeback: 880class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 881 : NLdStLn<1, 0b10, op11_8, op7_4, 882 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 883 (ins addrmode6:$Rn, am6offset:$Rm, 884 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 885 IIC_VLD3lnu, "vld3", Dt, 886 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 887 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 888 []> { 889 let DecoderMethod = "DecodeVLD3LN"; 890} 891 892def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 893 let Inst{7-5} = lane{2-0}; 894} 895def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 896 let Inst{7-6} = lane{1-0}; 897} 898def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 899 let Inst{7} = lane{0}; 900} 901 902def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 903def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 904def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 905 906def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 907 let Inst{7-6} = lane{1-0}; 908} 909def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 910 let Inst{7} = lane{0}; 911} 912 913def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 914def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 915 916// VLD4LN : Vector Load (single 4-element structure to one lane) 917class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 918 : NLdStLn<1, 0b10, op11_8, op7_4, 919 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 920 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 921 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 922 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 923 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 924 let Rm = 0b1111; 925 let Inst{4} = Rn{4}; 926 let DecoderMethod = "DecodeVLD4LN"; 927} 928 929def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 930 let Inst{7-5} = lane{2-0}; 931} 932def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 933 let Inst{7-6} = lane{1-0}; 934} 935def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 936 let Inst{7} = lane{0}; 937 let Inst{5} = Rn{5}; 938} 939 940def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 941def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 942def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 943 944// ...with double-spaced registers: 945def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 946 let Inst{7-6} = lane{1-0}; 947} 948def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 949 let Inst{7} = lane{0}; 950 let Inst{5} = Rn{5}; 951} 952 953def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 954def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 955 956// ...with address register writeback: 957class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 958 : NLdStLn<1, 0b10, op11_8, op7_4, 959 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 960 (ins addrmode6:$Rn, am6offset:$Rm, 961 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 962 IIC_VLD4lnu, "vld4", Dt, 963"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 964"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 965 []> { 966 let Inst{4} = Rn{4}; 967 let DecoderMethod = "DecodeVLD4LN" ; 968} 969 970def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 971 let Inst{7-5} = lane{2-0}; 972} 973def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 974 let Inst{7-6} = lane{1-0}; 975} 976def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 977 let Inst{7} = lane{0}; 978 let Inst{5} = Rn{5}; 979} 980 981def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 982def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 983def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 984 985def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 986 let Inst{7-6} = lane{1-0}; 987} 988def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 989 let Inst{7} = lane{0}; 990 let Inst{5} = Rn{5}; 991} 992 993def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 994def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 995 996} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 997 998// VLD1DUP : Vector Load (single element to all lanes) 999class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1000 : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn), 1001 IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "", 1002 [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1003 let Rm = 0b1111; 1004 let Inst{4} = Rn{4}; 1005 let DecoderMethod = "DecodeVLD1DupInstruction"; 1006} 1007class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> { 1008 let Pattern = [(set QPR:$dst, 1009 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; 1010} 1011 1012def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; 1013def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; 1014def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; 1015 1016def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>; 1017def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>; 1018def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>; 1019 1020def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1021 (VLD1DUPd32 addrmode6:$addr)>; 1022def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1023 (VLD1DUPq32Pseudo addrmode6:$addr)>; 1024 1025let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1026 1027class VLD1QDUP<bits<4> op7_4, string Dt> 1028 : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2), 1029 (ins addrmode6dup:$Rn), IIC_VLD1dup, 1030 "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { 1031 let Rm = 0b1111; 1032 let Inst{4} = Rn{4}; 1033 let DecoderMethod = "DecodeVLD1DupInstruction"; 1034} 1035 1036def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; 1037def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; 1038def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; 1039 1040// ...with address register writeback: 1041class VLD1DUPWB<bits<4> op7_4, string Dt> 1042 : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb), 1043 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, 1044 "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { 1045 let Inst{4} = Rn{4}; 1046 let DecoderMethod = "DecodeVLD1DupInstruction"; 1047} 1048class VLD1QDUPWB<bits<4> op7_4, string Dt> 1049 : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1050 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, 1051 "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { 1052 let Inst{4} = Rn{4}; 1053 let DecoderMethod = "DecodeVLD1DupInstruction"; 1054} 1055 1056def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">; 1057def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">; 1058def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">; 1059 1060def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">; 1061def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">; 1062def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">; 1063 1064def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; 1065def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; 1066def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; 1067 1068// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1069class VLD2DUP<bits<4> op7_4, string Dt> 1070 : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2), 1071 (ins addrmode6dup:$Rn), IIC_VLD2dup, 1072 "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { 1073 let Rm = 0b1111; 1074 let Inst{4} = Rn{4}; 1075 let DecoderMethod = "DecodeVLD2DupInstruction"; 1076} 1077 1078def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; 1079def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">; 1080def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">; 1081 1082def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>; 1083def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>; 1084def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>; 1085 1086// ...with double-spaced registers (not used for codegen): 1087def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">; 1088def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">; 1089def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">; 1090 1091// ...with address register writeback: 1092class VLD2DUPWB<bits<4> op7_4, string Dt> 1093 : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1094 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, 1095 "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { 1096 let Inst{4} = Rn{4}; 1097 let DecoderMethod = "DecodeVLD2DupInstruction"; 1098} 1099 1100def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; 1101def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">; 1102def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">; 1103 1104def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">; 1105def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">; 1106def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">; 1107 1108def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; 1109def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; 1110def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; 1111 1112// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1113class VLD3DUP<bits<4> op7_4, string Dt> 1114 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1115 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1116 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1117 let Rm = 0b1111; 1118 let Inst{4} = 0; 1119 let DecoderMethod = "DecodeVLD3DupInstruction"; 1120} 1121 1122def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1123def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1124def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1125 1126def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1127def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1128def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1129 1130// ...with double-spaced registers (not used for codegen): 1131def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; 1132def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; 1133def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; 1134 1135// ...with address register writeback: 1136class VLD3DUPWB<bits<4> op7_4, string Dt> 1137 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1138 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1139 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1140 "$Rn.addr = $wb", []> { 1141 let Inst{4} = 0; 1142 let DecoderMethod = "DecodeVLD3DupInstruction"; 1143} 1144 1145def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; 1146def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; 1147def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; 1148 1149def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; 1150def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; 1151def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; 1152 1153def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1154def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1155def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1156 1157// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1158class VLD4DUP<bits<4> op7_4, string Dt> 1159 : NLdSt<1, 0b10, 0b1111, op7_4, 1160 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1161 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1162 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1163 let Rm = 0b1111; 1164 let Inst{4} = Rn{4}; 1165 let DecoderMethod = "DecodeVLD4DupInstruction"; 1166} 1167 1168def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1169def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1170def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1171 1172def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1173def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1174def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1175 1176// ...with double-spaced registers (not used for codegen): 1177def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; 1178def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; 1179def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1180 1181// ...with address register writeback: 1182class VLD4DUPWB<bits<4> op7_4, string Dt> 1183 : NLdSt<1, 0b10, 0b1111, op7_4, 1184 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1185 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1186 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1187 "$Rn.addr = $wb", []> { 1188 let Inst{4} = Rn{4}; 1189 let DecoderMethod = "DecodeVLD4DupInstruction"; 1190} 1191 1192def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1193def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1194def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1195 1196def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1197def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1198def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1199 1200def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1201def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1202def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1203 1204} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1205 1206let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1207 1208// Classes for VST* pseudo-instructions with multi-register operands. 1209// These are expanded to real instructions after register allocation. 1210class VSTQPseudo<InstrItinClass itin> 1211 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1212class VSTQWBPseudo<InstrItinClass itin> 1213 : PseudoNLdSt<(outs GPR:$wb), 1214 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1215 "$addr.addr = $wb">; 1216class VSTQWBfixedPseudo<InstrItinClass itin> 1217 : PseudoNLdSt<(outs GPR:$wb), 1218 (ins addrmode6:$addr, QPR:$src), itin, 1219 "$addr.addr = $wb">; 1220class VSTQWBregisterPseudo<InstrItinClass itin> 1221 : PseudoNLdSt<(outs GPR:$wb), 1222 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1223 "$addr.addr = $wb">; 1224class VSTQQPseudo<InstrItinClass itin> 1225 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1226class VSTQQWBPseudo<InstrItinClass itin> 1227 : PseudoNLdSt<(outs GPR:$wb), 1228 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1229 "$addr.addr = $wb">; 1230class VSTQQQQPseudo<InstrItinClass itin> 1231 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1232class VSTQQQQWBPseudo<InstrItinClass itin> 1233 : PseudoNLdSt<(outs GPR:$wb), 1234 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1235 "$addr.addr = $wb">; 1236 1237// VST1 : Vector Store (multiple single elements) 1238class VST1D<bits<4> op7_4, string Dt> 1239 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), 1240 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1241 let Rm = 0b1111; 1242 let Inst{4} = Rn{4}; 1243 let DecoderMethod = "DecodeVSTInstruction"; 1244} 1245class VST1Q<bits<4> op7_4, string Dt> 1246 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), 1247 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1248 let Rm = 0b1111; 1249 let Inst{5-4} = Rn{5-4}; 1250 let DecoderMethod = "DecodeVSTInstruction"; 1251} 1252 1253def VST1d8 : VST1D<{0,0,0,?}, "8">; 1254def VST1d16 : VST1D<{0,1,0,?}, "16">; 1255def VST1d32 : VST1D<{1,0,0,?}, "32">; 1256def VST1d64 : VST1D<{1,1,0,?}, "64">; 1257 1258def VST1q8 : VST1Q<{0,0,?,?}, "8">; 1259def VST1q16 : VST1Q<{0,1,?,?}, "16">; 1260def VST1q32 : VST1Q<{1,0,?,?}, "32">; 1261def VST1q64 : VST1Q<{1,1,?,?}, "64">; 1262 1263def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>; 1264def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>; 1265def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>; 1266def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>; 1267 1268// ...with address register writeback: 1269multiclass VST1DWB<bits<4> op7_4, string Dt> { 1270 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1271 (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1272 "vst1", Dt, "$Vd, $Rn!", 1273 "$Rn.addr = $wb", []> { 1274 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1275 let Inst{4} = Rn{4}; 1276 let DecoderMethod = "DecodeVSTInstruction"; 1277 let AsmMatchConverter = "cvtVSTwbFixed"; 1278 } 1279 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1280 (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1281 IIC_VLD1u, 1282 "vst1", Dt, "$Vd, $Rn, $Rm", 1283 "$Rn.addr = $wb", []> { 1284 let Inst{4} = Rn{4}; 1285 let DecoderMethod = "DecodeVSTInstruction"; 1286 let AsmMatchConverter = "cvtVSTwbRegister"; 1287 } 1288} 1289multiclass VST1QWB<bits<4> op7_4, string Dt> { 1290 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1291 (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u, 1292 "vst1", Dt, "$Vd, $Rn!", 1293 "$Rn.addr = $wb", []> { 1294 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1295 let Inst{5-4} = Rn{5-4}; 1296 let DecoderMethod = "DecodeVSTInstruction"; 1297 let AsmMatchConverter = "cvtVSTwbFixed"; 1298 } 1299 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1300 (ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd), 1301 IIC_VLD1x2u, 1302 "vst1", Dt, "$Vd, $Rn, $Rm", 1303 "$Rn.addr = $wb", []> { 1304 let Inst{5-4} = Rn{5-4}; 1305 let DecoderMethod = "DecodeVSTInstruction"; 1306 let AsmMatchConverter = "cvtVSTwbRegister"; 1307 } 1308} 1309 1310defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; 1311defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; 1312defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; 1313defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; 1314 1315defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; 1316defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; 1317defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; 1318defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; 1319 1320def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; 1321def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; 1322def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; 1323def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; 1324def VST1q8PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; 1325def VST1q16PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; 1326def VST1q32PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; 1327def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; 1328 1329// ...with 3 registers 1330class VST1D3<bits<4> op7_4, string Dt> 1331 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1332 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), 1333 IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1334 let Rm = 0b1111; 1335 let Inst{4} = Rn{4}; 1336 let DecoderMethod = "DecodeVSTInstruction"; 1337} 1338class VST1D3WB<bits<4> op7_4, string Dt> 1339 : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), 1340 (ins addrmode6:$Rn, am6offset:$Rm, 1341 DPR:$Vd, DPR:$src2, DPR:$src3), 1342 IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1343 "$Rn.addr = $wb", []> { 1344 let Inst{4} = Rn{4}; 1345 let DecoderMethod = "DecodeVSTInstruction"; 1346} 1347 1348def VST1d8T : VST1D3<{0,0,0,?}, "8">; 1349def VST1d16T : VST1D3<{0,1,0,?}, "16">; 1350def VST1d32T : VST1D3<{1,0,0,?}, "32">; 1351def VST1d64T : VST1D3<{1,1,0,?}, "64">; 1352 1353def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">; 1354def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">; 1355def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">; 1356def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">; 1357 1358def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1359def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>; 1360 1361// ...with 4 registers 1362class VST1D4<bits<4> op7_4, string Dt> 1363 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1364 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1365 IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "", 1366 []> { 1367 let Rm = 0b1111; 1368 let Inst{5-4} = Rn{5-4}; 1369 let DecoderMethod = "DecodeVSTInstruction"; 1370} 1371class VST1D4WB<bits<4> op7_4, string Dt> 1372 : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), 1373 (ins addrmode6:$Rn, am6offset:$Rm, 1374 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u, 1375 "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1376 "$Rn.addr = $wb", []> { 1377 let Inst{5-4} = Rn{5-4}; 1378 let DecoderMethod = "DecodeVSTInstruction"; 1379} 1380 1381def VST1d8Q : VST1D4<{0,0,?,?}, "8">; 1382def VST1d16Q : VST1D4<{0,1,?,?}, "16">; 1383def VST1d32Q : VST1D4<{1,0,?,?}, "32">; 1384def VST1d64Q : VST1D4<{1,1,?,?}, "64">; 1385 1386def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">; 1387def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">; 1388def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">; 1389def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">; 1390 1391def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1392def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>; 1393 1394// VST2 : Vector Store (multiple 2-element structures) 1395class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> 1396 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1397 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), 1398 IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { 1399 let Rm = 0b1111; 1400 let Inst{5-4} = Rn{5-4}; 1401 let DecoderMethod = "DecodeVSTInstruction"; 1402} 1403class VST2Q<bits<4> op7_4, string Dt> 1404 : NLdSt<0, 0b00, 0b0011, op7_4, (outs), 1405 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1406 IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1407 "", []> { 1408 let Rm = 0b1111; 1409 let Inst{5-4} = Rn{5-4}; 1410 let DecoderMethod = "DecodeVSTInstruction"; 1411} 1412 1413def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">; 1414def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">; 1415def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">; 1416 1417def VST2q8 : VST2Q<{0,0,?,?}, "8">; 1418def VST2q16 : VST2Q<{0,1,?,?}, "16">; 1419def VST2q32 : VST2Q<{1,0,?,?}, "32">; 1420 1421def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; 1422def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; 1423def VST2d32Pseudo : VSTQPseudo<IIC_VST2>; 1424 1425def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1426def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1427def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1428 1429// ...with address register writeback: 1430class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1431 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1432 (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), 1433 IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", 1434 "$Rn.addr = $wb", []> { 1435 let Inst{5-4} = Rn{5-4}; 1436 let DecoderMethod = "DecodeVSTInstruction"; 1437} 1438class VST2QWB<bits<4> op7_4, string Dt> 1439 : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1440 (ins addrmode6:$Rn, am6offset:$Rm, 1441 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u, 1442 "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1443 "$Rn.addr = $wb", []> { 1444 let Inst{5-4} = Rn{5-4}; 1445 let DecoderMethod = "DecodeVSTInstruction"; 1446} 1447 1448def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">; 1449def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">; 1450def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">; 1451 1452def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">; 1453def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">; 1454def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">; 1455 1456def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 1457def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 1458def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 1459 1460def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 1461def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 1462def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 1463 1464// ...with double-spaced registers 1465def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">; 1466def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">; 1467def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">; 1468def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">; 1469def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">; 1470def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">; 1471 1472// VST3 : Vector Store (multiple 3-element structures) 1473class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1474 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1475 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1476 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1477 let Rm = 0b1111; 1478 let Inst{4} = Rn{4}; 1479 let DecoderMethod = "DecodeVSTInstruction"; 1480} 1481 1482def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1483def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1484def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1485 1486def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1487def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1488def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1489 1490// ...with address register writeback: 1491class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1492 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1493 (ins addrmode6:$Rn, am6offset:$Rm, 1494 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1495 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1496 "$Rn.addr = $wb", []> { 1497 let Inst{4} = Rn{4}; 1498 let DecoderMethod = "DecodeVSTInstruction"; 1499} 1500 1501def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1502def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1503def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1504 1505def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1506def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1507def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1508 1509// ...with double-spaced registers: 1510def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1511def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1512def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1513def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1514def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1515def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1516 1517def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1518def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1519def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1520 1521// ...alternate versions to be allocated odd register numbers: 1522def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1523def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1524def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1525 1526def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1527def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1528def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1529 1530// VST4 : Vector Store (multiple 4-element structures) 1531class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1532 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1533 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1534 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1535 "", []> { 1536 let Rm = 0b1111; 1537 let Inst{5-4} = Rn{5-4}; 1538 let DecoderMethod = "DecodeVSTInstruction"; 1539} 1540 1541def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1542def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1543def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1544 1545def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1546def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1547def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1548 1549// ...with address register writeback: 1550class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1551 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1552 (ins addrmode6:$Rn, am6offset:$Rm, 1553 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1554 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1555 "$Rn.addr = $wb", []> { 1556 let Inst{5-4} = Rn{5-4}; 1557 let DecoderMethod = "DecodeVSTInstruction"; 1558} 1559 1560def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1561def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1562def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1563 1564def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1565def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1566def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1567 1568// ...with double-spaced registers: 1569def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 1570def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 1571def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 1572def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 1573def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 1574def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 1575 1576def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1577def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1578def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1579 1580// ...alternate versions to be allocated odd register numbers: 1581def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1582def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1583def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1584 1585def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1586def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1587def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1588 1589} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1590 1591// Classes for VST*LN pseudo-instructions with multi-register operands. 1592// These are expanded to real instructions after register allocation. 1593class VSTQLNPseudo<InstrItinClass itin> 1594 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1595 itin, "">; 1596class VSTQLNWBPseudo<InstrItinClass itin> 1597 : PseudoNLdSt<(outs GPR:$wb), 1598 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1599 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1600class VSTQQLNPseudo<InstrItinClass itin> 1601 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1602 itin, "">; 1603class VSTQQLNWBPseudo<InstrItinClass itin> 1604 : PseudoNLdSt<(outs GPR:$wb), 1605 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1606 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1607class VSTQQQQLNPseudo<InstrItinClass itin> 1608 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1609 itin, "">; 1610class VSTQQQQLNWBPseudo<InstrItinClass itin> 1611 : PseudoNLdSt<(outs GPR:$wb), 1612 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1613 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1614 1615// VST1LN : Vector Store (single element from one lane) 1616class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1617 PatFrag StoreOp, SDNode ExtractOp> 1618 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1619 (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), 1620 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1621 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { 1622 let Rm = 0b1111; 1623 let DecoderMethod = "DecodeVST1LN"; 1624} 1625class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1626 PatFrag StoreOp, SDNode ExtractOp> 1627 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1628 (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane), 1629 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1630 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ 1631 let Rm = 0b1111; 1632 let DecoderMethod = "DecodeVST1LN"; 1633} 1634class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1635 : VSTQLNPseudo<IIC_VST1ln> { 1636 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1637 addrmode6:$addr)]; 1638} 1639 1640def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 1641 NEONvgetlaneu> { 1642 let Inst{7-5} = lane{2-0}; 1643} 1644def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 1645 NEONvgetlaneu> { 1646 let Inst{7-6} = lane{1-0}; 1647 let Inst{4} = Rn{5}; 1648} 1649 1650def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { 1651 let Inst{7} = lane{0}; 1652 let Inst{5-4} = Rn{5-4}; 1653} 1654 1655def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 1656def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 1657def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 1658 1659def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 1660 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1661def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 1662 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1663 1664// ...with address register writeback: 1665class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1666 PatFrag StoreOp, SDNode ExtractOp> 1667 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1668 (ins addrmode6:$Rn, am6offset:$Rm, 1669 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 1670 "\\{$Vd[$lane]\\}, $Rn$Rm", 1671 "$Rn.addr = $wb", 1672 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 1673 addrmode6:$Rn, am6offset:$Rm))]> { 1674 let DecoderMethod = "DecodeVST1LN"; 1675} 1676class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1677 : VSTQLNWBPseudo<IIC_VST1lnu> { 1678 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1679 addrmode6:$addr, am6offset:$offset))]; 1680} 1681 1682def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 1683 NEONvgetlaneu> { 1684 let Inst{7-5} = lane{2-0}; 1685} 1686def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 1687 NEONvgetlaneu> { 1688 let Inst{7-6} = lane{1-0}; 1689 let Inst{4} = Rn{5}; 1690} 1691def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 1692 extractelt> { 1693 let Inst{7} = lane{0}; 1694 let Inst{5-4} = Rn{5-4}; 1695} 1696 1697def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 1698def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 1699def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 1700 1701let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1702 1703// VST2LN : Vector Store (single 2-element structure from one lane) 1704class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1705 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1706 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 1707 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 1708 "", []> { 1709 let Rm = 0b1111; 1710 let Inst{4} = Rn{4}; 1711 let DecoderMethod = "DecodeVST2LN"; 1712} 1713 1714def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 1715 let Inst{7-5} = lane{2-0}; 1716} 1717def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 1718 let Inst{7-6} = lane{1-0}; 1719} 1720def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 1721 let Inst{7} = lane{0}; 1722} 1723 1724def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 1725def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 1726def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 1727 1728// ...with double-spaced registers: 1729def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 1730 let Inst{7-6} = lane{1-0}; 1731 let Inst{4} = Rn{4}; 1732} 1733def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 1734 let Inst{7} = lane{0}; 1735 let Inst{4} = Rn{4}; 1736} 1737 1738def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 1739def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 1740 1741// ...with address register writeback: 1742class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1743 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1744 (ins addrmode6:$addr, am6offset:$offset, 1745 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 1746 "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", 1747 "$addr.addr = $wb", []> { 1748 let Inst{4} = Rn{4}; 1749 let DecoderMethod = "DecodeVST2LN"; 1750} 1751 1752def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 1753 let Inst{7-5} = lane{2-0}; 1754} 1755def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 1756 let Inst{7-6} = lane{1-0}; 1757} 1758def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 1759 let Inst{7} = lane{0}; 1760} 1761 1762def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 1763def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 1764def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 1765 1766def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 1767 let Inst{7-6} = lane{1-0}; 1768} 1769def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 1770 let Inst{7} = lane{0}; 1771} 1772 1773def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 1774def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 1775 1776// VST3LN : Vector Store (single 3-element structure from one lane) 1777class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1778 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1779 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 1780 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 1781 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 1782 let Rm = 0b1111; 1783 let DecoderMethod = "DecodeVST3LN"; 1784} 1785 1786def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 1787 let Inst{7-5} = lane{2-0}; 1788} 1789def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 1790 let Inst{7-6} = lane{1-0}; 1791} 1792def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 1793 let Inst{7} = lane{0}; 1794} 1795 1796def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 1797def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 1798def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 1799 1800// ...with double-spaced registers: 1801def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 1802 let Inst{7-6} = lane{1-0}; 1803} 1804def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 1805 let Inst{7} = lane{0}; 1806} 1807 1808def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 1809def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 1810 1811// ...with address register writeback: 1812class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1813 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1814 (ins addrmode6:$Rn, am6offset:$Rm, 1815 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1816 IIC_VST3lnu, "vst3", Dt, 1817 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 1818 "$Rn.addr = $wb", []> { 1819 let DecoderMethod = "DecodeVST3LN"; 1820} 1821 1822def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 1823 let Inst{7-5} = lane{2-0}; 1824} 1825def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 1826 let Inst{7-6} = lane{1-0}; 1827} 1828def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 1829 let Inst{7} = lane{0}; 1830} 1831 1832def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 1833def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 1834def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 1835 1836def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 1837 let Inst{7-6} = lane{1-0}; 1838} 1839def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 1840 let Inst{7} = lane{0}; 1841} 1842 1843def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 1844def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 1845 1846// VST4LN : Vector Store (single 4-element structure from one lane) 1847class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1848 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1849 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 1850 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 1851 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 1852 "", []> { 1853 let Rm = 0b1111; 1854 let Inst{4} = Rn{4}; 1855 let DecoderMethod = "DecodeVST4LN"; 1856} 1857 1858def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 1859 let Inst{7-5} = lane{2-0}; 1860} 1861def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 1862 let Inst{7-6} = lane{1-0}; 1863} 1864def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 1865 let Inst{7} = lane{0}; 1866 let Inst{5} = Rn{5}; 1867} 1868 1869def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1870def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1871def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1872 1873// ...with double-spaced registers: 1874def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 1875 let Inst{7-6} = lane{1-0}; 1876} 1877def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 1878 let Inst{7} = lane{0}; 1879 let Inst{5} = Rn{5}; 1880} 1881 1882def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 1883def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 1884 1885// ...with address register writeback: 1886class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1887 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1888 (ins addrmode6:$Rn, am6offset:$Rm, 1889 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1890 IIC_VST4lnu, "vst4", Dt, 1891 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 1892 "$Rn.addr = $wb", []> { 1893 let Inst{4} = Rn{4}; 1894 let DecoderMethod = "DecodeVST4LN"; 1895} 1896 1897def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 1898 let Inst{7-5} = lane{2-0}; 1899} 1900def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 1901 let Inst{7-6} = lane{1-0}; 1902} 1903def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 1904 let Inst{7} = lane{0}; 1905 let Inst{5} = Rn{5}; 1906} 1907 1908def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 1909def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 1910def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 1911 1912def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 1913 let Inst{7-6} = lane{1-0}; 1914} 1915def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 1916 let Inst{7} = lane{0}; 1917 let Inst{5} = Rn{5}; 1918} 1919 1920def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 1921def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 1922 1923} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1924 1925 1926//===----------------------------------------------------------------------===// 1927// NEON pattern fragments 1928//===----------------------------------------------------------------------===// 1929 1930// Extract D sub-registers of Q registers. 1931def DSubReg_i8_reg : SDNodeXForm<imm, [{ 1932 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1933 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 1934}]>; 1935def DSubReg_i16_reg : SDNodeXForm<imm, [{ 1936 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1937 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 1938}]>; 1939def DSubReg_i32_reg : SDNodeXForm<imm, [{ 1940 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1941 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 1942}]>; 1943def DSubReg_f64_reg : SDNodeXForm<imm, [{ 1944 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1945 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 1946}]>; 1947 1948// Extract S sub-registers of Q/D registers. 1949def SSubReg_f32_reg : SDNodeXForm<imm, [{ 1950 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 1951 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 1952}]>; 1953 1954// Translate lane numbers from Q registers to D subregs. 1955def SubReg_i8_lane : SDNodeXForm<imm, [{ 1956 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 1957}]>; 1958def SubReg_i16_lane : SDNodeXForm<imm, [{ 1959 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 1960}]>; 1961def SubReg_i32_lane : SDNodeXForm<imm, [{ 1962 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 1963}]>; 1964 1965//===----------------------------------------------------------------------===// 1966// Instruction Classes 1967//===----------------------------------------------------------------------===// 1968 1969// Basic 2-register operations: double- and quad-register. 1970class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1971 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 1972 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 1973 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 1974 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 1975 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 1976class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1977 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 1978 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 1979 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 1980 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 1981 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 1982 1983// Basic 2-register intrinsics, both double- and quad-register. 1984class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1985 bits<2> op17_16, bits<5> op11_7, bit op4, 1986 InstrItinClass itin, string OpcodeStr, string Dt, 1987 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1988 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 1989 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 1990 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 1991class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1992 bits<2> op17_16, bits<5> op11_7, bit op4, 1993 InstrItinClass itin, string OpcodeStr, string Dt, 1994 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1995 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 1996 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 1997 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 1998 1999// Narrow 2-register operations. 2000class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2001 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2002 InstrItinClass itin, string OpcodeStr, string Dt, 2003 ValueType TyD, ValueType TyQ, SDNode OpNode> 2004 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2005 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2006 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2007 2008// Narrow 2-register intrinsics. 2009class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2010 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2011 InstrItinClass itin, string OpcodeStr, string Dt, 2012 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 2013 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2014 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2015 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2016 2017// Long 2-register operations (currently only used for VMOVL). 2018class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2019 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2020 InstrItinClass itin, string OpcodeStr, string Dt, 2021 ValueType TyQ, ValueType TyD, SDNode OpNode> 2022 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2023 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2024 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2025 2026// Long 2-register intrinsics. 2027class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2028 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2029 InstrItinClass itin, string OpcodeStr, string Dt, 2030 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 2031 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2032 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2033 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2034 2035// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2036class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2037 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2038 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2039 OpcodeStr, Dt, "$Vd, $Vm", 2040 "$src1 = $Vd, $src2 = $Vm", []>; 2041class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2042 InstrItinClass itin, string OpcodeStr, string Dt> 2043 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2044 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2045 "$src1 = $Vd, $src2 = $Vm", []>; 2046 2047// Basic 3-register operations: double- and quad-register. 2048class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2049 InstrItinClass itin, string OpcodeStr, string Dt, 2050 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2051 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2052 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2053 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2054 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2055 let isCommutable = Commutable; 2056} 2057// Same as N3VD but no data type. 2058class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2059 InstrItinClass itin, string OpcodeStr, 2060 ValueType ResTy, ValueType OpTy, 2061 SDNode OpNode, bit Commutable> 2062 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2063 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2064 OpcodeStr, "$Vd, $Vn, $Vm", "", 2065 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2066 let isCommutable = Commutable; 2067} 2068 2069class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2070 InstrItinClass itin, string OpcodeStr, string Dt, 2071 ValueType Ty, SDNode ShOp> 2072 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2073 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2074 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2075 [(set (Ty DPR:$Vd), 2076 (Ty (ShOp (Ty DPR:$Vn), 2077 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2078 let isCommutable = 0; 2079} 2080class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2081 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2082 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2083 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2084 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2085 [(set (Ty DPR:$Vd), 2086 (Ty (ShOp (Ty DPR:$Vn), 2087 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2088 let isCommutable = 0; 2089} 2090 2091class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2092 InstrItinClass itin, string OpcodeStr, string Dt, 2093 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2094 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2095 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2096 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2097 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2098 let isCommutable = Commutable; 2099} 2100class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2101 InstrItinClass itin, string OpcodeStr, 2102 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2103 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2104 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2105 OpcodeStr, "$Vd, $Vn, $Vm", "", 2106 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2107 let isCommutable = Commutable; 2108} 2109class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2110 InstrItinClass itin, string OpcodeStr, string Dt, 2111 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2112 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2113 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2114 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2115 [(set (ResTy QPR:$Vd), 2116 (ResTy (ShOp (ResTy QPR:$Vn), 2117 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2118 imm:$lane)))))]> { 2119 let isCommutable = 0; 2120} 2121class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2122 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2123 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2124 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2125 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2126 [(set (ResTy QPR:$Vd), 2127 (ResTy (ShOp (ResTy QPR:$Vn), 2128 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2129 imm:$lane)))))]> { 2130 let isCommutable = 0; 2131} 2132 2133// Basic 3-register intrinsics, both double- and quad-register. 2134class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2135 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2136 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 2137 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2138 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2139 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2140 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2141 let isCommutable = Commutable; 2142} 2143class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2144 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 2145 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2146 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2147 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2148 [(set (Ty DPR:$Vd), 2149 (Ty (IntOp (Ty DPR:$Vn), 2150 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2151 imm:$lane)))))]> { 2152 let isCommutable = 0; 2153} 2154class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2155 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 2156 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2157 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2158 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2159 [(set (Ty DPR:$Vd), 2160 (Ty (IntOp (Ty DPR:$Vn), 2161 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2162 let isCommutable = 0; 2163} 2164class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2165 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2166 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2167 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2168 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2169 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2170 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2171 let isCommutable = 0; 2172} 2173 2174class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2175 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2176 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 2177 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2178 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2179 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2180 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2181 let isCommutable = Commutable; 2182} 2183class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2184 string OpcodeStr, string Dt, 2185 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2186 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2187 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2188 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2189 [(set (ResTy QPR:$Vd), 2190 (ResTy (IntOp (ResTy QPR:$Vn), 2191 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2192 imm:$lane)))))]> { 2193 let isCommutable = 0; 2194} 2195class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2196 string OpcodeStr, string Dt, 2197 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2198 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2199 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2200 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2201 [(set (ResTy QPR:$Vd), 2202 (ResTy (IntOp (ResTy QPR:$Vn), 2203 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2204 imm:$lane)))))]> { 2205 let isCommutable = 0; 2206} 2207class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2208 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2209 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2210 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2211 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2212 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2213 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2214 let isCommutable = 0; 2215} 2216 2217// Multiply-Add/Sub operations: double- and quad-register. 2218class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2219 InstrItinClass itin, string OpcodeStr, string Dt, 2220 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2221 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2222 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2223 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2224 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2225 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2226 2227class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2228 string OpcodeStr, string Dt, 2229 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2230 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2231 (outs DPR:$Vd), 2232 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2233 NVMulSLFrm, itin, 2234 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2235 [(set (Ty DPR:$Vd), 2236 (Ty (ShOp (Ty DPR:$src1), 2237 (Ty (MulOp DPR:$Vn, 2238 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2239 imm:$lane)))))))]>; 2240class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2241 string OpcodeStr, string Dt, 2242 ValueType Ty, SDNode MulOp, SDNode ShOp> 2243 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2244 (outs DPR:$Vd), 2245 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2246 NVMulSLFrm, itin, 2247 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2248 [(set (Ty DPR:$Vd), 2249 (Ty (ShOp (Ty DPR:$src1), 2250 (Ty (MulOp DPR:$Vn, 2251 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2252 imm:$lane)))))))]>; 2253 2254class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2255 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2256 SDPatternOperator MulOp, SDPatternOperator OpNode> 2257 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2258 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2259 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2260 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2261 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2262class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2263 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2264 SDPatternOperator MulOp, SDPatternOperator ShOp> 2265 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2266 (outs QPR:$Vd), 2267 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2268 NVMulSLFrm, itin, 2269 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2270 [(set (ResTy QPR:$Vd), 2271 (ResTy (ShOp (ResTy QPR:$src1), 2272 (ResTy (MulOp QPR:$Vn, 2273 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2274 imm:$lane)))))))]>; 2275class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2276 string OpcodeStr, string Dt, 2277 ValueType ResTy, ValueType OpTy, 2278 SDNode MulOp, SDNode ShOp> 2279 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2280 (outs QPR:$Vd), 2281 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2282 NVMulSLFrm, itin, 2283 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2284 [(set (ResTy QPR:$Vd), 2285 (ResTy (ShOp (ResTy QPR:$src1), 2286 (ResTy (MulOp QPR:$Vn, 2287 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2288 imm:$lane)))))))]>; 2289 2290// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2291class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2292 InstrItinClass itin, string OpcodeStr, string Dt, 2293 ValueType Ty, Intrinsic IntOp, SDNode OpNode> 2294 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2295 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2296 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2297 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2298 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2299class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2300 InstrItinClass itin, string OpcodeStr, string Dt, 2301 ValueType Ty, Intrinsic IntOp, SDNode OpNode> 2302 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2303 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2304 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2305 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2306 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2307 2308// Neon 3-argument intrinsics, both double- and quad-register. 2309// The destination register is also used as the first source operand register. 2310class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2311 InstrItinClass itin, string OpcodeStr, string Dt, 2312 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2313 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2314 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2315 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2316 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2317 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2318class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2319 InstrItinClass itin, string OpcodeStr, string Dt, 2320 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2321 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2322 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2323 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2324 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2325 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2326 2327// Long Multiply-Add/Sub operations. 2328class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2329 InstrItinClass itin, string OpcodeStr, string Dt, 2330 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2331 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2332 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2333 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2334 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2335 (TyQ (MulOp (TyD DPR:$Vn), 2336 (TyD DPR:$Vm)))))]>; 2337class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2338 InstrItinClass itin, string OpcodeStr, string Dt, 2339 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2340 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2341 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2342 NVMulSLFrm, itin, 2343 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2344 [(set QPR:$Vd, 2345 (OpNode (TyQ QPR:$src1), 2346 (TyQ (MulOp (TyD DPR:$Vn), 2347 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2348 imm:$lane))))))]>; 2349class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2350 InstrItinClass itin, string OpcodeStr, string Dt, 2351 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2352 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2353 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2354 NVMulSLFrm, itin, 2355 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2356 [(set QPR:$Vd, 2357 (OpNode (TyQ QPR:$src1), 2358 (TyQ (MulOp (TyD DPR:$Vn), 2359 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2360 imm:$lane))))))]>; 2361 2362// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2363class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2364 InstrItinClass itin, string OpcodeStr, string Dt, 2365 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, 2366 SDNode OpNode> 2367 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2368 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2369 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2370 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2371 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2372 (TyD DPR:$Vm)))))))]>; 2373 2374// Neon Long 3-argument intrinsic. The destination register is 2375// a quad-register and is also used as the first source operand register. 2376class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2377 InstrItinClass itin, string OpcodeStr, string Dt, 2378 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 2379 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2380 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2381 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2382 [(set QPR:$Vd, 2383 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2384class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2385 string OpcodeStr, string Dt, 2386 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2387 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2388 (outs QPR:$Vd), 2389 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2390 NVMulSLFrm, itin, 2391 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2392 [(set (ResTy QPR:$Vd), 2393 (ResTy (IntOp (ResTy QPR:$src1), 2394 (OpTy DPR:$Vn), 2395 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2396 imm:$lane)))))]>; 2397class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2398 InstrItinClass itin, string OpcodeStr, string Dt, 2399 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2400 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2401 (outs QPR:$Vd), 2402 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2403 NVMulSLFrm, itin, 2404 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2405 [(set (ResTy QPR:$Vd), 2406 (ResTy (IntOp (ResTy QPR:$src1), 2407 (OpTy DPR:$Vn), 2408 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2409 imm:$lane)))))]>; 2410 2411// Narrowing 3-register intrinsics. 2412class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2413 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2414 Intrinsic IntOp, bit Commutable> 2415 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2416 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2417 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2418 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2419 let isCommutable = Commutable; 2420} 2421 2422// Long 3-register operations. 2423class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2424 InstrItinClass itin, string OpcodeStr, string Dt, 2425 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2426 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2427 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2428 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2429 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2430 let isCommutable = Commutable; 2431} 2432class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2433 InstrItinClass itin, string OpcodeStr, string Dt, 2434 ValueType TyQ, ValueType TyD, SDNode OpNode> 2435 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2436 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2437 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2438 [(set QPR:$Vd, 2439 (TyQ (OpNode (TyD DPR:$Vn), 2440 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2441class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2442 InstrItinClass itin, string OpcodeStr, string Dt, 2443 ValueType TyQ, ValueType TyD, SDNode OpNode> 2444 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2445 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2446 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2447 [(set QPR:$Vd, 2448 (TyQ (OpNode (TyD DPR:$Vn), 2449 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 2450 2451// Long 3-register operations with explicitly extended operands. 2452class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2453 InstrItinClass itin, string OpcodeStr, string Dt, 2454 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 2455 bit Commutable> 2456 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2457 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2458 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2459 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 2460 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2461 let isCommutable = Commutable; 2462} 2463 2464// Long 3-register intrinsics with explicit extend (VABDL). 2465class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2466 InstrItinClass itin, string OpcodeStr, string Dt, 2467 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, 2468 bit Commutable> 2469 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2470 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2471 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2472 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2473 (TyD DPR:$Vm))))))]> { 2474 let isCommutable = Commutable; 2475} 2476 2477// Long 3-register intrinsics. 2478class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2479 InstrItinClass itin, string OpcodeStr, string Dt, 2480 ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> 2481 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2482 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2483 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2484 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2485 let isCommutable = Commutable; 2486} 2487class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2488 string OpcodeStr, string Dt, 2489 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2490 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2491 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2492 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2493 [(set (ResTy QPR:$Vd), 2494 (ResTy (IntOp (OpTy DPR:$Vn), 2495 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2496 imm:$lane)))))]>; 2497class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2498 InstrItinClass itin, string OpcodeStr, string Dt, 2499 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2500 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2501 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2502 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2503 [(set (ResTy QPR:$Vd), 2504 (ResTy (IntOp (OpTy DPR:$Vn), 2505 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2506 imm:$lane)))))]>; 2507 2508// Wide 3-register operations. 2509class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2510 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 2511 SDNode OpNode, SDNode ExtOp, bit Commutable> 2512 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2513 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 2514 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2515 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 2516 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2517 let isCommutable = Commutable; 2518} 2519 2520// Pairwise long 2-register intrinsics, both double- and quad-register. 2521class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2522 bits<2> op17_16, bits<5> op11_7, bit op4, 2523 string OpcodeStr, string Dt, 2524 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2525 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2526 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2527 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2528class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2529 bits<2> op17_16, bits<5> op11_7, bit op4, 2530 string OpcodeStr, string Dt, 2531 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2532 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2533 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2534 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2535 2536// Pairwise long 2-register accumulate intrinsics, 2537// both double- and quad-register. 2538// The destination register is also used as the first source operand register. 2539class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2540 bits<2> op17_16, bits<5> op11_7, bit op4, 2541 string OpcodeStr, string Dt, 2542 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2543 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 2544 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 2545 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2546 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 2547class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2548 bits<2> op17_16, bits<5> op11_7, bit op4, 2549 string OpcodeStr, string Dt, 2550 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2551 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 2552 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 2553 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2554 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 2555 2556// Shift by immediate, 2557// both double- and quad-register. 2558class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2559 Format f, InstrItinClass itin, Operand ImmTy, 2560 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2561 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2562 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 2563 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2564 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 2565class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2566 Format f, InstrItinClass itin, Operand ImmTy, 2567 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2568 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2569 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 2570 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2571 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 2572 2573// Long shift by immediate. 2574class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2575 string OpcodeStr, string Dt, 2576 ValueType ResTy, ValueType OpTy, SDNode OpNode> 2577 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2578 (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm, 2579 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2580 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), 2581 (i32 imm:$SIMM))))]>; 2582 2583// Narrow shift by immediate. 2584class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2585 InstrItinClass itin, string OpcodeStr, string Dt, 2586 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 2587 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2588 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 2589 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2590 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 2591 (i32 imm:$SIMM))))]>; 2592 2593// Shift right by immediate and accumulate, 2594// both double- and quad-register. 2595class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2596 Operand ImmTy, string OpcodeStr, string Dt, 2597 ValueType Ty, SDNode ShOp> 2598 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 2599 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 2600 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2601 [(set DPR:$Vd, (Ty (add DPR:$src1, 2602 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 2603class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2604 Operand ImmTy, string OpcodeStr, string Dt, 2605 ValueType Ty, SDNode ShOp> 2606 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 2607 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 2608 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2609 [(set QPR:$Vd, (Ty (add QPR:$src1, 2610 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 2611 2612// Shift by immediate and insert, 2613// both double- and quad-register. 2614class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2615 Operand ImmTy, Format f, string OpcodeStr, string Dt, 2616 ValueType Ty,SDNode ShOp> 2617 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 2618 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 2619 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2620 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 2621class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2622 Operand ImmTy, Format f, string OpcodeStr, string Dt, 2623 ValueType Ty,SDNode ShOp> 2624 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 2625 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 2626 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2627 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 2628 2629// Convert, with fractional bits immediate, 2630// both double- and quad-register. 2631class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2632 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2633 Intrinsic IntOp> 2634 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2635 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 2636 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2637 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 2638class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2639 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2640 Intrinsic IntOp> 2641 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2642 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 2643 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2644 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 2645 2646//===----------------------------------------------------------------------===// 2647// Multiclasses 2648//===----------------------------------------------------------------------===// 2649 2650// Abbreviations used in multiclass suffixes: 2651// Q = quarter int (8 bit) elements 2652// H = half int (16 bit) elements 2653// S = single int (32 bit) elements 2654// D = double int (64 bit) elements 2655 2656// Neon 2-register vector operations and intrinsics. 2657 2658// Neon 2-register comparisons. 2659// source operand element sizes of 8, 16 and 32 bits: 2660multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2661 bits<5> op11_7, bit op4, string opc, string Dt, 2662 string asm, SDNode OpNode> { 2663 // 64-bit vector types. 2664 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 2665 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2666 opc, !strconcat(Dt, "8"), asm, "", 2667 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 2668 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 2669 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2670 opc, !strconcat(Dt, "16"), asm, "", 2671 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 2672 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 2673 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2674 opc, !strconcat(Dt, "32"), asm, "", 2675 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 2676 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 2677 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2678 opc, "f32", asm, "", 2679 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 2680 let Inst{10} = 1; // overwrite F = 1 2681 } 2682 2683 // 128-bit vector types. 2684 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 2685 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2686 opc, !strconcat(Dt, "8"), asm, "", 2687 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 2688 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 2689 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2690 opc, !strconcat(Dt, "16"), asm, "", 2691 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 2692 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 2693 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2694 opc, !strconcat(Dt, "32"), asm, "", 2695 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 2696 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 2697 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2698 opc, "f32", asm, "", 2699 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 2700 let Inst{10} = 1; // overwrite F = 1 2701 } 2702} 2703 2704 2705// Neon 2-register vector intrinsics, 2706// element sizes of 8, 16 and 32 bits: 2707multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2708 bits<5> op11_7, bit op4, 2709 InstrItinClass itinD, InstrItinClass itinQ, 2710 string OpcodeStr, string Dt, Intrinsic IntOp> { 2711 // 64-bit vector types. 2712 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2713 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 2714 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2715 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 2716 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2717 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 2718 2719 // 128-bit vector types. 2720 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2721 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 2722 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2723 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 2724 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2725 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 2726} 2727 2728 2729// Neon Narrowing 2-register vector operations, 2730// source operand element sizes of 16, 32 and 64 bits: 2731multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2732 bits<5> op11_7, bit op6, bit op4, 2733 InstrItinClass itin, string OpcodeStr, string Dt, 2734 SDNode OpNode> { 2735 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 2736 itin, OpcodeStr, !strconcat(Dt, "16"), 2737 v8i8, v8i16, OpNode>; 2738 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 2739 itin, OpcodeStr, !strconcat(Dt, "32"), 2740 v4i16, v4i32, OpNode>; 2741 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 2742 itin, OpcodeStr, !strconcat(Dt, "64"), 2743 v2i32, v2i64, OpNode>; 2744} 2745 2746// Neon Narrowing 2-register vector intrinsics, 2747// source operand element sizes of 16, 32 and 64 bits: 2748multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2749 bits<5> op11_7, bit op6, bit op4, 2750 InstrItinClass itin, string OpcodeStr, string Dt, 2751 Intrinsic IntOp> { 2752 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 2753 itin, OpcodeStr, !strconcat(Dt, "16"), 2754 v8i8, v8i16, IntOp>; 2755 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 2756 itin, OpcodeStr, !strconcat(Dt, "32"), 2757 v4i16, v4i32, IntOp>; 2758 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 2759 itin, OpcodeStr, !strconcat(Dt, "64"), 2760 v2i32, v2i64, IntOp>; 2761} 2762 2763 2764// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 2765// source operand element sizes of 16, 32 and 64 bits: 2766multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 2767 string OpcodeStr, string Dt, SDNode OpNode> { 2768 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 2769 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 2770 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 2771 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 2772 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 2773 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 2774} 2775 2776 2777// Neon 3-register vector operations. 2778 2779// First with only element sizes of 8, 16 and 32 bits: 2780multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2781 InstrItinClass itinD16, InstrItinClass itinD32, 2782 InstrItinClass itinQ16, InstrItinClass itinQ32, 2783 string OpcodeStr, string Dt, 2784 SDNode OpNode, bit Commutable = 0> { 2785 // 64-bit vector types. 2786 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 2787 OpcodeStr, !strconcat(Dt, "8"), 2788 v8i8, v8i8, OpNode, Commutable>; 2789 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 2790 OpcodeStr, !strconcat(Dt, "16"), 2791 v4i16, v4i16, OpNode, Commutable>; 2792 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 2793 OpcodeStr, !strconcat(Dt, "32"), 2794 v2i32, v2i32, OpNode, Commutable>; 2795 2796 // 128-bit vector types. 2797 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 2798 OpcodeStr, !strconcat(Dt, "8"), 2799 v16i8, v16i8, OpNode, Commutable>; 2800 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 2801 OpcodeStr, !strconcat(Dt, "16"), 2802 v8i16, v8i16, OpNode, Commutable>; 2803 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 2804 OpcodeStr, !strconcat(Dt, "32"), 2805 v4i32, v4i32, OpNode, Commutable>; 2806} 2807 2808multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { 2809 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 2810 v4i16, ShOp>; 2811 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), 2812 v2i32, ShOp>; 2813 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 2814 v8i16, v4i16, ShOp>; 2815 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), 2816 v4i32, v2i32, ShOp>; 2817} 2818 2819// ....then also with element size 64 bits: 2820multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 2821 InstrItinClass itinD, InstrItinClass itinQ, 2822 string OpcodeStr, string Dt, 2823 SDNode OpNode, bit Commutable = 0> 2824 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 2825 OpcodeStr, Dt, OpNode, Commutable> { 2826 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 2827 OpcodeStr, !strconcat(Dt, "64"), 2828 v1i64, v1i64, OpNode, Commutable>; 2829 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 2830 OpcodeStr, !strconcat(Dt, "64"), 2831 v2i64, v2i64, OpNode, Commutable>; 2832} 2833 2834 2835// Neon 3-register vector intrinsics. 2836 2837// First with only element sizes of 16 and 32 bits: 2838multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2839 InstrItinClass itinD16, InstrItinClass itinD32, 2840 InstrItinClass itinQ16, InstrItinClass itinQ32, 2841 string OpcodeStr, string Dt, 2842 Intrinsic IntOp, bit Commutable = 0> { 2843 // 64-bit vector types. 2844 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 2845 OpcodeStr, !strconcat(Dt, "16"), 2846 v4i16, v4i16, IntOp, Commutable>; 2847 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 2848 OpcodeStr, !strconcat(Dt, "32"), 2849 v2i32, v2i32, IntOp, Commutable>; 2850 2851 // 128-bit vector types. 2852 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 2853 OpcodeStr, !strconcat(Dt, "16"), 2854 v8i16, v8i16, IntOp, Commutable>; 2855 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 2856 OpcodeStr, !strconcat(Dt, "32"), 2857 v4i32, v4i32, IntOp, Commutable>; 2858} 2859multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2860 InstrItinClass itinD16, InstrItinClass itinD32, 2861 InstrItinClass itinQ16, InstrItinClass itinQ32, 2862 string OpcodeStr, string Dt, 2863 Intrinsic IntOp> { 2864 // 64-bit vector types. 2865 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 2866 OpcodeStr, !strconcat(Dt, "16"), 2867 v4i16, v4i16, IntOp>; 2868 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 2869 OpcodeStr, !strconcat(Dt, "32"), 2870 v2i32, v2i32, IntOp>; 2871 2872 // 128-bit vector types. 2873 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 2874 OpcodeStr, !strconcat(Dt, "16"), 2875 v8i16, v8i16, IntOp>; 2876 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 2877 OpcodeStr, !strconcat(Dt, "32"), 2878 v4i32, v4i32, IntOp>; 2879} 2880 2881multiclass N3VIntSL_HS<bits<4> op11_8, 2882 InstrItinClass itinD16, InstrItinClass itinD32, 2883 InstrItinClass itinQ16, InstrItinClass itinQ32, 2884 string OpcodeStr, string Dt, Intrinsic IntOp> { 2885 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 2886 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 2887 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 2888 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 2889 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 2890 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 2891 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 2892 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 2893} 2894 2895// ....then also with element size of 8 bits: 2896multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2897 InstrItinClass itinD16, InstrItinClass itinD32, 2898 InstrItinClass itinQ16, InstrItinClass itinQ32, 2899 string OpcodeStr, string Dt, 2900 Intrinsic IntOp, bit Commutable = 0> 2901 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 2902 OpcodeStr, Dt, IntOp, Commutable> { 2903 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 2904 OpcodeStr, !strconcat(Dt, "8"), 2905 v8i8, v8i8, IntOp, Commutable>; 2906 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 2907 OpcodeStr, !strconcat(Dt, "8"), 2908 v16i8, v16i8, IntOp, Commutable>; 2909} 2910multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2911 InstrItinClass itinD16, InstrItinClass itinD32, 2912 InstrItinClass itinQ16, InstrItinClass itinQ32, 2913 string OpcodeStr, string Dt, 2914 Intrinsic IntOp> 2915 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 2916 OpcodeStr, Dt, IntOp> { 2917 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 2918 OpcodeStr, !strconcat(Dt, "8"), 2919 v8i8, v8i8, IntOp>; 2920 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 2921 OpcodeStr, !strconcat(Dt, "8"), 2922 v16i8, v16i8, IntOp>; 2923} 2924 2925 2926// ....then also with element size of 64 bits: 2927multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2928 InstrItinClass itinD16, InstrItinClass itinD32, 2929 InstrItinClass itinQ16, InstrItinClass itinQ32, 2930 string OpcodeStr, string Dt, 2931 Intrinsic IntOp, bit Commutable = 0> 2932 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 2933 OpcodeStr, Dt, IntOp, Commutable> { 2934 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 2935 OpcodeStr, !strconcat(Dt, "64"), 2936 v1i64, v1i64, IntOp, Commutable>; 2937 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 2938 OpcodeStr, !strconcat(Dt, "64"), 2939 v2i64, v2i64, IntOp, Commutable>; 2940} 2941multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2942 InstrItinClass itinD16, InstrItinClass itinD32, 2943 InstrItinClass itinQ16, InstrItinClass itinQ32, 2944 string OpcodeStr, string Dt, 2945 Intrinsic IntOp> 2946 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 2947 OpcodeStr, Dt, IntOp> { 2948 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 2949 OpcodeStr, !strconcat(Dt, "64"), 2950 v1i64, v1i64, IntOp>; 2951 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 2952 OpcodeStr, !strconcat(Dt, "64"), 2953 v2i64, v2i64, IntOp>; 2954} 2955 2956// Neon Narrowing 3-register vector intrinsics, 2957// source operand element sizes of 16, 32 and 64 bits: 2958multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 2959 string OpcodeStr, string Dt, 2960 Intrinsic IntOp, bit Commutable = 0> { 2961 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 2962 OpcodeStr, !strconcat(Dt, "16"), 2963 v8i8, v8i16, IntOp, Commutable>; 2964 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 2965 OpcodeStr, !strconcat(Dt, "32"), 2966 v4i16, v4i32, IntOp, Commutable>; 2967 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 2968 OpcodeStr, !strconcat(Dt, "64"), 2969 v2i32, v2i64, IntOp, Commutable>; 2970} 2971 2972 2973// Neon Long 3-register vector operations. 2974 2975multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2976 InstrItinClass itin16, InstrItinClass itin32, 2977 string OpcodeStr, string Dt, 2978 SDNode OpNode, bit Commutable = 0> { 2979 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 2980 OpcodeStr, !strconcat(Dt, "8"), 2981 v8i16, v8i8, OpNode, Commutable>; 2982 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 2983 OpcodeStr, !strconcat(Dt, "16"), 2984 v4i32, v4i16, OpNode, Commutable>; 2985 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 2986 OpcodeStr, !strconcat(Dt, "32"), 2987 v2i64, v2i32, OpNode, Commutable>; 2988} 2989 2990multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 2991 InstrItinClass itin, string OpcodeStr, string Dt, 2992 SDNode OpNode> { 2993 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 2994 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 2995 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 2996 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 2997} 2998 2999multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3000 InstrItinClass itin16, InstrItinClass itin32, 3001 string OpcodeStr, string Dt, 3002 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3003 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3004 OpcodeStr, !strconcat(Dt, "8"), 3005 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3006 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3007 OpcodeStr, !strconcat(Dt, "16"), 3008 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3009 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3010 OpcodeStr, !strconcat(Dt, "32"), 3011 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3012} 3013 3014// Neon Long 3-register vector intrinsics. 3015 3016// First with only element sizes of 16 and 32 bits: 3017multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3018 InstrItinClass itin16, InstrItinClass itin32, 3019 string OpcodeStr, string Dt, 3020 Intrinsic IntOp, bit Commutable = 0> { 3021 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3022 OpcodeStr, !strconcat(Dt, "16"), 3023 v4i32, v4i16, IntOp, Commutable>; 3024 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3025 OpcodeStr, !strconcat(Dt, "32"), 3026 v2i64, v2i32, IntOp, Commutable>; 3027} 3028 3029multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3030 InstrItinClass itin, string OpcodeStr, string Dt, 3031 Intrinsic IntOp> { 3032 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3033 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3034 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3035 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3036} 3037 3038// ....then also with element size of 8 bits: 3039multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3040 InstrItinClass itin16, InstrItinClass itin32, 3041 string OpcodeStr, string Dt, 3042 Intrinsic IntOp, bit Commutable = 0> 3043 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3044 IntOp, Commutable> { 3045 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3046 OpcodeStr, !strconcat(Dt, "8"), 3047 v8i16, v8i8, IntOp, Commutable>; 3048} 3049 3050// ....with explicit extend (VABDL). 3051multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3052 InstrItinClass itin, string OpcodeStr, string Dt, 3053 Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { 3054 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3055 OpcodeStr, !strconcat(Dt, "8"), 3056 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3057 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3058 OpcodeStr, !strconcat(Dt, "16"), 3059 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3060 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3061 OpcodeStr, !strconcat(Dt, "32"), 3062 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3063} 3064 3065 3066// Neon Wide 3-register vector intrinsics, 3067// source operand element sizes of 8, 16 and 32 bits: 3068multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3069 string OpcodeStr, string Dt, 3070 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3071 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3072 OpcodeStr, !strconcat(Dt, "8"), 3073 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3074 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3075 OpcodeStr, !strconcat(Dt, "16"), 3076 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3077 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3078 OpcodeStr, !strconcat(Dt, "32"), 3079 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3080} 3081 3082 3083// Neon Multiply-Op vector operations, 3084// element sizes of 8, 16 and 32 bits: 3085multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3086 InstrItinClass itinD16, InstrItinClass itinD32, 3087 InstrItinClass itinQ16, InstrItinClass itinQ32, 3088 string OpcodeStr, string Dt, SDNode OpNode> { 3089 // 64-bit vector types. 3090 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3091 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3092 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3093 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3094 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3095 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3096 3097 // 128-bit vector types. 3098 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3099 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3100 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3101 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3102 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3103 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3104} 3105 3106multiclass N3VMulOpSL_HS<bits<4> op11_8, 3107 InstrItinClass itinD16, InstrItinClass itinD32, 3108 InstrItinClass itinQ16, InstrItinClass itinQ32, 3109 string OpcodeStr, string Dt, SDNode ShOp> { 3110 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3111 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3112 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3113 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3114 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3115 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3116 mul, ShOp>; 3117 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3118 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3119 mul, ShOp>; 3120} 3121 3122// Neon Intrinsic-Op vector operations, 3123// element sizes of 8, 16 and 32 bits: 3124multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3125 InstrItinClass itinD, InstrItinClass itinQ, 3126 string OpcodeStr, string Dt, Intrinsic IntOp, 3127 SDNode OpNode> { 3128 // 64-bit vector types. 3129 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3130 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3131 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3132 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3133 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3134 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3135 3136 // 128-bit vector types. 3137 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3138 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3139 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3140 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3141 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3142 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3143} 3144 3145// Neon 3-argument intrinsics, 3146// element sizes of 8, 16 and 32 bits: 3147multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3148 InstrItinClass itinD, InstrItinClass itinQ, 3149 string OpcodeStr, string Dt, Intrinsic IntOp> { 3150 // 64-bit vector types. 3151 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 3152 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3153 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 3154 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3155 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 3156 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3157 3158 // 128-bit vector types. 3159 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 3160 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3161 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 3162 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3163 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 3164 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3165} 3166 3167 3168// Neon Long Multiply-Op vector operations, 3169// element sizes of 8, 16 and 32 bits: 3170multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3171 InstrItinClass itin16, InstrItinClass itin32, 3172 string OpcodeStr, string Dt, SDNode MulOp, 3173 SDNode OpNode> { 3174 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3175 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3176 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3177 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3178 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3179 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3180} 3181 3182multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3183 string Dt, SDNode MulOp, SDNode OpNode> { 3184 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3185 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3186 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3187 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3188} 3189 3190 3191// Neon Long 3-argument intrinsics. 3192 3193// First with only element sizes of 16 and 32 bits: 3194multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3195 InstrItinClass itin16, InstrItinClass itin32, 3196 string OpcodeStr, string Dt, Intrinsic IntOp> { 3197 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3198 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3199 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3200 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3201} 3202 3203multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3204 string OpcodeStr, string Dt, Intrinsic IntOp> { 3205 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3206 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3207 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3208 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3209} 3210 3211// ....then also with element size of 8 bits: 3212multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3213 InstrItinClass itin16, InstrItinClass itin32, 3214 string OpcodeStr, string Dt, Intrinsic IntOp> 3215 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3216 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3217 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3218} 3219 3220// ....with explicit extend (VABAL). 3221multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3222 InstrItinClass itin, string OpcodeStr, string Dt, 3223 Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { 3224 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3225 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3226 IntOp, ExtOp, OpNode>; 3227 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3228 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3229 IntOp, ExtOp, OpNode>; 3230 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3231 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3232 IntOp, ExtOp, OpNode>; 3233} 3234 3235 3236// Neon Pairwise long 2-register intrinsics, 3237// element sizes of 8, 16 and 32 bits: 3238multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3239 bits<5> op11_7, bit op4, 3240 string OpcodeStr, string Dt, Intrinsic IntOp> { 3241 // 64-bit vector types. 3242 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3243 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3244 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3245 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3246 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3247 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3248 3249 // 128-bit vector types. 3250 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3251 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3252 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3253 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3254 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3255 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3256} 3257 3258 3259// Neon Pairwise long 2-register accumulate intrinsics, 3260// element sizes of 8, 16 and 32 bits: 3261multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3262 bits<5> op11_7, bit op4, 3263 string OpcodeStr, string Dt, Intrinsic IntOp> { 3264 // 64-bit vector types. 3265 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3266 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3267 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3268 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3269 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3270 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3271 3272 // 128-bit vector types. 3273 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3274 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3275 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3276 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3277 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3278 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3279} 3280 3281 3282// Neon 2-register vector shift by immediate, 3283// with f of either N2RegVShLFrm or N2RegVShRFrm 3284// element sizes of 8, 16, 32 and 64 bits: 3285multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3286 InstrItinClass itin, string OpcodeStr, string Dt, 3287 SDNode OpNode> { 3288 // 64-bit vector types. 3289 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3290 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3291 let Inst{21-19} = 0b001; // imm6 = 001xxx 3292 } 3293 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3294 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3295 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3296 } 3297 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3298 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3299 let Inst{21} = 0b1; // imm6 = 1xxxxx 3300 } 3301 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3302 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3303 // imm6 = xxxxxx 3304 3305 // 128-bit vector types. 3306 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3307 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3308 let Inst{21-19} = 0b001; // imm6 = 001xxx 3309 } 3310 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3311 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3312 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3313 } 3314 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3315 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3316 let Inst{21} = 0b1; // imm6 = 1xxxxx 3317 } 3318 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3319 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3320 // imm6 = xxxxxx 3321} 3322multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3323 InstrItinClass itin, string OpcodeStr, string Dt, 3324 SDNode OpNode> { 3325 // 64-bit vector types. 3326 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3327 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3328 let Inst{21-19} = 0b001; // imm6 = 001xxx 3329 } 3330 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3331 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3332 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3333 } 3334 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3335 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3336 let Inst{21} = 0b1; // imm6 = 1xxxxx 3337 } 3338 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3339 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3340 // imm6 = xxxxxx 3341 3342 // 128-bit vector types. 3343 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3344 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3345 let Inst{21-19} = 0b001; // imm6 = 001xxx 3346 } 3347 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3348 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3349 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3350 } 3351 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3352 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3353 let Inst{21} = 0b1; // imm6 = 1xxxxx 3354 } 3355 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3356 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3357 // imm6 = xxxxxx 3358} 3359 3360// Neon Shift-Accumulate vector operations, 3361// element sizes of 8, 16, 32 and 64 bits: 3362multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3363 string OpcodeStr, string Dt, SDNode ShOp> { 3364 // 64-bit vector types. 3365 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3366 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3367 let Inst{21-19} = 0b001; // imm6 = 001xxx 3368 } 3369 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3370 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3371 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3372 } 3373 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3374 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3375 let Inst{21} = 0b1; // imm6 = 1xxxxx 3376 } 3377 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3378 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3379 // imm6 = xxxxxx 3380 3381 // 128-bit vector types. 3382 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3383 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3384 let Inst{21-19} = 0b001; // imm6 = 001xxx 3385 } 3386 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3387 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3388 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3389 } 3390 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3391 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3392 let Inst{21} = 0b1; // imm6 = 1xxxxx 3393 } 3394 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3395 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3396 // imm6 = xxxxxx 3397} 3398 3399// Neon Shift-Insert vector operations, 3400// with f of either N2RegVShLFrm or N2RegVShRFrm 3401// element sizes of 8, 16, 32 and 64 bits: 3402multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3403 string OpcodeStr> { 3404 // 64-bit vector types. 3405 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3406 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 3407 let Inst{21-19} = 0b001; // imm6 = 001xxx 3408 } 3409 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3410 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 3411 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3412 } 3413 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3414 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 3415 let Inst{21} = 0b1; // imm6 = 1xxxxx 3416 } 3417 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 3418 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 3419 // imm6 = xxxxxx 3420 3421 // 128-bit vector types. 3422 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3423 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 3424 let Inst{21-19} = 0b001; // imm6 = 001xxx 3425 } 3426 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3427 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 3428 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3429 } 3430 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3431 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 3432 let Inst{21} = 0b1; // imm6 = 1xxxxx 3433 } 3434 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 3435 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 3436 // imm6 = xxxxxx 3437} 3438multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3439 string OpcodeStr> { 3440 // 64-bit vector types. 3441 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3442 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 3443 let Inst{21-19} = 0b001; // imm6 = 001xxx 3444 } 3445 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3446 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 3447 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3448 } 3449 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3450 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 3451 let Inst{21} = 0b1; // imm6 = 1xxxxx 3452 } 3453 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3454 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 3455 // imm6 = xxxxxx 3456 3457 // 128-bit vector types. 3458 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3459 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 3460 let Inst{21-19} = 0b001; // imm6 = 001xxx 3461 } 3462 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3463 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 3464 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3465 } 3466 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3467 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 3468 let Inst{21} = 0b1; // imm6 = 1xxxxx 3469 } 3470 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3471 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 3472 // imm6 = xxxxxx 3473} 3474 3475// Neon Shift Long operations, 3476// element sizes of 8, 16, 32 bits: 3477multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3478 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 3479 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3480 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { 3481 let Inst{21-19} = 0b001; // imm6 = 001xxx 3482 } 3483 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3484 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { 3485 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3486 } 3487 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3488 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { 3489 let Inst{21} = 0b1; // imm6 = 1xxxxx 3490 } 3491} 3492 3493// Neon Shift Narrow operations, 3494// element sizes of 16, 32, 64 bits: 3495multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3496 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 3497 SDNode OpNode> { 3498 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3499 OpcodeStr, !strconcat(Dt, "16"), 3500 v8i8, v8i16, shr_imm8, OpNode> { 3501 let Inst{21-19} = 0b001; // imm6 = 001xxx 3502 } 3503 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3504 OpcodeStr, !strconcat(Dt, "32"), 3505 v4i16, v4i32, shr_imm16, OpNode> { 3506 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3507 } 3508 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3509 OpcodeStr, !strconcat(Dt, "64"), 3510 v2i32, v2i64, shr_imm32, OpNode> { 3511 let Inst{21} = 0b1; // imm6 = 1xxxxx 3512 } 3513} 3514 3515//===----------------------------------------------------------------------===// 3516// Instruction Definitions. 3517//===----------------------------------------------------------------------===// 3518 3519// Vector Add Operations. 3520 3521// VADD : Vector Add (integer and floating-point) 3522defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 3523 add, 1>; 3524def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 3525 v2f32, v2f32, fadd, 1>; 3526def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 3527 v4f32, v4f32, fadd, 1>; 3528// VADDL : Vector Add Long (Q = D + D) 3529defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3530 "vaddl", "s", add, sext, 1>; 3531defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3532 "vaddl", "u", add, zext, 1>; 3533// VADDW : Vector Add Wide (Q = Q + D) 3534defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 3535defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 3536// VHADD : Vector Halving Add 3537defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 3538 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3539 "vhadd", "s", int_arm_neon_vhadds, 1>; 3540defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 3541 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3542 "vhadd", "u", int_arm_neon_vhaddu, 1>; 3543// VRHADD : Vector Rounding Halving Add 3544defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 3545 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3546 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 3547defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 3548 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3549 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 3550// VQADD : Vector Saturating Add 3551defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 3552 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3553 "vqadd", "s", int_arm_neon_vqadds, 1>; 3554defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 3555 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3556 "vqadd", "u", int_arm_neon_vqaddu, 1>; 3557// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 3558defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", 3559 int_arm_neon_vaddhn, 1>; 3560// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 3561defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 3562 int_arm_neon_vraddhn, 1>; 3563 3564// Vector Multiply Operations. 3565 3566// VMUL : Vector Multiply (integer, polynomial and floating-point) 3567defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 3568 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 3569def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 3570 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 3571def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 3572 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 3573def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 3574 v2f32, v2f32, fmul, 1>; 3575def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 3576 v4f32, v4f32, fmul, 1>; 3577defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; 3578def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 3579def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 3580 v2f32, fmul>; 3581 3582def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 3583 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 3584 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 3585 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3586 (DSubReg_i16_reg imm:$lane))), 3587 (SubReg_i16_lane imm:$lane)))>; 3588def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 3589 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 3590 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 3591 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3592 (DSubReg_i32_reg imm:$lane))), 3593 (SubReg_i32_lane imm:$lane)))>; 3594def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 3595 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 3596 (v4f32 (VMULslfq (v4f32 QPR:$src1), 3597 (v2f32 (EXTRACT_SUBREG QPR:$src2, 3598 (DSubReg_i32_reg imm:$lane))), 3599 (SubReg_i32_lane imm:$lane)))>; 3600 3601// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 3602defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 3603 IIC_VMULi16Q, IIC_VMULi32Q, 3604 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 3605defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 3606 IIC_VMULi16Q, IIC_VMULi32Q, 3607 "vqdmulh", "s", int_arm_neon_vqdmulh>; 3608def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 3609 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 3610 imm:$lane)))), 3611 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 3612 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3613 (DSubReg_i16_reg imm:$lane))), 3614 (SubReg_i16_lane imm:$lane)))>; 3615def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 3616 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 3617 imm:$lane)))), 3618 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 3619 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3620 (DSubReg_i32_reg imm:$lane))), 3621 (SubReg_i32_lane imm:$lane)))>; 3622 3623// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 3624defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 3625 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 3626 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 3627defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 3628 IIC_VMULi16Q, IIC_VMULi32Q, 3629 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 3630def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 3631 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 3632 imm:$lane)))), 3633 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 3634 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3635 (DSubReg_i16_reg imm:$lane))), 3636 (SubReg_i16_lane imm:$lane)))>; 3637def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 3638 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 3639 imm:$lane)))), 3640 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 3641 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3642 (DSubReg_i32_reg imm:$lane))), 3643 (SubReg_i32_lane imm:$lane)))>; 3644 3645// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 3646defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 3647 "vmull", "s", NEONvmulls, 1>; 3648defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 3649 "vmull", "u", NEONvmullu, 1>; 3650def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 3651 v8i16, v8i8, int_arm_neon_vmullp, 1>; 3652defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 3653defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 3654 3655// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 3656defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 3657 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 3658defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 3659 "vqdmull", "s", int_arm_neon_vqdmull>; 3660 3661// Vector Multiply-Accumulate and Multiply-Subtract Operations. 3662 3663// VMLA : Vector Multiply Accumulate (integer and floating-point) 3664defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 3665 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 3666def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 3667 v2f32, fmul_su, fadd_mlx>, 3668 Requires<[HasNEON, UseFPVMLx]>; 3669def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 3670 v4f32, fmul_su, fadd_mlx>, 3671 Requires<[HasNEON, UseFPVMLx]>; 3672defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 3673 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 3674def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 3675 v2f32, fmul_su, fadd_mlx>, 3676 Requires<[HasNEON, UseFPVMLx]>; 3677def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 3678 v4f32, v2f32, fmul_su, fadd_mlx>, 3679 Requires<[HasNEON, UseFPVMLx]>; 3680 3681def : Pat<(v8i16 (add (v8i16 QPR:$src1), 3682 (mul (v8i16 QPR:$src2), 3683 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 3684 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 3685 (v4i16 (EXTRACT_SUBREG QPR:$src3, 3686 (DSubReg_i16_reg imm:$lane))), 3687 (SubReg_i16_lane imm:$lane)))>; 3688 3689def : Pat<(v4i32 (add (v4i32 QPR:$src1), 3690 (mul (v4i32 QPR:$src2), 3691 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 3692 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 3693 (v2i32 (EXTRACT_SUBREG QPR:$src3, 3694 (DSubReg_i32_reg imm:$lane))), 3695 (SubReg_i32_lane imm:$lane)))>; 3696 3697def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 3698 (fmul_su (v4f32 QPR:$src2), 3699 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 3700 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 3701 (v4f32 QPR:$src2), 3702 (v2f32 (EXTRACT_SUBREG QPR:$src3, 3703 (DSubReg_i32_reg imm:$lane))), 3704 (SubReg_i32_lane imm:$lane)))>, 3705 Requires<[HasNEON, UseFPVMLx]>; 3706 3707// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 3708defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 3709 "vmlal", "s", NEONvmulls, add>; 3710defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 3711 "vmlal", "u", NEONvmullu, add>; 3712 3713defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 3714defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 3715 3716// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 3717defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 3718 "vqdmlal", "s", int_arm_neon_vqdmlal>; 3719defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; 3720 3721// VMLS : Vector Multiply Subtract (integer and floating-point) 3722defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 3723 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 3724def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 3725 v2f32, fmul_su, fsub_mlx>, 3726 Requires<[HasNEON, UseFPVMLx]>; 3727def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 3728 v4f32, fmul_su, fsub_mlx>, 3729 Requires<[HasNEON, UseFPVMLx]>; 3730defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 3731 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 3732def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 3733 v2f32, fmul_su, fsub_mlx>, 3734 Requires<[HasNEON, UseFPVMLx]>; 3735def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 3736 v4f32, v2f32, fmul_su, fsub_mlx>, 3737 Requires<[HasNEON, UseFPVMLx]>; 3738 3739def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 3740 (mul (v8i16 QPR:$src2), 3741 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 3742 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 3743 (v4i16 (EXTRACT_SUBREG QPR:$src3, 3744 (DSubReg_i16_reg imm:$lane))), 3745 (SubReg_i16_lane imm:$lane)))>; 3746 3747def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 3748 (mul (v4i32 QPR:$src2), 3749 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 3750 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 3751 (v2i32 (EXTRACT_SUBREG QPR:$src3, 3752 (DSubReg_i32_reg imm:$lane))), 3753 (SubReg_i32_lane imm:$lane)))>; 3754 3755def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 3756 (fmul_su (v4f32 QPR:$src2), 3757 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 3758 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 3759 (v2f32 (EXTRACT_SUBREG QPR:$src3, 3760 (DSubReg_i32_reg imm:$lane))), 3761 (SubReg_i32_lane imm:$lane)))>, 3762 Requires<[HasNEON, UseFPVMLx]>; 3763 3764// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 3765defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 3766 "vmlsl", "s", NEONvmulls, sub>; 3767defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 3768 "vmlsl", "u", NEONvmullu, sub>; 3769 3770defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 3771defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 3772 3773// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 3774defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 3775 "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 3776defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 3777 3778// Vector Subtract Operations. 3779 3780// VSUB : Vector Subtract (integer and floating-point) 3781defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 3782 "vsub", "i", sub, 0>; 3783def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 3784 v2f32, v2f32, fsub, 0>; 3785def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 3786 v4f32, v4f32, fsub, 0>; 3787// VSUBL : Vector Subtract Long (Q = D - D) 3788defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 3789 "vsubl", "s", sub, sext, 0>; 3790defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 3791 "vsubl", "u", sub, zext, 0>; 3792// VSUBW : Vector Subtract Wide (Q = Q - D) 3793defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 3794defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 3795// VHSUB : Vector Halving Subtract 3796defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 3797 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3798 "vhsub", "s", int_arm_neon_vhsubs, 0>; 3799defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 3800 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3801 "vhsub", "u", int_arm_neon_vhsubu, 0>; 3802// VQSUB : Vector Saturing Subtract 3803defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 3804 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3805 "vqsub", "s", int_arm_neon_vqsubs, 0>; 3806defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 3807 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3808 "vqsub", "u", int_arm_neon_vqsubu, 0>; 3809// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 3810defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", 3811 int_arm_neon_vsubhn, 0>; 3812// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 3813defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 3814 int_arm_neon_vrsubhn, 0>; 3815 3816// Vector Comparisons. 3817 3818// VCEQ : Vector Compare Equal 3819defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3820 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 3821def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 3822 NEONvceq, 1>; 3823def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 3824 NEONvceq, 1>; 3825 3826defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 3827 "$Vd, $Vm, #0", NEONvceqz>; 3828 3829// VCGE : Vector Compare Greater Than or Equal 3830defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3831 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 3832defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3833 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 3834def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 3835 NEONvcge, 0>; 3836def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 3837 NEONvcge, 0>; 3838 3839defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 3840 "$Vd, $Vm, #0", NEONvcgez>; 3841defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 3842 "$Vd, $Vm, #0", NEONvclez>; 3843 3844// VCGT : Vector Compare Greater Than 3845defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3846 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 3847defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3848 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 3849def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 3850 NEONvcgt, 0>; 3851def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 3852 NEONvcgt, 0>; 3853 3854defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 3855 "$Vd, $Vm, #0", NEONvcgtz>; 3856defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 3857 "$Vd, $Vm, #0", NEONvcltz>; 3858 3859// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 3860def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 3861 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 3862def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 3863 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 3864// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 3865def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 3866 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 3867def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 3868 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 3869// VTST : Vector Test Bits 3870defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 3871 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 3872 3873// Vector Bitwise Operations. 3874 3875def vnotd : PatFrag<(ops node:$in), 3876 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 3877def vnotq : PatFrag<(ops node:$in), 3878 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 3879 3880 3881// VAND : Vector Bitwise AND 3882def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 3883 v2i32, v2i32, and, 1>; 3884def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 3885 v4i32, v4i32, and, 1>; 3886 3887// VEOR : Vector Bitwise Exclusive OR 3888def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 3889 v2i32, v2i32, xor, 1>; 3890def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 3891 v4i32, v4i32, xor, 1>; 3892 3893// VORR : Vector Bitwise OR 3894def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 3895 v2i32, v2i32, or, 1>; 3896def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 3897 v4i32, v4i32, or, 1>; 3898 3899def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 3900 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 3901 IIC_VMOVImm, 3902 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 3903 [(set DPR:$Vd, 3904 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 3905 let Inst{9} = SIMM{9}; 3906} 3907 3908def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 3909 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 3910 IIC_VMOVImm, 3911 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 3912 [(set DPR:$Vd, 3913 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 3914 let Inst{10-9} = SIMM{10-9}; 3915} 3916 3917def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 3918 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 3919 IIC_VMOVImm, 3920 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 3921 [(set QPR:$Vd, 3922 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 3923 let Inst{9} = SIMM{9}; 3924} 3925 3926def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 3927 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 3928 IIC_VMOVImm, 3929 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 3930 [(set QPR:$Vd, 3931 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 3932 let Inst{10-9} = SIMM{10-9}; 3933} 3934 3935 3936// VBIC : Vector Bitwise Bit Clear (AND NOT) 3937def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 3938 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 3939 "vbic", "$Vd, $Vn, $Vm", "", 3940 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 3941 (vnotd DPR:$Vm))))]>; 3942def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 3943 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 3944 "vbic", "$Vd, $Vn, $Vm", "", 3945 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 3946 (vnotq QPR:$Vm))))]>; 3947 3948def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 3949 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 3950 IIC_VMOVImm, 3951 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 3952 [(set DPR:$Vd, 3953 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 3954 let Inst{9} = SIMM{9}; 3955} 3956 3957def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 3958 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 3959 IIC_VMOVImm, 3960 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 3961 [(set DPR:$Vd, 3962 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 3963 let Inst{10-9} = SIMM{10-9}; 3964} 3965 3966def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 3967 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 3968 IIC_VMOVImm, 3969 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 3970 [(set QPR:$Vd, 3971 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 3972 let Inst{9} = SIMM{9}; 3973} 3974 3975def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 3976 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 3977 IIC_VMOVImm, 3978 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 3979 [(set QPR:$Vd, 3980 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 3981 let Inst{10-9} = SIMM{10-9}; 3982} 3983 3984// VORN : Vector Bitwise OR NOT 3985def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 3986 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 3987 "vorn", "$Vd, $Vn, $Vm", "", 3988 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 3989 (vnotd DPR:$Vm))))]>; 3990def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 3991 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 3992 "vorn", "$Vd, $Vn, $Vm", "", 3993 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 3994 (vnotq QPR:$Vm))))]>; 3995 3996// VMVN : Vector Bitwise NOT (Immediate) 3997 3998let isReMaterializable = 1 in { 3999 4000def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4001 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4002 "vmvn", "i16", "$Vd, $SIMM", "", 4003 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4004 let Inst{9} = SIMM{9}; 4005} 4006 4007def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4008 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4009 "vmvn", "i16", "$Vd, $SIMM", "", 4010 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4011 let Inst{9} = SIMM{9}; 4012} 4013 4014def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4015 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4016 "vmvn", "i32", "$Vd, $SIMM", "", 4017 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4018 let Inst{11-8} = SIMM{11-8}; 4019} 4020 4021def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4022 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4023 "vmvn", "i32", "$Vd, $SIMM", "", 4024 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4025 let Inst{11-8} = SIMM{11-8}; 4026} 4027} 4028 4029// VMVN : Vector Bitwise NOT 4030def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4031 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 4032 "vmvn", "$Vd, $Vm", "", 4033 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 4034def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 4035 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 4036 "vmvn", "$Vd, $Vm", "", 4037 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 4038def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 4039def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 4040 4041// VBSL : Vector Bitwise Select 4042def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4043 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4044 N3RegFrm, IIC_VCNTiD, 4045 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4046 [(set DPR:$Vd, 4047 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 4048 4049def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 4050 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4051 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 4052 4053def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4054 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4055 N3RegFrm, IIC_VCNTiQ, 4056 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4057 [(set QPR:$Vd, 4058 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 4059 4060def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 4061 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4062 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 4063 4064// VBIF : Vector Bitwise Insert if False 4065// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 4066// FIXME: This instruction's encoding MAY NOT BE correct. 4067def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 4068 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4069 N3RegFrm, IIC_VBINiD, 4070 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4071 []>; 4072def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 4073 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4074 N3RegFrm, IIC_VBINiQ, 4075 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4076 []>; 4077 4078// VBIT : Vector Bitwise Insert if True 4079// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 4080// FIXME: This instruction's encoding MAY NOT BE correct. 4081def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 4082 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4083 N3RegFrm, IIC_VBINiD, 4084 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4085 []>; 4086def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 4087 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4088 N3RegFrm, IIC_VBINiQ, 4089 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4090 []>; 4091 4092// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 4093// for equivalent operations with different register constraints; it just 4094// inserts copies. 4095 4096// Vector Absolute Differences. 4097 4098// VABD : Vector Absolute Difference 4099defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 4100 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4101 "vabd", "s", int_arm_neon_vabds, 1>; 4102defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 4103 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4104 "vabd", "u", int_arm_neon_vabdu, 1>; 4105def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 4106 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 4107def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 4108 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 4109 4110// VABDL : Vector Absolute Difference Long (Q = | D - D |) 4111defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 4112 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 4113defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 4114 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 4115 4116// VABA : Vector Absolute Difference and Accumulate 4117defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4118 "vaba", "s", int_arm_neon_vabds, add>; 4119defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4120 "vaba", "u", int_arm_neon_vabdu, add>; 4121 4122// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 4123defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 4124 "vabal", "s", int_arm_neon_vabds, zext, add>; 4125defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 4126 "vabal", "u", int_arm_neon_vabdu, zext, add>; 4127 4128// Vector Maximum and Minimum. 4129 4130// VMAX : Vector Maximum 4131defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 4132 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4133 "vmax", "s", int_arm_neon_vmaxs, 1>; 4134defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 4135 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4136 "vmax", "u", int_arm_neon_vmaxu, 1>; 4137def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 4138 "vmax", "f32", 4139 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 4140def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4141 "vmax", "f32", 4142 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 4143 4144// VMIN : Vector Minimum 4145defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 4146 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4147 "vmin", "s", int_arm_neon_vmins, 1>; 4148defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 4149 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4150 "vmin", "u", int_arm_neon_vminu, 1>; 4151def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 4152 "vmin", "f32", 4153 v2f32, v2f32, int_arm_neon_vmins, 1>; 4154def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4155 "vmin", "f32", 4156 v4f32, v4f32, int_arm_neon_vmins, 1>; 4157 4158// Vector Pairwise Operations. 4159 4160// VPADD : Vector Pairwise Add 4161def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4162 "vpadd", "i8", 4163 v8i8, v8i8, int_arm_neon_vpadd, 0>; 4164def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4165 "vpadd", "i16", 4166 v4i16, v4i16, int_arm_neon_vpadd, 0>; 4167def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4168 "vpadd", "i32", 4169 v2i32, v2i32, int_arm_neon_vpadd, 0>; 4170def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 4171 IIC_VPBIND, "vpadd", "f32", 4172 v2f32, v2f32, int_arm_neon_vpadd, 0>; 4173 4174// VPADDL : Vector Pairwise Add Long 4175defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 4176 int_arm_neon_vpaddls>; 4177defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 4178 int_arm_neon_vpaddlu>; 4179 4180// VPADAL : Vector Pairwise Add and Accumulate Long 4181defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 4182 int_arm_neon_vpadals>; 4183defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 4184 int_arm_neon_vpadalu>; 4185 4186// VPMAX : Vector Pairwise Maximum 4187def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4188 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 4189def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4190 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 4191def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4192 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 4193def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4194 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 4195def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4196 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 4197def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4198 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 4199def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 4200 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 4201 4202// VPMIN : Vector Pairwise Minimum 4203def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4204 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 4205def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4206 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 4207def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4208 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 4209def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4210 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 4211def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4212 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 4213def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4214 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 4215def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 4216 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 4217 4218// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 4219 4220// VRECPE : Vector Reciprocal Estimate 4221def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4222 IIC_VUNAD, "vrecpe", "u32", 4223 v2i32, v2i32, int_arm_neon_vrecpe>; 4224def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4225 IIC_VUNAQ, "vrecpe", "u32", 4226 v4i32, v4i32, int_arm_neon_vrecpe>; 4227def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4228 IIC_VUNAD, "vrecpe", "f32", 4229 v2f32, v2f32, int_arm_neon_vrecpe>; 4230def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4231 IIC_VUNAQ, "vrecpe", "f32", 4232 v4f32, v4f32, int_arm_neon_vrecpe>; 4233 4234// VRECPS : Vector Reciprocal Step 4235def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4236 IIC_VRECSD, "vrecps", "f32", 4237 v2f32, v2f32, int_arm_neon_vrecps, 1>; 4238def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4239 IIC_VRECSQ, "vrecps", "f32", 4240 v4f32, v4f32, int_arm_neon_vrecps, 1>; 4241 4242// VRSQRTE : Vector Reciprocal Square Root Estimate 4243def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4244 IIC_VUNAD, "vrsqrte", "u32", 4245 v2i32, v2i32, int_arm_neon_vrsqrte>; 4246def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4247 IIC_VUNAQ, "vrsqrte", "u32", 4248 v4i32, v4i32, int_arm_neon_vrsqrte>; 4249def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4250 IIC_VUNAD, "vrsqrte", "f32", 4251 v2f32, v2f32, int_arm_neon_vrsqrte>; 4252def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4253 IIC_VUNAQ, "vrsqrte", "f32", 4254 v4f32, v4f32, int_arm_neon_vrsqrte>; 4255 4256// VRSQRTS : Vector Reciprocal Square Root Step 4257def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4258 IIC_VRECSD, "vrsqrts", "f32", 4259 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 4260def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4261 IIC_VRECSQ, "vrsqrts", "f32", 4262 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 4263 4264// Vector Shifts. 4265 4266// VSHL : Vector Shift 4267defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 4268 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4269 "vshl", "s", int_arm_neon_vshifts>; 4270defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 4271 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4272 "vshl", "u", int_arm_neon_vshiftu>; 4273 4274// VSHL : Vector Shift Left (Immediate) 4275defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 4276 4277// VSHR : Vector Shift Right (Immediate) 4278defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; 4279defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; 4280 4281// VSHLL : Vector Shift Left Long 4282defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 4283defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 4284 4285// VSHLL : Vector Shift Left Long (with maximum shift count) 4286class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 4287 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 4288 ValueType OpTy, SDNode OpNode> 4289 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 4290 ResTy, OpTy, OpNode> { 4291 let Inst{21-16} = op21_16; 4292 let DecoderMethod = "DecodeVSHLMaxInstruction"; 4293} 4294def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 4295 v8i16, v8i8, NEONvshlli>; 4296def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 4297 v4i32, v4i16, NEONvshlli>; 4298def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 4299 v2i64, v2i32, NEONvshlli>; 4300 4301// VSHRN : Vector Shift Right and Narrow 4302defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 4303 NEONvshrn>; 4304 4305// VRSHL : Vector Rounding Shift 4306defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 4307 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4308 "vrshl", "s", int_arm_neon_vrshifts>; 4309defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 4310 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4311 "vrshl", "u", int_arm_neon_vrshiftu>; 4312// VRSHR : Vector Rounding Shift Right 4313defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; 4314defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; 4315 4316// VRSHRN : Vector Rounding Shift Right and Narrow 4317defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 4318 NEONvrshrn>; 4319 4320// VQSHL : Vector Saturating Shift 4321defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 4322 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4323 "vqshl", "s", int_arm_neon_vqshifts>; 4324defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 4325 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4326 "vqshl", "u", int_arm_neon_vqshiftu>; 4327// VQSHL : Vector Saturating Shift Left (Immediate) 4328defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 4329defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 4330 4331// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 4332defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 4333 4334// VQSHRN : Vector Saturating Shift Right and Narrow 4335defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 4336 NEONvqshrns>; 4337defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 4338 NEONvqshrnu>; 4339 4340// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 4341defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 4342 NEONvqshrnsu>; 4343 4344// VQRSHL : Vector Saturating Rounding Shift 4345defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 4346 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4347 "vqrshl", "s", int_arm_neon_vqrshifts>; 4348defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 4349 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4350 "vqrshl", "u", int_arm_neon_vqrshiftu>; 4351 4352// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 4353defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 4354 NEONvqrshrns>; 4355defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 4356 NEONvqrshrnu>; 4357 4358// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 4359defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 4360 NEONvqrshrnsu>; 4361 4362// VSRA : Vector Shift Right and Accumulate 4363defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 4364defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 4365// VRSRA : Vector Rounding Shift Right and Accumulate 4366defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 4367defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 4368 4369// VSLI : Vector Shift Left and Insert 4370defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 4371 4372// VSRI : Vector Shift Right and Insert 4373defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 4374 4375// Vector Absolute and Saturating Absolute. 4376 4377// VABS : Vector Absolute Value 4378defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 4379 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 4380 int_arm_neon_vabs>; 4381def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4382 IIC_VUNAD, "vabs", "f32", 4383 v2f32, v2f32, int_arm_neon_vabs>; 4384def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4385 IIC_VUNAQ, "vabs", "f32", 4386 v4f32, v4f32, int_arm_neon_vabs>; 4387 4388// VQABS : Vector Saturating Absolute Value 4389defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 4390 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 4391 int_arm_neon_vqabs>; 4392 4393// Vector Negate. 4394 4395def vnegd : PatFrag<(ops node:$in), 4396 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 4397def vnegq : PatFrag<(ops node:$in), 4398 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 4399 4400class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4401 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 4402 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 4403 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 4404class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4405 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 4406 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 4407 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 4408 4409// VNEG : Vector Negate (integer) 4410def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 4411def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 4412def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 4413def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 4414def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 4415def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 4416 4417// VNEG : Vector Negate (floating-point) 4418def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 4419 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 4420 "vneg", "f32", "$Vd, $Vm", "", 4421 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 4422def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 4423 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 4424 "vneg", "f32", "$Vd, $Vm", "", 4425 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 4426 4427def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 4428def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 4429def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 4430def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 4431def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 4432def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 4433 4434// VQNEG : Vector Saturating Negate 4435defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 4436 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 4437 int_arm_neon_vqneg>; 4438 4439// Vector Bit Counting Operations. 4440 4441// VCLS : Vector Count Leading Sign Bits 4442defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 4443 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 4444 int_arm_neon_vcls>; 4445// VCLZ : Vector Count Leading Zeros 4446defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 4447 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 4448 int_arm_neon_vclz>; 4449// VCNT : Vector Count One Bits 4450def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 4451 IIC_VCNTiD, "vcnt", "8", 4452 v8i8, v8i8, int_arm_neon_vcnt>; 4453def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 4454 IIC_VCNTiQ, "vcnt", "8", 4455 v16i8, v16i8, int_arm_neon_vcnt>; 4456 4457// Vector Swap 4458def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 4459 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 4460 "vswp", "$Vd, $Vm", "", []>; 4461def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 4462 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 4463 "vswp", "$Vd, $Vm", "", []>; 4464 4465// Vector Move Operations. 4466 4467// VMOV : Vector Move (Register) 4468def : InstAlias<"vmov${p} $Vd, $Vm", 4469 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 4470def : InstAlias<"vmov${p} $Vd, $Vm", 4471 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 4472 4473// VMOV : Vector Move (Immediate) 4474 4475let isReMaterializable = 1 in { 4476def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 4477 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 4478 "vmov", "i8", "$Vd, $SIMM", "", 4479 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 4480def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 4481 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 4482 "vmov", "i8", "$Vd, $SIMM", "", 4483 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 4484 4485def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 4486 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4487 "vmov", "i16", "$Vd, $SIMM", "", 4488 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 4489 let Inst{9} = SIMM{9}; 4490} 4491 4492def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 4493 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4494 "vmov", "i16", "$Vd, $SIMM", "", 4495 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 4496 let Inst{9} = SIMM{9}; 4497} 4498 4499def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 4500 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4501 "vmov", "i32", "$Vd, $SIMM", "", 4502 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 4503 let Inst{11-8} = SIMM{11-8}; 4504} 4505 4506def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 4507 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4508 "vmov", "i32", "$Vd, $SIMM", "", 4509 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 4510 let Inst{11-8} = SIMM{11-8}; 4511} 4512 4513def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 4514 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 4515 "vmov", "i64", "$Vd, $SIMM", "", 4516 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 4517def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 4518 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 4519 "vmov", "i64", "$Vd, $SIMM", "", 4520 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 4521 4522def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 4523 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 4524 "vmov", "f32", "$Vd, $SIMM", "", 4525 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 4526def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 4527 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 4528 "vmov", "f32", "$Vd, $SIMM", "", 4529 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 4530} // isReMaterializable 4531 4532// VMOV : Vector Get Lane (move scalar to ARM core register) 4533 4534def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 4535 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 4536 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 4537 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 4538 imm:$lane))]> { 4539 let Inst{21} = lane{2}; 4540 let Inst{6-5} = lane{1-0}; 4541} 4542def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 4543 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 4544 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 4545 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 4546 imm:$lane))]> { 4547 let Inst{21} = lane{1}; 4548 let Inst{6} = lane{0}; 4549} 4550def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 4551 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 4552 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 4553 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 4554 imm:$lane))]> { 4555 let Inst{21} = lane{2}; 4556 let Inst{6-5} = lane{1-0}; 4557} 4558def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 4559 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 4560 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 4561 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 4562 imm:$lane))]> { 4563 let Inst{21} = lane{1}; 4564 let Inst{6} = lane{0}; 4565} 4566def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 4567 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 4568 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 4569 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 4570 imm:$lane))]> { 4571 let Inst{21} = lane{0}; 4572} 4573// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 4574def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 4575 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 4576 (DSubReg_i8_reg imm:$lane))), 4577 (SubReg_i8_lane imm:$lane))>; 4578def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 4579 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 4580 (DSubReg_i16_reg imm:$lane))), 4581 (SubReg_i16_lane imm:$lane))>; 4582def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 4583 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 4584 (DSubReg_i8_reg imm:$lane))), 4585 (SubReg_i8_lane imm:$lane))>; 4586def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 4587 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 4588 (DSubReg_i16_reg imm:$lane))), 4589 (SubReg_i16_lane imm:$lane))>; 4590def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 4591 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 4592 (DSubReg_i32_reg imm:$lane))), 4593 (SubReg_i32_lane imm:$lane))>; 4594def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 4595 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 4596 (SSubReg_f32_reg imm:$src2))>; 4597def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 4598 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 4599 (SSubReg_f32_reg imm:$src2))>; 4600//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 4601// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 4602def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 4603 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 4604 4605 4606// VMOV : Vector Set Lane (move ARM core register to scalar) 4607 4608let Constraints = "$src1 = $V" in { 4609def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 4610 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 4611 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 4612 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 4613 GPR:$R, imm:$lane))]> { 4614 let Inst{21} = lane{2}; 4615 let Inst{6-5} = lane{1-0}; 4616} 4617def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 4618 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 4619 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 4620 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 4621 GPR:$R, imm:$lane))]> { 4622 let Inst{21} = lane{1}; 4623 let Inst{6} = lane{0}; 4624} 4625def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 4626 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 4627 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 4628 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 4629 GPR:$R, imm:$lane))]> { 4630 let Inst{21} = lane{0}; 4631} 4632} 4633def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 4634 (v16i8 (INSERT_SUBREG QPR:$src1, 4635 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 4636 (DSubReg_i8_reg imm:$lane))), 4637 GPR:$src2, (SubReg_i8_lane imm:$lane))), 4638 (DSubReg_i8_reg imm:$lane)))>; 4639def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 4640 (v8i16 (INSERT_SUBREG QPR:$src1, 4641 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 4642 (DSubReg_i16_reg imm:$lane))), 4643 GPR:$src2, (SubReg_i16_lane imm:$lane))), 4644 (DSubReg_i16_reg imm:$lane)))>; 4645def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 4646 (v4i32 (INSERT_SUBREG QPR:$src1, 4647 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 4648 (DSubReg_i32_reg imm:$lane))), 4649 GPR:$src2, (SubReg_i32_lane imm:$lane))), 4650 (DSubReg_i32_reg imm:$lane)))>; 4651 4652def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 4653 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 4654 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 4655def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 4656 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 4657 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 4658 4659//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 4660// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 4661def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 4662 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 4663 4664def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 4665 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 4666def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 4667 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 4668def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 4669 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 4670 4671def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 4672 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 4673def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 4674 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 4675def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 4676 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 4677 4678def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 4679 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4680 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 4681 dsub_0)>; 4682def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 4683 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 4684 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 4685 dsub_0)>; 4686def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 4687 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 4688 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 4689 dsub_0)>; 4690 4691// VDUP : Vector Duplicate (from ARM core register to all elements) 4692 4693class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 4694 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 4695 IIC_VMOVIS, "vdup", Dt, "$V, $R", 4696 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 4697class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 4698 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 4699 IIC_VMOVIS, "vdup", Dt, "$V, $R", 4700 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 4701 4702def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 4703def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 4704def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; 4705def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 4706def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 4707def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 4708 4709def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; 4710def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 4711 4712// VDUP : Vector Duplicate Lane (from scalar to all elements) 4713 4714class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 4715 ValueType Ty, Operand IdxTy> 4716 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 4717 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 4718 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 4719 4720class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 4721 ValueType ResTy, ValueType OpTy, Operand IdxTy> 4722 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 4723 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 4724 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 4725 VectorIndex32:$lane)))]>; 4726 4727// Inst{19-16} is partially specified depending on the element size. 4728 4729def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 4730 bits<3> lane; 4731 let Inst{19-17} = lane{2-0}; 4732} 4733def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 4734 bits<2> lane; 4735 let Inst{19-18} = lane{1-0}; 4736} 4737def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 4738 bits<1> lane; 4739 let Inst{19} = lane{0}; 4740} 4741def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 4742 bits<3> lane; 4743 let Inst{19-17} = lane{2-0}; 4744} 4745def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 4746 bits<2> lane; 4747 let Inst{19-18} = lane{1-0}; 4748} 4749def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 4750 bits<1> lane; 4751 let Inst{19} = lane{0}; 4752} 4753 4754def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 4755 (VDUPLN32d DPR:$Vm, imm:$lane)>; 4756 4757def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 4758 (VDUPLN32q DPR:$Vm, imm:$lane)>; 4759 4760def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 4761 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 4762 (DSubReg_i8_reg imm:$lane))), 4763 (SubReg_i8_lane imm:$lane)))>; 4764def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 4765 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 4766 (DSubReg_i16_reg imm:$lane))), 4767 (SubReg_i16_lane imm:$lane)))>; 4768def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 4769 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 4770 (DSubReg_i32_reg imm:$lane))), 4771 (SubReg_i32_lane imm:$lane)))>; 4772def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 4773 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 4774 (DSubReg_i32_reg imm:$lane))), 4775 (SubReg_i32_lane imm:$lane)))>; 4776 4777def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 4778 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 4779def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 4780 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 4781 4782// VMOVN : Vector Narrowing Move 4783defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 4784 "vmovn", "i", trunc>; 4785// VQMOVN : Vector Saturating Narrowing Move 4786defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 4787 "vqmovn", "s", int_arm_neon_vqmovns>; 4788defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 4789 "vqmovn", "u", int_arm_neon_vqmovnu>; 4790defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 4791 "vqmovun", "s", int_arm_neon_vqmovnsu>; 4792// VMOVL : Vector Lengthening Move 4793defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 4794defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 4795 4796// Vector Conversions. 4797 4798// VCVT : Vector Convert Between Floating-Point and Integers 4799def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 4800 v2i32, v2f32, fp_to_sint>; 4801def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 4802 v2i32, v2f32, fp_to_uint>; 4803def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 4804 v2f32, v2i32, sint_to_fp>; 4805def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 4806 v2f32, v2i32, uint_to_fp>; 4807 4808def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 4809 v4i32, v4f32, fp_to_sint>; 4810def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 4811 v4i32, v4f32, fp_to_uint>; 4812def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 4813 v4f32, v4i32, sint_to_fp>; 4814def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 4815 v4f32, v4i32, uint_to_fp>; 4816 4817// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 4818def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 4819 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 4820def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 4821 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 4822def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 4823 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 4824def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 4825 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 4826 4827def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 4828 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 4829def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 4830 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 4831def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 4832 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 4833def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 4834 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 4835 4836// VCVT : Vector Convert Between Half-Precision and Single-Precision. 4837def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 4838 IIC_VUNAQ, "vcvt", "f16.f32", 4839 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 4840 Requires<[HasNEON, HasFP16]>; 4841def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 4842 IIC_VUNAQ, "vcvt", "f32.f16", 4843 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 4844 Requires<[HasNEON, HasFP16]>; 4845 4846// Vector Reverse. 4847 4848// VREV64 : Vector Reverse elements within 64-bit doublewords 4849 4850class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4851 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 4852 (ins DPR:$Vm), IIC_VMOVD, 4853 OpcodeStr, Dt, "$Vd, $Vm", "", 4854 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 4855class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4856 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 4857 (ins QPR:$Vm), IIC_VMOVQ, 4858 OpcodeStr, Dt, "$Vd, $Vm", "", 4859 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 4860 4861def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 4862def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 4863def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 4864def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 4865 4866def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 4867def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 4868def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 4869def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 4870 4871// VREV32 : Vector Reverse elements within 32-bit words 4872 4873class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4874 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 4875 (ins DPR:$Vm), IIC_VMOVD, 4876 OpcodeStr, Dt, "$Vd, $Vm", "", 4877 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 4878class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4879 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 4880 (ins QPR:$Vm), IIC_VMOVQ, 4881 OpcodeStr, Dt, "$Vd, $Vm", "", 4882 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 4883 4884def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 4885def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 4886 4887def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 4888def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 4889 4890// VREV16 : Vector Reverse elements within 16-bit halfwords 4891 4892class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4893 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 4894 (ins DPR:$Vm), IIC_VMOVD, 4895 OpcodeStr, Dt, "$Vd, $Vm", "", 4896 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 4897class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4898 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 4899 (ins QPR:$Vm), IIC_VMOVQ, 4900 OpcodeStr, Dt, "$Vd, $Vm", "", 4901 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 4902 4903def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 4904def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 4905 4906// Other Vector Shuffles. 4907 4908// Aligned extractions: really just dropping registers 4909 4910class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 4911 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 4912 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 4913 4914def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 4915 4916def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 4917 4918def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 4919 4920def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 4921 4922def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 4923 4924 4925// VEXT : Vector Extract 4926 4927class VEXTd<string OpcodeStr, string Dt, ValueType Ty> 4928 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 4929 (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm, 4930 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 4931 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 4932 (Ty DPR:$Vm), imm:$index)))]> { 4933 bits<4> index; 4934 let Inst{11-8} = index{3-0}; 4935} 4936 4937class VEXTq<string OpcodeStr, string Dt, ValueType Ty> 4938 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 4939 (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm, 4940 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 4941 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 4942 (Ty QPR:$Vm), imm:$index)))]> { 4943 bits<4> index; 4944 let Inst{11-8} = index{3-0}; 4945} 4946 4947def VEXTd8 : VEXTd<"vext", "8", v8i8> { 4948 let Inst{11-8} = index{3-0}; 4949} 4950def VEXTd16 : VEXTd<"vext", "16", v4i16> { 4951 let Inst{11-9} = index{2-0}; 4952 let Inst{8} = 0b0; 4953} 4954def VEXTd32 : VEXTd<"vext", "32", v2i32> { 4955 let Inst{11-10} = index{1-0}; 4956 let Inst{9-8} = 0b00; 4957} 4958def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 4959 (v2f32 DPR:$Vm), 4960 (i32 imm:$index))), 4961 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 4962 4963def VEXTq8 : VEXTq<"vext", "8", v16i8> { 4964 let Inst{11-8} = index{3-0}; 4965} 4966def VEXTq16 : VEXTq<"vext", "16", v8i16> { 4967 let Inst{11-9} = index{2-0}; 4968 let Inst{8} = 0b0; 4969} 4970def VEXTq32 : VEXTq<"vext", "32", v4i32> { 4971 let Inst{11-10} = index{1-0}; 4972 let Inst{9-8} = 0b00; 4973} 4974def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 4975 (v4f32 QPR:$Vm), 4976 (i32 imm:$index))), 4977 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 4978 4979// VTRN : Vector Transpose 4980 4981def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 4982def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 4983def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 4984 4985def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 4986def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 4987def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 4988 4989// VUZP : Vector Unzip (Deinterleave) 4990 4991def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 4992def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 4993def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; 4994 4995def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 4996def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 4997def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 4998 4999// VZIP : Vector Zip (Interleave) 5000 5001def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 5002def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 5003def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; 5004 5005def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 5006def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 5007def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 5008 5009// Vector Table Lookup and Table Extension. 5010 5011// VTBL : Vector Table Lookup 5012let DecoderMethod = "DecodeTBLInstruction" in { 5013def VTBL1 5014 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 5015 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 5016 "vtbl", "8", "$Vd, $Vn, $Vm", "", 5017 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 5018let hasExtraSrcRegAllocReq = 1 in { 5019def VTBL2 5020 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 5021 (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2, 5022 "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>; 5023def VTBL3 5024 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 5025 (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3, 5026 "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>; 5027def VTBL4 5028 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 5029 (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), 5030 NVTBLFrm, IIC_VTB4, 5031 "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>; 5032} // hasExtraSrcRegAllocReq = 1 5033 5034def VTBL2Pseudo 5035 : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; 5036def VTBL3Pseudo 5037 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 5038def VTBL4Pseudo 5039 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 5040 5041// VTBX : Vector Table Extension 5042def VTBX1 5043 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 5044 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 5045 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 5046 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 5047 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 5048let hasExtraSrcRegAllocReq = 1 in { 5049def VTBX2 5050 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 5051 (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 5052 "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>; 5053def VTBX3 5054 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 5055 (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), 5056 NVTBLFrm, IIC_VTBX3, 5057 "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", 5058 "$orig = $Vd", []>; 5059def VTBX4 5060 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn, 5061 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 5062 "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", 5063 "$orig = $Vd", []>; 5064} // hasExtraSrcRegAllocReq = 1 5065 5066def VTBX2Pseudo 5067 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), 5068 IIC_VTBX2, "$orig = $dst", []>; 5069def VTBX3Pseudo 5070 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5071 IIC_VTBX3, "$orig = $dst", []>; 5072def VTBX4Pseudo 5073 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5074 IIC_VTBX4, "$orig = $dst", []>; 5075} // DecoderMethod = "DecodeTBLInstruction" 5076 5077//===----------------------------------------------------------------------===// 5078// NEON instructions for single-precision FP math 5079//===----------------------------------------------------------------------===// 5080 5081class N2VSPat<SDNode OpNode, NeonI Inst> 5082 : NEONFPPat<(f32 (OpNode SPR:$a)), 5083 (EXTRACT_SUBREG 5084 (v2f32 (COPY_TO_REGCLASS (Inst 5085 (INSERT_SUBREG 5086 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5087 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 5088 5089class N3VSPat<SDNode OpNode, NeonI Inst> 5090 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 5091 (EXTRACT_SUBREG 5092 (v2f32 (COPY_TO_REGCLASS (Inst 5093 (INSERT_SUBREG 5094 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5095 SPR:$a, ssub_0), 5096 (INSERT_SUBREG 5097 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5098 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5099 5100class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 5101 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 5102 (EXTRACT_SUBREG 5103 (v2f32 (COPY_TO_REGCLASS (Inst 5104 (INSERT_SUBREG 5105 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5106 SPR:$acc, ssub_0), 5107 (INSERT_SUBREG 5108 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5109 SPR:$a, ssub_0), 5110 (INSERT_SUBREG 5111 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5112 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5113 5114def : N3VSPat<fadd, VADDfd>; 5115def : N3VSPat<fsub, VSUBfd>; 5116def : N3VSPat<fmul, VMULfd>; 5117def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 5118 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 5119def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 5120 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 5121def : N2VSPat<fabs, VABSfd>; 5122def : N2VSPat<fneg, VNEGfd>; 5123def : N3VSPat<NEONfmax, VMAXfd>; 5124def : N3VSPat<NEONfmin, VMINfd>; 5125def : N2VSPat<arm_ftosi, VCVTf2sd>; 5126def : N2VSPat<arm_ftoui, VCVTf2ud>; 5127def : N2VSPat<arm_sitof, VCVTs2fd>; 5128def : N2VSPat<arm_uitof, VCVTu2fd>; 5129 5130//===----------------------------------------------------------------------===// 5131// Non-Instruction Patterns 5132//===----------------------------------------------------------------------===// 5133 5134// bit_convert 5135def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 5136def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 5137def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 5138def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 5139def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 5140def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 5141def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 5142def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 5143def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 5144def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 5145def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 5146def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 5147def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 5148def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 5149def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 5150def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 5151def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 5152def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 5153def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 5154def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 5155def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 5156def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 5157def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 5158def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 5159def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 5160def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 5161def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 5162def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 5163def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 5164def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 5165 5166def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 5167def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 5168def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 5169def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 5170def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 5171def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 5172def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 5173def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 5174def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 5175def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 5176def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 5177def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 5178def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 5179def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 5180def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 5181def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 5182def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 5183def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 5184def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 5185def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 5186def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 5187def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 5188def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 5189def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 5190def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 5191def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 5192def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 5193def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 5194def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 5195def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 5196 5197 5198//===----------------------------------------------------------------------===// 5199// Assembler aliases 5200// 5201 5202// VAND/VEOR/VORR accept but do not require a type suffix. 5203defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 5204 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5205defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 5206 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5207defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 5208 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5209defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 5210 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5211defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 5212 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5213defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 5214 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5215 5216// VLD1 requires a size suffix, but also accepts type specific variants. 5217// Load one D register. 5218defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5219 (VLD1d8 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; 5220defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5221 (VLD1d16 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; 5222defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5223 (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; 5224defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5225 (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; 5226// with writeback, fixed stride 5227defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5228 (VLD1d8wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; 5229defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5230 (VLD1d16wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; 5231defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5232 (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; 5233defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5234 (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; 5235 5236// Load two D registers. 5237defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5238 (VLD1q8 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; 5239defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5240 (VLD1q16 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; 5241defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5242 (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; 5243defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5244 (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; 5245// with writeback, fixed stride 5246defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5247 (VLD1q8wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; 5248defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5249 (VLD1q16wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; 5250defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5251 (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; 5252defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5253 (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; 5254 5255// Load three D registers. 5256defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5257 (VLD1d8T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; 5258defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5259 (VLD1d16T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; 5260defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5261 (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; 5262defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5263 (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; 5264// with writeback, fixed stride 5265defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5266 (VLD1d8Twb_fixed VecListThreeD:$Vd, zero_reg, 5267 addrmode6:$Rn, pred:$p)>; 5268defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5269 (VLD1d16Twb_fixed VecListThreeD:$Vd, zero_reg, 5270 addrmode6:$Rn, pred:$p)>; 5271defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5272 (VLD1d32Twb_fixed VecListThreeD:$Vd, zero_reg, 5273 addrmode6:$Rn, pred:$p)>; 5274defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5275 (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg, 5276 addrmode6:$Rn, pred:$p)>; 5277 5278 5279// Load four D registers. 5280defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5281 (VLD1d8Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; 5282defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5283 (VLD1d16Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; 5284defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5285 (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; 5286defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", 5287 (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; 5288// with writeback, fixed stride 5289defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5290 (VLD1d8Qwb_fixed VecListFourD:$Vd, zero_reg, 5291 addrmode6:$Rn, pred:$p)>; 5292defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5293 (VLD1d16Qwb_fixed VecListFourD:$Vd, zero_reg, 5294 addrmode6:$Rn, pred:$p)>; 5295defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5296 (VLD1d32Qwb_fixed VecListFourD:$Vd, zero_reg, 5297 addrmode6:$Rn, pred:$p)>; 5298defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", 5299 (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg, 5300 addrmode6:$Rn, pred:$p)>; 5301 5302// VST1 requires a size suffix, but also accepts type specific variants. 5303// Store one D register. 5304defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5305 (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; 5306defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5307 (VST1d16 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; 5308defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5309 (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; 5310defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5311 (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; 5312// with writeback, fixed stride 5313defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", 5314 (VST1d8wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; 5315defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", 5316 (VST1d16wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; 5317defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", 5318 (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; 5319defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", 5320 (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; 5321 5322// Store two D registers. 5323defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5324 (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; 5325defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5326 (VST1q16 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; 5327defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5328 (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; 5329defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5330 (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; 5331// with writeback, fixed stride 5332defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", 5333 (VST1q8wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; 5334defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", 5335 (VST1q16wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; 5336defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", 5337 (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; 5338defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", 5339 (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; 5340 5341// FIXME: The three and four register VST1 instructions haven't been moved 5342// to the VecList* encoding yet, so we can't do assembly parsing support 5343// for them. Uncomment these when that happens. 5344// Load three D registers. 5345//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5346// (VST1d8T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; 5347//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5348// (VST1d16T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; 5349//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5350// (VST1d32T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; 5351//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5352// (VST1d64T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; 5353 5354// Load four D registers. 5355//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5356// (VST1d8Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; 5357//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5358// (VST1d16Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; 5359//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5360// (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; 5361//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", 5362// (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; 5363