ARMInstrNEON.td revision 20accfc6c7b22b22193eb90c53921f71c1202a73
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14 15//===----------------------------------------------------------------------===// 16// NEON-specific Operands. 17//===----------------------------------------------------------------------===// 18def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20} 21 22def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26} 27def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31} 32def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36} 37def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 38def nImmVMOVI32 : Operand<i32> { 39 let PrintMethod = "printNEONModImmOperand"; 40 let ParserMatchClass = nImmVMOVI32AsmOperand; 41} 42def nImmVMOVF32 : Operand<i32> { 43 let PrintMethod = "printFPImmOperand"; 44 let ParserMatchClass = FPImmOperand; 45} 46def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 47def nImmSplatI64 : Operand<i32> { 48 let PrintMethod = "printNEONModImmOperand"; 49 let ParserMatchClass = nImmSplatI64AsmOperand; 50} 51 52def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 53def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 54def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 55def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 56 return ((uint64_t)Imm) < 8; 57}]> { 58 let ParserMatchClass = VectorIndex8Operand; 59 let PrintMethod = "printVectorIndex"; 60 let MIOperandInfo = (ops i32imm); 61} 62def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 63 return ((uint64_t)Imm) < 4; 64}]> { 65 let ParserMatchClass = VectorIndex16Operand; 66 let PrintMethod = "printVectorIndex"; 67 let MIOperandInfo = (ops i32imm); 68} 69def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 70 return ((uint64_t)Imm) < 2; 71}]> { 72 let ParserMatchClass = VectorIndex32Operand; 73 let PrintMethod = "printVectorIndex"; 74 let MIOperandInfo = (ops i32imm); 75} 76 77// Register list of one D register. 78def VecListOneDAsmOperand : AsmOperandClass { 79 let Name = "VecListOneD"; 80 let ParserMethod = "parseVectorList"; 81 let RenderMethod = "addVecListOperands"; 82} 83def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 84 let ParserMatchClass = VecListOneDAsmOperand; 85} 86// Register list of two sequential D registers. 87def VecListTwoDAsmOperand : AsmOperandClass { 88 let Name = "VecListTwoD"; 89 let ParserMethod = "parseVectorList"; 90 let RenderMethod = "addVecListOperands"; 91} 92def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { 93 let ParserMatchClass = VecListTwoDAsmOperand; 94} 95// Register list of three sequential D registers. 96def VecListThreeDAsmOperand : AsmOperandClass { 97 let Name = "VecListThreeD"; 98 let ParserMethod = "parseVectorList"; 99 let RenderMethod = "addVecListOperands"; 100} 101def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 102 let ParserMatchClass = VecListThreeDAsmOperand; 103} 104// Register list of four sequential D registers. 105def VecListFourDAsmOperand : AsmOperandClass { 106 let Name = "VecListFourD"; 107 let ParserMethod = "parseVectorList"; 108 let RenderMethod = "addVecListOperands"; 109} 110def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 111 let ParserMatchClass = VecListFourDAsmOperand; 112} 113// Register list of two D registers spaced by 2 (two sequential Q registers). 114def VecListTwoQAsmOperand : AsmOperandClass { 115 let Name = "VecListTwoQ"; 116 let ParserMethod = "parseVectorList"; 117 let RenderMethod = "addVecListOperands"; 118} 119def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> { 120 let ParserMatchClass = VecListTwoQAsmOperand; 121} 122 123// Register list of one D register, with "all lanes" subscripting. 124def VecListOneDAllLanesAsmOperand : AsmOperandClass { 125 let Name = "VecListOneDAllLanes"; 126 let ParserMethod = "parseVectorList"; 127 let RenderMethod = "addVecListOperands"; 128} 129def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 130 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 131} 132// Register list of two D registers, with "all lanes" subscripting. 133def VecListTwoDAllLanesAsmOperand : AsmOperandClass { 134 let Name = "VecListTwoDAllLanes"; 135 let ParserMethod = "parseVectorList"; 136 let RenderMethod = "addVecListOperands"; 137} 138def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> { 139 let ParserMatchClass = VecListTwoDAllLanesAsmOperand; 140} 141 142// Register list of one D register, with byte lane subscripting. 143def VecListOneDByteIndexAsmOperand : AsmOperandClass { 144 let Name = "VecListOneDByteIndexed"; 145 let ParserMethod = "parseVectorList"; 146 let RenderMethod = "addVecListIndexedOperands"; 147} 148def VecListOneDByteIndexed : Operand<i32> { 149 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 150 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 151} 152 153//===----------------------------------------------------------------------===// 154// NEON-specific DAG Nodes. 155//===----------------------------------------------------------------------===// 156 157def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 158def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 159 160def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 161def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 162def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 163def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 164def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 165def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 166def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 167def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 168def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 169def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 170def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 171 172// Types for vector shift by immediates. The "SHX" version is for long and 173// narrow operations where the source and destination vectors have different 174// types. The "SHINS" version is for shift and insert operations. 175def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 176 SDTCisVT<2, i32>]>; 177def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 178 SDTCisVT<2, i32>]>; 179def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 180 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 181 182def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 183def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 184def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 185def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 186def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 187def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 188def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 189 190def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 191def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 192def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 193 194def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 195def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 196def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 197def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 198def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 199def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 200 201def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 202def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 203def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 204 205def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 206def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 207 208def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 209 SDTCisVT<2, i32>]>; 210def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 211def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 212 213def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 214def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 215def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 216def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 217 218def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 219 SDTCisVT<2, i32>]>; 220def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 221def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 222 223def NEONvbsl : SDNode<"ARMISD::VBSL", 224 SDTypeProfile<1, 3, [SDTCisVec<0>, 225 SDTCisSameAs<0, 1>, 226 SDTCisSameAs<0, 2>, 227 SDTCisSameAs<0, 3>]>>; 228 229def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 230 231// VDUPLANE can produce a quad-register result from a double-register source, 232// so the result is not constrained to match the source. 233def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 234 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 235 SDTCisVT<2, i32>]>>; 236 237def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 238 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 239def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 240 241def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 242def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 243def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 244def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 245 246def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 247 SDTCisSameAs<0, 2>, 248 SDTCisSameAs<0, 3>]>; 249def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 250def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 251def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 252 253def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 254 SDTCisSameAs<1, 2>]>; 255def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 256def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 257 258def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 259 SDTCisSameAs<0, 2>]>; 260def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 261def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 262 263def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 264 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 265 unsigned EltBits = 0; 266 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 267 return (EltBits == 32 && EltVal == 0); 268}]>; 269 270def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 271 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 272 unsigned EltBits = 0; 273 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 274 return (EltBits == 8 && EltVal == 0xff); 275}]>; 276 277//===----------------------------------------------------------------------===// 278// NEON load / store instructions 279//===----------------------------------------------------------------------===// 280 281// Use VLDM to load a Q register as a D register pair. 282// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 283def VLDMQIA 284 : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), 285 IIC_fpLoad_m, "", 286 [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; 287 288// Use VSTM to store a Q register as a D register pair. 289// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 290def VSTMQIA 291 : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), 292 IIC_fpStore_m, "", 293 [(store (v2f64 QPR:$src), GPR:$Rn)]>; 294 295// Classes for VLD* pseudo-instructions with multi-register operands. 296// These are expanded to real instructions after register allocation. 297class VLDQPseudo<InstrItinClass itin> 298 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 299class VLDQWBPseudo<InstrItinClass itin> 300 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 301 (ins addrmode6:$addr, am6offset:$offset), itin, 302 "$addr.addr = $wb">; 303class VLDQWBfixedPseudo<InstrItinClass itin> 304 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 305 (ins addrmode6:$addr), itin, 306 "$addr.addr = $wb">; 307class VLDQWBregisterPseudo<InstrItinClass itin> 308 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 309 (ins addrmode6:$addr, rGPR:$offset), itin, 310 "$addr.addr = $wb">; 311 312class VLDQQPseudo<InstrItinClass itin> 313 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 314class VLDQQWBPseudo<InstrItinClass itin> 315 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 316 (ins addrmode6:$addr, am6offset:$offset), itin, 317 "$addr.addr = $wb">; 318class VLDQQWBfixedPseudo<InstrItinClass itin> 319 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 320 (ins addrmode6:$addr), itin, 321 "$addr.addr = $wb">; 322class VLDQQWBregisterPseudo<InstrItinClass itin> 323 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 324 (ins addrmode6:$addr, rGPR:$offset), itin, 325 "$addr.addr = $wb">; 326 327 328class VLDQQQQPseudo<InstrItinClass itin> 329 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 330 "$src = $dst">; 331class VLDQQQQWBPseudo<InstrItinClass itin> 332 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 333 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 334 "$addr.addr = $wb, $src = $dst">; 335 336let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 337 338// VLD1 : Vector Load (multiple single elements) 339class VLD1D<bits<4> op7_4, string Dt> 340 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 341 (ins addrmode6:$Rn), IIC_VLD1, 342 "vld1", Dt, "$Vd, $Rn", "", []> { 343 let Rm = 0b1111; 344 let Inst{4} = Rn{4}; 345 let DecoderMethod = "DecodeVLDInstruction"; 346} 347class VLD1Q<bits<4> op7_4, string Dt> 348 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd), 349 (ins addrmode6:$Rn), IIC_VLD1x2, 350 "vld1", Dt, "$Vd, $Rn", "", []> { 351 let Rm = 0b1111; 352 let Inst{5-4} = Rn{5-4}; 353 let DecoderMethod = "DecodeVLDInstruction"; 354} 355 356def VLD1d8 : VLD1D<{0,0,0,?}, "8">; 357def VLD1d16 : VLD1D<{0,1,0,?}, "16">; 358def VLD1d32 : VLD1D<{1,0,0,?}, "32">; 359def VLD1d64 : VLD1D<{1,1,0,?}, "64">; 360 361def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; 362def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; 363def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; 364def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; 365 366def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>; 367def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>; 368def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>; 369def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>; 370 371// ...with address register writeback: 372multiclass VLD1DWB<bits<4> op7_4, string Dt> { 373 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 374 (ins addrmode6:$Rn), IIC_VLD1u, 375 "vld1", Dt, "$Vd, $Rn!", 376 "$Rn.addr = $wb", []> { 377 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 378 let Inst{4} = Rn{4}; 379 let DecoderMethod = "DecodeVLDInstruction"; 380 let AsmMatchConverter = "cvtVLDwbFixed"; 381 } 382 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 383 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, 384 "vld1", Dt, "$Vd, $Rn, $Rm", 385 "$Rn.addr = $wb", []> { 386 let Inst{4} = Rn{4}; 387 let DecoderMethod = "DecodeVLDInstruction"; 388 let AsmMatchConverter = "cvtVLDwbRegister"; 389 } 390} 391multiclass VLD1QWB<bits<4> op7_4, string Dt> { 392 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), 393 (ins addrmode6:$Rn), IIC_VLD1x2u, 394 "vld1", Dt, "$Vd, $Rn!", 395 "$Rn.addr = $wb", []> { 396 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 397 let Inst{5-4} = Rn{5-4}; 398 let DecoderMethod = "DecodeVLDInstruction"; 399 let AsmMatchConverter = "cvtVLDwbFixed"; 400 } 401 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), 402 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 403 "vld1", Dt, "$Vd, $Rn, $Rm", 404 "$Rn.addr = $wb", []> { 405 let Inst{5-4} = Rn{5-4}; 406 let DecoderMethod = "DecodeVLDInstruction"; 407 let AsmMatchConverter = "cvtVLDwbRegister"; 408 } 409} 410 411defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; 412defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; 413defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; 414defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; 415defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; 416defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; 417defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; 418defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; 419 420def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; 421def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; 422def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; 423def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; 424def VLD1q8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; 425def VLD1q16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; 426def VLD1q32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; 427def VLD1q64PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; 428 429// ...with 3 registers 430class VLD1D3<bits<4> op7_4, string Dt> 431 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 432 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, 433 "$Vd, $Rn", "", []> { 434 let Rm = 0b1111; 435 let Inst{4} = Rn{4}; 436 let DecoderMethod = "DecodeVLDInstruction"; 437} 438multiclass VLD1D3WB<bits<4> op7_4, string Dt> { 439 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 440 (ins addrmode6:$Rn), IIC_VLD1x2u, 441 "vld1", Dt, "$Vd, $Rn!", 442 "$Rn.addr = $wb", []> { 443 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 444 let Inst{4} = Rn{4}; 445 let DecoderMethod = "DecodeVLDInstruction"; 446 let AsmMatchConverter = "cvtVLDwbFixed"; 447 } 448 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 449 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 450 "vld1", Dt, "$Vd, $Rn, $Rm", 451 "$Rn.addr = $wb", []> { 452 let Inst{4} = Rn{4}; 453 let DecoderMethod = "DecodeVLDInstruction"; 454 let AsmMatchConverter = "cvtVLDwbRegister"; 455 } 456} 457 458def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; 459def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; 460def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; 461def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; 462 463defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; 464defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; 465defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; 466defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; 467 468def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 469 470// ...with 4 registers 471class VLD1D4<bits<4> op7_4, string Dt> 472 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 473 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, 474 "$Vd, $Rn", "", []> { 475 let Rm = 0b1111; 476 let Inst{5-4} = Rn{5-4}; 477 let DecoderMethod = "DecodeVLDInstruction"; 478} 479multiclass VLD1D4WB<bits<4> op7_4, string Dt> { 480 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 481 (ins addrmode6:$Rn), IIC_VLD1x2u, 482 "vld1", Dt, "$Vd, $Rn!", 483 "$Rn.addr = $wb", []> { 484 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 485 let Inst{5-4} = Rn{5-4}; 486 let DecoderMethod = "DecodeVLDInstruction"; 487 let AsmMatchConverter = "cvtVLDwbFixed"; 488 } 489 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 490 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 491 "vld1", Dt, "$Vd, $Rn, $Rm", 492 "$Rn.addr = $wb", []> { 493 let Inst{5-4} = Rn{5-4}; 494 let DecoderMethod = "DecodeVLDInstruction"; 495 let AsmMatchConverter = "cvtVLDwbRegister"; 496 } 497} 498 499def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; 500def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; 501def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; 502def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; 503 504defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; 505defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; 506defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; 507defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; 508 509def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 510 511// VLD2 : Vector Load (multiple 2-element structures) 512class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 513 InstrItinClass itin> 514 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 515 (ins addrmode6:$Rn), itin, 516 "vld2", Dt, "$Vd, $Rn", "", []> { 517 let Rm = 0b1111; 518 let Inst{5-4} = Rn{5-4}; 519 let DecoderMethod = "DecodeVLDInstruction"; 520} 521 522def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; 523def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; 524def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; 525 526def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; 527def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; 528def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; 529 530def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; 531def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; 532def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>; 533 534def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 535def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 536def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 537 538// ...with address register writeback: 539multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 540 RegisterOperand VdTy, InstrItinClass itin> { 541 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 542 (ins addrmode6:$Rn), itin, 543 "vld2", Dt, "$Vd, $Rn!", 544 "$Rn.addr = $wb", []> { 545 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 546 let Inst{5-4} = Rn{5-4}; 547 let DecoderMethod = "DecodeVLDInstruction"; 548 let AsmMatchConverter = "cvtVLDwbFixed"; 549 } 550 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 551 (ins addrmode6:$Rn, rGPR:$Rm), itin, 552 "vld2", Dt, "$Vd, $Rn, $Rm", 553 "$Rn.addr = $wb", []> { 554 let Inst{5-4} = Rn{5-4}; 555 let DecoderMethod = "DecodeVLDInstruction"; 556 let AsmMatchConverter = "cvtVLDwbRegister"; 557 } 558} 559 560defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; 561defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; 562defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; 563 564defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; 565defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; 566defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; 567 568def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; 569def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; 570def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; 571def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; 572def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; 573def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; 574 575def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 576def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 577def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 578def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 579def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 580def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 581 582// ...with double-spaced registers 583def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>; 584def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>; 585def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>; 586defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>; 587defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>; 588defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>; 589 590// VLD3 : Vector Load (multiple 3-element structures) 591class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 592 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 593 (ins addrmode6:$Rn), IIC_VLD3, 594 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 595 let Rm = 0b1111; 596 let Inst{4} = Rn{4}; 597 let DecoderMethod = "DecodeVLDInstruction"; 598} 599 600def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 601def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 602def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 603 604def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 605def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 606def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 607 608// ...with address register writeback: 609class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 610 : NLdSt<0, 0b10, op11_8, op7_4, 611 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 612 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 613 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 614 "$Rn.addr = $wb", []> { 615 let Inst{4} = Rn{4}; 616 let DecoderMethod = "DecodeVLDInstruction"; 617} 618 619def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 620def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 621def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 622 623def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 624def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 625def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 626 627// ...with double-spaced registers: 628def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 629def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 630def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 631def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 632def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 633def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 634 635def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 636def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 637def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 638 639// ...alternate versions to be allocated odd register numbers: 640def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 641def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 642def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 643 644def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 645def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 646def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 647 648// VLD4 : Vector Load (multiple 4-element structures) 649class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 650 : NLdSt<0, 0b10, op11_8, op7_4, 651 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 652 (ins addrmode6:$Rn), IIC_VLD4, 653 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 654 let Rm = 0b1111; 655 let Inst{5-4} = Rn{5-4}; 656 let DecoderMethod = "DecodeVLDInstruction"; 657} 658 659def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 660def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 661def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 662 663def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 664def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 665def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 666 667// ...with address register writeback: 668class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 669 : NLdSt<0, 0b10, op11_8, op7_4, 670 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 671 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 672 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 673 "$Rn.addr = $wb", []> { 674 let Inst{5-4} = Rn{5-4}; 675 let DecoderMethod = "DecodeVLDInstruction"; 676} 677 678def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 679def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 680def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 681 682def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 683def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 684def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 685 686// ...with double-spaced registers: 687def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 688def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 689def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 690def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 691def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 692def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 693 694def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 695def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 696def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 697 698// ...alternate versions to be allocated odd register numbers: 699def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 700def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 701def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 702 703def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 704def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 705def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 706 707} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 708 709// Classes for VLD*LN pseudo-instructions with multi-register operands. 710// These are expanded to real instructions after register allocation. 711class VLDQLNPseudo<InstrItinClass itin> 712 : PseudoNLdSt<(outs QPR:$dst), 713 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 714 itin, "$src = $dst">; 715class VLDQLNWBPseudo<InstrItinClass itin> 716 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 717 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 718 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 719class VLDQQLNPseudo<InstrItinClass itin> 720 : PseudoNLdSt<(outs QQPR:$dst), 721 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 722 itin, "$src = $dst">; 723class VLDQQLNWBPseudo<InstrItinClass itin> 724 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 725 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 726 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 727class VLDQQQQLNPseudo<InstrItinClass itin> 728 : PseudoNLdSt<(outs QQQQPR:$dst), 729 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 730 itin, "$src = $dst">; 731class VLDQQQQLNWBPseudo<InstrItinClass itin> 732 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 733 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 734 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 735 736// VLD1LN : Vector Load (single element to one lane) 737class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 738 PatFrag LoadOp> 739 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 740 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 741 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 742 "$src = $Vd", 743 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 744 (i32 (LoadOp addrmode6:$Rn)), 745 imm:$lane))]> { 746 let Rm = 0b1111; 747 let DecoderMethod = "DecodeVLD1LN"; 748} 749class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 750 PatFrag LoadOp> 751 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 752 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 753 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 754 "$src = $Vd", 755 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 756 (i32 (LoadOp addrmode6oneL32:$Rn)), 757 imm:$lane))]> { 758 let Rm = 0b1111; 759 let DecoderMethod = "DecodeVLD1LN"; 760} 761class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 762 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 763 (i32 (LoadOp addrmode6:$addr)), 764 imm:$lane))]; 765} 766 767def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 768 let Inst{7-5} = lane{2-0}; 769} 770def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 771 let Inst{7-6} = lane{1-0}; 772 let Inst{4} = Rn{4}; 773} 774def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 775 let Inst{7} = lane{0}; 776 let Inst{5} = Rn{4}; 777 let Inst{4} = Rn{4}; 778} 779 780def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 781def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 782def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 783 784def : Pat<(vector_insert (v2f32 DPR:$src), 785 (f32 (load addrmode6:$addr)), imm:$lane), 786 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 787def : Pat<(vector_insert (v4f32 QPR:$src), 788 (f32 (load addrmode6:$addr)), imm:$lane), 789 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 790 791let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 792 793// ...with address register writeback: 794class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 795 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 796 (ins addrmode6:$Rn, am6offset:$Rm, 797 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 798 "\\{$Vd[$lane]\\}, $Rn$Rm", 799 "$src = $Vd, $Rn.addr = $wb", []> { 800 let DecoderMethod = "DecodeVLD1LN"; 801} 802 803def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 804 let Inst{7-5} = lane{2-0}; 805} 806def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 807 let Inst{7-6} = lane{1-0}; 808 let Inst{4} = Rn{4}; 809} 810def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 811 let Inst{7} = lane{0}; 812 let Inst{5} = Rn{4}; 813 let Inst{4} = Rn{4}; 814} 815 816def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 817def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 818def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 819 820// VLD2LN : Vector Load (single 2-element structure to one lane) 821class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 822 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 823 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 824 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 825 "$src1 = $Vd, $src2 = $dst2", []> { 826 let Rm = 0b1111; 827 let Inst{4} = Rn{4}; 828 let DecoderMethod = "DecodeVLD2LN"; 829} 830 831def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 832 let Inst{7-5} = lane{2-0}; 833} 834def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 835 let Inst{7-6} = lane{1-0}; 836} 837def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 838 let Inst{7} = lane{0}; 839} 840 841def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 842def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 843def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 844 845// ...with double-spaced registers: 846def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 847 let Inst{7-6} = lane{1-0}; 848} 849def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 850 let Inst{7} = lane{0}; 851} 852 853def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 854def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 855 856// ...with address register writeback: 857class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 858 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 859 (ins addrmode6:$Rn, am6offset:$Rm, 860 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 861 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 862 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 863 let Inst{4} = Rn{4}; 864 let DecoderMethod = "DecodeVLD2LN"; 865} 866 867def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 868 let Inst{7-5} = lane{2-0}; 869} 870def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 871 let Inst{7-6} = lane{1-0}; 872} 873def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 874 let Inst{7} = lane{0}; 875} 876 877def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 878def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 879def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 880 881def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 882 let Inst{7-6} = lane{1-0}; 883} 884def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 885 let Inst{7} = lane{0}; 886} 887 888def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 889def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 890 891// VLD3LN : Vector Load (single 3-element structure to one lane) 892class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 893 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 894 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 895 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 896 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 897 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 898 let Rm = 0b1111; 899 let DecoderMethod = "DecodeVLD3LN"; 900} 901 902def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 903 let Inst{7-5} = lane{2-0}; 904} 905def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 906 let Inst{7-6} = lane{1-0}; 907} 908def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 909 let Inst{7} = lane{0}; 910} 911 912def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 913def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 914def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 915 916// ...with double-spaced registers: 917def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 918 let Inst{7-6} = lane{1-0}; 919} 920def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 921 let Inst{7} = lane{0}; 922} 923 924def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 925def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 926 927// ...with address register writeback: 928class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 929 : NLdStLn<1, 0b10, op11_8, op7_4, 930 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 931 (ins addrmode6:$Rn, am6offset:$Rm, 932 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 933 IIC_VLD3lnu, "vld3", Dt, 934 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 935 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 936 []> { 937 let DecoderMethod = "DecodeVLD3LN"; 938} 939 940def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 941 let Inst{7-5} = lane{2-0}; 942} 943def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 944 let Inst{7-6} = lane{1-0}; 945} 946def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 947 let Inst{7} = lane{0}; 948} 949 950def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 951def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 952def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 953 954def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 955 let Inst{7-6} = lane{1-0}; 956} 957def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 958 let Inst{7} = lane{0}; 959} 960 961def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 962def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 963 964// VLD4LN : Vector Load (single 4-element structure to one lane) 965class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 966 : NLdStLn<1, 0b10, op11_8, op7_4, 967 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 968 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 969 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 970 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 971 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 972 let Rm = 0b1111; 973 let Inst{4} = Rn{4}; 974 let DecoderMethod = "DecodeVLD4LN"; 975} 976 977def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 978 let Inst{7-5} = lane{2-0}; 979} 980def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 981 let Inst{7-6} = lane{1-0}; 982} 983def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 984 let Inst{7} = lane{0}; 985 let Inst{5} = Rn{5}; 986} 987 988def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 989def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 990def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 991 992// ...with double-spaced registers: 993def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 994 let Inst{7-6} = lane{1-0}; 995} 996def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 997 let Inst{7} = lane{0}; 998 let Inst{5} = Rn{5}; 999} 1000 1001def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1002def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1003 1004// ...with address register writeback: 1005class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1006 : NLdStLn<1, 0b10, op11_8, op7_4, 1007 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1008 (ins addrmode6:$Rn, am6offset:$Rm, 1009 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1010 IIC_VLD4lnu, "vld4", Dt, 1011"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1012"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1013 []> { 1014 let Inst{4} = Rn{4}; 1015 let DecoderMethod = "DecodeVLD4LN" ; 1016} 1017 1018def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1019 let Inst{7-5} = lane{2-0}; 1020} 1021def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1022 let Inst{7-6} = lane{1-0}; 1023} 1024def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1025 let Inst{7} = lane{0}; 1026 let Inst{5} = Rn{5}; 1027} 1028 1029def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1030def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1031def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1032 1033def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1034 let Inst{7-6} = lane{1-0}; 1035} 1036def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1037 let Inst{7} = lane{0}; 1038 let Inst{5} = Rn{5}; 1039} 1040 1041def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1042def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1043 1044} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1045 1046// VLD1DUP : Vector Load (single element to all lanes) 1047class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1048 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1049 (ins addrmode6dup:$Rn), 1050 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1051 [(set VecListOneDAllLanes:$Vd, 1052 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1053 let Rm = 0b1111; 1054 let Inst{4} = Rn{4}; 1055 let DecoderMethod = "DecodeVLD1DupInstruction"; 1056} 1057class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> { 1058 let Pattern = [(set QPR:$dst, 1059 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; 1060} 1061 1062def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; 1063def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; 1064def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; 1065 1066def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>; 1067def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>; 1068def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>; 1069 1070def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1071 (VLD1DUPd32 addrmode6:$addr)>; 1072def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1073 (VLD1DUPq32Pseudo addrmode6:$addr)>; 1074 1075let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1076 1077class VLD1QDUP<bits<4> op7_4, string Dt> 1078 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd), 1079 (ins addrmode6dup:$Rn), IIC_VLD1dup, 1080 "vld1", Dt, "$Vd, $Rn", "", []> { 1081 let Rm = 0b1111; 1082 let Inst{4} = Rn{4}; 1083 let DecoderMethod = "DecodeVLD1DupInstruction"; 1084} 1085 1086def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; 1087def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; 1088def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; 1089 1090// ...with address register writeback: 1091multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { 1092 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1093 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1094 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1095 "vld1", Dt, "$Vd, $Rn!", 1096 "$Rn.addr = $wb", []> { 1097 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1098 let Inst{4} = Rn{4}; 1099 let DecoderMethod = "DecodeVLD1DupInstruction"; 1100 let AsmMatchConverter = "cvtVLDwbFixed"; 1101 } 1102 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1103 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1104 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1105 "vld1", Dt, "$Vd, $Rn, $Rm", 1106 "$Rn.addr = $wb", []> { 1107 let Inst{4} = Rn{4}; 1108 let DecoderMethod = "DecodeVLD1DupInstruction"; 1109 let AsmMatchConverter = "cvtVLDwbRegister"; 1110 } 1111} 1112multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { 1113 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1114 (outs VecListTwoDAllLanes:$Vd, GPR:$wb), 1115 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1116 "vld1", Dt, "$Vd, $Rn!", 1117 "$Rn.addr = $wb", []> { 1118 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1119 let Inst{4} = Rn{4}; 1120 let DecoderMethod = "DecodeVLD1DupInstruction"; 1121 let AsmMatchConverter = "cvtVLDwbFixed"; 1122 } 1123 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1124 (outs VecListTwoDAllLanes:$Vd, GPR:$wb), 1125 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1126 "vld1", Dt, "$Vd, $Rn, $Rm", 1127 "$Rn.addr = $wb", []> { 1128 let Inst{4} = Rn{4}; 1129 let DecoderMethod = "DecodeVLD1DupInstruction"; 1130 let AsmMatchConverter = "cvtVLDwbRegister"; 1131 } 1132} 1133 1134defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; 1135defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; 1136defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; 1137 1138defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; 1139defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; 1140defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; 1141 1142def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; 1143def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; 1144def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; 1145def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; 1146def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; 1147def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; 1148 1149// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1150class VLD2DUP<bits<4> op7_4, string Dt> 1151 : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2), 1152 (ins addrmode6dup:$Rn), IIC_VLD2dup, 1153 "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { 1154 let Rm = 0b1111; 1155 let Inst{4} = Rn{4}; 1156 let DecoderMethod = "DecodeVLD2DupInstruction"; 1157} 1158 1159def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; 1160def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">; 1161def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">; 1162 1163def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>; 1164def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>; 1165def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>; 1166 1167// ...with double-spaced registers (not used for codegen): 1168def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">; 1169def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">; 1170def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">; 1171 1172// ...with address register writeback: 1173class VLD2DUPWB<bits<4> op7_4, string Dt> 1174 : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1175 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, 1176 "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { 1177 let Inst{4} = Rn{4}; 1178 let DecoderMethod = "DecodeVLD2DupInstruction"; 1179} 1180 1181def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; 1182def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">; 1183def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">; 1184 1185def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">; 1186def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">; 1187def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">; 1188 1189def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; 1190def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; 1191def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; 1192 1193// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1194class VLD3DUP<bits<4> op7_4, string Dt> 1195 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1196 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1197 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1198 let Rm = 0b1111; 1199 let Inst{4} = 0; 1200 let DecoderMethod = "DecodeVLD3DupInstruction"; 1201} 1202 1203def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1204def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1205def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1206 1207def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1208def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1209def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1210 1211// ...with double-spaced registers (not used for codegen): 1212def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; 1213def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; 1214def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; 1215 1216// ...with address register writeback: 1217class VLD3DUPWB<bits<4> op7_4, string Dt> 1218 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1219 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1220 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1221 "$Rn.addr = $wb", []> { 1222 let Inst{4} = 0; 1223 let DecoderMethod = "DecodeVLD3DupInstruction"; 1224} 1225 1226def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; 1227def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; 1228def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; 1229 1230def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; 1231def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; 1232def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; 1233 1234def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1235def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1236def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1237 1238// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1239class VLD4DUP<bits<4> op7_4, string Dt> 1240 : NLdSt<1, 0b10, 0b1111, op7_4, 1241 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1242 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1243 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1244 let Rm = 0b1111; 1245 let Inst{4} = Rn{4}; 1246 let DecoderMethod = "DecodeVLD4DupInstruction"; 1247} 1248 1249def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1250def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1251def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1252 1253def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1254def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1255def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1256 1257// ...with double-spaced registers (not used for codegen): 1258def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; 1259def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; 1260def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1261 1262// ...with address register writeback: 1263class VLD4DUPWB<bits<4> op7_4, string Dt> 1264 : NLdSt<1, 0b10, 0b1111, op7_4, 1265 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1266 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1267 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1268 "$Rn.addr = $wb", []> { 1269 let Inst{4} = Rn{4}; 1270 let DecoderMethod = "DecodeVLD4DupInstruction"; 1271} 1272 1273def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1274def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1275def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1276 1277def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1278def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1279def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1280 1281def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1282def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1283def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1284 1285} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1286 1287let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1288 1289// Classes for VST* pseudo-instructions with multi-register operands. 1290// These are expanded to real instructions after register allocation. 1291class VSTQPseudo<InstrItinClass itin> 1292 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1293class VSTQWBPseudo<InstrItinClass itin> 1294 : PseudoNLdSt<(outs GPR:$wb), 1295 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1296 "$addr.addr = $wb">; 1297class VSTQWBfixedPseudo<InstrItinClass itin> 1298 : PseudoNLdSt<(outs GPR:$wb), 1299 (ins addrmode6:$addr, QPR:$src), itin, 1300 "$addr.addr = $wb">; 1301class VSTQWBregisterPseudo<InstrItinClass itin> 1302 : PseudoNLdSt<(outs GPR:$wb), 1303 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1304 "$addr.addr = $wb">; 1305class VSTQQPseudo<InstrItinClass itin> 1306 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1307class VSTQQWBPseudo<InstrItinClass itin> 1308 : PseudoNLdSt<(outs GPR:$wb), 1309 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1310 "$addr.addr = $wb">; 1311class VSTQQQQPseudo<InstrItinClass itin> 1312 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1313class VSTQQQQWBPseudo<InstrItinClass itin> 1314 : PseudoNLdSt<(outs GPR:$wb), 1315 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1316 "$addr.addr = $wb">; 1317 1318// VST1 : Vector Store (multiple single elements) 1319class VST1D<bits<4> op7_4, string Dt> 1320 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), 1321 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1322 let Rm = 0b1111; 1323 let Inst{4} = Rn{4}; 1324 let DecoderMethod = "DecodeVSTInstruction"; 1325} 1326class VST1Q<bits<4> op7_4, string Dt> 1327 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), 1328 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1329 let Rm = 0b1111; 1330 let Inst{5-4} = Rn{5-4}; 1331 let DecoderMethod = "DecodeVSTInstruction"; 1332} 1333 1334def VST1d8 : VST1D<{0,0,0,?}, "8">; 1335def VST1d16 : VST1D<{0,1,0,?}, "16">; 1336def VST1d32 : VST1D<{1,0,0,?}, "32">; 1337def VST1d64 : VST1D<{1,1,0,?}, "64">; 1338 1339def VST1q8 : VST1Q<{0,0,?,?}, "8">; 1340def VST1q16 : VST1Q<{0,1,?,?}, "16">; 1341def VST1q32 : VST1Q<{1,0,?,?}, "32">; 1342def VST1q64 : VST1Q<{1,1,?,?}, "64">; 1343 1344def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>; 1345def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>; 1346def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>; 1347def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>; 1348 1349// ...with address register writeback: 1350multiclass VST1DWB<bits<4> op7_4, string Dt> { 1351 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1352 (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1353 "vst1", Dt, "$Vd, $Rn!", 1354 "$Rn.addr = $wb", []> { 1355 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1356 let Inst{4} = Rn{4}; 1357 let DecoderMethod = "DecodeVSTInstruction"; 1358 let AsmMatchConverter = "cvtVSTwbFixed"; 1359 } 1360 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1361 (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1362 IIC_VLD1u, 1363 "vst1", Dt, "$Vd, $Rn, $Rm", 1364 "$Rn.addr = $wb", []> { 1365 let Inst{4} = Rn{4}; 1366 let DecoderMethod = "DecodeVSTInstruction"; 1367 let AsmMatchConverter = "cvtVSTwbRegister"; 1368 } 1369} 1370multiclass VST1QWB<bits<4> op7_4, string Dt> { 1371 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1372 (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u, 1373 "vst1", Dt, "$Vd, $Rn!", 1374 "$Rn.addr = $wb", []> { 1375 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1376 let Inst{5-4} = Rn{5-4}; 1377 let DecoderMethod = "DecodeVSTInstruction"; 1378 let AsmMatchConverter = "cvtVSTwbFixed"; 1379 } 1380 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1381 (ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd), 1382 IIC_VLD1x2u, 1383 "vst1", Dt, "$Vd, $Rn, $Rm", 1384 "$Rn.addr = $wb", []> { 1385 let Inst{5-4} = Rn{5-4}; 1386 let DecoderMethod = "DecodeVSTInstruction"; 1387 let AsmMatchConverter = "cvtVSTwbRegister"; 1388 } 1389} 1390 1391defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; 1392defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; 1393defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; 1394defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; 1395 1396defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; 1397defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; 1398defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; 1399defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; 1400 1401def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; 1402def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; 1403def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; 1404def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; 1405def VST1q8PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; 1406def VST1q16PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; 1407def VST1q32PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; 1408def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; 1409 1410// ...with 3 registers 1411class VST1D3<bits<4> op7_4, string Dt> 1412 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1413 (ins addrmode6:$Rn, VecListThreeD:$Vd), 1414 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1415 let Rm = 0b1111; 1416 let Inst{4} = Rn{4}; 1417 let DecoderMethod = "DecodeVSTInstruction"; 1418} 1419multiclass VST1D3WB<bits<4> op7_4, string Dt> { 1420 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1421 (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1422 "vst1", Dt, "$Vd, $Rn!", 1423 "$Rn.addr = $wb", []> { 1424 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1425 let Inst{5-4} = Rn{5-4}; 1426 let DecoderMethod = "DecodeVSTInstruction"; 1427 let AsmMatchConverter = "cvtVSTwbFixed"; 1428 } 1429 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1430 (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1431 IIC_VLD1x3u, 1432 "vst1", Dt, "$Vd, $Rn, $Rm", 1433 "$Rn.addr = $wb", []> { 1434 let Inst{5-4} = Rn{5-4}; 1435 let DecoderMethod = "DecodeVSTInstruction"; 1436 let AsmMatchConverter = "cvtVSTwbRegister"; 1437 } 1438} 1439 1440def VST1d8T : VST1D3<{0,0,0,?}, "8">; 1441def VST1d16T : VST1D3<{0,1,0,?}, "16">; 1442def VST1d32T : VST1D3<{1,0,0,?}, "32">; 1443def VST1d64T : VST1D3<{1,1,0,?}, "64">; 1444 1445defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; 1446defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; 1447defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; 1448defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; 1449 1450def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1451def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; 1452def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1453 1454// ...with 4 registers 1455class VST1D4<bits<4> op7_4, string Dt> 1456 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1457 (ins addrmode6:$Rn, VecListFourD:$Vd), 1458 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1459 []> { 1460 let Rm = 0b1111; 1461 let Inst{5-4} = Rn{5-4}; 1462 let DecoderMethod = "DecodeVSTInstruction"; 1463} 1464multiclass VST1D4WB<bits<4> op7_4, string Dt> { 1465 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1466 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1467 "vst1", Dt, "$Vd, $Rn!", 1468 "$Rn.addr = $wb", []> { 1469 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1470 let Inst{5-4} = Rn{5-4}; 1471 let DecoderMethod = "DecodeVSTInstruction"; 1472 let AsmMatchConverter = "cvtVSTwbFixed"; 1473 } 1474 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1475 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1476 IIC_VLD1x4u, 1477 "vst1", Dt, "$Vd, $Rn, $Rm", 1478 "$Rn.addr = $wb", []> { 1479 let Inst{5-4} = Rn{5-4}; 1480 let DecoderMethod = "DecodeVSTInstruction"; 1481 let AsmMatchConverter = "cvtVSTwbRegister"; 1482 } 1483} 1484 1485def VST1d8Q : VST1D4<{0,0,?,?}, "8">; 1486def VST1d16Q : VST1D4<{0,1,?,?}, "16">; 1487def VST1d32Q : VST1D4<{1,0,?,?}, "32">; 1488def VST1d64Q : VST1D4<{1,1,?,?}, "64">; 1489 1490defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; 1491defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; 1492defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; 1493defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; 1494 1495def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1496def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; 1497def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1498 1499// VST2 : Vector Store (multiple 2-element structures) 1500class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1501 InstrItinClass itin> 1502 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), 1503 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1504 let Rm = 0b1111; 1505 let Inst{5-4} = Rn{5-4}; 1506 let DecoderMethod = "DecodeVSTInstruction"; 1507} 1508 1509def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; 1510def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; 1511def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; 1512 1513def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; 1514def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; 1515def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; 1516 1517def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; 1518def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; 1519def VST2d32Pseudo : VSTQPseudo<IIC_VST2>; 1520 1521def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1522def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1523def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1524 1525// ...with address register writeback: 1526class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> 1527 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1528 (ins addrmode6:$Rn, am6offset:$Rm, VdTy:$Vd), 1529 IIC_VST2u, "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { 1530 let Inst{5-4} = Rn{5-4}; 1531 let DecoderMethod = "DecodeVSTInstruction"; 1532} 1533class VST2QWB<bits<4> op7_4, string Dt> 1534 : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1535 (ins addrmode6:$Rn, am6offset:$Rm, VecListFourD:$Vd), IIC_VST2x2u, 1536 "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { 1537 let Inst{5-4} = Rn{5-4}; 1538 let DecoderMethod = "DecodeVSTInstruction"; 1539} 1540 1541def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; 1542def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; 1543def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; 1544 1545def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">; 1546def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">; 1547def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">; 1548 1549def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 1550def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 1551def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 1552 1553def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 1554def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 1555def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 1556 1557// ...with double-spaced registers 1558def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; 1559def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; 1560def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; 1561def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; 1562def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; 1563def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; 1564 1565// VST3 : Vector Store (multiple 3-element structures) 1566class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1567 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1568 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1569 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1570 let Rm = 0b1111; 1571 let Inst{4} = Rn{4}; 1572 let DecoderMethod = "DecodeVSTInstruction"; 1573} 1574 1575def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1576def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1577def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1578 1579def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1580def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1581def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1582 1583// ...with address register writeback: 1584class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1585 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1586 (ins addrmode6:$Rn, am6offset:$Rm, 1587 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1588 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1589 "$Rn.addr = $wb", []> { 1590 let Inst{4} = Rn{4}; 1591 let DecoderMethod = "DecodeVSTInstruction"; 1592} 1593 1594def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1595def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1596def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1597 1598def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1599def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1600def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1601 1602// ...with double-spaced registers: 1603def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1604def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1605def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1606def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1607def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1608def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1609 1610def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1611def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1612def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1613 1614// ...alternate versions to be allocated odd register numbers: 1615def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1616def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1617def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1618 1619def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1620def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1621def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1622 1623// VST4 : Vector Store (multiple 4-element structures) 1624class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1625 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1626 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1627 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1628 "", []> { 1629 let Rm = 0b1111; 1630 let Inst{5-4} = Rn{5-4}; 1631 let DecoderMethod = "DecodeVSTInstruction"; 1632} 1633 1634def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1635def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1636def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1637 1638def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1639def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1640def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1641 1642// ...with address register writeback: 1643class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1644 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1645 (ins addrmode6:$Rn, am6offset:$Rm, 1646 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1647 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1648 "$Rn.addr = $wb", []> { 1649 let Inst{5-4} = Rn{5-4}; 1650 let DecoderMethod = "DecodeVSTInstruction"; 1651} 1652 1653def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1654def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1655def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1656 1657def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1658def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1659def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1660 1661// ...with double-spaced registers: 1662def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 1663def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 1664def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 1665def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 1666def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 1667def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 1668 1669def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1670def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1671def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1672 1673// ...alternate versions to be allocated odd register numbers: 1674def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1675def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1676def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1677 1678def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1679def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1680def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1681 1682} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1683 1684// Classes for VST*LN pseudo-instructions with multi-register operands. 1685// These are expanded to real instructions after register allocation. 1686class VSTQLNPseudo<InstrItinClass itin> 1687 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1688 itin, "">; 1689class VSTQLNWBPseudo<InstrItinClass itin> 1690 : PseudoNLdSt<(outs GPR:$wb), 1691 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1692 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1693class VSTQQLNPseudo<InstrItinClass itin> 1694 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1695 itin, "">; 1696class VSTQQLNWBPseudo<InstrItinClass itin> 1697 : PseudoNLdSt<(outs GPR:$wb), 1698 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1699 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1700class VSTQQQQLNPseudo<InstrItinClass itin> 1701 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1702 itin, "">; 1703class VSTQQQQLNWBPseudo<InstrItinClass itin> 1704 : PseudoNLdSt<(outs GPR:$wb), 1705 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1706 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1707 1708// VST1LN : Vector Store (single element from one lane) 1709class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1710 PatFrag StoreOp, SDNode ExtractOp> 1711 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1712 (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), 1713 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1714 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { 1715 let Rm = 0b1111; 1716 let DecoderMethod = "DecodeVST1LN"; 1717} 1718class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1719 PatFrag StoreOp, SDNode ExtractOp> 1720 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1721 (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane), 1722 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1723 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ 1724 let Rm = 0b1111; 1725 let DecoderMethod = "DecodeVST1LN"; 1726} 1727class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1728 : VSTQLNPseudo<IIC_VST1ln> { 1729 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1730 addrmode6:$addr)]; 1731} 1732 1733def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 1734 NEONvgetlaneu> { 1735 let Inst{7-5} = lane{2-0}; 1736} 1737def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 1738 NEONvgetlaneu> { 1739 let Inst{7-6} = lane{1-0}; 1740 let Inst{4} = Rn{5}; 1741} 1742 1743def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { 1744 let Inst{7} = lane{0}; 1745 let Inst{5-4} = Rn{5-4}; 1746} 1747 1748def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 1749def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 1750def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 1751 1752def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 1753 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1754def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 1755 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1756 1757// ...with address register writeback: 1758class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1759 PatFrag StoreOp, SDNode ExtractOp> 1760 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1761 (ins addrmode6:$Rn, am6offset:$Rm, 1762 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 1763 "\\{$Vd[$lane]\\}, $Rn$Rm", 1764 "$Rn.addr = $wb", 1765 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 1766 addrmode6:$Rn, am6offset:$Rm))]> { 1767 let DecoderMethod = "DecodeVST1LN"; 1768} 1769class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1770 : VSTQLNWBPseudo<IIC_VST1lnu> { 1771 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1772 addrmode6:$addr, am6offset:$offset))]; 1773} 1774 1775def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 1776 NEONvgetlaneu> { 1777 let Inst{7-5} = lane{2-0}; 1778} 1779def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 1780 NEONvgetlaneu> { 1781 let Inst{7-6} = lane{1-0}; 1782 let Inst{4} = Rn{5}; 1783} 1784def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 1785 extractelt> { 1786 let Inst{7} = lane{0}; 1787 let Inst{5-4} = Rn{5-4}; 1788} 1789 1790def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 1791def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 1792def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 1793 1794let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1795 1796// VST2LN : Vector Store (single 2-element structure from one lane) 1797class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1798 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1799 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 1800 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 1801 "", []> { 1802 let Rm = 0b1111; 1803 let Inst{4} = Rn{4}; 1804 let DecoderMethod = "DecodeVST2LN"; 1805} 1806 1807def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 1808 let Inst{7-5} = lane{2-0}; 1809} 1810def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 1811 let Inst{7-6} = lane{1-0}; 1812} 1813def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 1814 let Inst{7} = lane{0}; 1815} 1816 1817def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 1818def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 1819def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 1820 1821// ...with double-spaced registers: 1822def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 1823 let Inst{7-6} = lane{1-0}; 1824 let Inst{4} = Rn{4}; 1825} 1826def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 1827 let Inst{7} = lane{0}; 1828 let Inst{4} = Rn{4}; 1829} 1830 1831def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 1832def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 1833 1834// ...with address register writeback: 1835class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1836 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1837 (ins addrmode6:$addr, am6offset:$offset, 1838 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 1839 "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", 1840 "$addr.addr = $wb", []> { 1841 let Inst{4} = Rn{4}; 1842 let DecoderMethod = "DecodeVST2LN"; 1843} 1844 1845def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 1846 let Inst{7-5} = lane{2-0}; 1847} 1848def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 1849 let Inst{7-6} = lane{1-0}; 1850} 1851def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 1852 let Inst{7} = lane{0}; 1853} 1854 1855def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 1856def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 1857def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 1858 1859def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 1860 let Inst{7-6} = lane{1-0}; 1861} 1862def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 1863 let Inst{7} = lane{0}; 1864} 1865 1866def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 1867def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 1868 1869// VST3LN : Vector Store (single 3-element structure from one lane) 1870class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1871 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1872 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 1873 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 1874 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 1875 let Rm = 0b1111; 1876 let DecoderMethod = "DecodeVST3LN"; 1877} 1878 1879def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 1880 let Inst{7-5} = lane{2-0}; 1881} 1882def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 1883 let Inst{7-6} = lane{1-0}; 1884} 1885def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 1886 let Inst{7} = lane{0}; 1887} 1888 1889def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 1890def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 1891def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 1892 1893// ...with double-spaced registers: 1894def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 1895 let Inst{7-6} = lane{1-0}; 1896} 1897def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 1898 let Inst{7} = lane{0}; 1899} 1900 1901def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 1902def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 1903 1904// ...with address register writeback: 1905class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1906 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1907 (ins addrmode6:$Rn, am6offset:$Rm, 1908 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1909 IIC_VST3lnu, "vst3", Dt, 1910 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 1911 "$Rn.addr = $wb", []> { 1912 let DecoderMethod = "DecodeVST3LN"; 1913} 1914 1915def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 1916 let Inst{7-5} = lane{2-0}; 1917} 1918def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 1919 let Inst{7-6} = lane{1-0}; 1920} 1921def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 1922 let Inst{7} = lane{0}; 1923} 1924 1925def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 1926def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 1927def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 1928 1929def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 1930 let Inst{7-6} = lane{1-0}; 1931} 1932def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 1933 let Inst{7} = lane{0}; 1934} 1935 1936def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 1937def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 1938 1939// VST4LN : Vector Store (single 4-element structure from one lane) 1940class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1941 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1942 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 1943 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 1944 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 1945 "", []> { 1946 let Rm = 0b1111; 1947 let Inst{4} = Rn{4}; 1948 let DecoderMethod = "DecodeVST4LN"; 1949} 1950 1951def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 1952 let Inst{7-5} = lane{2-0}; 1953} 1954def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 1955 let Inst{7-6} = lane{1-0}; 1956} 1957def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 1958 let Inst{7} = lane{0}; 1959 let Inst{5} = Rn{5}; 1960} 1961 1962def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1963def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1964def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1965 1966// ...with double-spaced registers: 1967def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 1968 let Inst{7-6} = lane{1-0}; 1969} 1970def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 1971 let Inst{7} = lane{0}; 1972 let Inst{5} = Rn{5}; 1973} 1974 1975def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 1976def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 1977 1978// ...with address register writeback: 1979class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1980 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1981 (ins addrmode6:$Rn, am6offset:$Rm, 1982 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1983 IIC_VST4lnu, "vst4", Dt, 1984 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 1985 "$Rn.addr = $wb", []> { 1986 let Inst{4} = Rn{4}; 1987 let DecoderMethod = "DecodeVST4LN"; 1988} 1989 1990def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 1991 let Inst{7-5} = lane{2-0}; 1992} 1993def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 1994 let Inst{7-6} = lane{1-0}; 1995} 1996def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 1997 let Inst{7} = lane{0}; 1998 let Inst{5} = Rn{5}; 1999} 2000 2001def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2002def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2003def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2004 2005def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2006 let Inst{7-6} = lane{1-0}; 2007} 2008def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2009 let Inst{7} = lane{0}; 2010 let Inst{5} = Rn{5}; 2011} 2012 2013def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2014def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2015 2016} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 2017 2018 2019//===----------------------------------------------------------------------===// 2020// NEON pattern fragments 2021//===----------------------------------------------------------------------===// 2022 2023// Extract D sub-registers of Q registers. 2024def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2025 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2026 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 2027}]>; 2028def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2029 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2030 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 2031}]>; 2032def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2033 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2034 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 2035}]>; 2036def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2037 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2038 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 2039}]>; 2040 2041// Extract S sub-registers of Q/D registers. 2042def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2043 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2044 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 2045}]>; 2046 2047// Translate lane numbers from Q registers to D subregs. 2048def SubReg_i8_lane : SDNodeXForm<imm, [{ 2049 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 2050}]>; 2051def SubReg_i16_lane : SDNodeXForm<imm, [{ 2052 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 2053}]>; 2054def SubReg_i32_lane : SDNodeXForm<imm, [{ 2055 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 2056}]>; 2057 2058//===----------------------------------------------------------------------===// 2059// Instruction Classes 2060//===----------------------------------------------------------------------===// 2061 2062// Basic 2-register operations: double- and quad-register. 2063class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2064 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2065 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2066 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2067 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2068 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2069class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2070 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2071 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2072 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2073 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2074 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2075 2076// Basic 2-register intrinsics, both double- and quad-register. 2077class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2078 bits<2> op17_16, bits<5> op11_7, bit op4, 2079 InstrItinClass itin, string OpcodeStr, string Dt, 2080 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2081 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2082 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2083 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2084class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2085 bits<2> op17_16, bits<5> op11_7, bit op4, 2086 InstrItinClass itin, string OpcodeStr, string Dt, 2087 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2088 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2089 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2090 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2091 2092// Narrow 2-register operations. 2093class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2094 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2095 InstrItinClass itin, string OpcodeStr, string Dt, 2096 ValueType TyD, ValueType TyQ, SDNode OpNode> 2097 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2098 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2099 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2100 2101// Narrow 2-register intrinsics. 2102class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2103 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2104 InstrItinClass itin, string OpcodeStr, string Dt, 2105 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 2106 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2107 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2108 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2109 2110// Long 2-register operations (currently only used for VMOVL). 2111class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2112 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2113 InstrItinClass itin, string OpcodeStr, string Dt, 2114 ValueType TyQ, ValueType TyD, SDNode OpNode> 2115 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2116 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2117 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2118 2119// Long 2-register intrinsics. 2120class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2121 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2122 InstrItinClass itin, string OpcodeStr, string Dt, 2123 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 2124 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2125 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2126 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2127 2128// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2129class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2130 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2131 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2132 OpcodeStr, Dt, "$Vd, $Vm", 2133 "$src1 = $Vd, $src2 = $Vm", []>; 2134class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2135 InstrItinClass itin, string OpcodeStr, string Dt> 2136 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2137 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2138 "$src1 = $Vd, $src2 = $Vm", []>; 2139 2140// Basic 3-register operations: double- and quad-register. 2141class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2142 InstrItinClass itin, string OpcodeStr, string Dt, 2143 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2144 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2145 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2146 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2147 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2148 let isCommutable = Commutable; 2149} 2150// Same as N3VD but no data type. 2151class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2152 InstrItinClass itin, string OpcodeStr, 2153 ValueType ResTy, ValueType OpTy, 2154 SDNode OpNode, bit Commutable> 2155 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2156 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2157 OpcodeStr, "$Vd, $Vn, $Vm", "", 2158 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2159 let isCommutable = Commutable; 2160} 2161 2162class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2163 InstrItinClass itin, string OpcodeStr, string Dt, 2164 ValueType Ty, SDNode ShOp> 2165 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2166 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2167 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2168 [(set (Ty DPR:$Vd), 2169 (Ty (ShOp (Ty DPR:$Vn), 2170 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2171 let isCommutable = 0; 2172} 2173class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2174 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2175 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2176 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2177 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2178 [(set (Ty DPR:$Vd), 2179 (Ty (ShOp (Ty DPR:$Vn), 2180 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2181 let isCommutable = 0; 2182} 2183 2184class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2185 InstrItinClass itin, string OpcodeStr, string Dt, 2186 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2187 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2188 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2189 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2190 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2191 let isCommutable = Commutable; 2192} 2193class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2194 InstrItinClass itin, string OpcodeStr, 2195 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2196 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2197 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2198 OpcodeStr, "$Vd, $Vn, $Vm", "", 2199 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2200 let isCommutable = Commutable; 2201} 2202class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2203 InstrItinClass itin, string OpcodeStr, string Dt, 2204 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2205 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2206 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2207 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2208 [(set (ResTy QPR:$Vd), 2209 (ResTy (ShOp (ResTy QPR:$Vn), 2210 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2211 imm:$lane)))))]> { 2212 let isCommutable = 0; 2213} 2214class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2215 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2216 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2217 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2218 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2219 [(set (ResTy QPR:$Vd), 2220 (ResTy (ShOp (ResTy QPR:$Vn), 2221 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2222 imm:$lane)))))]> { 2223 let isCommutable = 0; 2224} 2225 2226// Basic 3-register intrinsics, both double- and quad-register. 2227class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2228 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2229 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 2230 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2231 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2232 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2233 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2234 let isCommutable = Commutable; 2235} 2236class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2237 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 2238 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2239 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2240 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2241 [(set (Ty DPR:$Vd), 2242 (Ty (IntOp (Ty DPR:$Vn), 2243 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2244 imm:$lane)))))]> { 2245 let isCommutable = 0; 2246} 2247class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2248 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 2249 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2250 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2251 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2252 [(set (Ty DPR:$Vd), 2253 (Ty (IntOp (Ty DPR:$Vn), 2254 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2255 let isCommutable = 0; 2256} 2257class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2258 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2259 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2260 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2261 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2262 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2263 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2264 let isCommutable = 0; 2265} 2266 2267class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2268 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2269 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 2270 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2271 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2272 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2273 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2274 let isCommutable = Commutable; 2275} 2276class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2277 string OpcodeStr, string Dt, 2278 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2279 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2280 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2281 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2282 [(set (ResTy QPR:$Vd), 2283 (ResTy (IntOp (ResTy QPR:$Vn), 2284 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2285 imm:$lane)))))]> { 2286 let isCommutable = 0; 2287} 2288class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2289 string OpcodeStr, string Dt, 2290 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2291 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2292 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2293 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2294 [(set (ResTy QPR:$Vd), 2295 (ResTy (IntOp (ResTy QPR:$Vn), 2296 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2297 imm:$lane)))))]> { 2298 let isCommutable = 0; 2299} 2300class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2301 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2302 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2303 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2304 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2305 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2306 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2307 let isCommutable = 0; 2308} 2309 2310// Multiply-Add/Sub operations: double- and quad-register. 2311class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2312 InstrItinClass itin, string OpcodeStr, string Dt, 2313 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2314 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2315 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2316 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2317 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2318 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2319 2320class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2321 string OpcodeStr, string Dt, 2322 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2323 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2324 (outs DPR:$Vd), 2325 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2326 NVMulSLFrm, itin, 2327 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2328 [(set (Ty DPR:$Vd), 2329 (Ty (ShOp (Ty DPR:$src1), 2330 (Ty (MulOp DPR:$Vn, 2331 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2332 imm:$lane)))))))]>; 2333class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2334 string OpcodeStr, string Dt, 2335 ValueType Ty, SDNode MulOp, SDNode ShOp> 2336 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2337 (outs DPR:$Vd), 2338 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2339 NVMulSLFrm, itin, 2340 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2341 [(set (Ty DPR:$Vd), 2342 (Ty (ShOp (Ty DPR:$src1), 2343 (Ty (MulOp DPR:$Vn, 2344 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2345 imm:$lane)))))))]>; 2346 2347class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2348 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2349 SDPatternOperator MulOp, SDPatternOperator OpNode> 2350 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2351 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2352 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2353 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2354 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2355class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2356 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2357 SDPatternOperator MulOp, SDPatternOperator ShOp> 2358 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2359 (outs QPR:$Vd), 2360 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2361 NVMulSLFrm, itin, 2362 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2363 [(set (ResTy QPR:$Vd), 2364 (ResTy (ShOp (ResTy QPR:$src1), 2365 (ResTy (MulOp QPR:$Vn, 2366 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2367 imm:$lane)))))))]>; 2368class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2369 string OpcodeStr, string Dt, 2370 ValueType ResTy, ValueType OpTy, 2371 SDNode MulOp, SDNode ShOp> 2372 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2373 (outs QPR:$Vd), 2374 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2375 NVMulSLFrm, itin, 2376 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2377 [(set (ResTy QPR:$Vd), 2378 (ResTy (ShOp (ResTy QPR:$src1), 2379 (ResTy (MulOp QPR:$Vn, 2380 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2381 imm:$lane)))))))]>; 2382 2383// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2384class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2385 InstrItinClass itin, string OpcodeStr, string Dt, 2386 ValueType Ty, Intrinsic IntOp, SDNode OpNode> 2387 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2388 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2389 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2390 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2391 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2392class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2393 InstrItinClass itin, string OpcodeStr, string Dt, 2394 ValueType Ty, Intrinsic IntOp, SDNode OpNode> 2395 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2396 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2397 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2398 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2399 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2400 2401// Neon 3-argument intrinsics, both double- and quad-register. 2402// The destination register is also used as the first source operand register. 2403class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2404 InstrItinClass itin, string OpcodeStr, string Dt, 2405 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2406 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2407 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2408 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2409 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2410 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2411class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2412 InstrItinClass itin, string OpcodeStr, string Dt, 2413 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2414 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2415 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2416 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2417 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2418 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2419 2420// Long Multiply-Add/Sub operations. 2421class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2422 InstrItinClass itin, string OpcodeStr, string Dt, 2423 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2424 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2425 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2426 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2427 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2428 (TyQ (MulOp (TyD DPR:$Vn), 2429 (TyD DPR:$Vm)))))]>; 2430class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2431 InstrItinClass itin, string OpcodeStr, string Dt, 2432 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2433 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2434 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2435 NVMulSLFrm, itin, 2436 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2437 [(set QPR:$Vd, 2438 (OpNode (TyQ QPR:$src1), 2439 (TyQ (MulOp (TyD DPR:$Vn), 2440 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2441 imm:$lane))))))]>; 2442class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2443 InstrItinClass itin, string OpcodeStr, string Dt, 2444 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2445 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2446 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2447 NVMulSLFrm, itin, 2448 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2449 [(set QPR:$Vd, 2450 (OpNode (TyQ QPR:$src1), 2451 (TyQ (MulOp (TyD DPR:$Vn), 2452 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2453 imm:$lane))))))]>; 2454 2455// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2456class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2457 InstrItinClass itin, string OpcodeStr, string Dt, 2458 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, 2459 SDNode OpNode> 2460 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2461 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2462 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2463 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2464 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2465 (TyD DPR:$Vm)))))))]>; 2466 2467// Neon Long 3-argument intrinsic. The destination register is 2468// a quad-register and is also used as the first source operand register. 2469class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2470 InstrItinClass itin, string OpcodeStr, string Dt, 2471 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 2472 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2473 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2474 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2475 [(set QPR:$Vd, 2476 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2477class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2478 string OpcodeStr, string Dt, 2479 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2480 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2481 (outs QPR:$Vd), 2482 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2483 NVMulSLFrm, itin, 2484 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2485 [(set (ResTy QPR:$Vd), 2486 (ResTy (IntOp (ResTy QPR:$src1), 2487 (OpTy DPR:$Vn), 2488 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2489 imm:$lane)))))]>; 2490class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2491 InstrItinClass itin, string OpcodeStr, string Dt, 2492 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2493 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2494 (outs QPR:$Vd), 2495 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2496 NVMulSLFrm, itin, 2497 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2498 [(set (ResTy QPR:$Vd), 2499 (ResTy (IntOp (ResTy QPR:$src1), 2500 (OpTy DPR:$Vn), 2501 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2502 imm:$lane)))))]>; 2503 2504// Narrowing 3-register intrinsics. 2505class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2506 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2507 Intrinsic IntOp, bit Commutable> 2508 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2509 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2510 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2511 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2512 let isCommutable = Commutable; 2513} 2514 2515// Long 3-register operations. 2516class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2517 InstrItinClass itin, string OpcodeStr, string Dt, 2518 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2519 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2520 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2521 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2522 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2523 let isCommutable = Commutable; 2524} 2525class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2526 InstrItinClass itin, string OpcodeStr, string Dt, 2527 ValueType TyQ, ValueType TyD, SDNode OpNode> 2528 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2529 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2530 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2531 [(set QPR:$Vd, 2532 (TyQ (OpNode (TyD DPR:$Vn), 2533 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2534class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2535 InstrItinClass itin, string OpcodeStr, string Dt, 2536 ValueType TyQ, ValueType TyD, SDNode OpNode> 2537 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2538 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2539 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2540 [(set QPR:$Vd, 2541 (TyQ (OpNode (TyD DPR:$Vn), 2542 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 2543 2544// Long 3-register operations with explicitly extended operands. 2545class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2546 InstrItinClass itin, string OpcodeStr, string Dt, 2547 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 2548 bit Commutable> 2549 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2550 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2551 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2552 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 2553 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2554 let isCommutable = Commutable; 2555} 2556 2557// Long 3-register intrinsics with explicit extend (VABDL). 2558class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2559 InstrItinClass itin, string OpcodeStr, string Dt, 2560 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, 2561 bit Commutable> 2562 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2563 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2564 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2565 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2566 (TyD DPR:$Vm))))))]> { 2567 let isCommutable = Commutable; 2568} 2569 2570// Long 3-register intrinsics. 2571class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2572 InstrItinClass itin, string OpcodeStr, string Dt, 2573 ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> 2574 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2575 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2576 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2577 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2578 let isCommutable = Commutable; 2579} 2580class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2581 string OpcodeStr, string Dt, 2582 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2583 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2584 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2585 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2586 [(set (ResTy QPR:$Vd), 2587 (ResTy (IntOp (OpTy DPR:$Vn), 2588 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2589 imm:$lane)))))]>; 2590class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2591 InstrItinClass itin, string OpcodeStr, string Dt, 2592 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2593 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2594 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2595 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2596 [(set (ResTy QPR:$Vd), 2597 (ResTy (IntOp (OpTy DPR:$Vn), 2598 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2599 imm:$lane)))))]>; 2600 2601// Wide 3-register operations. 2602class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2603 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 2604 SDNode OpNode, SDNode ExtOp, bit Commutable> 2605 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2606 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 2607 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2608 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 2609 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2610 let isCommutable = Commutable; 2611} 2612 2613// Pairwise long 2-register intrinsics, both double- and quad-register. 2614class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2615 bits<2> op17_16, bits<5> op11_7, bit op4, 2616 string OpcodeStr, string Dt, 2617 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2618 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2619 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2620 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2621class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2622 bits<2> op17_16, bits<5> op11_7, bit op4, 2623 string OpcodeStr, string Dt, 2624 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2625 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2626 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2627 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2628 2629// Pairwise long 2-register accumulate intrinsics, 2630// both double- and quad-register. 2631// The destination register is also used as the first source operand register. 2632class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2633 bits<2> op17_16, bits<5> op11_7, bit op4, 2634 string OpcodeStr, string Dt, 2635 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2636 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 2637 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 2638 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2639 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 2640class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2641 bits<2> op17_16, bits<5> op11_7, bit op4, 2642 string OpcodeStr, string Dt, 2643 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2644 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 2645 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 2646 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2647 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 2648 2649// Shift by immediate, 2650// both double- and quad-register. 2651class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2652 Format f, InstrItinClass itin, Operand ImmTy, 2653 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2654 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2655 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 2656 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2657 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 2658class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2659 Format f, InstrItinClass itin, Operand ImmTy, 2660 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2661 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2662 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 2663 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2664 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 2665 2666// Long shift by immediate. 2667class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2668 string OpcodeStr, string Dt, 2669 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 2670 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2671 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 2672 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2673 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), 2674 (i32 imm:$SIMM))))]>; 2675 2676// Narrow shift by immediate. 2677class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2678 InstrItinClass itin, string OpcodeStr, string Dt, 2679 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 2680 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2681 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 2682 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2683 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 2684 (i32 imm:$SIMM))))]>; 2685 2686// Shift right by immediate and accumulate, 2687// both double- and quad-register. 2688class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2689 Operand ImmTy, string OpcodeStr, string Dt, 2690 ValueType Ty, SDNode ShOp> 2691 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 2692 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 2693 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2694 [(set DPR:$Vd, (Ty (add DPR:$src1, 2695 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 2696class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2697 Operand ImmTy, string OpcodeStr, string Dt, 2698 ValueType Ty, SDNode ShOp> 2699 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 2700 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 2701 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2702 [(set QPR:$Vd, (Ty (add QPR:$src1, 2703 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 2704 2705// Shift by immediate and insert, 2706// both double- and quad-register. 2707class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2708 Operand ImmTy, Format f, string OpcodeStr, string Dt, 2709 ValueType Ty,SDNode ShOp> 2710 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 2711 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 2712 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2713 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 2714class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2715 Operand ImmTy, Format f, string OpcodeStr, string Dt, 2716 ValueType Ty,SDNode ShOp> 2717 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 2718 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 2719 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2720 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 2721 2722// Convert, with fractional bits immediate, 2723// both double- and quad-register. 2724class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2725 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2726 Intrinsic IntOp> 2727 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2728 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 2729 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2730 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 2731class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2732 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2733 Intrinsic IntOp> 2734 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2735 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 2736 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2737 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 2738 2739//===----------------------------------------------------------------------===// 2740// Multiclasses 2741//===----------------------------------------------------------------------===// 2742 2743// Abbreviations used in multiclass suffixes: 2744// Q = quarter int (8 bit) elements 2745// H = half int (16 bit) elements 2746// S = single int (32 bit) elements 2747// D = double int (64 bit) elements 2748 2749// Neon 2-register vector operations and intrinsics. 2750 2751// Neon 2-register comparisons. 2752// source operand element sizes of 8, 16 and 32 bits: 2753multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2754 bits<5> op11_7, bit op4, string opc, string Dt, 2755 string asm, SDNode OpNode> { 2756 // 64-bit vector types. 2757 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 2758 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2759 opc, !strconcat(Dt, "8"), asm, "", 2760 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 2761 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 2762 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2763 opc, !strconcat(Dt, "16"), asm, "", 2764 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 2765 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 2766 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2767 opc, !strconcat(Dt, "32"), asm, "", 2768 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 2769 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 2770 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2771 opc, "f32", asm, "", 2772 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 2773 let Inst{10} = 1; // overwrite F = 1 2774 } 2775 2776 // 128-bit vector types. 2777 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 2778 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2779 opc, !strconcat(Dt, "8"), asm, "", 2780 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 2781 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 2782 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2783 opc, !strconcat(Dt, "16"), asm, "", 2784 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 2785 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 2786 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2787 opc, !strconcat(Dt, "32"), asm, "", 2788 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 2789 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 2790 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2791 opc, "f32", asm, "", 2792 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 2793 let Inst{10} = 1; // overwrite F = 1 2794 } 2795} 2796 2797 2798// Neon 2-register vector intrinsics, 2799// element sizes of 8, 16 and 32 bits: 2800multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2801 bits<5> op11_7, bit op4, 2802 InstrItinClass itinD, InstrItinClass itinQ, 2803 string OpcodeStr, string Dt, Intrinsic IntOp> { 2804 // 64-bit vector types. 2805 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2806 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 2807 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2808 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 2809 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2810 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 2811 2812 // 128-bit vector types. 2813 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2814 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 2815 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2816 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 2817 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2818 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 2819} 2820 2821 2822// Neon Narrowing 2-register vector operations, 2823// source operand element sizes of 16, 32 and 64 bits: 2824multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2825 bits<5> op11_7, bit op6, bit op4, 2826 InstrItinClass itin, string OpcodeStr, string Dt, 2827 SDNode OpNode> { 2828 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 2829 itin, OpcodeStr, !strconcat(Dt, "16"), 2830 v8i8, v8i16, OpNode>; 2831 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 2832 itin, OpcodeStr, !strconcat(Dt, "32"), 2833 v4i16, v4i32, OpNode>; 2834 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 2835 itin, OpcodeStr, !strconcat(Dt, "64"), 2836 v2i32, v2i64, OpNode>; 2837} 2838 2839// Neon Narrowing 2-register vector intrinsics, 2840// source operand element sizes of 16, 32 and 64 bits: 2841multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2842 bits<5> op11_7, bit op6, bit op4, 2843 InstrItinClass itin, string OpcodeStr, string Dt, 2844 Intrinsic IntOp> { 2845 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 2846 itin, OpcodeStr, !strconcat(Dt, "16"), 2847 v8i8, v8i16, IntOp>; 2848 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 2849 itin, OpcodeStr, !strconcat(Dt, "32"), 2850 v4i16, v4i32, IntOp>; 2851 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 2852 itin, OpcodeStr, !strconcat(Dt, "64"), 2853 v2i32, v2i64, IntOp>; 2854} 2855 2856 2857// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 2858// source operand element sizes of 16, 32 and 64 bits: 2859multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 2860 string OpcodeStr, string Dt, SDNode OpNode> { 2861 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 2862 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 2863 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 2864 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 2865 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 2866 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 2867} 2868 2869 2870// Neon 3-register vector operations. 2871 2872// First with only element sizes of 8, 16 and 32 bits: 2873multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2874 InstrItinClass itinD16, InstrItinClass itinD32, 2875 InstrItinClass itinQ16, InstrItinClass itinQ32, 2876 string OpcodeStr, string Dt, 2877 SDNode OpNode, bit Commutable = 0> { 2878 // 64-bit vector types. 2879 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 2880 OpcodeStr, !strconcat(Dt, "8"), 2881 v8i8, v8i8, OpNode, Commutable>; 2882 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 2883 OpcodeStr, !strconcat(Dt, "16"), 2884 v4i16, v4i16, OpNode, Commutable>; 2885 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 2886 OpcodeStr, !strconcat(Dt, "32"), 2887 v2i32, v2i32, OpNode, Commutable>; 2888 2889 // 128-bit vector types. 2890 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 2891 OpcodeStr, !strconcat(Dt, "8"), 2892 v16i8, v16i8, OpNode, Commutable>; 2893 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 2894 OpcodeStr, !strconcat(Dt, "16"), 2895 v8i16, v8i16, OpNode, Commutable>; 2896 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 2897 OpcodeStr, !strconcat(Dt, "32"), 2898 v4i32, v4i32, OpNode, Commutable>; 2899} 2900 2901multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 2902 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 2903 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 2904 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 2905 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 2906 v4i32, v2i32, ShOp>; 2907} 2908 2909// ....then also with element size 64 bits: 2910multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 2911 InstrItinClass itinD, InstrItinClass itinQ, 2912 string OpcodeStr, string Dt, 2913 SDNode OpNode, bit Commutable = 0> 2914 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 2915 OpcodeStr, Dt, OpNode, Commutable> { 2916 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 2917 OpcodeStr, !strconcat(Dt, "64"), 2918 v1i64, v1i64, OpNode, Commutable>; 2919 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 2920 OpcodeStr, !strconcat(Dt, "64"), 2921 v2i64, v2i64, OpNode, Commutable>; 2922} 2923 2924 2925// Neon 3-register vector intrinsics. 2926 2927// First with only element sizes of 16 and 32 bits: 2928multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2929 InstrItinClass itinD16, InstrItinClass itinD32, 2930 InstrItinClass itinQ16, InstrItinClass itinQ32, 2931 string OpcodeStr, string Dt, 2932 Intrinsic IntOp, bit Commutable = 0> { 2933 // 64-bit vector types. 2934 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 2935 OpcodeStr, !strconcat(Dt, "16"), 2936 v4i16, v4i16, IntOp, Commutable>; 2937 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 2938 OpcodeStr, !strconcat(Dt, "32"), 2939 v2i32, v2i32, IntOp, Commutable>; 2940 2941 // 128-bit vector types. 2942 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 2943 OpcodeStr, !strconcat(Dt, "16"), 2944 v8i16, v8i16, IntOp, Commutable>; 2945 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 2946 OpcodeStr, !strconcat(Dt, "32"), 2947 v4i32, v4i32, IntOp, Commutable>; 2948} 2949multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2950 InstrItinClass itinD16, InstrItinClass itinD32, 2951 InstrItinClass itinQ16, InstrItinClass itinQ32, 2952 string OpcodeStr, string Dt, 2953 Intrinsic IntOp> { 2954 // 64-bit vector types. 2955 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 2956 OpcodeStr, !strconcat(Dt, "16"), 2957 v4i16, v4i16, IntOp>; 2958 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 2959 OpcodeStr, !strconcat(Dt, "32"), 2960 v2i32, v2i32, IntOp>; 2961 2962 // 128-bit vector types. 2963 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 2964 OpcodeStr, !strconcat(Dt, "16"), 2965 v8i16, v8i16, IntOp>; 2966 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 2967 OpcodeStr, !strconcat(Dt, "32"), 2968 v4i32, v4i32, IntOp>; 2969} 2970 2971multiclass N3VIntSL_HS<bits<4> op11_8, 2972 InstrItinClass itinD16, InstrItinClass itinD32, 2973 InstrItinClass itinQ16, InstrItinClass itinQ32, 2974 string OpcodeStr, string Dt, Intrinsic IntOp> { 2975 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 2976 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 2977 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 2978 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 2979 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 2980 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 2981 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 2982 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 2983} 2984 2985// ....then also with element size of 8 bits: 2986multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2987 InstrItinClass itinD16, InstrItinClass itinD32, 2988 InstrItinClass itinQ16, InstrItinClass itinQ32, 2989 string OpcodeStr, string Dt, 2990 Intrinsic IntOp, bit Commutable = 0> 2991 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 2992 OpcodeStr, Dt, IntOp, Commutable> { 2993 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 2994 OpcodeStr, !strconcat(Dt, "8"), 2995 v8i8, v8i8, IntOp, Commutable>; 2996 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 2997 OpcodeStr, !strconcat(Dt, "8"), 2998 v16i8, v16i8, IntOp, Commutable>; 2999} 3000multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3001 InstrItinClass itinD16, InstrItinClass itinD32, 3002 InstrItinClass itinQ16, InstrItinClass itinQ32, 3003 string OpcodeStr, string Dt, 3004 Intrinsic IntOp> 3005 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3006 OpcodeStr, Dt, IntOp> { 3007 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3008 OpcodeStr, !strconcat(Dt, "8"), 3009 v8i8, v8i8, IntOp>; 3010 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3011 OpcodeStr, !strconcat(Dt, "8"), 3012 v16i8, v16i8, IntOp>; 3013} 3014 3015 3016// ....then also with element size of 64 bits: 3017multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3018 InstrItinClass itinD16, InstrItinClass itinD32, 3019 InstrItinClass itinQ16, InstrItinClass itinQ32, 3020 string OpcodeStr, string Dt, 3021 Intrinsic IntOp, bit Commutable = 0> 3022 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3023 OpcodeStr, Dt, IntOp, Commutable> { 3024 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3025 OpcodeStr, !strconcat(Dt, "64"), 3026 v1i64, v1i64, IntOp, Commutable>; 3027 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3028 OpcodeStr, !strconcat(Dt, "64"), 3029 v2i64, v2i64, IntOp, Commutable>; 3030} 3031multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3032 InstrItinClass itinD16, InstrItinClass itinD32, 3033 InstrItinClass itinQ16, InstrItinClass itinQ32, 3034 string OpcodeStr, string Dt, 3035 Intrinsic IntOp> 3036 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3037 OpcodeStr, Dt, IntOp> { 3038 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3039 OpcodeStr, !strconcat(Dt, "64"), 3040 v1i64, v1i64, IntOp>; 3041 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3042 OpcodeStr, !strconcat(Dt, "64"), 3043 v2i64, v2i64, IntOp>; 3044} 3045 3046// Neon Narrowing 3-register vector intrinsics, 3047// source operand element sizes of 16, 32 and 64 bits: 3048multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3049 string OpcodeStr, string Dt, 3050 Intrinsic IntOp, bit Commutable = 0> { 3051 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3052 OpcodeStr, !strconcat(Dt, "16"), 3053 v8i8, v8i16, IntOp, Commutable>; 3054 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3055 OpcodeStr, !strconcat(Dt, "32"), 3056 v4i16, v4i32, IntOp, Commutable>; 3057 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3058 OpcodeStr, !strconcat(Dt, "64"), 3059 v2i32, v2i64, IntOp, Commutable>; 3060} 3061 3062 3063// Neon Long 3-register vector operations. 3064 3065multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3066 InstrItinClass itin16, InstrItinClass itin32, 3067 string OpcodeStr, string Dt, 3068 SDNode OpNode, bit Commutable = 0> { 3069 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3070 OpcodeStr, !strconcat(Dt, "8"), 3071 v8i16, v8i8, OpNode, Commutable>; 3072 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3073 OpcodeStr, !strconcat(Dt, "16"), 3074 v4i32, v4i16, OpNode, Commutable>; 3075 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3076 OpcodeStr, !strconcat(Dt, "32"), 3077 v2i64, v2i32, OpNode, Commutable>; 3078} 3079 3080multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3081 InstrItinClass itin, string OpcodeStr, string Dt, 3082 SDNode OpNode> { 3083 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3084 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3085 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3086 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3087} 3088 3089multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3090 InstrItinClass itin16, InstrItinClass itin32, 3091 string OpcodeStr, string Dt, 3092 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3093 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3094 OpcodeStr, !strconcat(Dt, "8"), 3095 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3096 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3097 OpcodeStr, !strconcat(Dt, "16"), 3098 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3099 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3100 OpcodeStr, !strconcat(Dt, "32"), 3101 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3102} 3103 3104// Neon Long 3-register vector intrinsics. 3105 3106// First with only element sizes of 16 and 32 bits: 3107multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3108 InstrItinClass itin16, InstrItinClass itin32, 3109 string OpcodeStr, string Dt, 3110 Intrinsic IntOp, bit Commutable = 0> { 3111 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3112 OpcodeStr, !strconcat(Dt, "16"), 3113 v4i32, v4i16, IntOp, Commutable>; 3114 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3115 OpcodeStr, !strconcat(Dt, "32"), 3116 v2i64, v2i32, IntOp, Commutable>; 3117} 3118 3119multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3120 InstrItinClass itin, string OpcodeStr, string Dt, 3121 Intrinsic IntOp> { 3122 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3123 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3124 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3125 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3126} 3127 3128// ....then also with element size of 8 bits: 3129multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3130 InstrItinClass itin16, InstrItinClass itin32, 3131 string OpcodeStr, string Dt, 3132 Intrinsic IntOp, bit Commutable = 0> 3133 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3134 IntOp, Commutable> { 3135 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3136 OpcodeStr, !strconcat(Dt, "8"), 3137 v8i16, v8i8, IntOp, Commutable>; 3138} 3139 3140// ....with explicit extend (VABDL). 3141multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3142 InstrItinClass itin, string OpcodeStr, string Dt, 3143 Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { 3144 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3145 OpcodeStr, !strconcat(Dt, "8"), 3146 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3147 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3148 OpcodeStr, !strconcat(Dt, "16"), 3149 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3150 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3151 OpcodeStr, !strconcat(Dt, "32"), 3152 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3153} 3154 3155 3156// Neon Wide 3-register vector intrinsics, 3157// source operand element sizes of 8, 16 and 32 bits: 3158multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3159 string OpcodeStr, string Dt, 3160 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3161 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3162 OpcodeStr, !strconcat(Dt, "8"), 3163 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3164 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3165 OpcodeStr, !strconcat(Dt, "16"), 3166 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3167 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3168 OpcodeStr, !strconcat(Dt, "32"), 3169 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3170} 3171 3172 3173// Neon Multiply-Op vector operations, 3174// element sizes of 8, 16 and 32 bits: 3175multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3176 InstrItinClass itinD16, InstrItinClass itinD32, 3177 InstrItinClass itinQ16, InstrItinClass itinQ32, 3178 string OpcodeStr, string Dt, SDNode OpNode> { 3179 // 64-bit vector types. 3180 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3181 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3182 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3183 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3184 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3185 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3186 3187 // 128-bit vector types. 3188 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3189 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3190 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3191 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3192 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3193 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3194} 3195 3196multiclass N3VMulOpSL_HS<bits<4> op11_8, 3197 InstrItinClass itinD16, InstrItinClass itinD32, 3198 InstrItinClass itinQ16, InstrItinClass itinQ32, 3199 string OpcodeStr, string Dt, SDNode ShOp> { 3200 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3201 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3202 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3203 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3204 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3205 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3206 mul, ShOp>; 3207 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3208 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3209 mul, ShOp>; 3210} 3211 3212// Neon Intrinsic-Op vector operations, 3213// element sizes of 8, 16 and 32 bits: 3214multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3215 InstrItinClass itinD, InstrItinClass itinQ, 3216 string OpcodeStr, string Dt, Intrinsic IntOp, 3217 SDNode OpNode> { 3218 // 64-bit vector types. 3219 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3220 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3221 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3222 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3223 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3224 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3225 3226 // 128-bit vector types. 3227 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3228 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3229 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3230 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3231 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3232 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3233} 3234 3235// Neon 3-argument intrinsics, 3236// element sizes of 8, 16 and 32 bits: 3237multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3238 InstrItinClass itinD, InstrItinClass itinQ, 3239 string OpcodeStr, string Dt, Intrinsic IntOp> { 3240 // 64-bit vector types. 3241 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 3242 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3243 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 3244 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3245 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 3246 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3247 3248 // 128-bit vector types. 3249 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 3250 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3251 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 3252 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3253 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 3254 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3255} 3256 3257 3258// Neon Long Multiply-Op vector operations, 3259// element sizes of 8, 16 and 32 bits: 3260multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3261 InstrItinClass itin16, InstrItinClass itin32, 3262 string OpcodeStr, string Dt, SDNode MulOp, 3263 SDNode OpNode> { 3264 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3265 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3266 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3267 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3268 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3269 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3270} 3271 3272multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3273 string Dt, SDNode MulOp, SDNode OpNode> { 3274 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3275 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3276 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3277 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3278} 3279 3280 3281// Neon Long 3-argument intrinsics. 3282 3283// First with only element sizes of 16 and 32 bits: 3284multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3285 InstrItinClass itin16, InstrItinClass itin32, 3286 string OpcodeStr, string Dt, Intrinsic IntOp> { 3287 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3288 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3289 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3290 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3291} 3292 3293multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3294 string OpcodeStr, string Dt, Intrinsic IntOp> { 3295 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3296 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3297 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3298 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3299} 3300 3301// ....then also with element size of 8 bits: 3302multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3303 InstrItinClass itin16, InstrItinClass itin32, 3304 string OpcodeStr, string Dt, Intrinsic IntOp> 3305 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3306 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3307 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3308} 3309 3310// ....with explicit extend (VABAL). 3311multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3312 InstrItinClass itin, string OpcodeStr, string Dt, 3313 Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { 3314 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3315 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3316 IntOp, ExtOp, OpNode>; 3317 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3318 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3319 IntOp, ExtOp, OpNode>; 3320 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3321 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3322 IntOp, ExtOp, OpNode>; 3323} 3324 3325 3326// Neon Pairwise long 2-register intrinsics, 3327// element sizes of 8, 16 and 32 bits: 3328multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3329 bits<5> op11_7, bit op4, 3330 string OpcodeStr, string Dt, Intrinsic IntOp> { 3331 // 64-bit vector types. 3332 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3333 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3334 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3335 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3336 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3337 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3338 3339 // 128-bit vector types. 3340 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3341 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3342 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3343 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3344 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3345 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3346} 3347 3348 3349// Neon Pairwise long 2-register accumulate intrinsics, 3350// element sizes of 8, 16 and 32 bits: 3351multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3352 bits<5> op11_7, bit op4, 3353 string OpcodeStr, string Dt, Intrinsic IntOp> { 3354 // 64-bit vector types. 3355 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3356 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3357 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3358 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3359 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3360 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3361 3362 // 128-bit vector types. 3363 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3364 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3365 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3366 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3367 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3368 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3369} 3370 3371 3372// Neon 2-register vector shift by immediate, 3373// with f of either N2RegVShLFrm or N2RegVShRFrm 3374// element sizes of 8, 16, 32 and 64 bits: 3375multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3376 InstrItinClass itin, string OpcodeStr, string Dt, 3377 SDNode OpNode> { 3378 // 64-bit vector types. 3379 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3380 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3381 let Inst{21-19} = 0b001; // imm6 = 001xxx 3382 } 3383 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3384 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3385 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3386 } 3387 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3388 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3389 let Inst{21} = 0b1; // imm6 = 1xxxxx 3390 } 3391 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3392 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3393 // imm6 = xxxxxx 3394 3395 // 128-bit vector types. 3396 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3397 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3398 let Inst{21-19} = 0b001; // imm6 = 001xxx 3399 } 3400 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3401 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3402 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3403 } 3404 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3405 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3406 let Inst{21} = 0b1; // imm6 = 1xxxxx 3407 } 3408 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3409 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3410 // imm6 = xxxxxx 3411} 3412multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3413 InstrItinClass itin, string OpcodeStr, string Dt, 3414 SDNode OpNode> { 3415 // 64-bit vector types. 3416 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3417 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3418 let Inst{21-19} = 0b001; // imm6 = 001xxx 3419 } 3420 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3421 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3422 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3423 } 3424 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3425 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3426 let Inst{21} = 0b1; // imm6 = 1xxxxx 3427 } 3428 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3429 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3430 // imm6 = xxxxxx 3431 3432 // 128-bit vector types. 3433 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3434 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3435 let Inst{21-19} = 0b001; // imm6 = 001xxx 3436 } 3437 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3438 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3439 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3440 } 3441 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3442 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3443 let Inst{21} = 0b1; // imm6 = 1xxxxx 3444 } 3445 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3446 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3447 // imm6 = xxxxxx 3448} 3449 3450// Neon Shift-Accumulate vector operations, 3451// element sizes of 8, 16, 32 and 64 bits: 3452multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3453 string OpcodeStr, string Dt, SDNode ShOp> { 3454 // 64-bit vector types. 3455 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3456 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3457 let Inst{21-19} = 0b001; // imm6 = 001xxx 3458 } 3459 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3460 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3461 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3462 } 3463 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3464 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3465 let Inst{21} = 0b1; // imm6 = 1xxxxx 3466 } 3467 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3468 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3469 // imm6 = xxxxxx 3470 3471 // 128-bit vector types. 3472 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3473 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3474 let Inst{21-19} = 0b001; // imm6 = 001xxx 3475 } 3476 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3477 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3478 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3479 } 3480 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3481 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3482 let Inst{21} = 0b1; // imm6 = 1xxxxx 3483 } 3484 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3485 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3486 // imm6 = xxxxxx 3487} 3488 3489// Neon Shift-Insert vector operations, 3490// with f of either N2RegVShLFrm or N2RegVShRFrm 3491// element sizes of 8, 16, 32 and 64 bits: 3492multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3493 string OpcodeStr> { 3494 // 64-bit vector types. 3495 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3496 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 3497 let Inst{21-19} = 0b001; // imm6 = 001xxx 3498 } 3499 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3500 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 3501 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3502 } 3503 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3504 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 3505 let Inst{21} = 0b1; // imm6 = 1xxxxx 3506 } 3507 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 3508 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 3509 // imm6 = xxxxxx 3510 3511 // 128-bit vector types. 3512 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3513 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 3514 let Inst{21-19} = 0b001; // imm6 = 001xxx 3515 } 3516 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3517 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 3518 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3519 } 3520 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3521 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 3522 let Inst{21} = 0b1; // imm6 = 1xxxxx 3523 } 3524 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 3525 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 3526 // imm6 = xxxxxx 3527} 3528multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3529 string OpcodeStr> { 3530 // 64-bit vector types. 3531 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3532 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 3533 let Inst{21-19} = 0b001; // imm6 = 001xxx 3534 } 3535 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3536 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 3537 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3538 } 3539 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3540 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 3541 let Inst{21} = 0b1; // imm6 = 1xxxxx 3542 } 3543 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3544 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 3545 // imm6 = xxxxxx 3546 3547 // 128-bit vector types. 3548 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3549 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 3550 let Inst{21-19} = 0b001; // imm6 = 001xxx 3551 } 3552 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3553 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 3554 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3555 } 3556 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3557 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 3558 let Inst{21} = 0b1; // imm6 = 1xxxxx 3559 } 3560 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3561 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 3562 // imm6 = xxxxxx 3563} 3564 3565// Neon Shift Long operations, 3566// element sizes of 8, 16, 32 bits: 3567multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3568 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 3569 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3570 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 3571 let Inst{21-19} = 0b001; // imm6 = 001xxx 3572 } 3573 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3574 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 3575 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3576 } 3577 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3578 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 3579 let Inst{21} = 0b1; // imm6 = 1xxxxx 3580 } 3581} 3582 3583// Neon Shift Narrow operations, 3584// element sizes of 16, 32, 64 bits: 3585multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3586 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 3587 SDNode OpNode> { 3588 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3589 OpcodeStr, !strconcat(Dt, "16"), 3590 v8i8, v8i16, shr_imm8, OpNode> { 3591 let Inst{21-19} = 0b001; // imm6 = 001xxx 3592 } 3593 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3594 OpcodeStr, !strconcat(Dt, "32"), 3595 v4i16, v4i32, shr_imm16, OpNode> { 3596 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3597 } 3598 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3599 OpcodeStr, !strconcat(Dt, "64"), 3600 v2i32, v2i64, shr_imm32, OpNode> { 3601 let Inst{21} = 0b1; // imm6 = 1xxxxx 3602 } 3603} 3604 3605//===----------------------------------------------------------------------===// 3606// Instruction Definitions. 3607//===----------------------------------------------------------------------===// 3608 3609// Vector Add Operations. 3610 3611// VADD : Vector Add (integer and floating-point) 3612defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 3613 add, 1>; 3614def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 3615 v2f32, v2f32, fadd, 1>; 3616def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 3617 v4f32, v4f32, fadd, 1>; 3618// VADDL : Vector Add Long (Q = D + D) 3619defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3620 "vaddl", "s", add, sext, 1>; 3621defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3622 "vaddl", "u", add, zext, 1>; 3623// VADDW : Vector Add Wide (Q = Q + D) 3624defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 3625defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 3626// VHADD : Vector Halving Add 3627defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 3628 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3629 "vhadd", "s", int_arm_neon_vhadds, 1>; 3630defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 3631 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3632 "vhadd", "u", int_arm_neon_vhaddu, 1>; 3633// VRHADD : Vector Rounding Halving Add 3634defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 3635 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3636 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 3637defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 3638 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3639 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 3640// VQADD : Vector Saturating Add 3641defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 3642 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3643 "vqadd", "s", int_arm_neon_vqadds, 1>; 3644defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 3645 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3646 "vqadd", "u", int_arm_neon_vqaddu, 1>; 3647// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 3648defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", 3649 int_arm_neon_vaddhn, 1>; 3650// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 3651defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 3652 int_arm_neon_vraddhn, 1>; 3653 3654// Vector Multiply Operations. 3655 3656// VMUL : Vector Multiply (integer, polynomial and floating-point) 3657defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 3658 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 3659def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 3660 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 3661def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 3662 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 3663def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 3664 v2f32, v2f32, fmul, 1>; 3665def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 3666 v4f32, v4f32, fmul, 1>; 3667defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 3668def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 3669def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 3670 v2f32, fmul>; 3671 3672def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 3673 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 3674 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 3675 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3676 (DSubReg_i16_reg imm:$lane))), 3677 (SubReg_i16_lane imm:$lane)))>; 3678def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 3679 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 3680 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 3681 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3682 (DSubReg_i32_reg imm:$lane))), 3683 (SubReg_i32_lane imm:$lane)))>; 3684def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 3685 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 3686 (v4f32 (VMULslfq (v4f32 QPR:$src1), 3687 (v2f32 (EXTRACT_SUBREG QPR:$src2, 3688 (DSubReg_i32_reg imm:$lane))), 3689 (SubReg_i32_lane imm:$lane)))>; 3690 3691// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 3692defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 3693 IIC_VMULi16Q, IIC_VMULi32Q, 3694 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 3695defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 3696 IIC_VMULi16Q, IIC_VMULi32Q, 3697 "vqdmulh", "s", int_arm_neon_vqdmulh>; 3698def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 3699 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 3700 imm:$lane)))), 3701 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 3702 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3703 (DSubReg_i16_reg imm:$lane))), 3704 (SubReg_i16_lane imm:$lane)))>; 3705def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 3706 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 3707 imm:$lane)))), 3708 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 3709 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3710 (DSubReg_i32_reg imm:$lane))), 3711 (SubReg_i32_lane imm:$lane)))>; 3712 3713// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 3714defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 3715 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 3716 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 3717defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 3718 IIC_VMULi16Q, IIC_VMULi32Q, 3719 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 3720def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 3721 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 3722 imm:$lane)))), 3723 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 3724 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3725 (DSubReg_i16_reg imm:$lane))), 3726 (SubReg_i16_lane imm:$lane)))>; 3727def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 3728 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 3729 imm:$lane)))), 3730 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 3731 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3732 (DSubReg_i32_reg imm:$lane))), 3733 (SubReg_i32_lane imm:$lane)))>; 3734 3735// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 3736defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 3737 "vmull", "s", NEONvmulls, 1>; 3738defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 3739 "vmull", "u", NEONvmullu, 1>; 3740def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 3741 v8i16, v8i8, int_arm_neon_vmullp, 1>; 3742defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 3743defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 3744 3745// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 3746defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 3747 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 3748defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 3749 "vqdmull", "s", int_arm_neon_vqdmull>; 3750 3751// Vector Multiply-Accumulate and Multiply-Subtract Operations. 3752 3753// VMLA : Vector Multiply Accumulate (integer and floating-point) 3754defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 3755 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 3756def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 3757 v2f32, fmul_su, fadd_mlx>, 3758 Requires<[HasNEON, UseFPVMLx]>; 3759def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 3760 v4f32, fmul_su, fadd_mlx>, 3761 Requires<[HasNEON, UseFPVMLx]>; 3762defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 3763 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 3764def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 3765 v2f32, fmul_su, fadd_mlx>, 3766 Requires<[HasNEON, UseFPVMLx]>; 3767def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 3768 v4f32, v2f32, fmul_su, fadd_mlx>, 3769 Requires<[HasNEON, UseFPVMLx]>; 3770 3771def : Pat<(v8i16 (add (v8i16 QPR:$src1), 3772 (mul (v8i16 QPR:$src2), 3773 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 3774 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 3775 (v4i16 (EXTRACT_SUBREG QPR:$src3, 3776 (DSubReg_i16_reg imm:$lane))), 3777 (SubReg_i16_lane imm:$lane)))>; 3778 3779def : Pat<(v4i32 (add (v4i32 QPR:$src1), 3780 (mul (v4i32 QPR:$src2), 3781 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 3782 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 3783 (v2i32 (EXTRACT_SUBREG QPR:$src3, 3784 (DSubReg_i32_reg imm:$lane))), 3785 (SubReg_i32_lane imm:$lane)))>; 3786 3787def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 3788 (fmul_su (v4f32 QPR:$src2), 3789 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 3790 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 3791 (v4f32 QPR:$src2), 3792 (v2f32 (EXTRACT_SUBREG QPR:$src3, 3793 (DSubReg_i32_reg imm:$lane))), 3794 (SubReg_i32_lane imm:$lane)))>, 3795 Requires<[HasNEON, UseFPVMLx]>; 3796 3797// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 3798defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 3799 "vmlal", "s", NEONvmulls, add>; 3800defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 3801 "vmlal", "u", NEONvmullu, add>; 3802 3803defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 3804defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 3805 3806// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 3807defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 3808 "vqdmlal", "s", int_arm_neon_vqdmlal>; 3809defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; 3810 3811// VMLS : Vector Multiply Subtract (integer and floating-point) 3812defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 3813 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 3814def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 3815 v2f32, fmul_su, fsub_mlx>, 3816 Requires<[HasNEON, UseFPVMLx]>; 3817def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 3818 v4f32, fmul_su, fsub_mlx>, 3819 Requires<[HasNEON, UseFPVMLx]>; 3820defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 3821 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 3822def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 3823 v2f32, fmul_su, fsub_mlx>, 3824 Requires<[HasNEON, UseFPVMLx]>; 3825def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 3826 v4f32, v2f32, fmul_su, fsub_mlx>, 3827 Requires<[HasNEON, UseFPVMLx]>; 3828 3829def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 3830 (mul (v8i16 QPR:$src2), 3831 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 3832 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 3833 (v4i16 (EXTRACT_SUBREG QPR:$src3, 3834 (DSubReg_i16_reg imm:$lane))), 3835 (SubReg_i16_lane imm:$lane)))>; 3836 3837def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 3838 (mul (v4i32 QPR:$src2), 3839 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 3840 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 3841 (v2i32 (EXTRACT_SUBREG QPR:$src3, 3842 (DSubReg_i32_reg imm:$lane))), 3843 (SubReg_i32_lane imm:$lane)))>; 3844 3845def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 3846 (fmul_su (v4f32 QPR:$src2), 3847 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 3848 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 3849 (v2f32 (EXTRACT_SUBREG QPR:$src3, 3850 (DSubReg_i32_reg imm:$lane))), 3851 (SubReg_i32_lane imm:$lane)))>, 3852 Requires<[HasNEON, UseFPVMLx]>; 3853 3854// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 3855defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 3856 "vmlsl", "s", NEONvmulls, sub>; 3857defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 3858 "vmlsl", "u", NEONvmullu, sub>; 3859 3860defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 3861defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 3862 3863// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 3864defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 3865 "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 3866defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 3867 3868// Vector Subtract Operations. 3869 3870// VSUB : Vector Subtract (integer and floating-point) 3871defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 3872 "vsub", "i", sub, 0>; 3873def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 3874 v2f32, v2f32, fsub, 0>; 3875def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 3876 v4f32, v4f32, fsub, 0>; 3877// VSUBL : Vector Subtract Long (Q = D - D) 3878defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 3879 "vsubl", "s", sub, sext, 0>; 3880defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 3881 "vsubl", "u", sub, zext, 0>; 3882// VSUBW : Vector Subtract Wide (Q = Q - D) 3883defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 3884defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 3885// VHSUB : Vector Halving Subtract 3886defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 3887 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3888 "vhsub", "s", int_arm_neon_vhsubs, 0>; 3889defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 3890 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3891 "vhsub", "u", int_arm_neon_vhsubu, 0>; 3892// VQSUB : Vector Saturing Subtract 3893defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 3894 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3895 "vqsub", "s", int_arm_neon_vqsubs, 0>; 3896defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 3897 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3898 "vqsub", "u", int_arm_neon_vqsubu, 0>; 3899// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 3900defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", 3901 int_arm_neon_vsubhn, 0>; 3902// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 3903defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 3904 int_arm_neon_vrsubhn, 0>; 3905 3906// Vector Comparisons. 3907 3908// VCEQ : Vector Compare Equal 3909defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3910 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 3911def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 3912 NEONvceq, 1>; 3913def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 3914 NEONvceq, 1>; 3915 3916defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 3917 "$Vd, $Vm, #0", NEONvceqz>; 3918 3919// VCGE : Vector Compare Greater Than or Equal 3920defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3921 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 3922defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3923 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 3924def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 3925 NEONvcge, 0>; 3926def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 3927 NEONvcge, 0>; 3928 3929defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 3930 "$Vd, $Vm, #0", NEONvcgez>; 3931defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 3932 "$Vd, $Vm, #0", NEONvclez>; 3933 3934// VCGT : Vector Compare Greater Than 3935defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3936 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 3937defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3938 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 3939def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 3940 NEONvcgt, 0>; 3941def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 3942 NEONvcgt, 0>; 3943 3944defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 3945 "$Vd, $Vm, #0", NEONvcgtz>; 3946defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 3947 "$Vd, $Vm, #0", NEONvcltz>; 3948 3949// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 3950def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 3951 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 3952def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 3953 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 3954// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 3955def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 3956 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 3957def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 3958 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 3959// VTST : Vector Test Bits 3960defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 3961 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 3962 3963// Vector Bitwise Operations. 3964 3965def vnotd : PatFrag<(ops node:$in), 3966 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 3967def vnotq : PatFrag<(ops node:$in), 3968 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 3969 3970 3971// VAND : Vector Bitwise AND 3972def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 3973 v2i32, v2i32, and, 1>; 3974def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 3975 v4i32, v4i32, and, 1>; 3976 3977// VEOR : Vector Bitwise Exclusive OR 3978def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 3979 v2i32, v2i32, xor, 1>; 3980def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 3981 v4i32, v4i32, xor, 1>; 3982 3983// VORR : Vector Bitwise OR 3984def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 3985 v2i32, v2i32, or, 1>; 3986def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 3987 v4i32, v4i32, or, 1>; 3988 3989def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 3990 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 3991 IIC_VMOVImm, 3992 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 3993 [(set DPR:$Vd, 3994 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 3995 let Inst{9} = SIMM{9}; 3996} 3997 3998def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 3999 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4000 IIC_VMOVImm, 4001 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4002 [(set DPR:$Vd, 4003 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4004 let Inst{10-9} = SIMM{10-9}; 4005} 4006 4007def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4008 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4009 IIC_VMOVImm, 4010 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4011 [(set QPR:$Vd, 4012 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4013 let Inst{9} = SIMM{9}; 4014} 4015 4016def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4017 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4018 IIC_VMOVImm, 4019 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4020 [(set QPR:$Vd, 4021 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4022 let Inst{10-9} = SIMM{10-9}; 4023} 4024 4025 4026// VBIC : Vector Bitwise Bit Clear (AND NOT) 4027def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4028 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4029 "vbic", "$Vd, $Vn, $Vm", "", 4030 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4031 (vnotd DPR:$Vm))))]>; 4032def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4033 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4034 "vbic", "$Vd, $Vn, $Vm", "", 4035 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4036 (vnotq QPR:$Vm))))]>; 4037 4038def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4039 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4040 IIC_VMOVImm, 4041 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4042 [(set DPR:$Vd, 4043 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4044 let Inst{9} = SIMM{9}; 4045} 4046 4047def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4048 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4049 IIC_VMOVImm, 4050 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4051 [(set DPR:$Vd, 4052 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4053 let Inst{10-9} = SIMM{10-9}; 4054} 4055 4056def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4057 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4058 IIC_VMOVImm, 4059 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4060 [(set QPR:$Vd, 4061 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4062 let Inst{9} = SIMM{9}; 4063} 4064 4065def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4066 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4067 IIC_VMOVImm, 4068 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4069 [(set QPR:$Vd, 4070 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4071 let Inst{10-9} = SIMM{10-9}; 4072} 4073 4074// VORN : Vector Bitwise OR NOT 4075def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4076 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4077 "vorn", "$Vd, $Vn, $Vm", "", 4078 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4079 (vnotd DPR:$Vm))))]>; 4080def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4081 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4082 "vorn", "$Vd, $Vn, $Vm", "", 4083 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4084 (vnotq QPR:$Vm))))]>; 4085 4086// VMVN : Vector Bitwise NOT (Immediate) 4087 4088let isReMaterializable = 1 in { 4089 4090def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4091 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4092 "vmvn", "i16", "$Vd, $SIMM", "", 4093 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4094 let Inst{9} = SIMM{9}; 4095} 4096 4097def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4098 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4099 "vmvn", "i16", "$Vd, $SIMM", "", 4100 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4101 let Inst{9} = SIMM{9}; 4102} 4103 4104def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4105 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4106 "vmvn", "i32", "$Vd, $SIMM", "", 4107 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4108 let Inst{11-8} = SIMM{11-8}; 4109} 4110 4111def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4112 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4113 "vmvn", "i32", "$Vd, $SIMM", "", 4114 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4115 let Inst{11-8} = SIMM{11-8}; 4116} 4117} 4118 4119// VMVN : Vector Bitwise NOT 4120def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4121 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 4122 "vmvn", "$Vd, $Vm", "", 4123 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 4124def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 4125 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 4126 "vmvn", "$Vd, $Vm", "", 4127 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 4128def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 4129def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 4130 4131// VBSL : Vector Bitwise Select 4132def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4133 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4134 N3RegFrm, IIC_VCNTiD, 4135 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4136 [(set DPR:$Vd, 4137 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 4138 4139def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 4140 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4141 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 4142 4143def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4144 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4145 N3RegFrm, IIC_VCNTiQ, 4146 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4147 [(set QPR:$Vd, 4148 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 4149 4150def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 4151 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4152 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 4153 4154// VBIF : Vector Bitwise Insert if False 4155// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 4156// FIXME: This instruction's encoding MAY NOT BE correct. 4157def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 4158 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4159 N3RegFrm, IIC_VBINiD, 4160 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4161 []>; 4162def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 4163 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4164 N3RegFrm, IIC_VBINiQ, 4165 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4166 []>; 4167 4168// VBIT : Vector Bitwise Insert if True 4169// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 4170// FIXME: This instruction's encoding MAY NOT BE correct. 4171def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 4172 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4173 N3RegFrm, IIC_VBINiD, 4174 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4175 []>; 4176def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 4177 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4178 N3RegFrm, IIC_VBINiQ, 4179 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4180 []>; 4181 4182// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 4183// for equivalent operations with different register constraints; it just 4184// inserts copies. 4185 4186// Vector Absolute Differences. 4187 4188// VABD : Vector Absolute Difference 4189defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 4190 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4191 "vabd", "s", int_arm_neon_vabds, 1>; 4192defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 4193 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4194 "vabd", "u", int_arm_neon_vabdu, 1>; 4195def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 4196 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 4197def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 4198 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 4199 4200// VABDL : Vector Absolute Difference Long (Q = | D - D |) 4201defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 4202 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 4203defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 4204 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 4205 4206// VABA : Vector Absolute Difference and Accumulate 4207defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4208 "vaba", "s", int_arm_neon_vabds, add>; 4209defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4210 "vaba", "u", int_arm_neon_vabdu, add>; 4211 4212// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 4213defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 4214 "vabal", "s", int_arm_neon_vabds, zext, add>; 4215defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 4216 "vabal", "u", int_arm_neon_vabdu, zext, add>; 4217 4218// Vector Maximum and Minimum. 4219 4220// VMAX : Vector Maximum 4221defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 4222 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4223 "vmax", "s", int_arm_neon_vmaxs, 1>; 4224defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 4225 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4226 "vmax", "u", int_arm_neon_vmaxu, 1>; 4227def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 4228 "vmax", "f32", 4229 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 4230def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4231 "vmax", "f32", 4232 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 4233 4234// VMIN : Vector Minimum 4235defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 4236 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4237 "vmin", "s", int_arm_neon_vmins, 1>; 4238defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 4239 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4240 "vmin", "u", int_arm_neon_vminu, 1>; 4241def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 4242 "vmin", "f32", 4243 v2f32, v2f32, int_arm_neon_vmins, 1>; 4244def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4245 "vmin", "f32", 4246 v4f32, v4f32, int_arm_neon_vmins, 1>; 4247 4248// Vector Pairwise Operations. 4249 4250// VPADD : Vector Pairwise Add 4251def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4252 "vpadd", "i8", 4253 v8i8, v8i8, int_arm_neon_vpadd, 0>; 4254def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4255 "vpadd", "i16", 4256 v4i16, v4i16, int_arm_neon_vpadd, 0>; 4257def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4258 "vpadd", "i32", 4259 v2i32, v2i32, int_arm_neon_vpadd, 0>; 4260def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 4261 IIC_VPBIND, "vpadd", "f32", 4262 v2f32, v2f32, int_arm_neon_vpadd, 0>; 4263 4264// VPADDL : Vector Pairwise Add Long 4265defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 4266 int_arm_neon_vpaddls>; 4267defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 4268 int_arm_neon_vpaddlu>; 4269 4270// VPADAL : Vector Pairwise Add and Accumulate Long 4271defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 4272 int_arm_neon_vpadals>; 4273defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 4274 int_arm_neon_vpadalu>; 4275 4276// VPMAX : Vector Pairwise Maximum 4277def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4278 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 4279def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4280 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 4281def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4282 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 4283def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4284 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 4285def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4286 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 4287def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4288 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 4289def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 4290 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 4291 4292// VPMIN : Vector Pairwise Minimum 4293def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4294 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 4295def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4296 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 4297def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4298 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 4299def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4300 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 4301def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4302 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 4303def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4304 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 4305def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 4306 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 4307 4308// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 4309 4310// VRECPE : Vector Reciprocal Estimate 4311def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4312 IIC_VUNAD, "vrecpe", "u32", 4313 v2i32, v2i32, int_arm_neon_vrecpe>; 4314def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4315 IIC_VUNAQ, "vrecpe", "u32", 4316 v4i32, v4i32, int_arm_neon_vrecpe>; 4317def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4318 IIC_VUNAD, "vrecpe", "f32", 4319 v2f32, v2f32, int_arm_neon_vrecpe>; 4320def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4321 IIC_VUNAQ, "vrecpe", "f32", 4322 v4f32, v4f32, int_arm_neon_vrecpe>; 4323 4324// VRECPS : Vector Reciprocal Step 4325def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4326 IIC_VRECSD, "vrecps", "f32", 4327 v2f32, v2f32, int_arm_neon_vrecps, 1>; 4328def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4329 IIC_VRECSQ, "vrecps", "f32", 4330 v4f32, v4f32, int_arm_neon_vrecps, 1>; 4331 4332// VRSQRTE : Vector Reciprocal Square Root Estimate 4333def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4334 IIC_VUNAD, "vrsqrte", "u32", 4335 v2i32, v2i32, int_arm_neon_vrsqrte>; 4336def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4337 IIC_VUNAQ, "vrsqrte", "u32", 4338 v4i32, v4i32, int_arm_neon_vrsqrte>; 4339def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4340 IIC_VUNAD, "vrsqrte", "f32", 4341 v2f32, v2f32, int_arm_neon_vrsqrte>; 4342def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4343 IIC_VUNAQ, "vrsqrte", "f32", 4344 v4f32, v4f32, int_arm_neon_vrsqrte>; 4345 4346// VRSQRTS : Vector Reciprocal Square Root Step 4347def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4348 IIC_VRECSD, "vrsqrts", "f32", 4349 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 4350def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4351 IIC_VRECSQ, "vrsqrts", "f32", 4352 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 4353 4354// Vector Shifts. 4355 4356// VSHL : Vector Shift 4357defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 4358 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4359 "vshl", "s", int_arm_neon_vshifts>; 4360defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 4361 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4362 "vshl", "u", int_arm_neon_vshiftu>; 4363 4364// VSHL : Vector Shift Left (Immediate) 4365defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 4366 4367// VSHR : Vector Shift Right (Immediate) 4368defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; 4369defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; 4370 4371// VSHLL : Vector Shift Left Long 4372defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 4373defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 4374 4375// VSHLL : Vector Shift Left Long (with maximum shift count) 4376class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 4377 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 4378 ValueType OpTy, Operand ImmTy, SDNode OpNode> 4379 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 4380 ResTy, OpTy, ImmTy, OpNode> { 4381 let Inst{21-16} = op21_16; 4382 let DecoderMethod = "DecodeVSHLMaxInstruction"; 4383} 4384def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 4385 v8i16, v8i8, imm8, NEONvshlli>; 4386def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 4387 v4i32, v4i16, imm16, NEONvshlli>; 4388def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 4389 v2i64, v2i32, imm32, NEONvshlli>; 4390 4391// VSHRN : Vector Shift Right and Narrow 4392defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 4393 NEONvshrn>; 4394 4395// VRSHL : Vector Rounding Shift 4396defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 4397 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4398 "vrshl", "s", int_arm_neon_vrshifts>; 4399defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 4400 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4401 "vrshl", "u", int_arm_neon_vrshiftu>; 4402// VRSHR : Vector Rounding Shift Right 4403defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; 4404defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; 4405 4406// VRSHRN : Vector Rounding Shift Right and Narrow 4407defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 4408 NEONvrshrn>; 4409 4410// VQSHL : Vector Saturating Shift 4411defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 4412 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4413 "vqshl", "s", int_arm_neon_vqshifts>; 4414defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 4415 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4416 "vqshl", "u", int_arm_neon_vqshiftu>; 4417// VQSHL : Vector Saturating Shift Left (Immediate) 4418defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 4419defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 4420 4421// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 4422defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 4423 4424// VQSHRN : Vector Saturating Shift Right and Narrow 4425defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 4426 NEONvqshrns>; 4427defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 4428 NEONvqshrnu>; 4429 4430// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 4431defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 4432 NEONvqshrnsu>; 4433 4434// VQRSHL : Vector Saturating Rounding Shift 4435defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 4436 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4437 "vqrshl", "s", int_arm_neon_vqrshifts>; 4438defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 4439 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4440 "vqrshl", "u", int_arm_neon_vqrshiftu>; 4441 4442// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 4443defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 4444 NEONvqrshrns>; 4445defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 4446 NEONvqrshrnu>; 4447 4448// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 4449defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 4450 NEONvqrshrnsu>; 4451 4452// VSRA : Vector Shift Right and Accumulate 4453defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 4454defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 4455// VRSRA : Vector Rounding Shift Right and Accumulate 4456defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 4457defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 4458 4459// VSLI : Vector Shift Left and Insert 4460defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 4461 4462// VSRI : Vector Shift Right and Insert 4463defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 4464 4465// Vector Absolute and Saturating Absolute. 4466 4467// VABS : Vector Absolute Value 4468defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 4469 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 4470 int_arm_neon_vabs>; 4471def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4472 IIC_VUNAD, "vabs", "f32", 4473 v2f32, v2f32, int_arm_neon_vabs>; 4474def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4475 IIC_VUNAQ, "vabs", "f32", 4476 v4f32, v4f32, int_arm_neon_vabs>; 4477 4478// VQABS : Vector Saturating Absolute Value 4479defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 4480 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 4481 int_arm_neon_vqabs>; 4482 4483// Vector Negate. 4484 4485def vnegd : PatFrag<(ops node:$in), 4486 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 4487def vnegq : PatFrag<(ops node:$in), 4488 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 4489 4490class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4491 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 4492 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 4493 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 4494class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4495 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 4496 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 4497 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 4498 4499// VNEG : Vector Negate (integer) 4500def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 4501def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 4502def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 4503def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 4504def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 4505def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 4506 4507// VNEG : Vector Negate (floating-point) 4508def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 4509 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 4510 "vneg", "f32", "$Vd, $Vm", "", 4511 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 4512def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 4513 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 4514 "vneg", "f32", "$Vd, $Vm", "", 4515 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 4516 4517def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 4518def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 4519def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 4520def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 4521def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 4522def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 4523 4524// VQNEG : Vector Saturating Negate 4525defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 4526 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 4527 int_arm_neon_vqneg>; 4528 4529// Vector Bit Counting Operations. 4530 4531// VCLS : Vector Count Leading Sign Bits 4532defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 4533 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 4534 int_arm_neon_vcls>; 4535// VCLZ : Vector Count Leading Zeros 4536defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 4537 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 4538 int_arm_neon_vclz>; 4539// VCNT : Vector Count One Bits 4540def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 4541 IIC_VCNTiD, "vcnt", "8", 4542 v8i8, v8i8, int_arm_neon_vcnt>; 4543def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 4544 IIC_VCNTiQ, "vcnt", "8", 4545 v16i8, v16i8, int_arm_neon_vcnt>; 4546 4547// Vector Swap 4548def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 4549 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 4550 "vswp", "$Vd, $Vm", "", []>; 4551def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 4552 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 4553 "vswp", "$Vd, $Vm", "", []>; 4554 4555// Vector Move Operations. 4556 4557// VMOV : Vector Move (Register) 4558def : InstAlias<"vmov${p} $Vd, $Vm", 4559 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 4560def : InstAlias<"vmov${p} $Vd, $Vm", 4561 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 4562 4563// VMOV : Vector Move (Immediate) 4564 4565let isReMaterializable = 1 in { 4566def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 4567 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 4568 "vmov", "i8", "$Vd, $SIMM", "", 4569 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 4570def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 4571 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 4572 "vmov", "i8", "$Vd, $SIMM", "", 4573 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 4574 4575def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 4576 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4577 "vmov", "i16", "$Vd, $SIMM", "", 4578 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 4579 let Inst{9} = SIMM{9}; 4580} 4581 4582def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 4583 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4584 "vmov", "i16", "$Vd, $SIMM", "", 4585 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 4586 let Inst{9} = SIMM{9}; 4587} 4588 4589def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 4590 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4591 "vmov", "i32", "$Vd, $SIMM", "", 4592 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 4593 let Inst{11-8} = SIMM{11-8}; 4594} 4595 4596def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 4597 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4598 "vmov", "i32", "$Vd, $SIMM", "", 4599 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 4600 let Inst{11-8} = SIMM{11-8}; 4601} 4602 4603def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 4604 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 4605 "vmov", "i64", "$Vd, $SIMM", "", 4606 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 4607def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 4608 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 4609 "vmov", "i64", "$Vd, $SIMM", "", 4610 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 4611 4612def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 4613 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 4614 "vmov", "f32", "$Vd, $SIMM", "", 4615 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 4616def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 4617 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 4618 "vmov", "f32", "$Vd, $SIMM", "", 4619 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 4620} // isReMaterializable 4621 4622// VMOV : Vector Get Lane (move scalar to ARM core register) 4623 4624def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 4625 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 4626 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 4627 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 4628 imm:$lane))]> { 4629 let Inst{21} = lane{2}; 4630 let Inst{6-5} = lane{1-0}; 4631} 4632def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 4633 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 4634 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 4635 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 4636 imm:$lane))]> { 4637 let Inst{21} = lane{1}; 4638 let Inst{6} = lane{0}; 4639} 4640def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 4641 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 4642 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 4643 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 4644 imm:$lane))]> { 4645 let Inst{21} = lane{2}; 4646 let Inst{6-5} = lane{1-0}; 4647} 4648def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 4649 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 4650 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 4651 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 4652 imm:$lane))]> { 4653 let Inst{21} = lane{1}; 4654 let Inst{6} = lane{0}; 4655} 4656def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 4657 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 4658 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 4659 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 4660 imm:$lane))]> { 4661 let Inst{21} = lane{0}; 4662} 4663// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 4664def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 4665 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 4666 (DSubReg_i8_reg imm:$lane))), 4667 (SubReg_i8_lane imm:$lane))>; 4668def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 4669 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 4670 (DSubReg_i16_reg imm:$lane))), 4671 (SubReg_i16_lane imm:$lane))>; 4672def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 4673 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 4674 (DSubReg_i8_reg imm:$lane))), 4675 (SubReg_i8_lane imm:$lane))>; 4676def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 4677 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 4678 (DSubReg_i16_reg imm:$lane))), 4679 (SubReg_i16_lane imm:$lane))>; 4680def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 4681 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 4682 (DSubReg_i32_reg imm:$lane))), 4683 (SubReg_i32_lane imm:$lane))>; 4684def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 4685 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 4686 (SSubReg_f32_reg imm:$src2))>; 4687def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 4688 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 4689 (SSubReg_f32_reg imm:$src2))>; 4690//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 4691// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 4692def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 4693 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 4694 4695 4696// VMOV : Vector Set Lane (move ARM core register to scalar) 4697 4698let Constraints = "$src1 = $V" in { 4699def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 4700 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 4701 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 4702 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 4703 GPR:$R, imm:$lane))]> { 4704 let Inst{21} = lane{2}; 4705 let Inst{6-5} = lane{1-0}; 4706} 4707def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 4708 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 4709 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 4710 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 4711 GPR:$R, imm:$lane))]> { 4712 let Inst{21} = lane{1}; 4713 let Inst{6} = lane{0}; 4714} 4715def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 4716 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 4717 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 4718 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 4719 GPR:$R, imm:$lane))]> { 4720 let Inst{21} = lane{0}; 4721} 4722} 4723def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 4724 (v16i8 (INSERT_SUBREG QPR:$src1, 4725 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 4726 (DSubReg_i8_reg imm:$lane))), 4727 GPR:$src2, (SubReg_i8_lane imm:$lane))), 4728 (DSubReg_i8_reg imm:$lane)))>; 4729def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 4730 (v8i16 (INSERT_SUBREG QPR:$src1, 4731 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 4732 (DSubReg_i16_reg imm:$lane))), 4733 GPR:$src2, (SubReg_i16_lane imm:$lane))), 4734 (DSubReg_i16_reg imm:$lane)))>; 4735def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 4736 (v4i32 (INSERT_SUBREG QPR:$src1, 4737 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 4738 (DSubReg_i32_reg imm:$lane))), 4739 GPR:$src2, (SubReg_i32_lane imm:$lane))), 4740 (DSubReg_i32_reg imm:$lane)))>; 4741 4742def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 4743 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 4744 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 4745def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 4746 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 4747 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 4748 4749//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 4750// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 4751def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 4752 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 4753 4754def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 4755 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 4756def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 4757 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 4758def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 4759 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 4760 4761def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 4762 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 4763def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 4764 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 4765def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 4766 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 4767 4768def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 4769 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4770 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 4771 dsub_0)>; 4772def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 4773 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 4774 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 4775 dsub_0)>; 4776def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 4777 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 4778 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 4779 dsub_0)>; 4780 4781// VDUP : Vector Duplicate (from ARM core register to all elements) 4782 4783class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 4784 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 4785 IIC_VMOVIS, "vdup", Dt, "$V, $R", 4786 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 4787class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 4788 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 4789 IIC_VMOVIS, "vdup", Dt, "$V, $R", 4790 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 4791 4792def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 4793def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 4794def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; 4795def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 4796def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 4797def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 4798 4799def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; 4800def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 4801 4802// VDUP : Vector Duplicate Lane (from scalar to all elements) 4803 4804class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 4805 ValueType Ty, Operand IdxTy> 4806 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 4807 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 4808 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 4809 4810class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 4811 ValueType ResTy, ValueType OpTy, Operand IdxTy> 4812 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 4813 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 4814 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 4815 VectorIndex32:$lane)))]>; 4816 4817// Inst{19-16} is partially specified depending on the element size. 4818 4819def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 4820 bits<3> lane; 4821 let Inst{19-17} = lane{2-0}; 4822} 4823def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 4824 bits<2> lane; 4825 let Inst{19-18} = lane{1-0}; 4826} 4827def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 4828 bits<1> lane; 4829 let Inst{19} = lane{0}; 4830} 4831def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 4832 bits<3> lane; 4833 let Inst{19-17} = lane{2-0}; 4834} 4835def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 4836 bits<2> lane; 4837 let Inst{19-18} = lane{1-0}; 4838} 4839def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 4840 bits<1> lane; 4841 let Inst{19} = lane{0}; 4842} 4843 4844def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 4845 (VDUPLN32d DPR:$Vm, imm:$lane)>; 4846 4847def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 4848 (VDUPLN32q DPR:$Vm, imm:$lane)>; 4849 4850def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 4851 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 4852 (DSubReg_i8_reg imm:$lane))), 4853 (SubReg_i8_lane imm:$lane)))>; 4854def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 4855 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 4856 (DSubReg_i16_reg imm:$lane))), 4857 (SubReg_i16_lane imm:$lane)))>; 4858def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 4859 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 4860 (DSubReg_i32_reg imm:$lane))), 4861 (SubReg_i32_lane imm:$lane)))>; 4862def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 4863 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 4864 (DSubReg_i32_reg imm:$lane))), 4865 (SubReg_i32_lane imm:$lane)))>; 4866 4867def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 4868 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 4869def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 4870 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 4871 4872// VMOVN : Vector Narrowing Move 4873defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 4874 "vmovn", "i", trunc>; 4875// VQMOVN : Vector Saturating Narrowing Move 4876defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 4877 "vqmovn", "s", int_arm_neon_vqmovns>; 4878defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 4879 "vqmovn", "u", int_arm_neon_vqmovnu>; 4880defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 4881 "vqmovun", "s", int_arm_neon_vqmovnsu>; 4882// VMOVL : Vector Lengthening Move 4883defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 4884defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 4885 4886// Vector Conversions. 4887 4888// VCVT : Vector Convert Between Floating-Point and Integers 4889def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 4890 v2i32, v2f32, fp_to_sint>; 4891def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 4892 v2i32, v2f32, fp_to_uint>; 4893def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 4894 v2f32, v2i32, sint_to_fp>; 4895def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 4896 v2f32, v2i32, uint_to_fp>; 4897 4898def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 4899 v4i32, v4f32, fp_to_sint>; 4900def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 4901 v4i32, v4f32, fp_to_uint>; 4902def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 4903 v4f32, v4i32, sint_to_fp>; 4904def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 4905 v4f32, v4i32, uint_to_fp>; 4906 4907// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 4908let DecoderMethod = "DecodeVCVTD" in { 4909def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 4910 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 4911def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 4912 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 4913def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 4914 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 4915def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 4916 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 4917} 4918 4919let DecoderMethod = "DecodeVCVTQ" in { 4920def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 4921 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 4922def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 4923 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 4924def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 4925 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 4926def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 4927 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 4928} 4929 4930// VCVT : Vector Convert Between Half-Precision and Single-Precision. 4931def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 4932 IIC_VUNAQ, "vcvt", "f16.f32", 4933 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 4934 Requires<[HasNEON, HasFP16]>; 4935def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 4936 IIC_VUNAQ, "vcvt", "f32.f16", 4937 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 4938 Requires<[HasNEON, HasFP16]>; 4939 4940// Vector Reverse. 4941 4942// VREV64 : Vector Reverse elements within 64-bit doublewords 4943 4944class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4945 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 4946 (ins DPR:$Vm), IIC_VMOVD, 4947 OpcodeStr, Dt, "$Vd, $Vm", "", 4948 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 4949class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4950 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 4951 (ins QPR:$Vm), IIC_VMOVQ, 4952 OpcodeStr, Dt, "$Vd, $Vm", "", 4953 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 4954 4955def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 4956def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 4957def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 4958def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 4959 4960def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 4961def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 4962def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 4963def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 4964 4965// VREV32 : Vector Reverse elements within 32-bit words 4966 4967class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4968 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 4969 (ins DPR:$Vm), IIC_VMOVD, 4970 OpcodeStr, Dt, "$Vd, $Vm", "", 4971 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 4972class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4973 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 4974 (ins QPR:$Vm), IIC_VMOVQ, 4975 OpcodeStr, Dt, "$Vd, $Vm", "", 4976 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 4977 4978def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 4979def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 4980 4981def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 4982def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 4983 4984// VREV16 : Vector Reverse elements within 16-bit halfwords 4985 4986class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4987 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 4988 (ins DPR:$Vm), IIC_VMOVD, 4989 OpcodeStr, Dt, "$Vd, $Vm", "", 4990 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 4991class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4992 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 4993 (ins QPR:$Vm), IIC_VMOVQ, 4994 OpcodeStr, Dt, "$Vd, $Vm", "", 4995 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 4996 4997def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 4998def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 4999 5000// Other Vector Shuffles. 5001 5002// Aligned extractions: really just dropping registers 5003 5004class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 5005 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 5006 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 5007 5008def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 5009 5010def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 5011 5012def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 5013 5014def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 5015 5016def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 5017 5018 5019// VEXT : Vector Extract 5020 5021class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5022 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 5023 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 5024 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5025 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 5026 (Ty DPR:$Vm), imm:$index)))]> { 5027 bits<4> index; 5028 let Inst{11-8} = index{3-0}; 5029} 5030 5031class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5032 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 5033 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 5034 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5035 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 5036 (Ty QPR:$Vm), imm:$index)))]> { 5037 bits<4> index; 5038 let Inst{11-8} = index{3-0}; 5039} 5040 5041def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 5042 let Inst{11-8} = index{3-0}; 5043} 5044def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 5045 let Inst{11-9} = index{2-0}; 5046 let Inst{8} = 0b0; 5047} 5048def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 5049 let Inst{11-10} = index{1-0}; 5050 let Inst{9-8} = 0b00; 5051} 5052def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 5053 (v2f32 DPR:$Vm), 5054 (i32 imm:$index))), 5055 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 5056 5057def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 5058 let Inst{11-8} = index{3-0}; 5059} 5060def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 5061 let Inst{11-9} = index{2-0}; 5062 let Inst{8} = 0b0; 5063} 5064def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 5065 let Inst{11-10} = index{1-0}; 5066 let Inst{9-8} = 0b00; 5067} 5068def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 5069 let Inst{11} = index{0}; 5070 let Inst{10-8} = 0b000; 5071} 5072def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 5073 (v4f32 QPR:$Vm), 5074 (i32 imm:$index))), 5075 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 5076 5077// VTRN : Vector Transpose 5078 5079def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 5080def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 5081def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 5082 5083def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 5084def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 5085def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 5086 5087// VUZP : Vector Unzip (Deinterleave) 5088 5089def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 5090def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 5091def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; 5092 5093def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 5094def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 5095def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 5096 5097// VZIP : Vector Zip (Interleave) 5098 5099def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 5100def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 5101def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; 5102 5103def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 5104def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 5105def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 5106 5107// Vector Table Lookup and Table Extension. 5108 5109// VTBL : Vector Table Lookup 5110let DecoderMethod = "DecodeTBLInstruction" in { 5111def VTBL1 5112 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 5113 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 5114 "vtbl", "8", "$Vd, $Vn, $Vm", "", 5115 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 5116let hasExtraSrcRegAllocReq = 1 in { 5117def VTBL2 5118 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 5119 (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2, 5120 "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>; 5121def VTBL3 5122 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 5123 (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3, 5124 "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>; 5125def VTBL4 5126 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 5127 (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), 5128 NVTBLFrm, IIC_VTB4, 5129 "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>; 5130} // hasExtraSrcRegAllocReq = 1 5131 5132def VTBL2Pseudo 5133 : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; 5134def VTBL3Pseudo 5135 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 5136def VTBL4Pseudo 5137 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 5138 5139// VTBX : Vector Table Extension 5140def VTBX1 5141 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 5142 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 5143 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 5144 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 5145 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 5146let hasExtraSrcRegAllocReq = 1 in { 5147def VTBX2 5148 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 5149 (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 5150 "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>; 5151def VTBX3 5152 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 5153 (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), 5154 NVTBLFrm, IIC_VTBX3, 5155 "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", 5156 "$orig = $Vd", []>; 5157def VTBX4 5158 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn, 5159 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 5160 "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", 5161 "$orig = $Vd", []>; 5162} // hasExtraSrcRegAllocReq = 1 5163 5164def VTBX2Pseudo 5165 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), 5166 IIC_VTBX2, "$orig = $dst", []>; 5167def VTBX3Pseudo 5168 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5169 IIC_VTBX3, "$orig = $dst", []>; 5170def VTBX4Pseudo 5171 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5172 IIC_VTBX4, "$orig = $dst", []>; 5173} // DecoderMethod = "DecodeTBLInstruction" 5174 5175//===----------------------------------------------------------------------===// 5176// NEON instructions for single-precision FP math 5177//===----------------------------------------------------------------------===// 5178 5179class N2VSPat<SDNode OpNode, NeonI Inst> 5180 : NEONFPPat<(f32 (OpNode SPR:$a)), 5181 (EXTRACT_SUBREG 5182 (v2f32 (COPY_TO_REGCLASS (Inst 5183 (INSERT_SUBREG 5184 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5185 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 5186 5187class N3VSPat<SDNode OpNode, NeonI Inst> 5188 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 5189 (EXTRACT_SUBREG 5190 (v2f32 (COPY_TO_REGCLASS (Inst 5191 (INSERT_SUBREG 5192 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5193 SPR:$a, ssub_0), 5194 (INSERT_SUBREG 5195 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5196 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5197 5198class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 5199 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 5200 (EXTRACT_SUBREG 5201 (v2f32 (COPY_TO_REGCLASS (Inst 5202 (INSERT_SUBREG 5203 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5204 SPR:$acc, ssub_0), 5205 (INSERT_SUBREG 5206 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5207 SPR:$a, ssub_0), 5208 (INSERT_SUBREG 5209 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5210 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5211 5212def : N3VSPat<fadd, VADDfd>; 5213def : N3VSPat<fsub, VSUBfd>; 5214def : N3VSPat<fmul, VMULfd>; 5215def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 5216 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 5217def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 5218 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 5219def : N2VSPat<fabs, VABSfd>; 5220def : N2VSPat<fneg, VNEGfd>; 5221def : N3VSPat<NEONfmax, VMAXfd>; 5222def : N3VSPat<NEONfmin, VMINfd>; 5223def : N2VSPat<arm_ftosi, VCVTf2sd>; 5224def : N2VSPat<arm_ftoui, VCVTf2ud>; 5225def : N2VSPat<arm_sitof, VCVTs2fd>; 5226def : N2VSPat<arm_uitof, VCVTu2fd>; 5227 5228//===----------------------------------------------------------------------===// 5229// Non-Instruction Patterns 5230//===----------------------------------------------------------------------===// 5231 5232// bit_convert 5233def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 5234def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 5235def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 5236def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 5237def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 5238def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 5239def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 5240def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 5241def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 5242def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 5243def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 5244def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 5245def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 5246def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 5247def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 5248def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 5249def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 5250def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 5251def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 5252def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 5253def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 5254def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 5255def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 5256def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 5257def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 5258def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 5259def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 5260def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 5261def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 5262def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 5263 5264def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 5265def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 5266def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 5267def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 5268def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 5269def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 5270def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 5271def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 5272def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 5273def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 5274def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 5275def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 5276def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 5277def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 5278def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 5279def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 5280def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 5281def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 5282def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 5283def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 5284def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 5285def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 5286def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 5287def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 5288def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 5289def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 5290def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 5291def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 5292def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 5293def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 5294 5295 5296//===----------------------------------------------------------------------===// 5297// Assembler aliases 5298// 5299 5300def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 5301 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 5302def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 5303 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 5304 5305 5306// VADD two-operand aliases. 5307def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", 5308 (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5309def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", 5310 (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5311def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", 5312 (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5313def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", 5314 (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5315 5316def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", 5317 (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5318def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", 5319 (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5320def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", 5321 (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5322def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", 5323 (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5324 5325def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", 5326 (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5327def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", 5328 (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5329 5330// VSUB two-operand aliases. 5331def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", 5332 (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5333def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", 5334 (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5335def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", 5336 (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5337def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", 5338 (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5339 5340def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", 5341 (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5342def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", 5343 (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5344def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", 5345 (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5346def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", 5347 (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5348 5349def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", 5350 (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5351def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", 5352 (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5353 5354// VADDW two-operand aliases. 5355def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm", 5356 (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5357def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm", 5358 (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5359def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm", 5360 (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5361def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm", 5362 (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5363def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm", 5364 (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5365def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", 5366 (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; 5367 5368// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 5369defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 5370 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5371defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 5372 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5373defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 5374 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5375defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 5376 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5377defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 5378 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5379defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 5380 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5381defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 5382 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 5383defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 5384 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 5385// ... two-operand aliases 5386def : NEONInstAlias<"vand${p} $Vdn, $Vm", 5387 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5388def : NEONInstAlias<"vand${p} $Vdn, $Vm", 5389 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5390def : NEONInstAlias<"vbic${p} $Vdn, $Vm", 5391 (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5392def : NEONInstAlias<"vbic${p} $Vdn, $Vm", 5393 (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5394def : NEONInstAlias<"veor${p} $Vdn, $Vm", 5395 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5396def : NEONInstAlias<"veor${p} $Vdn, $Vm", 5397 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5398def : NEONInstAlias<"vorr${p} $Vdn, $Vm", 5399 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5400def : NEONInstAlias<"vorr${p} $Vdn, $Vm", 5401 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5402 5403defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 5404 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5405defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 5406 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5407defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 5408 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5409defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 5410 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5411defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 5412 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5413defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 5414 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5415 5416// VMUL two-operand aliases. 5417def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm", 5418 (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; 5419def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm", 5420 (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; 5421def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm", 5422 (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; 5423def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm", 5424 (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; 5425 5426def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm", 5427 (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; 5428def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm", 5429 (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; 5430def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm", 5431 (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; 5432def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm", 5433 (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; 5434 5435def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm", 5436 (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; 5437def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm", 5438 (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; 5439 5440def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", 5441 (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, 5442 VectorIndex16:$lane, pred:$p)>; 5443def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", 5444 (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, 5445 VectorIndex16:$lane, pred:$p)>; 5446 5447def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", 5448 (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, 5449 VectorIndex32:$lane, pred:$p)>; 5450def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", 5451 (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, 5452 VectorIndex32:$lane, pred:$p)>; 5453 5454def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", 5455 (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, 5456 VectorIndex32:$lane, pred:$p)>; 5457def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", 5458 (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, 5459 VectorIndex32:$lane, pred:$p)>; 5460 5461// VQADD (register) two-operand aliases. 5462def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", 5463 (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5464def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", 5465 (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5466def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", 5467 (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5468def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", 5469 (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5470def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", 5471 (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5472def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", 5473 (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5474def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", 5475 (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5476def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", 5477 (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5478 5479def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", 5480 (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5481def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", 5482 (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5483def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", 5484 (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5485def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", 5486 (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5487def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", 5488 (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5489def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", 5490 (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5491def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", 5492 (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5493def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", 5494 (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5495 5496// VSHL (immediate) two-operand aliases. 5497def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", 5498 (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>; 5499def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", 5500 (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>; 5501def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", 5502 (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>; 5503def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", 5504 (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>; 5505 5506def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", 5507 (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>; 5508def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", 5509 (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>; 5510def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", 5511 (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>; 5512def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", 5513 (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>; 5514 5515// VSHL (register) two-operand aliases. 5516def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", 5517 (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5518def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", 5519 (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5520def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", 5521 (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5522def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", 5523 (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5524def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", 5525 (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5526def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", 5527 (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5528def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", 5529 (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5530def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", 5531 (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5532 5533def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", 5534 (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5535def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", 5536 (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5537def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", 5538 (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5539def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", 5540 (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5541def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", 5542 (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5543def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", 5544 (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5545def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", 5546 (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5547def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", 5548 (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5549 5550// VSHL (immediate) two-operand aliases. 5551def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", 5552 (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; 5553def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", 5554 (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; 5555def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", 5556 (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; 5557def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", 5558 (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; 5559 5560def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", 5561 (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; 5562def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", 5563 (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; 5564def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", 5565 (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; 5566def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", 5567 (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; 5568 5569def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", 5570 (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; 5571def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", 5572 (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; 5573def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", 5574 (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; 5575def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", 5576 (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; 5577 5578def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", 5579 (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; 5580def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", 5581 (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; 5582def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", 5583 (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; 5584def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", 5585 (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; 5586 5587// VLD1 single-lane pseudo-instructions. These need special handling for 5588// the lane index that an InstAlias can't handle, so we use these instead. 5589defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr", 5590 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5591defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr", 5592 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5593defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr", 5594 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5595 5596defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!", 5597 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5598defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!", 5599 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5600defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!", 5601 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5602defm VLD1LNdWB_register_Asm : 5603 NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", 5604 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 5605 rGPR:$Rm, pred:$p)>; 5606defm VLD1LNdWB_register_Asm : 5607 NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", 5608 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 5609 rGPR:$Rm, pred:$p)>; 5610defm VLD1LNdWB_register_Asm : 5611 NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", 5612 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 5613 rGPR:$Rm, pred:$p)>; 5614 5615 5616// VST1 single-lane pseudo-instructions. These need special handling for 5617// the lane index that an InstAlias can't handle, so we use these instead. 5618defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr", 5619 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5620defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr", 5621 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5622defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr", 5623 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5624 5625defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!", 5626 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5627defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!", 5628 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5629defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!", 5630 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 5631defm VST1LNdWB_register_Asm : 5632 NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", 5633 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 5634 rGPR:$Rm, pred:$p)>; 5635defm VST1LNdWB_register_Asm : 5636 NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", 5637 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 5638 rGPR:$Rm, pred:$p)>; 5639defm VST1LNdWB_register_Asm : 5640 NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", 5641 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 5642 rGPR:$Rm, pred:$p)>; 5643 5644// VMOV takes an optional datatype suffix 5645defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 5646 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 5647defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 5648 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 5649 5650// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 5651// D-register versions. 5652def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 5653 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 5654def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 5655 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 5656def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 5657 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 5658def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 5659 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 5660def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 5661 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 5662def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 5663 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 5664def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 5665 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 5666// Q-register versions. 5667def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 5668 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 5669def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 5670 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 5671def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 5672 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 5673def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 5674 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 5675def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 5676 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 5677def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 5678 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 5679def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 5680 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 5681 5682// Two-operand variants for VEXT 5683def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", 5684 (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>; 5685def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", 5686 (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>; 5687def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", 5688 (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>; 5689 5690def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", 5691 (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>; 5692def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", 5693 (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>; 5694def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", 5695 (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>; 5696def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm", 5697 (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>; 5698 5699// Two-operand variants for VQDMULH 5700def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", 5701 (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5702def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", 5703 (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 5704 5705def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", 5706 (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5707def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", 5708 (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 5709 5710// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 5711// these should restrict to just the Q register variants, but the register 5712// classes are enough to match correctly regardless, so we keep it simple 5713// and just use MnemonicAlias. 5714def : NEONMnemonicAlias<"vbicq", "vbic">; 5715def : NEONMnemonicAlias<"vandq", "vand">; 5716def : NEONMnemonicAlias<"veorq", "veor">; 5717def : NEONMnemonicAlias<"vorrq", "vorr">; 5718 5719def : NEONMnemonicAlias<"vmovq", "vmov">; 5720def : NEONMnemonicAlias<"vmvnq", "vmvn">; 5721 5722def : NEONMnemonicAlias<"vaddq", "vadd">; 5723def : NEONMnemonicAlias<"vsubq", "vsub">; 5724 5725def : NEONMnemonicAlias<"vminq", "vmin">; 5726def : NEONMnemonicAlias<"vmaxq", "vmax">; 5727 5728def : NEONMnemonicAlias<"vmulq", "vmul">; 5729 5730def : NEONMnemonicAlias<"vabsq", "vabs">; 5731 5732def : NEONMnemonicAlias<"vshlq", "vshl">; 5733def : NEONMnemonicAlias<"vshrq", "vshr">; 5734 5735def : NEONMnemonicAlias<"vcvtq", "vcvt">; 5736 5737def : NEONMnemonicAlias<"vcleq", "vcle">; 5738def : NEONMnemonicAlias<"vceqq", "vceq">; 5739