ARMInstrNEON.td revision 4dedddce93ffb4476fb269caddb10da60a0a8d84
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// NEON-specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17 18def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 19def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 20 21def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 22def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 23def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 24def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 25def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 26def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 27def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 28def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 29def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 30def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 31def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 32 33// Types for vector shift by immediates. The "SHX" version is for long and 34// narrow operations where the source and destination vectors have different 35// types. The "SHINS" version is for shift and insert operations. 36def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 37 SDTCisVT<2, i32>]>; 38def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 39 SDTCisVT<2, i32>]>; 40def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 41 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 42 43def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 44def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 45def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 46def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 47def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 48def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 49def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 50 51def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 52def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 53def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 54 55def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 56def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 57def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 58def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 59def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 60def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 61 62def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 63def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 64def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 65 66def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 67def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 68 69def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 70 SDTCisVT<2, i32>]>; 71def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 72def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 73 74def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 75def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 76def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 77 78def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 79 SDTCisVT<2, i32>]>; 80def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 81def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 82 83def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 84 85// VDUPLANE can produce a quad-register result from a double-register source, 86// so the result is not constrained to match the source. 87def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 88 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 89 SDTCisVT<2, i32>]>>; 90 91def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 92 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 93def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 94 95def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 96def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 97def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 98def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 99 100def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 101 SDTCisSameAs<0, 2>, 102 SDTCisSameAs<0, 3>]>; 103def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 104def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 105def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 106 107def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 108 SDTCisSameAs<1, 2>]>; 109def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 110def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 111 112def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 113 SDTCisSameAs<0, 2>]>; 114def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 115def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 116 117def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 118 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 119 unsigned EltBits = 0; 120 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 121 return (EltBits == 32 && EltVal == 0); 122}]>; 123 124def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 125 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 126 unsigned EltBits = 0; 127 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 128 return (EltBits == 8 && EltVal == 0xff); 129}]>; 130 131//===----------------------------------------------------------------------===// 132// NEON operand definitions 133//===----------------------------------------------------------------------===// 134 135def nModImm : Operand<i32> { 136 let PrintMethod = "printNEONModImmOperand"; 137} 138 139//===----------------------------------------------------------------------===// 140// NEON load / store instructions 141//===----------------------------------------------------------------------===// 142 143// Use VLDM to load a Q register as a D register pair. 144// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 145def VLDMQIA 146 : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), 147 IIC_fpLoad_m, "", 148 [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; 149def VLDMQDB 150 : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), 151 IIC_fpLoad_m, "", 152 [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; 153 154// Use VSTM to store a Q register as a D register pair. 155// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 156def VSTMQIA 157 : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), 158 IIC_fpStore_m, "", 159 [(store (v2f64 QPR:$src), GPR:$Rn)]>; 160def VSTMQDB 161 : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), 162 IIC_fpStore_m, "", 163 [(store (v2f64 QPR:$src), GPR:$Rn)]>; 164 165// Classes for VLD* pseudo-instructions with multi-register operands. 166// These are expanded to real instructions after register allocation. 167class VLDQPseudo<InstrItinClass itin> 168 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 169class VLDQWBPseudo<InstrItinClass itin> 170 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 171 (ins addrmode6:$addr, am6offset:$offset), itin, 172 "$addr.addr = $wb">; 173class VLDQQPseudo<InstrItinClass itin> 174 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 175class VLDQQWBPseudo<InstrItinClass itin> 176 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 177 (ins addrmode6:$addr, am6offset:$offset), itin, 178 "$addr.addr = $wb">; 179class VLDQQQQWBPseudo<InstrItinClass itin> 180 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 181 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 182 "$addr.addr = $wb, $src = $dst">; 183 184let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 185 186// VLD1 : Vector Load (multiple single elements) 187class VLD1D<bits<4> op7_4, string Dt> 188 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd), 189 (ins addrmode6:$Rn), IIC_VLD1, 190 "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> { 191 let Rm = 0b1111; 192 let Inst{4} = Rn{4}; 193} 194class VLD1Q<bits<4> op7_4, string Dt> 195 : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2), 196 (ins addrmode6:$Rn), IIC_VLD1x2, 197 "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { 198 let Rm = 0b1111; 199 let Inst{5-4} = Rn{5-4}; 200} 201 202def VLD1d8 : VLD1D<{0,0,0,?}, "8">; 203def VLD1d16 : VLD1D<{0,1,0,?}, "16">; 204def VLD1d32 : VLD1D<{1,0,0,?}, "32">; 205def VLD1d64 : VLD1D<{1,1,0,?}, "64">; 206 207def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; 208def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; 209def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; 210def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; 211 212def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>; 213def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>; 214def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>; 215def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>; 216 217// ...with address register writeback: 218class VLD1DWB<bits<4> op7_4, string Dt> 219 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb), 220 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u, 221 "vld1", Dt, "\\{$Vd\\}, $Rn$Rm", 222 "$Rn.addr = $wb", []> { 223 let Inst{4} = Rn{4}; 224} 225class VLD1QWB<bits<4> op7_4, string Dt> 226 : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 227 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u, 228 "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", 229 "$Rn.addr = $wb", []> { 230 let Inst{5-4} = Rn{5-4}; 231} 232 233def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">; 234def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">; 235def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">; 236def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">; 237 238def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">; 239def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">; 240def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">; 241def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">; 242 243def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; 244def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; 245def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; 246def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; 247 248// ...with 3 registers (some of these are only for the disassembler): 249class VLD1D3<bits<4> op7_4, string Dt> 250 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 251 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, 252 "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 253 let Rm = 0b1111; 254 let Inst{4} = Rn{4}; 255} 256class VLD1D3WB<bits<4> op7_4, string Dt> 257 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 258 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt, 259 "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { 260 let Inst{4} = Rn{4}; 261} 262 263def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; 264def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; 265def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; 266def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; 267 268def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">; 269def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">; 270def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">; 271def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">; 272 273def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 274def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>; 275 276// ...with 4 registers (some of these are only for the disassembler): 277class VLD1D4<bits<4> op7_4, string Dt> 278 : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 279 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, 280 "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 281 let Rm = 0b1111; 282 let Inst{5-4} = Rn{5-4}; 283} 284class VLD1D4WB<bits<4> op7_4, string Dt> 285 : NLdSt<0,0b10,0b0010,op7_4, 286 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 287 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4, "vld1", Dt, 288 "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", 289 []> { 290 let Inst{5-4} = Rn{5-4}; 291} 292 293def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; 294def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; 295def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; 296def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; 297 298def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">; 299def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">; 300def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">; 301def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">; 302 303def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 304def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>; 305 306// VLD2 : Vector Load (multiple 2-element structures) 307class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> 308 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 309 (ins addrmode6:$Rn), IIC_VLD2, 310 "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { 311 let Rm = 0b1111; 312 let Inst{5-4} = Rn{5-4}; 313} 314class VLD2Q<bits<4> op7_4, string Dt> 315 : NLdSt<0, 0b10, 0b0011, op7_4, 316 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 317 (ins addrmode6:$Rn), IIC_VLD2x2, 318 "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 319 let Rm = 0b1111; 320 let Inst{5-4} = Rn{5-4}; 321} 322 323def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">; 324def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">; 325def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">; 326 327def VLD2q8 : VLD2Q<{0,0,?,?}, "8">; 328def VLD2q16 : VLD2Q<{0,1,?,?}, "16">; 329def VLD2q32 : VLD2Q<{1,0,?,?}, "32">; 330 331def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; 332def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; 333def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>; 334 335def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 336def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 337def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 338 339// ...with address register writeback: 340class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 341 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 342 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u, 343 "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", 344 "$Rn.addr = $wb", []> { 345 let Inst{5-4} = Rn{5-4}; 346} 347class VLD2QWB<bits<4> op7_4, string Dt> 348 : NLdSt<0, 0b10, 0b0011, op7_4, 349 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 350 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u, 351 "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 352 "$Rn.addr = $wb", []> { 353 let Inst{5-4} = Rn{5-4}; 354} 355 356def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">; 357def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">; 358def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">; 359 360def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">; 361def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">; 362def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">; 363 364def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; 365def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; 366def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; 367 368def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; 369def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; 370def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; 371 372// ...with double-spaced registers (for disassembly only): 373def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">; 374def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">; 375def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">; 376def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">; 377def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">; 378def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">; 379 380// VLD3 : Vector Load (multiple 3-element structures) 381class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 382 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 383 (ins addrmode6:$Rn), IIC_VLD3, 384 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 385 let Rm = 0b1111; 386 let Inst{4} = Rn{4}; 387} 388 389def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 390def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 391def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 392 393def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 394def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 395def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 396 397// ...with address register writeback: 398class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 399 : NLdSt<0, 0b10, op11_8, op7_4, 400 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 401 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 402 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 403 "$Rn.addr = $wb", []> { 404 let Inst{4} = Rn{4}; 405} 406 407def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 408def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 409def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 410 411def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 412def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 413def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 414 415// ...with double-spaced registers (non-updating versions for disassembly only): 416def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 417def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 418def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 419def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 420def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 421def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 422 423def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 424def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 425def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 426 427// ...alternate versions to be allocated odd register numbers: 428def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 429def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 430def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 431 432// VLD4 : Vector Load (multiple 4-element structures) 433class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 434 : NLdSt<0, 0b10, op11_8, op7_4, 435 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 436 (ins addrmode6:$Rn), IIC_VLD4, 437 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 438 let Rm = 0b1111; 439 let Inst{5-4} = Rn{5-4}; 440} 441 442def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 443def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 444def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 445 446def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 447def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 448def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 449 450// ...with address register writeback: 451class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 452 : NLdSt<0, 0b10, op11_8, op7_4, 453 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 454 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4, 455 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 456 "$Rn.addr = $wb", []> { 457 let Inst{5-4} = Rn{5-4}; 458} 459 460def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 461def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 462def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 463 464def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>; 465def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>; 466def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>; 467 468// ...with double-spaced registers (non-updating versions for disassembly only): 469def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 470def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 471def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 472def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 473def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 474def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 475 476def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 477def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 478def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 479 480// ...alternate versions to be allocated odd register numbers: 481def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 482def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 483def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 484 485} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 486 487// Classes for VLD*LN pseudo-instructions with multi-register operands. 488// These are expanded to real instructions after register allocation. 489class VLDQLNPseudo<InstrItinClass itin> 490 : PseudoNLdSt<(outs QPR:$dst), 491 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 492 itin, "$src = $dst">; 493class VLDQLNWBPseudo<InstrItinClass itin> 494 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 495 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 496 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 497class VLDQQLNPseudo<InstrItinClass itin> 498 : PseudoNLdSt<(outs QQPR:$dst), 499 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 500 itin, "$src = $dst">; 501class VLDQQLNWBPseudo<InstrItinClass itin> 502 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 503 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 504 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 505class VLDQQQQLNPseudo<InstrItinClass itin> 506 : PseudoNLdSt<(outs QQQQPR:$dst), 507 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 508 itin, "$src = $dst">; 509class VLDQQQQLNWBPseudo<InstrItinClass itin> 510 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 511 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 512 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 513 514// VLD1LN : Vector Load (single element to one lane) 515class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 516 PatFrag LoadOp> 517 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 518 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 519 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 520 "$src = $Vd", 521 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 522 (i32 (LoadOp addrmode6:$Rn)), 523 imm:$lane))]> { 524 let Rm = 0b1111; 525} 526class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 527 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 528 (i32 (LoadOp addrmode6:$addr)), 529 imm:$lane))]; 530} 531 532def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 533 let Inst{7-5} = lane{2-0}; 534} 535def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 536 let Inst{7-6} = lane{1-0}; 537 let Inst{4} = Rn{4}; 538} 539def VLD1LNd32 : VLD1LN<0b1000, {?,0,?,?}, "32", v2i32, load> { 540 let Inst{7} = lane{0}; 541 let Inst{5} = Rn{4}; 542 let Inst{4} = Rn{4}; 543} 544 545def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 546def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 547def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 548 549def : Pat<(vector_insert (v2f32 DPR:$src), 550 (f32 (load addrmode6:$addr)), imm:$lane), 551 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 552def : Pat<(vector_insert (v4f32 QPR:$src), 553 (f32 (load addrmode6:$addr)), imm:$lane), 554 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 555 556let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 557 558// ...with address register writeback: 559class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 560 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 561 (ins addrmode6:$Rn, am6offset:$Rm, 562 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 563 "\\{$Vd[$lane]\\}, $Rn$Rm", 564 "$src = $Vd, $Rn.addr = $wb", []>; 565 566def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 567 let Inst{7-5} = lane{2-0}; 568} 569def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 570 let Inst{7-6} = lane{1-0}; 571 let Inst{4} = Rn{4}; 572} 573def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 574 let Inst{7} = lane{0}; 575 let Inst{5} = Rn{4}; 576 let Inst{4} = Rn{4}; 577} 578 579def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 580def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 581def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 582 583// VLD2LN : Vector Load (single 2-element structure to one lane) 584class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 585 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 586 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 587 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 588 "$src1 = $Vd, $src2 = $dst2", []> { 589 let Rm = 0b1111; 590 let Inst{4} = Rn{4}; 591} 592 593def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 594 let Inst{7-5} = lane{2-0}; 595} 596def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 597 let Inst{7-6} = lane{1-0}; 598} 599def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 600 let Inst{7} = lane{0}; 601} 602 603def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 604def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 605def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 606 607// ...with double-spaced registers: 608def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 609 let Inst{7-6} = lane{1-0}; 610} 611def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 612 let Inst{7} = lane{0}; 613} 614 615def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 616def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 617 618// ...with address register writeback: 619class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 620 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 621 (ins addrmode6:$Rn, am6offset:$Rm, 622 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 623 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 624 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 625 let Inst{4} = Rn{4}; 626} 627 628def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 629 let Inst{7-5} = lane{2-0}; 630} 631def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 632 let Inst{7-6} = lane{1-0}; 633} 634def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 635 let Inst{7} = lane{0}; 636} 637 638def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 639def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 640def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 641 642def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 643 let Inst{7-6} = lane{1-0}; 644} 645def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 646 let Inst{7} = lane{0}; 647} 648 649def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 650def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 651 652// VLD3LN : Vector Load (single 3-element structure to one lane) 653class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 654 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 655 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 656 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 657 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 658 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 659 let Rm = 0b1111; 660} 661 662def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 663 let Inst{7-5} = lane{2-0}; 664} 665def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 666 let Inst{7-6} = lane{1-0}; 667} 668def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 669 let Inst{7} = lane{0}; 670} 671 672def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 673def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 674def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 675 676// ...with double-spaced registers: 677def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 678 let Inst{7-6} = lane{1-0}; 679} 680def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 681 let Inst{7} = lane{0}; 682} 683 684def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 685def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 686 687// ...with address register writeback: 688class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 689 : NLdStLn<1, 0b10, op11_8, op7_4, 690 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 691 (ins addrmode6:$Rn, am6offset:$Rm, 692 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 693 IIC_VLD3lnu, "vld3", Dt, 694 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 695 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 696 []>; 697 698def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 699 let Inst{7-5} = lane{2-0}; 700} 701def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 702 let Inst{7-6} = lane{1-0}; 703} 704def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 705 let Inst{7} = lane{0}; 706} 707 708def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 709def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 710def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 711 712def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 713 let Inst{7-6} = lane{1-0}; 714} 715def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 716 let Inst{7} = lane{0}; 717} 718 719def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 720def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 721 722// VLD4LN : Vector Load (single 4-element structure to one lane) 723class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 724 : NLdStLn<1, 0b10, op11_8, op7_4, 725 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 726 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 727 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 728 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 729 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 730 let Rm = 0b1111; 731 let Inst{4} = Rn{4}; 732} 733 734def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 735 let Inst{7-5} = lane{2-0}; 736} 737def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 738 let Inst{7-6} = lane{1-0}; 739} 740def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 741 let Inst{7} = lane{0}; 742 let Inst{5} = Rn{5}; 743} 744 745def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 746def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 747def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 748 749// ...with double-spaced registers: 750def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 751 let Inst{7-6} = lane{1-0}; 752} 753def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 754 let Inst{7} = lane{0}; 755 let Inst{5} = Rn{5}; 756} 757 758def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 759def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 760 761// ...with address register writeback: 762class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 763 : NLdStLn<1, 0b10, op11_8, op7_4, 764 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 765 (ins addrmode6:$Rn, am6offset:$Rm, 766 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 767 IIC_VLD4ln, "vld4", Dt, 768"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 769"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 770 []> { 771 let Inst{4} = Rn{4}; 772} 773 774def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 775 let Inst{7-5} = lane{2-0}; 776} 777def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 778 let Inst{7-6} = lane{1-0}; 779} 780def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 781 let Inst{7} = lane{0}; 782 let Inst{5} = Rn{5}; 783} 784 785def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 786def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 787def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 788 789def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 790 let Inst{7-6} = lane{1-0}; 791} 792def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 793 let Inst{7} = lane{0}; 794 let Inst{5} = Rn{5}; 795} 796 797def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 798def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 799 800} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 801 802// VLD1DUP : Vector Load (single element to all lanes) 803class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 804 : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn), 805 IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "", 806 [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 807 let Rm = 0b1111; 808 let Inst{4} = Rn{4}; 809} 810class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> { 811 let Pattern = [(set QPR:$dst, 812 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; 813} 814 815def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; 816def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; 817def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; 818 819def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>; 820def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>; 821def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>; 822 823def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 824 (VLD1DUPd32 addrmode6:$addr)>; 825def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 826 (VLD1DUPq32Pseudo addrmode6:$addr)>; 827 828let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 829 830class VLD1QDUP<bits<4> op7_4, string Dt> 831 : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2), 832 (ins addrmode6dup:$Rn), IIC_VLD1dup, 833 "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { 834 let Rm = 0b1111; 835 let Inst{4} = Rn{4}; 836} 837 838def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; 839def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; 840def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; 841 842// ...with address register writeback: 843class VLD1DUPWB<bits<4> op7_4, string Dt> 844 : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb), 845 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, 846 "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { 847 let Inst{4} = Rn{4}; 848} 849class VLD1QDUPWB<bits<4> op7_4, string Dt> 850 : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 851 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, 852 "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { 853 let Inst{4} = Rn{4}; 854} 855 856def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">; 857def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">; 858def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">; 859 860def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">; 861def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">; 862def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">; 863 864def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; 865def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; 866def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; 867 868// VLD2DUP : Vector Load (single 2-element structure to all lanes) 869class VLD2DUP<bits<4> op7_4, string Dt> 870 : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2), 871 (ins addrmode6dup:$Rn), IIC_VLD2dup, 872 "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { 873 let Rm = 0b1111; 874 let Inst{4} = Rn{4}; 875} 876 877def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; 878def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">; 879def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">; 880 881def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>; 882def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>; 883def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>; 884 885// ...with double-spaced registers (not used for codegen): 886def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">; 887def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">; 888def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">; 889 890// ...with address register writeback: 891class VLD2DUPWB<bits<4> op7_4, string Dt> 892 : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 893 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, 894 "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { 895 let Inst{4} = Rn{4}; 896} 897 898def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; 899def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">; 900def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">; 901 902def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">; 903def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">; 904def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">; 905 906def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; 907def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; 908def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; 909 910// VLD3DUP : Vector Load (single 3-element structure to all lanes) 911class VLD3DUP<bits<4> op7_4, string Dt> 912 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 913 (ins addrmode6dup:$Rn), IIC_VLD3dup, 914 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 915 let Rm = 0b1111; 916 let Inst{4} = Rn{4}; 917} 918 919def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 920def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 921def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 922 923def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 924def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 925def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 926 927// ...with double-spaced registers (not used for codegen): 928def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; 929def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; 930def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; 931 932// ...with address register writeback: 933class VLD3DUPWB<bits<4> op7_4, string Dt> 934 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 935 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, 936 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 937 "$Rn.addr = $wb", []> { 938 let Inst{4} = Rn{4}; 939} 940 941def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; 942def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; 943def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; 944 945def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; 946def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; 947def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; 948 949def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 950def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 951def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 952 953// VLD4DUP : Vector Load (single 4-element structure to all lanes) 954class VLD4DUP<bits<4> op7_4, string Dt> 955 : NLdSt<1, 0b10, 0b1111, op7_4, 956 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 957 (ins addrmode6dup:$Rn), IIC_VLD4dup, 958 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 959 let Rm = 0b1111; 960 let Inst{4} = Rn{4}; 961} 962 963def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 964def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 965def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 966 967def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 968def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 969def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 970 971// ...with double-spaced registers (not used for codegen): 972def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; 973def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; 974def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 975 976// ...with address register writeback: 977class VLD4DUPWB<bits<4> op7_4, string Dt> 978 : NLdSt<1, 0b10, 0b1111, op7_4, 979 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 980 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 981 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 982 "$Rn.addr = $wb", []> { 983 let Inst{4} = Rn{4}; 984} 985 986def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 987def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 988def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 989 990def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 991def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 992def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 993 994def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 995def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 996def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 997 998} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 999 1000let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1001 1002// Classes for VST* pseudo-instructions with multi-register operands. 1003// These are expanded to real instructions after register allocation. 1004class VSTQPseudo<InstrItinClass itin> 1005 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1006class VSTQWBPseudo<InstrItinClass itin> 1007 : PseudoNLdSt<(outs GPR:$wb), 1008 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1009 "$addr.addr = $wb">; 1010class VSTQQPseudo<InstrItinClass itin> 1011 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1012class VSTQQWBPseudo<InstrItinClass itin> 1013 : PseudoNLdSt<(outs GPR:$wb), 1014 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1015 "$addr.addr = $wb">; 1016class VSTQQQQWBPseudo<InstrItinClass itin> 1017 : PseudoNLdSt<(outs GPR:$wb), 1018 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1019 "$addr.addr = $wb">; 1020 1021// VST1 : Vector Store (multiple single elements) 1022class VST1D<bits<4> op7_4, string Dt> 1023 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd), 1024 IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> { 1025 let Rm = 0b1111; 1026 let Inst{4} = Rn{4}; 1027} 1028class VST1Q<bits<4> op7_4, string Dt> 1029 : NLdSt<0,0b00,0b1010,op7_4, (outs), 1030 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2, 1031 "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { 1032 let Rm = 0b1111; 1033 let Inst{5-4} = Rn{5-4}; 1034} 1035 1036def VST1d8 : VST1D<{0,0,0,?}, "8">; 1037def VST1d16 : VST1D<{0,1,0,?}, "16">; 1038def VST1d32 : VST1D<{1,0,0,?}, "32">; 1039def VST1d64 : VST1D<{1,1,0,?}, "64">; 1040 1041def VST1q8 : VST1Q<{0,0,?,?}, "8">; 1042def VST1q16 : VST1Q<{0,1,?,?}, "16">; 1043def VST1q32 : VST1Q<{1,0,?,?}, "32">; 1044def VST1q64 : VST1Q<{1,1,?,?}, "64">; 1045 1046def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>; 1047def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>; 1048def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>; 1049def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>; 1050 1051// ...with address register writeback: 1052class VST1DWB<bits<4> op7_4, string Dt> 1053 : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), 1054 (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u, 1055 "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> { 1056 let Inst{4} = Rn{4}; 1057} 1058class VST1QWB<bits<4> op7_4, string Dt> 1059 : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), 1060 (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), 1061 IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", 1062 "$Rn.addr = $wb", []> { 1063 let Inst{5-4} = Rn{5-4}; 1064} 1065 1066def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">; 1067def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">; 1068def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">; 1069def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">; 1070 1071def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">; 1072def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">; 1073def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">; 1074def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">; 1075 1076def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; 1077def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; 1078def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; 1079def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; 1080 1081// ...with 3 registers (some of these are only for the disassembler): 1082class VST1D3<bits<4> op7_4, string Dt> 1083 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1084 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), 1085 IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1086 let Rm = 0b1111; 1087 let Inst{4} = Rn{4}; 1088} 1089class VST1D3WB<bits<4> op7_4, string Dt> 1090 : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), 1091 (ins addrmode6:$Rn, am6offset:$Rm, 1092 DPR:$Vd, DPR:$src2, DPR:$src3), 1093 IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1094 "$Rn.addr = $wb", []> { 1095 let Inst{4} = Rn{4}; 1096} 1097 1098def VST1d8T : VST1D3<{0,0,0,?}, "8">; 1099def VST1d16T : VST1D3<{0,1,0,?}, "16">; 1100def VST1d32T : VST1D3<{1,0,0,?}, "32">; 1101def VST1d64T : VST1D3<{1,1,0,?}, "64">; 1102 1103def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">; 1104def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">; 1105def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">; 1106def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">; 1107 1108def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1109def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>; 1110 1111// ...with 4 registers (some of these are only for the disassembler): 1112class VST1D4<bits<4> op7_4, string Dt> 1113 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1114 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1115 IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "", 1116 []> { 1117 let Rm = 0b1111; 1118 let Inst{5-4} = Rn{5-4}; 1119} 1120class VST1D4WB<bits<4> op7_4, string Dt> 1121 : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), 1122 (ins addrmode6:$Rn, am6offset:$Rm, 1123 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u, 1124 "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1125 "$Rn.addr = $wb", []> { 1126 let Inst{5-4} = Rn{5-4}; 1127} 1128 1129def VST1d8Q : VST1D4<{0,0,?,?}, "8">; 1130def VST1d16Q : VST1D4<{0,1,?,?}, "16">; 1131def VST1d32Q : VST1D4<{1,0,?,?}, "32">; 1132def VST1d64Q : VST1D4<{1,1,?,?}, "64">; 1133 1134def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">; 1135def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">; 1136def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">; 1137def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">; 1138 1139def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1140def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>; 1141 1142// VST2 : Vector Store (multiple 2-element structures) 1143class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> 1144 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1145 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), 1146 IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { 1147 let Rm = 0b1111; 1148 let Inst{5-4} = Rn{5-4}; 1149} 1150class VST2Q<bits<4> op7_4, string Dt> 1151 : NLdSt<0, 0b00, 0b0011, op7_4, (outs), 1152 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1153 IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1154 "", []> { 1155 let Rm = 0b1111; 1156 let Inst{5-4} = Rn{5-4}; 1157} 1158 1159def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">; 1160def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">; 1161def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">; 1162 1163def VST2q8 : VST2Q<{0,0,?,?}, "8">; 1164def VST2q16 : VST2Q<{0,1,?,?}, "16">; 1165def VST2q32 : VST2Q<{1,0,?,?}, "32">; 1166 1167def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; 1168def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; 1169def VST2d32Pseudo : VSTQPseudo<IIC_VST2>; 1170 1171def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1172def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1173def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1174 1175// ...with address register writeback: 1176class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1177 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1178 (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), 1179 IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", 1180 "$Rn.addr = $wb", []> { 1181 let Inst{5-4} = Rn{5-4}; 1182} 1183class VST2QWB<bits<4> op7_4, string Dt> 1184 : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1185 (ins addrmode6:$Rn, am6offset:$Rm, 1186 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u, 1187 "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1188 "$Rn.addr = $wb", []> { 1189 let Inst{5-4} = Rn{5-4}; 1190} 1191 1192def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">; 1193def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">; 1194def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">; 1195 1196def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">; 1197def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">; 1198def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">; 1199 1200def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 1201def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 1202def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 1203 1204def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 1205def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 1206def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 1207 1208// ...with double-spaced registers (for disassembly only): 1209def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">; 1210def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">; 1211def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">; 1212def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">; 1213def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">; 1214def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">; 1215 1216// VST3 : Vector Store (multiple 3-element structures) 1217class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1218 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1219 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1220 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1221 let Rm = 0b1111; 1222 let Inst{4} = Rn{4}; 1223} 1224 1225def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1226def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1227def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1228 1229def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1230def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1231def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1232 1233// ...with address register writeback: 1234class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1235 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1236 (ins addrmode6:$Rn, am6offset:$Rm, 1237 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1238 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1239 "$Rn.addr = $wb", []> { 1240 let Inst{4} = Rn{4}; 1241} 1242 1243def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1244def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1245def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1246 1247def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1248def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1249def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1250 1251// ...with double-spaced registers (non-updating versions for disassembly only): 1252def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1253def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1254def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1255def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1256def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1257def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1258 1259def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1260def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1261def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1262 1263// ...alternate versions to be allocated odd register numbers: 1264def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1265def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1266def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1267 1268// VST4 : Vector Store (multiple 4-element structures) 1269class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1270 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1271 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1272 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1273 "", []> { 1274 let Rm = 0b1111; 1275 let Inst{5-4} = Rn{5-4}; 1276} 1277 1278def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1279def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1280def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1281 1282def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1283def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1284def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1285 1286// ...with address register writeback: 1287class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1288 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1289 (ins addrmode6:$Rn, am6offset:$Rm, 1290 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1291 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1292 "$Rn.addr = $wb", []> { 1293 let Inst{5-4} = Rn{5-4}; 1294} 1295 1296def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1297def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1298def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1299 1300def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1301def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1302def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1303 1304// ...with double-spaced registers (non-updating versions for disassembly only): 1305def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 1306def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 1307def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 1308def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 1309def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 1310def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 1311 1312def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1313def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1314def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1315 1316// ...alternate versions to be allocated odd register numbers: 1317def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1318def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1319def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1320 1321} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1322 1323// Classes for VST*LN pseudo-instructions with multi-register operands. 1324// These are expanded to real instructions after register allocation. 1325class VSTQLNPseudo<InstrItinClass itin> 1326 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1327 itin, "">; 1328class VSTQLNWBPseudo<InstrItinClass itin> 1329 : PseudoNLdSt<(outs GPR:$wb), 1330 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1331 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1332class VSTQQLNPseudo<InstrItinClass itin> 1333 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1334 itin, "">; 1335class VSTQQLNWBPseudo<InstrItinClass itin> 1336 : PseudoNLdSt<(outs GPR:$wb), 1337 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1338 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1339class VSTQQQQLNPseudo<InstrItinClass itin> 1340 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1341 itin, "">; 1342class VSTQQQQLNWBPseudo<InstrItinClass itin> 1343 : PseudoNLdSt<(outs GPR:$wb), 1344 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1345 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1346 1347// VST1LN : Vector Store (single element from one lane) 1348class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1349 PatFrag StoreOp, SDNode ExtractOp> 1350 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1351 (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), 1352 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1353 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { 1354 let Rm = 0b1111; 1355} 1356class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1357 : VSTQLNPseudo<IIC_VST1ln> { 1358 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1359 addrmode6:$addr)]; 1360} 1361 1362def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 1363 NEONvgetlaneu> { 1364 let Inst{7-5} = lane{2-0}; 1365} 1366def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 1367 NEONvgetlaneu> { 1368 let Inst{7-6} = lane{1-0}; 1369 let Inst{4} = Rn{5}; 1370} 1371def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { 1372 let Inst{7} = lane{0}; 1373 let Inst{5-4} = Rn{5-4}; 1374} 1375 1376def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 1377def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 1378def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 1379 1380def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 1381 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1382def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 1383 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1384 1385let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1386 1387// ...with address register writeback: 1388class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1389 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1390 (ins addrmode6:$Rn, am6offset:$Rm, 1391 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 1392 "\\{$Vd[$lane]\\}, $Rn$Rm", 1393 "$Rn.addr = $wb", []>; 1394 1395def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> { 1396 let Inst{7-5} = lane{2-0}; 1397} 1398def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> { 1399 let Inst{7-6} = lane{1-0}; 1400 let Inst{4} = Rn{5}; 1401} 1402def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> { 1403 let Inst{7} = lane{0}; 1404 let Inst{5-4} = Rn{5-4}; 1405} 1406 1407def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; 1408def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; 1409def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; 1410 1411// VST2LN : Vector Store (single 2-element structure from one lane) 1412class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1413 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1414 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 1415 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 1416 "", []> { 1417 let Rm = 0b1111; 1418 let Inst{4} = Rn{4}; 1419} 1420 1421def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 1422 let Inst{7-5} = lane{2-0}; 1423} 1424def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 1425 let Inst{7-6} = lane{1-0}; 1426} 1427def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 1428 let Inst{7} = lane{0}; 1429} 1430 1431def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 1432def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 1433def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 1434 1435// ...with double-spaced registers: 1436def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 1437 let Inst{7-6} = lane{1-0}; 1438 let Inst{4} = Rn{4}; 1439} 1440def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 1441 let Inst{7} = lane{0}; 1442 let Inst{4} = Rn{4}; 1443} 1444 1445def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 1446def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 1447 1448// ...with address register writeback: 1449class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1450 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1451 (ins addrmode6:$addr, am6offset:$offset, 1452 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 1453 "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", 1454 "$addr.addr = $wb", []> { 1455 let Inst{4} = Rn{4}; 1456} 1457 1458def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 1459 let Inst{7-5} = lane{2-0}; 1460} 1461def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 1462 let Inst{7-6} = lane{1-0}; 1463} 1464def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 1465 let Inst{7} = lane{0}; 1466} 1467 1468def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 1469def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 1470def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 1471 1472def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 1473 let Inst{7-6} = lane{1-0}; 1474} 1475def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 1476 let Inst{7} = lane{0}; 1477} 1478 1479def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 1480def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 1481 1482// VST3LN : Vector Store (single 3-element structure from one lane) 1483class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1484 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1485 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 1486 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 1487 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 1488 let Rm = 0b1111; 1489} 1490 1491def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 1492 let Inst{7-5} = lane{2-0}; 1493} 1494def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 1495 let Inst{7-6} = lane{1-0}; 1496} 1497def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 1498 let Inst{7} = lane{0}; 1499} 1500 1501def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 1502def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 1503def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 1504 1505// ...with double-spaced registers: 1506def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 1507 let Inst{7-6} = lane{1-0}; 1508} 1509def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 1510 let Inst{7} = lane{0}; 1511} 1512 1513def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 1514def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 1515 1516// ...with address register writeback: 1517class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1518 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1519 (ins addrmode6:$Rn, am6offset:$Rm, 1520 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1521 IIC_VST3lnu, "vst3", Dt, 1522 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 1523 "$Rn.addr = $wb", []>; 1524 1525def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 1526 let Inst{7-5} = lane{2-0}; 1527} 1528def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 1529 let Inst{7-6} = lane{1-0}; 1530} 1531def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 1532 let Inst{7} = lane{0}; 1533} 1534 1535def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 1536def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 1537def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 1538 1539def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 1540 let Inst{7-6} = lane{1-0}; 1541} 1542def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 1543 let Inst{7} = lane{0}; 1544} 1545 1546def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 1547def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 1548 1549// VST4LN : Vector Store (single 4-element structure from one lane) 1550class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1551 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1552 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 1553 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 1554 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 1555 "", []> { 1556 let Rm = 0b1111; 1557 let Inst{4} = Rn{4}; 1558} 1559 1560def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 1561 let Inst{7-5} = lane{2-0}; 1562} 1563def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 1564 let Inst{7-6} = lane{1-0}; 1565} 1566def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 1567 let Inst{7} = lane{0}; 1568 let Inst{5} = Rn{5}; 1569} 1570 1571def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1572def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1573def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1574 1575// ...with double-spaced registers: 1576def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 1577 let Inst{7-6} = lane{1-0}; 1578} 1579def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 1580 let Inst{7} = lane{0}; 1581 let Inst{5} = Rn{5}; 1582} 1583 1584def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 1585def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 1586 1587// ...with address register writeback: 1588class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1589 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1590 (ins addrmode6:$Rn, am6offset:$Rm, 1591 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1592 IIC_VST4lnu, "vst4", Dt, 1593 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 1594 "$Rn.addr = $wb", []> { 1595 let Inst{4} = Rn{4}; 1596} 1597 1598def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 1599 let Inst{7-5} = lane{2-0}; 1600} 1601def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 1602 let Inst{7-6} = lane{1-0}; 1603} 1604def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 1605 let Inst{7} = lane{0}; 1606 let Inst{5} = Rn{5}; 1607} 1608 1609def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 1610def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 1611def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 1612 1613def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 1614 let Inst{7-6} = lane{1-0}; 1615} 1616def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 1617 let Inst{7} = lane{0}; 1618 let Inst{5} = Rn{5}; 1619} 1620 1621def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 1622def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 1623 1624} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1625 1626 1627//===----------------------------------------------------------------------===// 1628// NEON pattern fragments 1629//===----------------------------------------------------------------------===// 1630 1631// Extract D sub-registers of Q registers. 1632def DSubReg_i8_reg : SDNodeXForm<imm, [{ 1633 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1634 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 1635}]>; 1636def DSubReg_i16_reg : SDNodeXForm<imm, [{ 1637 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1638 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 1639}]>; 1640def DSubReg_i32_reg : SDNodeXForm<imm, [{ 1641 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1642 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 1643}]>; 1644def DSubReg_f64_reg : SDNodeXForm<imm, [{ 1645 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1646 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 1647}]>; 1648 1649// Extract S sub-registers of Q/D registers. 1650def SSubReg_f32_reg : SDNodeXForm<imm, [{ 1651 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 1652 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 1653}]>; 1654 1655// Translate lane numbers from Q registers to D subregs. 1656def SubReg_i8_lane : SDNodeXForm<imm, [{ 1657 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 1658}]>; 1659def SubReg_i16_lane : SDNodeXForm<imm, [{ 1660 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 1661}]>; 1662def SubReg_i32_lane : SDNodeXForm<imm, [{ 1663 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 1664}]>; 1665 1666//===----------------------------------------------------------------------===// 1667// Instruction Classes 1668//===----------------------------------------------------------------------===// 1669 1670// Basic 2-register operations: single-, double- and quad-register. 1671class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1672 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 1673 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 1674 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 1675 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 1676 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 1677class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1678 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 1679 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 1680 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 1681 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 1682 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 1683 1684// Basic 2-register intrinsics, both double- and quad-register. 1685class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1686 bits<2> op17_16, bits<5> op11_7, bit op4, 1687 InstrItinClass itin, string OpcodeStr, string Dt, 1688 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1689 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 1690 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 1691 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 1692class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1693 bits<2> op17_16, bits<5> op11_7, bit op4, 1694 InstrItinClass itin, string OpcodeStr, string Dt, 1695 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1696 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 1697 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 1698 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 1699 1700// Narrow 2-register operations. 1701class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1702 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 1703 InstrItinClass itin, string OpcodeStr, string Dt, 1704 ValueType TyD, ValueType TyQ, SDNode OpNode> 1705 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 1706 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 1707 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 1708 1709// Narrow 2-register intrinsics. 1710class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1711 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 1712 InstrItinClass itin, string OpcodeStr, string Dt, 1713 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 1714 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 1715 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 1716 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 1717 1718// Long 2-register operations (currently only used for VMOVL). 1719class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1720 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 1721 InstrItinClass itin, string OpcodeStr, string Dt, 1722 ValueType TyQ, ValueType TyD, SDNode OpNode> 1723 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 1724 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 1725 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 1726 1727// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 1728class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 1729 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 1730 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 1731 OpcodeStr, Dt, "$Vd, $Vm", 1732 "$src1 = $Vd, $src2 = $Vm", []>; 1733class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 1734 InstrItinClass itin, string OpcodeStr, string Dt> 1735 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 1736 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 1737 "$src1 = $Vd, $src2 = $Vm", []>; 1738 1739// Basic 3-register operations: single-, double- and quad-register. 1740class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1741 string OpcodeStr, string Dt> 1742 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1743 (outs DPR_VFP2:$Vd), (ins DPR_VFP2:$Vn, DPR_VFP2:$Vm), N3RegFrm, 1744 IIC_VBIND, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", []>; 1745 1746class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1747 InstrItinClass itin, string OpcodeStr, string Dt, 1748 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 1749 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1750 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 1751 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 1752 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 1753 let isCommutable = Commutable; 1754} 1755// Same as N3VD but no data type. 1756class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1757 InstrItinClass itin, string OpcodeStr, 1758 ValueType ResTy, ValueType OpTy, 1759 SDNode OpNode, bit Commutable> 1760 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 1761 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 1762 OpcodeStr, "$Vd, $Vn, $Vm", "", 1763 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 1764 let isCommutable = Commutable; 1765} 1766 1767class N3VDSL<bits<2> op21_20, bits<4> op11_8, 1768 InstrItinClass itin, string OpcodeStr, string Dt, 1769 ValueType Ty, SDNode ShOp> 1770 : N3V<0, 1, op21_20, op11_8, 1, 0, 1771 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), 1772 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", 1773 [(set (Ty DPR:$Vd), 1774 (Ty (ShOp (Ty DPR:$Vn), 1775 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 1776 let isCommutable = 0; 1777} 1778class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 1779 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1780 : N3V<0, 1, op21_20, op11_8, 1, 0, 1781 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 1782 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", 1783 [(set (Ty DPR:$Vd), 1784 (Ty (ShOp (Ty DPR:$Vn), 1785 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 1786 let isCommutable = 0; 1787} 1788 1789class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1790 InstrItinClass itin, string OpcodeStr, string Dt, 1791 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 1792 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1793 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 1794 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 1795 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 1796 let isCommutable = Commutable; 1797} 1798class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1799 InstrItinClass itin, string OpcodeStr, 1800 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 1801 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 1802 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 1803 OpcodeStr, "$Vd, $Vn, $Vm", "", 1804 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 1805 let isCommutable = Commutable; 1806} 1807class N3VQSL<bits<2> op21_20, bits<4> op11_8, 1808 InstrItinClass itin, string OpcodeStr, string Dt, 1809 ValueType ResTy, ValueType OpTy, SDNode ShOp> 1810 : N3V<1, 1, op21_20, op11_8, 1, 0, 1811 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), 1812 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", 1813 [(set (ResTy QPR:$Vd), 1814 (ResTy (ShOp (ResTy QPR:$Vn), 1815 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 1816 imm:$lane)))))]> { 1817 let isCommutable = 0; 1818} 1819class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 1820 ValueType ResTy, ValueType OpTy, SDNode ShOp> 1821 : N3V<1, 1, op21_20, op11_8, 1, 0, 1822 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 1823 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", 1824 [(set (ResTy QPR:$Vd), 1825 (ResTy (ShOp (ResTy QPR:$Vn), 1826 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 1827 imm:$lane)))))]> { 1828 let isCommutable = 0; 1829} 1830 1831// Basic 3-register intrinsics, both double- and quad-register. 1832class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1833 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1834 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 1835 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1836 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 1837 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 1838 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 1839 let isCommutable = Commutable; 1840} 1841class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1842 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 1843 : N3V<0, 1, op21_20, op11_8, 1, 0, 1844 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), 1845 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", 1846 [(set (Ty DPR:$Vd), 1847 (Ty (IntOp (Ty DPR:$Vn), 1848 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 1849 imm:$lane)))))]> { 1850 let isCommutable = 0; 1851} 1852class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1853 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 1854 : N3V<0, 1, op21_20, op11_8, 1, 0, 1855 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 1856 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", 1857 [(set (Ty DPR:$Vd), 1858 (Ty (IntOp (Ty DPR:$Vn), 1859 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 1860 let isCommutable = 0; 1861} 1862class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1863 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1864 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1865 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1866 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 1867 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 1868 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 1869 let isCommutable = 0; 1870} 1871 1872class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1873 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1874 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 1875 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1876 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 1877 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 1878 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 1879 let isCommutable = Commutable; 1880} 1881class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1882 string OpcodeStr, string Dt, 1883 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1884 : N3V<1, 1, op21_20, op11_8, 1, 0, 1885 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), 1886 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", 1887 [(set (ResTy QPR:$Vd), 1888 (ResTy (IntOp (ResTy QPR:$Vn), 1889 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 1890 imm:$lane)))))]> { 1891 let isCommutable = 0; 1892} 1893class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1894 string OpcodeStr, string Dt, 1895 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1896 : N3V<1, 1, op21_20, op11_8, 1, 0, 1897 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 1898 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", 1899 [(set (ResTy QPR:$Vd), 1900 (ResTy (IntOp (ResTy QPR:$Vn), 1901 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 1902 imm:$lane)))))]> { 1903 let isCommutable = 0; 1904} 1905class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1906 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1907 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1908 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1909 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 1910 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 1911 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 1912 let isCommutable = 0; 1913} 1914 1915// Multiply-Add/Sub operations: single-, double- and quad-register. 1916class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1917 InstrItinClass itin, string OpcodeStr, string Dt> 1918 : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR_VFP2:$Vd), 1919 (ins DPR_VFP2:$src1, DPR_VFP2:$Vn, DPR_VFP2:$Vm), N3RegFrm, itin, 1920 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", []>; 1921 1922class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1923 InstrItinClass itin, string OpcodeStr, string Dt, 1924 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 1925 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1926 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 1927 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 1928 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 1929 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 1930 1931class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1932 string OpcodeStr, string Dt, 1933 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 1934 : N3V<0, 1, op21_20, op11_8, 1, 0, 1935 (outs DPR:$Vd), 1936 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), 1937 NVMulSLFrm, itin, 1938 OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", 1939 [(set (Ty DPR:$Vd), 1940 (Ty (ShOp (Ty DPR:$src1), 1941 (Ty (MulOp DPR:$Vn, 1942 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 1943 imm:$lane)))))))]>; 1944class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1945 string OpcodeStr, string Dt, 1946 ValueType Ty, SDNode MulOp, SDNode ShOp> 1947 : N3V<0, 1, op21_20, op11_8, 1, 0, 1948 (outs DPR:$Vd), 1949 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 1950 NVMulSLFrm, itin, 1951 OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", 1952 [(set (Ty DPR:$Vd), 1953 (Ty (ShOp (Ty DPR:$src1), 1954 (Ty (MulOp DPR:$Vn, 1955 (Ty (NEONvduplane (Ty DPR_8:$Vm), 1956 imm:$lane)))))))]>; 1957 1958class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1959 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 1960 SDPatternOperator MulOp, SDPatternOperator OpNode> 1961 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1962 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 1963 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 1964 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 1965 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 1966class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1967 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1968 SDPatternOperator MulOp, SDPatternOperator ShOp> 1969 : N3V<1, 1, op21_20, op11_8, 1, 0, 1970 (outs QPR:$Vd), 1971 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), 1972 NVMulSLFrm, itin, 1973 OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", 1974 [(set (ResTy QPR:$Vd), 1975 (ResTy (ShOp (ResTy QPR:$src1), 1976 (ResTy (MulOp QPR:$Vn, 1977 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 1978 imm:$lane)))))))]>; 1979class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1980 string OpcodeStr, string Dt, 1981 ValueType ResTy, ValueType OpTy, 1982 SDNode MulOp, SDNode ShOp> 1983 : N3V<1, 1, op21_20, op11_8, 1, 0, 1984 (outs QPR:$Vd), 1985 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 1986 NVMulSLFrm, itin, 1987 OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", 1988 [(set (ResTy QPR:$Vd), 1989 (ResTy (ShOp (ResTy QPR:$src1), 1990 (ResTy (MulOp QPR:$Vn, 1991 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 1992 imm:$lane)))))))]>; 1993 1994// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 1995class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1996 InstrItinClass itin, string OpcodeStr, string Dt, 1997 ValueType Ty, Intrinsic IntOp, SDNode OpNode> 1998 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1999 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2000 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2001 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2002 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2003class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2004 InstrItinClass itin, string OpcodeStr, string Dt, 2005 ValueType Ty, Intrinsic IntOp, SDNode OpNode> 2006 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2007 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2008 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2009 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2010 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2011 2012// Neon 3-argument intrinsics, both double- and quad-register. 2013// The destination register is also used as the first source operand register. 2014class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2015 InstrItinClass itin, string OpcodeStr, string Dt, 2016 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2017 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2018 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2019 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2020 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2021 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2022class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2023 InstrItinClass itin, string OpcodeStr, string Dt, 2024 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2025 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2026 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2027 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2028 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2029 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2030 2031// Long Multiply-Add/Sub operations. 2032class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2033 InstrItinClass itin, string OpcodeStr, string Dt, 2034 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2035 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2036 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2037 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2038 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2039 (TyQ (MulOp (TyD DPR:$Vn), 2040 (TyD DPR:$Vm)))))]>; 2041class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2042 InstrItinClass itin, string OpcodeStr, string Dt, 2043 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2044 : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2045 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), 2046 NVMulSLFrm, itin, 2047 OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", 2048 [(set QPR:$Vd, 2049 (OpNode (TyQ QPR:$src1), 2050 (TyQ (MulOp (TyD DPR:$Vn), 2051 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2052 imm:$lane))))))]>; 2053class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2054 InstrItinClass itin, string OpcodeStr, string Dt, 2055 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2056 : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2057 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 2058 NVMulSLFrm, itin, 2059 OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", 2060 [(set QPR:$Vd, 2061 (OpNode (TyQ QPR:$src1), 2062 (TyQ (MulOp (TyD DPR:$Vn), 2063 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2064 imm:$lane))))))]>; 2065 2066// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2067class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2068 InstrItinClass itin, string OpcodeStr, string Dt, 2069 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, 2070 SDNode OpNode> 2071 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2072 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2073 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2074 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2075 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2076 (TyD DPR:$Vm)))))))]>; 2077 2078// Neon Long 3-argument intrinsic. The destination register is 2079// a quad-register and is also used as the first source operand register. 2080class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2081 InstrItinClass itin, string OpcodeStr, string Dt, 2082 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 2083 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2084 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2085 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2086 [(set QPR:$Vd, 2087 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2088class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2089 string OpcodeStr, string Dt, 2090 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2091 : N3V<op24, 1, op21_20, op11_8, 1, 0, 2092 (outs QPR:$Vd), 2093 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), 2094 NVMulSLFrm, itin, 2095 OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", 2096 [(set (ResTy QPR:$Vd), 2097 (ResTy (IntOp (ResTy QPR:$src1), 2098 (OpTy DPR:$Vn), 2099 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2100 imm:$lane)))))]>; 2101class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2102 InstrItinClass itin, string OpcodeStr, string Dt, 2103 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2104 : N3V<op24, 1, op21_20, op11_8, 1, 0, 2105 (outs QPR:$Vd), 2106 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 2107 NVMulSLFrm, itin, 2108 OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", 2109 [(set (ResTy QPR:$Vd), 2110 (ResTy (IntOp (ResTy QPR:$src1), 2111 (OpTy DPR:$Vn), 2112 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2113 imm:$lane)))))]>; 2114 2115// Narrowing 3-register intrinsics. 2116class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2117 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2118 Intrinsic IntOp, bit Commutable> 2119 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2120 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2121 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2122 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2123 let isCommutable = Commutable; 2124} 2125 2126// Long 3-register operations. 2127class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2128 InstrItinClass itin, string OpcodeStr, string Dt, 2129 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2130 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2131 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2132 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2133 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2134 let isCommutable = Commutable; 2135} 2136class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2137 InstrItinClass itin, string OpcodeStr, string Dt, 2138 ValueType TyQ, ValueType TyD, SDNode OpNode> 2139 : N3V<op24, 1, op21_20, op11_8, 1, 0, 2140 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), 2141 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", 2142 [(set QPR:$Vd, 2143 (TyQ (OpNode (TyD DPR:$Vn), 2144 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2145class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2146 InstrItinClass itin, string OpcodeStr, string Dt, 2147 ValueType TyQ, ValueType TyD, SDNode OpNode> 2148 : N3V<op24, 1, op21_20, op11_8, 1, 0, 2149 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 2150 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", 2151 [(set QPR:$Vd, 2152 (TyQ (OpNode (TyD DPR:$Vn), 2153 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 2154 2155// Long 3-register operations with explicitly extended operands. 2156class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2157 InstrItinClass itin, string OpcodeStr, string Dt, 2158 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 2159 bit Commutable> 2160 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2161 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2162 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2163 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 2164 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2165 let isCommutable = Commutable; 2166} 2167 2168// Long 3-register intrinsics with explicit extend (VABDL). 2169class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2170 InstrItinClass itin, string OpcodeStr, string Dt, 2171 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, 2172 bit Commutable> 2173 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2174 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2175 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2176 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2177 (TyD DPR:$Vm))))))]> { 2178 let isCommutable = Commutable; 2179} 2180 2181// Long 3-register intrinsics. 2182class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2183 InstrItinClass itin, string OpcodeStr, string Dt, 2184 ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> 2185 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2186 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2187 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2188 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2189 let isCommutable = Commutable; 2190} 2191class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2192 string OpcodeStr, string Dt, 2193 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2194 : N3V<op24, 1, op21_20, op11_8, 1, 0, 2195 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), 2196 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", 2197 [(set (ResTy QPR:$Vd), 2198 (ResTy (IntOp (OpTy DPR:$Vn), 2199 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2200 imm:$lane)))))]>; 2201class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2202 InstrItinClass itin, string OpcodeStr, string Dt, 2203 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2204 : N3V<op24, 1, op21_20, op11_8, 1, 0, 2205 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 2206 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", 2207 [(set (ResTy QPR:$Vd), 2208 (ResTy (IntOp (OpTy DPR:$Vn), 2209 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2210 imm:$lane)))))]>; 2211 2212// Wide 3-register operations. 2213class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2214 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 2215 SDNode OpNode, SDNode ExtOp, bit Commutable> 2216 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2217 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 2218 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2219 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 2220 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2221 let isCommutable = Commutable; 2222} 2223 2224// Pairwise long 2-register intrinsics, both double- and quad-register. 2225class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2226 bits<2> op17_16, bits<5> op11_7, bit op4, 2227 string OpcodeStr, string Dt, 2228 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2229 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2230 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2231 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2232class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2233 bits<2> op17_16, bits<5> op11_7, bit op4, 2234 string OpcodeStr, string Dt, 2235 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2236 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2237 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2238 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2239 2240// Pairwise long 2-register accumulate intrinsics, 2241// both double- and quad-register. 2242// The destination register is also used as the first source operand register. 2243class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2244 bits<2> op17_16, bits<5> op11_7, bit op4, 2245 string OpcodeStr, string Dt, 2246 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2247 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 2248 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 2249 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2250 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 2251class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2252 bits<2> op17_16, bits<5> op11_7, bit op4, 2253 string OpcodeStr, string Dt, 2254 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 2255 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 2256 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 2257 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2258 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 2259 2260// Shift by immediate, 2261// both double- and quad-register. 2262class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2263 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2264 ValueType Ty, SDNode OpNode> 2265 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2266 (outs DPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), f, itin, 2267 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2268 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 2269class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2270 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2271 ValueType Ty, SDNode OpNode> 2272 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2273 (outs QPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), f, itin, 2274 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2275 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 2276 2277// Long shift by immediate. 2278class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2279 string OpcodeStr, string Dt, 2280 ValueType ResTy, ValueType OpTy, SDNode OpNode> 2281 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2282 (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm, 2283 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2284 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), 2285 (i32 imm:$SIMM))))]>; 2286 2287// Narrow shift by immediate. 2288class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2289 InstrItinClass itin, string OpcodeStr, string Dt, 2290 ValueType ResTy, ValueType OpTy, SDNode OpNode> 2291 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2292 (outs DPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, itin, 2293 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2294 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 2295 (i32 imm:$SIMM))))]>; 2296 2297// Shift right by immediate and accumulate, 2298// both double- and quad-register. 2299class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2300 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2301 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 2302 (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, 2303 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2304 [(set DPR:$Vd, (Ty (add DPR:$src1, 2305 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 2306class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2307 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2308 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 2309 (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, 2310 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2311 [(set QPR:$Vd, (Ty (add QPR:$src1, 2312 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 2313 2314// Shift by immediate and insert, 2315// both double- and quad-register. 2316class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2317 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> 2318 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 2319 (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiD, 2320 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2321 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 2322class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2323 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> 2324 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 2325 (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiQ, 2326 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 2327 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 2328 2329// Convert, with fractional bits immediate, 2330// both double- and quad-register. 2331class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2332 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2333 Intrinsic IntOp> 2334 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2335 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 2336 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2337 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 2338class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2339 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2340 Intrinsic IntOp> 2341 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2342 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 2343 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2344 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 2345 2346//===----------------------------------------------------------------------===// 2347// Multiclasses 2348//===----------------------------------------------------------------------===// 2349 2350// Abbreviations used in multiclass suffixes: 2351// Q = quarter int (8 bit) elements 2352// H = half int (16 bit) elements 2353// S = single int (32 bit) elements 2354// D = double int (64 bit) elements 2355 2356// Neon 2-register vector operations -- for disassembly only. 2357 2358// First with only element sizes of 8, 16 and 32 bits: 2359multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2360 bits<5> op11_7, bit op4, string opc, string Dt, 2361 string asm, SDNode OpNode> { 2362 // 64-bit vector types. 2363 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 2364 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2365 opc, !strconcat(Dt, "8"), asm, "", 2366 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 2367 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 2368 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2369 opc, !strconcat(Dt, "16"), asm, "", 2370 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 2371 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 2372 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2373 opc, !strconcat(Dt, "32"), asm, "", 2374 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 2375 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 2376 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 2377 opc, "f32", asm, "", 2378 [(set DPR:$Vd, (v2f32 (OpNode (v2f32 DPR:$Vm))))]> { 2379 let Inst{10} = 1; // overwrite F = 1 2380 } 2381 2382 // 128-bit vector types. 2383 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 2384 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2385 opc, !strconcat(Dt, "8"), asm, "", 2386 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 2387 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 2388 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2389 opc, !strconcat(Dt, "16"), asm, "", 2390 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 2391 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 2392 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2393 opc, !strconcat(Dt, "32"), asm, "", 2394 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 2395 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 2396 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 2397 opc, "f32", asm, "", 2398 [(set QPR:$Vd, (v4f32 (OpNode (v4f32 QPR:$Vm))))]> { 2399 let Inst{10} = 1; // overwrite F = 1 2400 } 2401} 2402 2403// Neon 3-register vector operations. 2404 2405// First with only element sizes of 8, 16 and 32 bits: 2406multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2407 InstrItinClass itinD16, InstrItinClass itinD32, 2408 InstrItinClass itinQ16, InstrItinClass itinQ32, 2409 string OpcodeStr, string Dt, 2410 SDNode OpNode, bit Commutable = 0> { 2411 // 64-bit vector types. 2412 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 2413 OpcodeStr, !strconcat(Dt, "8"), 2414 v8i8, v8i8, OpNode, Commutable>; 2415 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 2416 OpcodeStr, !strconcat(Dt, "16"), 2417 v4i16, v4i16, OpNode, Commutable>; 2418 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 2419 OpcodeStr, !strconcat(Dt, "32"), 2420 v2i32, v2i32, OpNode, Commutable>; 2421 2422 // 128-bit vector types. 2423 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 2424 OpcodeStr, !strconcat(Dt, "8"), 2425 v16i8, v16i8, OpNode, Commutable>; 2426 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 2427 OpcodeStr, !strconcat(Dt, "16"), 2428 v8i16, v8i16, OpNode, Commutable>; 2429 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 2430 OpcodeStr, !strconcat(Dt, "32"), 2431 v4i32, v4i32, OpNode, Commutable>; 2432} 2433 2434multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { 2435 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 2436 v4i16, ShOp>; 2437 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), 2438 v2i32, ShOp>; 2439 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 2440 v8i16, v4i16, ShOp>; 2441 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), 2442 v4i32, v2i32, ShOp>; 2443} 2444 2445// ....then also with element size 64 bits: 2446multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 2447 InstrItinClass itinD, InstrItinClass itinQ, 2448 string OpcodeStr, string Dt, 2449 SDNode OpNode, bit Commutable = 0> 2450 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 2451 OpcodeStr, Dt, OpNode, Commutable> { 2452 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 2453 OpcodeStr, !strconcat(Dt, "64"), 2454 v1i64, v1i64, OpNode, Commutable>; 2455 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 2456 OpcodeStr, !strconcat(Dt, "64"), 2457 v2i64, v2i64, OpNode, Commutable>; 2458} 2459 2460 2461// Neon Narrowing 2-register vector operations, 2462// source operand element sizes of 16, 32 and 64 bits: 2463multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2464 bits<5> op11_7, bit op6, bit op4, 2465 InstrItinClass itin, string OpcodeStr, string Dt, 2466 SDNode OpNode> { 2467 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 2468 itin, OpcodeStr, !strconcat(Dt, "16"), 2469 v8i8, v8i16, OpNode>; 2470 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 2471 itin, OpcodeStr, !strconcat(Dt, "32"), 2472 v4i16, v4i32, OpNode>; 2473 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 2474 itin, OpcodeStr, !strconcat(Dt, "64"), 2475 v2i32, v2i64, OpNode>; 2476} 2477 2478// Neon Narrowing 2-register vector intrinsics, 2479// source operand element sizes of 16, 32 and 64 bits: 2480multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2481 bits<5> op11_7, bit op6, bit op4, 2482 InstrItinClass itin, string OpcodeStr, string Dt, 2483 Intrinsic IntOp> { 2484 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 2485 itin, OpcodeStr, !strconcat(Dt, "16"), 2486 v8i8, v8i16, IntOp>; 2487 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 2488 itin, OpcodeStr, !strconcat(Dt, "32"), 2489 v4i16, v4i32, IntOp>; 2490 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 2491 itin, OpcodeStr, !strconcat(Dt, "64"), 2492 v2i32, v2i64, IntOp>; 2493} 2494 2495 2496// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 2497// source operand element sizes of 16, 32 and 64 bits: 2498multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 2499 string OpcodeStr, string Dt, SDNode OpNode> { 2500 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 2501 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 2502 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 2503 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 2504 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 2505 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 2506} 2507 2508 2509// Neon 3-register vector intrinsics. 2510 2511// First with only element sizes of 16 and 32 bits: 2512multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2513 InstrItinClass itinD16, InstrItinClass itinD32, 2514 InstrItinClass itinQ16, InstrItinClass itinQ32, 2515 string OpcodeStr, string Dt, 2516 Intrinsic IntOp, bit Commutable = 0> { 2517 // 64-bit vector types. 2518 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 2519 OpcodeStr, !strconcat(Dt, "16"), 2520 v4i16, v4i16, IntOp, Commutable>; 2521 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 2522 OpcodeStr, !strconcat(Dt, "32"), 2523 v2i32, v2i32, IntOp, Commutable>; 2524 2525 // 128-bit vector types. 2526 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 2527 OpcodeStr, !strconcat(Dt, "16"), 2528 v8i16, v8i16, IntOp, Commutable>; 2529 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 2530 OpcodeStr, !strconcat(Dt, "32"), 2531 v4i32, v4i32, IntOp, Commutable>; 2532} 2533multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2534 InstrItinClass itinD16, InstrItinClass itinD32, 2535 InstrItinClass itinQ16, InstrItinClass itinQ32, 2536 string OpcodeStr, string Dt, 2537 Intrinsic IntOp> { 2538 // 64-bit vector types. 2539 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 2540 OpcodeStr, !strconcat(Dt, "16"), 2541 v4i16, v4i16, IntOp>; 2542 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 2543 OpcodeStr, !strconcat(Dt, "32"), 2544 v2i32, v2i32, IntOp>; 2545 2546 // 128-bit vector types. 2547 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 2548 OpcodeStr, !strconcat(Dt, "16"), 2549 v8i16, v8i16, IntOp>; 2550 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 2551 OpcodeStr, !strconcat(Dt, "32"), 2552 v4i32, v4i32, IntOp>; 2553} 2554 2555multiclass N3VIntSL_HS<bits<4> op11_8, 2556 InstrItinClass itinD16, InstrItinClass itinD32, 2557 InstrItinClass itinQ16, InstrItinClass itinQ32, 2558 string OpcodeStr, string Dt, Intrinsic IntOp> { 2559 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 2560 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 2561 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 2562 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 2563 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 2564 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 2565 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 2566 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 2567} 2568 2569// ....then also with element size of 8 bits: 2570multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2571 InstrItinClass itinD16, InstrItinClass itinD32, 2572 InstrItinClass itinQ16, InstrItinClass itinQ32, 2573 string OpcodeStr, string Dt, 2574 Intrinsic IntOp, bit Commutable = 0> 2575 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 2576 OpcodeStr, Dt, IntOp, Commutable> { 2577 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 2578 OpcodeStr, !strconcat(Dt, "8"), 2579 v8i8, v8i8, IntOp, Commutable>; 2580 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 2581 OpcodeStr, !strconcat(Dt, "8"), 2582 v16i8, v16i8, IntOp, Commutable>; 2583} 2584multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2585 InstrItinClass itinD16, InstrItinClass itinD32, 2586 InstrItinClass itinQ16, InstrItinClass itinQ32, 2587 string OpcodeStr, string Dt, 2588 Intrinsic IntOp> 2589 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 2590 OpcodeStr, Dt, IntOp> { 2591 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 2592 OpcodeStr, !strconcat(Dt, "8"), 2593 v8i8, v8i8, IntOp>; 2594 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 2595 OpcodeStr, !strconcat(Dt, "8"), 2596 v16i8, v16i8, IntOp>; 2597} 2598 2599 2600// ....then also with element size of 64 bits: 2601multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2602 InstrItinClass itinD16, InstrItinClass itinD32, 2603 InstrItinClass itinQ16, InstrItinClass itinQ32, 2604 string OpcodeStr, string Dt, 2605 Intrinsic IntOp, bit Commutable = 0> 2606 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 2607 OpcodeStr, Dt, IntOp, Commutable> { 2608 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 2609 OpcodeStr, !strconcat(Dt, "64"), 2610 v1i64, v1i64, IntOp, Commutable>; 2611 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 2612 OpcodeStr, !strconcat(Dt, "64"), 2613 v2i64, v2i64, IntOp, Commutable>; 2614} 2615multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 2616 InstrItinClass itinD16, InstrItinClass itinD32, 2617 InstrItinClass itinQ16, InstrItinClass itinQ32, 2618 string OpcodeStr, string Dt, 2619 Intrinsic IntOp> 2620 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 2621 OpcodeStr, Dt, IntOp> { 2622 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 2623 OpcodeStr, !strconcat(Dt, "64"), 2624 v1i64, v1i64, IntOp>; 2625 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 2626 OpcodeStr, !strconcat(Dt, "64"), 2627 v2i64, v2i64, IntOp>; 2628} 2629 2630// Neon Narrowing 3-register vector intrinsics, 2631// source operand element sizes of 16, 32 and 64 bits: 2632multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 2633 string OpcodeStr, string Dt, 2634 Intrinsic IntOp, bit Commutable = 0> { 2635 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 2636 OpcodeStr, !strconcat(Dt, "16"), 2637 v8i8, v8i16, IntOp, Commutable>; 2638 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 2639 OpcodeStr, !strconcat(Dt, "32"), 2640 v4i16, v4i32, IntOp, Commutable>; 2641 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 2642 OpcodeStr, !strconcat(Dt, "64"), 2643 v2i32, v2i64, IntOp, Commutable>; 2644} 2645 2646 2647// Neon Long 3-register vector operations. 2648 2649multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2650 InstrItinClass itin16, InstrItinClass itin32, 2651 string OpcodeStr, string Dt, 2652 SDNode OpNode, bit Commutable = 0> { 2653 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 2654 OpcodeStr, !strconcat(Dt, "8"), 2655 v8i16, v8i8, OpNode, Commutable>; 2656 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 2657 OpcodeStr, !strconcat(Dt, "16"), 2658 v4i32, v4i16, OpNode, Commutable>; 2659 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 2660 OpcodeStr, !strconcat(Dt, "32"), 2661 v2i64, v2i32, OpNode, Commutable>; 2662} 2663 2664multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 2665 InstrItinClass itin, string OpcodeStr, string Dt, 2666 SDNode OpNode> { 2667 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 2668 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 2669 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 2670 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 2671} 2672 2673multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2674 InstrItinClass itin16, InstrItinClass itin32, 2675 string OpcodeStr, string Dt, 2676 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 2677 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 2678 OpcodeStr, !strconcat(Dt, "8"), 2679 v8i16, v8i8, OpNode, ExtOp, Commutable>; 2680 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 2681 OpcodeStr, !strconcat(Dt, "16"), 2682 v4i32, v4i16, OpNode, ExtOp, Commutable>; 2683 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 2684 OpcodeStr, !strconcat(Dt, "32"), 2685 v2i64, v2i32, OpNode, ExtOp, Commutable>; 2686} 2687 2688// Neon Long 3-register vector intrinsics. 2689 2690// First with only element sizes of 16 and 32 bits: 2691multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 2692 InstrItinClass itin16, InstrItinClass itin32, 2693 string OpcodeStr, string Dt, 2694 Intrinsic IntOp, bit Commutable = 0> { 2695 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 2696 OpcodeStr, !strconcat(Dt, "16"), 2697 v4i32, v4i16, IntOp, Commutable>; 2698 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 2699 OpcodeStr, !strconcat(Dt, "32"), 2700 v2i64, v2i32, IntOp, Commutable>; 2701} 2702 2703multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 2704 InstrItinClass itin, string OpcodeStr, string Dt, 2705 Intrinsic IntOp> { 2706 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 2707 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 2708 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 2709 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 2710} 2711 2712// ....then also with element size of 8 bits: 2713multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2714 InstrItinClass itin16, InstrItinClass itin32, 2715 string OpcodeStr, string Dt, 2716 Intrinsic IntOp, bit Commutable = 0> 2717 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 2718 IntOp, Commutable> { 2719 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 2720 OpcodeStr, !strconcat(Dt, "8"), 2721 v8i16, v8i8, IntOp, Commutable>; 2722} 2723 2724// ....with explicit extend (VABDL). 2725multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2726 InstrItinClass itin, string OpcodeStr, string Dt, 2727 Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { 2728 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 2729 OpcodeStr, !strconcat(Dt, "8"), 2730 v8i16, v8i8, IntOp, ExtOp, Commutable>; 2731 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 2732 OpcodeStr, !strconcat(Dt, "16"), 2733 v4i32, v4i16, IntOp, ExtOp, Commutable>; 2734 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 2735 OpcodeStr, !strconcat(Dt, "32"), 2736 v2i64, v2i32, IntOp, ExtOp, Commutable>; 2737} 2738 2739 2740// Neon Wide 3-register vector intrinsics, 2741// source operand element sizes of 8, 16 and 32 bits: 2742multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2743 string OpcodeStr, string Dt, 2744 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 2745 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 2746 OpcodeStr, !strconcat(Dt, "8"), 2747 v8i16, v8i8, OpNode, ExtOp, Commutable>; 2748 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 2749 OpcodeStr, !strconcat(Dt, "16"), 2750 v4i32, v4i16, OpNode, ExtOp, Commutable>; 2751 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 2752 OpcodeStr, !strconcat(Dt, "32"), 2753 v2i64, v2i32, OpNode, ExtOp, Commutable>; 2754} 2755 2756 2757// Neon Multiply-Op vector operations, 2758// element sizes of 8, 16 and 32 bits: 2759multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2760 InstrItinClass itinD16, InstrItinClass itinD32, 2761 InstrItinClass itinQ16, InstrItinClass itinQ32, 2762 string OpcodeStr, string Dt, SDNode OpNode> { 2763 // 64-bit vector types. 2764 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 2765 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 2766 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 2767 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 2768 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 2769 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 2770 2771 // 128-bit vector types. 2772 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 2773 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 2774 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 2775 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 2776 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 2777 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 2778} 2779 2780multiclass N3VMulOpSL_HS<bits<4> op11_8, 2781 InstrItinClass itinD16, InstrItinClass itinD32, 2782 InstrItinClass itinQ16, InstrItinClass itinQ32, 2783 string OpcodeStr, string Dt, SDNode ShOp> { 2784 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 2785 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 2786 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 2787 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 2788 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 2789 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 2790 mul, ShOp>; 2791 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 2792 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 2793 mul, ShOp>; 2794} 2795 2796// Neon Intrinsic-Op vector operations, 2797// element sizes of 8, 16 and 32 bits: 2798multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2799 InstrItinClass itinD, InstrItinClass itinQ, 2800 string OpcodeStr, string Dt, Intrinsic IntOp, 2801 SDNode OpNode> { 2802 // 64-bit vector types. 2803 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 2804 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 2805 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 2806 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 2807 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 2808 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 2809 2810 // 128-bit vector types. 2811 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 2812 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 2813 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 2814 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 2815 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 2816 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 2817} 2818 2819// Neon 3-argument intrinsics, 2820// element sizes of 8, 16 and 32 bits: 2821multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2822 InstrItinClass itinD, InstrItinClass itinQ, 2823 string OpcodeStr, string Dt, Intrinsic IntOp> { 2824 // 64-bit vector types. 2825 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 2826 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 2827 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 2828 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 2829 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 2830 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 2831 2832 // 128-bit vector types. 2833 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 2834 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 2835 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 2836 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 2837 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 2838 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 2839} 2840 2841 2842// Neon Long Multiply-Op vector operations, 2843// element sizes of 8, 16 and 32 bits: 2844multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2845 InstrItinClass itin16, InstrItinClass itin32, 2846 string OpcodeStr, string Dt, SDNode MulOp, 2847 SDNode OpNode> { 2848 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 2849 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 2850 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 2851 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 2852 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 2853 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 2854} 2855 2856multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 2857 string Dt, SDNode MulOp, SDNode OpNode> { 2858 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 2859 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 2860 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 2861 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 2862} 2863 2864 2865// Neon Long 3-argument intrinsics. 2866 2867// First with only element sizes of 16 and 32 bits: 2868multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 2869 InstrItinClass itin16, InstrItinClass itin32, 2870 string OpcodeStr, string Dt, Intrinsic IntOp> { 2871 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 2872 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 2873 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 2874 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 2875} 2876 2877multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 2878 string OpcodeStr, string Dt, Intrinsic IntOp> { 2879 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 2880 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 2881 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 2882 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 2883} 2884 2885// ....then also with element size of 8 bits: 2886multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2887 InstrItinClass itin16, InstrItinClass itin32, 2888 string OpcodeStr, string Dt, Intrinsic IntOp> 2889 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 2890 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 2891 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 2892} 2893 2894// ....with explicit extend (VABAL). 2895multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2896 InstrItinClass itin, string OpcodeStr, string Dt, 2897 Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { 2898 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 2899 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 2900 IntOp, ExtOp, OpNode>; 2901 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 2902 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 2903 IntOp, ExtOp, OpNode>; 2904 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 2905 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 2906 IntOp, ExtOp, OpNode>; 2907} 2908 2909 2910// Neon 2-register vector intrinsics, 2911// element sizes of 8, 16 and 32 bits: 2912multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2913 bits<5> op11_7, bit op4, 2914 InstrItinClass itinD, InstrItinClass itinQ, 2915 string OpcodeStr, string Dt, Intrinsic IntOp> { 2916 // 64-bit vector types. 2917 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2918 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 2919 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2920 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 2921 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2922 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 2923 2924 // 128-bit vector types. 2925 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2926 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 2927 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2928 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 2929 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2930 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 2931} 2932 2933 2934// Neon Pairwise long 2-register intrinsics, 2935// element sizes of 8, 16 and 32 bits: 2936multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2937 bits<5> op11_7, bit op4, 2938 string OpcodeStr, string Dt, Intrinsic IntOp> { 2939 // 64-bit vector types. 2940 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2941 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 2942 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2943 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 2944 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2945 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 2946 2947 // 128-bit vector types. 2948 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2949 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 2950 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2951 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 2952 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2953 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 2954} 2955 2956 2957// Neon Pairwise long 2-register accumulate intrinsics, 2958// element sizes of 8, 16 and 32 bits: 2959multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2960 bits<5> op11_7, bit op4, 2961 string OpcodeStr, string Dt, Intrinsic IntOp> { 2962 // 64-bit vector types. 2963 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2964 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 2965 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2966 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 2967 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2968 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 2969 2970 // 128-bit vector types. 2971 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2972 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 2973 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2974 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 2975 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2976 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 2977} 2978 2979 2980// Neon 2-register vector shift by immediate, 2981// with f of either N2RegVShLFrm or N2RegVShRFrm 2982// element sizes of 8, 16, 32 and 64 bits: 2983multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 2984 InstrItinClass itin, string OpcodeStr, string Dt, 2985 SDNode OpNode, Format f> { 2986 // 64-bit vector types. 2987 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 2988 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 2989 let Inst{21-19} = 0b001; // imm6 = 001xxx 2990 } 2991 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 2992 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 2993 let Inst{21-20} = 0b01; // imm6 = 01xxxx 2994 } 2995 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 2996 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 2997 let Inst{21} = 0b1; // imm6 = 1xxxxx 2998 } 2999 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin, 3000 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3001 // imm6 = xxxxxx 3002 3003 // 128-bit vector types. 3004 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 3005 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3006 let Inst{21-19} = 0b001; // imm6 = 001xxx 3007 } 3008 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 3009 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3010 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3011 } 3012 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 3013 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3014 let Inst{21} = 0b1; // imm6 = 1xxxxx 3015 } 3016 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin, 3017 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3018 // imm6 = xxxxxx 3019} 3020 3021// Neon Shift-Accumulate vector operations, 3022// element sizes of 8, 16, 32 and 64 bits: 3023multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3024 string OpcodeStr, string Dt, SDNode ShOp> { 3025 // 64-bit vector types. 3026 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, 3027 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3028 let Inst{21-19} = 0b001; // imm6 = 001xxx 3029 } 3030 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, 3031 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3032 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3033 } 3034 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, 3035 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3036 let Inst{21} = 0b1; // imm6 = 1xxxxx 3037 } 3038 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, 3039 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3040 // imm6 = xxxxxx 3041 3042 // 128-bit vector types. 3043 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, 3044 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3045 let Inst{21-19} = 0b001; // imm6 = 001xxx 3046 } 3047 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, 3048 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3049 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3050 } 3051 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, 3052 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3053 let Inst{21} = 0b1; // imm6 = 1xxxxx 3054 } 3055 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, 3056 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3057 // imm6 = xxxxxx 3058} 3059 3060 3061// Neon Shift-Insert vector operations, 3062// with f of either N2RegVShLFrm or N2RegVShRFrm 3063// element sizes of 8, 16, 32 and 64 bits: 3064multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3065 string OpcodeStr, SDNode ShOp, 3066 Format f> { 3067 // 64-bit vector types. 3068 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, 3069 f, OpcodeStr, "8", v8i8, ShOp> { 3070 let Inst{21-19} = 0b001; // imm6 = 001xxx 3071 } 3072 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, 3073 f, OpcodeStr, "16", v4i16, ShOp> { 3074 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3075 } 3076 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, 3077 f, OpcodeStr, "32", v2i32, ShOp> { 3078 let Inst{21} = 0b1; // imm6 = 1xxxxx 3079 } 3080 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, 3081 f, OpcodeStr, "64", v1i64, ShOp>; 3082 // imm6 = xxxxxx 3083 3084 // 128-bit vector types. 3085 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, 3086 f, OpcodeStr, "8", v16i8, ShOp> { 3087 let Inst{21-19} = 0b001; // imm6 = 001xxx 3088 } 3089 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, 3090 f, OpcodeStr, "16", v8i16, ShOp> { 3091 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3092 } 3093 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, 3094 f, OpcodeStr, "32", v4i32, ShOp> { 3095 let Inst{21} = 0b1; // imm6 = 1xxxxx 3096 } 3097 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, 3098 f, OpcodeStr, "64", v2i64, ShOp>; 3099 // imm6 = xxxxxx 3100} 3101 3102// Neon Shift Long operations, 3103// element sizes of 8, 16, 32 bits: 3104multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3105 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 3106 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3107 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { 3108 let Inst{21-19} = 0b001; // imm6 = 001xxx 3109 } 3110 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3111 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { 3112 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3113 } 3114 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3115 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { 3116 let Inst{21} = 0b1; // imm6 = 1xxxxx 3117 } 3118} 3119 3120// Neon Shift Narrow operations, 3121// element sizes of 16, 32, 64 bits: 3122multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3123 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 3124 SDNode OpNode> { 3125 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3126 OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> { 3127 let Inst{21-19} = 0b001; // imm6 = 001xxx 3128 } 3129 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3130 OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> { 3131 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3132 } 3133 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3134 OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> { 3135 let Inst{21} = 0b1; // imm6 = 1xxxxx 3136 } 3137} 3138 3139//===----------------------------------------------------------------------===// 3140// Instruction Definitions. 3141//===----------------------------------------------------------------------===// 3142 3143// Vector Add Operations. 3144 3145// VADD : Vector Add (integer and floating-point) 3146defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 3147 add, 1>; 3148def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 3149 v2f32, v2f32, fadd, 1>; 3150def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 3151 v4f32, v4f32, fadd, 1>; 3152// VADDL : Vector Add Long (Q = D + D) 3153defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3154 "vaddl", "s", add, sext, 1>; 3155defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3156 "vaddl", "u", add, zext, 1>; 3157// VADDW : Vector Add Wide (Q = Q + D) 3158defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 3159defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 3160// VHADD : Vector Halving Add 3161defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 3162 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3163 "vhadd", "s", int_arm_neon_vhadds, 1>; 3164defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 3165 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3166 "vhadd", "u", int_arm_neon_vhaddu, 1>; 3167// VRHADD : Vector Rounding Halving Add 3168defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 3169 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3170 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 3171defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 3172 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3173 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 3174// VQADD : Vector Saturating Add 3175defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 3176 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3177 "vqadd", "s", int_arm_neon_vqadds, 1>; 3178defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 3179 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3180 "vqadd", "u", int_arm_neon_vqaddu, 1>; 3181// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 3182defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", 3183 int_arm_neon_vaddhn, 1>; 3184// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 3185defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 3186 int_arm_neon_vraddhn, 1>; 3187 3188// Vector Multiply Operations. 3189 3190// VMUL : Vector Multiply (integer, polynomial and floating-point) 3191defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 3192 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 3193def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 3194 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 3195def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 3196 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 3197def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 3198 v2f32, v2f32, fmul, 1>; 3199def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 3200 v4f32, v4f32, fmul, 1>; 3201defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; 3202def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 3203def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 3204 v2f32, fmul>; 3205 3206def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 3207 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 3208 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 3209 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3210 (DSubReg_i16_reg imm:$lane))), 3211 (SubReg_i16_lane imm:$lane)))>; 3212def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 3213 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 3214 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 3215 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3216 (DSubReg_i32_reg imm:$lane))), 3217 (SubReg_i32_lane imm:$lane)))>; 3218def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 3219 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 3220 (v4f32 (VMULslfq (v4f32 QPR:$src1), 3221 (v2f32 (EXTRACT_SUBREG QPR:$src2, 3222 (DSubReg_i32_reg imm:$lane))), 3223 (SubReg_i32_lane imm:$lane)))>; 3224 3225// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 3226defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 3227 IIC_VMULi16Q, IIC_VMULi32Q, 3228 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 3229defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 3230 IIC_VMULi16Q, IIC_VMULi32Q, 3231 "vqdmulh", "s", int_arm_neon_vqdmulh>; 3232def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 3233 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 3234 imm:$lane)))), 3235 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 3236 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3237 (DSubReg_i16_reg imm:$lane))), 3238 (SubReg_i16_lane imm:$lane)))>; 3239def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 3240 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 3241 imm:$lane)))), 3242 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 3243 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3244 (DSubReg_i32_reg imm:$lane))), 3245 (SubReg_i32_lane imm:$lane)))>; 3246 3247// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 3248defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 3249 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 3250 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 3251defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 3252 IIC_VMULi16Q, IIC_VMULi32Q, 3253 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 3254def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 3255 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 3256 imm:$lane)))), 3257 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 3258 (v4i16 (EXTRACT_SUBREG QPR:$src2, 3259 (DSubReg_i16_reg imm:$lane))), 3260 (SubReg_i16_lane imm:$lane)))>; 3261def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 3262 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 3263 imm:$lane)))), 3264 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 3265 (v2i32 (EXTRACT_SUBREG QPR:$src2, 3266 (DSubReg_i32_reg imm:$lane))), 3267 (SubReg_i32_lane imm:$lane)))>; 3268 3269// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 3270defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 3271 "vmull", "s", NEONvmulls, 1>; 3272defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 3273 "vmull", "u", NEONvmullu, 1>; 3274def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 3275 v8i16, v8i8, int_arm_neon_vmullp, 1>; 3276defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 3277defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 3278 3279// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 3280defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 3281 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 3282defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 3283 "vqdmull", "s", int_arm_neon_vqdmull>; 3284 3285// Vector Multiply-Accumulate and Multiply-Subtract Operations. 3286 3287// VMLA : Vector Multiply Accumulate (integer and floating-point) 3288defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 3289 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 3290def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 3291 v2f32, fmul_su, fadd_mlx>, 3292 Requires<[HasNEON, UseFPVMLx]>; 3293def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 3294 v4f32, fmul_su, fadd_mlx>, 3295 Requires<[HasNEON, UseFPVMLx]>; 3296defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 3297 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 3298def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 3299 v2f32, fmul_su, fadd_mlx>, 3300 Requires<[HasNEON, UseFPVMLx]>; 3301def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 3302 v4f32, v2f32, fmul_su, fadd_mlx>, 3303 Requires<[HasNEON, UseFPVMLx]>; 3304 3305def : Pat<(v8i16 (add (v8i16 QPR:$src1), 3306 (mul (v8i16 QPR:$src2), 3307 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 3308 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 3309 (v4i16 (EXTRACT_SUBREG QPR:$src3, 3310 (DSubReg_i16_reg imm:$lane))), 3311 (SubReg_i16_lane imm:$lane)))>; 3312 3313def : Pat<(v4i32 (add (v4i32 QPR:$src1), 3314 (mul (v4i32 QPR:$src2), 3315 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 3316 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 3317 (v2i32 (EXTRACT_SUBREG QPR:$src3, 3318 (DSubReg_i32_reg imm:$lane))), 3319 (SubReg_i32_lane imm:$lane)))>; 3320 3321def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 3322 (fmul_su (v4f32 QPR:$src2), 3323 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 3324 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 3325 (v4f32 QPR:$src2), 3326 (v2f32 (EXTRACT_SUBREG QPR:$src3, 3327 (DSubReg_i32_reg imm:$lane))), 3328 (SubReg_i32_lane imm:$lane)))>, 3329 Requires<[HasNEON, UseFPVMLx]>; 3330 3331// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 3332defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 3333 "vmlal", "s", NEONvmulls, add>; 3334defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 3335 "vmlal", "u", NEONvmullu, add>; 3336 3337defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 3338defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 3339 3340// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 3341defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 3342 "vqdmlal", "s", int_arm_neon_vqdmlal>; 3343defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; 3344 3345// VMLS : Vector Multiply Subtract (integer and floating-point) 3346defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 3347 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 3348def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 3349 v2f32, fmul_su, fsub_mlx>, 3350 Requires<[HasNEON, UseFPVMLx]>; 3351def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 3352 v4f32, fmul_su, fsub_mlx>, 3353 Requires<[HasNEON, UseFPVMLx]>; 3354defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 3355 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 3356def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 3357 v2f32, fmul_su, fsub_mlx>, 3358 Requires<[HasNEON, UseFPVMLx]>; 3359def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 3360 v4f32, v2f32, fmul_su, fsub_mlx>, 3361 Requires<[HasNEON, UseFPVMLx]>; 3362 3363def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 3364 (mul (v8i16 QPR:$src2), 3365 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 3366 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 3367 (v4i16 (EXTRACT_SUBREG QPR:$src3, 3368 (DSubReg_i16_reg imm:$lane))), 3369 (SubReg_i16_lane imm:$lane)))>; 3370 3371def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 3372 (mul (v4i32 QPR:$src2), 3373 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 3374 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 3375 (v2i32 (EXTRACT_SUBREG QPR:$src3, 3376 (DSubReg_i32_reg imm:$lane))), 3377 (SubReg_i32_lane imm:$lane)))>; 3378 3379def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 3380 (fmul_su (v4f32 QPR:$src2), 3381 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 3382 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 3383 (v2f32 (EXTRACT_SUBREG QPR:$src3, 3384 (DSubReg_i32_reg imm:$lane))), 3385 (SubReg_i32_lane imm:$lane)))>, 3386 Requires<[HasNEON, UseFPVMLx]>; 3387 3388// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 3389defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 3390 "vmlsl", "s", NEONvmulls, sub>; 3391defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 3392 "vmlsl", "u", NEONvmullu, sub>; 3393 3394defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 3395defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 3396 3397// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 3398defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 3399 "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 3400defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 3401 3402// Vector Subtract Operations. 3403 3404// VSUB : Vector Subtract (integer and floating-point) 3405defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 3406 "vsub", "i", sub, 0>; 3407def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 3408 v2f32, v2f32, fsub, 0>; 3409def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 3410 v4f32, v4f32, fsub, 0>; 3411// VSUBL : Vector Subtract Long (Q = D - D) 3412defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 3413 "vsubl", "s", sub, sext, 0>; 3414defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 3415 "vsubl", "u", sub, zext, 0>; 3416// VSUBW : Vector Subtract Wide (Q = Q - D) 3417defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 3418defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 3419// VHSUB : Vector Halving Subtract 3420defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 3421 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3422 "vhsub", "s", int_arm_neon_vhsubs, 0>; 3423defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 3424 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3425 "vhsub", "u", int_arm_neon_vhsubu, 0>; 3426// VQSUB : Vector Saturing Subtract 3427defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 3428 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3429 "vqsub", "s", int_arm_neon_vqsubs, 0>; 3430defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 3431 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3432 "vqsub", "u", int_arm_neon_vqsubu, 0>; 3433// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 3434defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", 3435 int_arm_neon_vsubhn, 0>; 3436// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 3437defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 3438 int_arm_neon_vrsubhn, 0>; 3439 3440// Vector Comparisons. 3441 3442// VCEQ : Vector Compare Equal 3443defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3444 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 3445def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 3446 NEONvceq, 1>; 3447def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 3448 NEONvceq, 1>; 3449 3450defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 3451 "$Vd, $Vm, #0", NEONvceqz>; 3452 3453// VCGE : Vector Compare Greater Than or Equal 3454defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3455 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 3456defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3457 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 3458def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 3459 NEONvcge, 0>; 3460def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 3461 NEONvcge, 0>; 3462 3463defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 3464 "$Vd, $Vm, #0", NEONvcgez>; 3465defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 3466 "$Vd, $Vm, #0", NEONvclez>; 3467 3468// VCGT : Vector Compare Greater Than 3469defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3470 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 3471defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 3472 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 3473def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 3474 NEONvcgt, 0>; 3475def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 3476 NEONvcgt, 0>; 3477 3478defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 3479 "$Vd, $Vm, #0", NEONvcgtz>; 3480defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 3481 "$Vd, $Vm, #0", NEONvcltz>; 3482 3483// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 3484def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 3485 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 3486def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 3487 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 3488// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 3489def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 3490 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 3491def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 3492 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 3493// VTST : Vector Test Bits 3494defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 3495 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 3496 3497// Vector Bitwise Operations. 3498 3499def vnotd : PatFrag<(ops node:$in), 3500 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 3501def vnotq : PatFrag<(ops node:$in), 3502 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 3503 3504 3505// VAND : Vector Bitwise AND 3506def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 3507 v2i32, v2i32, and, 1>; 3508def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 3509 v4i32, v4i32, and, 1>; 3510 3511// VEOR : Vector Bitwise Exclusive OR 3512def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 3513 v2i32, v2i32, xor, 1>; 3514def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 3515 v4i32, v4i32, xor, 1>; 3516 3517// VORR : Vector Bitwise OR 3518def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 3519 v2i32, v2i32, or, 1>; 3520def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 3521 v4i32, v4i32, or, 1>; 3522 3523def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 3524 (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), 3525 IIC_VMOVImm, 3526 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 3527 [(set DPR:$Vd, 3528 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 3529 let Inst{9} = SIMM{9}; 3530} 3531 3532def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 3533 (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), 3534 IIC_VMOVImm, 3535 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 3536 [(set DPR:$Vd, 3537 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 3538 let Inst{10-9} = SIMM{10-9}; 3539} 3540 3541def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 3542 (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), 3543 IIC_VMOVImm, 3544 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 3545 [(set QPR:$Vd, 3546 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 3547 let Inst{9} = SIMM{9}; 3548} 3549 3550def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 3551 (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), 3552 IIC_VMOVImm, 3553 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 3554 [(set QPR:$Vd, 3555 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 3556 let Inst{10-9} = SIMM{10-9}; 3557} 3558 3559 3560// VBIC : Vector Bitwise Bit Clear (AND NOT) 3561def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 3562 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 3563 "vbic", "$Vd, $Vn, $Vm", "", 3564 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 3565 (vnotd DPR:$Vm))))]>; 3566def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 3567 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 3568 "vbic", "$Vd, $Vn, $Vm", "", 3569 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 3570 (vnotq QPR:$Vm))))]>; 3571 3572def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 3573 (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), 3574 IIC_VMOVImm, 3575 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 3576 [(set DPR:$Vd, 3577 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 3578 let Inst{9} = SIMM{9}; 3579} 3580 3581def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 3582 (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), 3583 IIC_VMOVImm, 3584 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 3585 [(set DPR:$Vd, 3586 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 3587 let Inst{10-9} = SIMM{10-9}; 3588} 3589 3590def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 3591 (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), 3592 IIC_VMOVImm, 3593 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 3594 [(set QPR:$Vd, 3595 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 3596 let Inst{9} = SIMM{9}; 3597} 3598 3599def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 3600 (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), 3601 IIC_VMOVImm, 3602 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 3603 [(set QPR:$Vd, 3604 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 3605 let Inst{10-9} = SIMM{10-9}; 3606} 3607 3608// VORN : Vector Bitwise OR NOT 3609def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 3610 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 3611 "vorn", "$Vd, $Vn, $Vm", "", 3612 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 3613 (vnotd DPR:$Vm))))]>; 3614def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 3615 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 3616 "vorn", "$Vd, $Vn, $Vm", "", 3617 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 3618 (vnotq QPR:$Vm))))]>; 3619 3620// VMVN : Vector Bitwise NOT (Immediate) 3621 3622let isReMaterializable = 1 in { 3623 3624def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 3625 (ins nModImm:$SIMM), IIC_VMOVImm, 3626 "vmvn", "i16", "$Vd, $SIMM", "", 3627 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 3628 let Inst{9} = SIMM{9}; 3629} 3630 3631def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 3632 (ins nModImm:$SIMM), IIC_VMOVImm, 3633 "vmvn", "i16", "$Vd, $SIMM", "", 3634 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 3635 let Inst{9} = SIMM{9}; 3636} 3637 3638def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 3639 (ins nModImm:$SIMM), IIC_VMOVImm, 3640 "vmvn", "i32", "$Vd, $SIMM", "", 3641 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 3642 let Inst{11-8} = SIMM{11-8}; 3643} 3644 3645def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 3646 (ins nModImm:$SIMM), IIC_VMOVImm, 3647 "vmvn", "i32", "$Vd, $SIMM", "", 3648 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 3649 let Inst{11-8} = SIMM{11-8}; 3650} 3651} 3652 3653// VMVN : Vector Bitwise NOT 3654def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 3655 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 3656 "vmvn", "$Vd, $Vm", "", 3657 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 3658def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 3659 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 3660 "vmvn", "$Vd, $Vm", "", 3661 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 3662def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 3663def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 3664 3665// VBSL : Vector Bitwise Select 3666def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 3667 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 3668 N3RegFrm, IIC_VCNTiD, 3669 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 3670 [(set DPR:$Vd, 3671 (v2i32 (or (and DPR:$Vn, DPR:$src1), 3672 (and DPR:$Vm, (vnotd DPR:$src1)))))]>; 3673def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 3674 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 3675 N3RegFrm, IIC_VCNTiQ, 3676 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 3677 [(set QPR:$Vd, 3678 (v4i32 (or (and QPR:$Vn, QPR:$src1), 3679 (and QPR:$Vm, (vnotq QPR:$src1)))))]>; 3680 3681// VBIF : Vector Bitwise Insert if False 3682// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 3683// FIXME: This instruction's encoding MAY NOT BE correct. 3684def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 3685 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 3686 N3RegFrm, IIC_VBINiD, 3687 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 3688 [/* For disassembly only; pattern left blank */]>; 3689def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 3690 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 3691 N3RegFrm, IIC_VBINiQ, 3692 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 3693 [/* For disassembly only; pattern left blank */]>; 3694 3695// VBIT : Vector Bitwise Insert if True 3696// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 3697// FIXME: This instruction's encoding MAY NOT BE correct. 3698def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 3699 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 3700 N3RegFrm, IIC_VBINiD, 3701 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 3702 [/* For disassembly only; pattern left blank */]>; 3703def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 3704 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 3705 N3RegFrm, IIC_VBINiQ, 3706 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 3707 [/* For disassembly only; pattern left blank */]>; 3708 3709// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 3710// for equivalent operations with different register constraints; it just 3711// inserts copies. 3712 3713// Vector Absolute Differences. 3714 3715// VABD : Vector Absolute Difference 3716defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 3717 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3718 "vabd", "s", int_arm_neon_vabds, 1>; 3719defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 3720 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3721 "vabd", "u", int_arm_neon_vabdu, 1>; 3722def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 3723 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 3724def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 3725 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 3726 3727// VABDL : Vector Absolute Difference Long (Q = | D - D |) 3728defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 3729 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 3730defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 3731 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 3732 3733// VABA : Vector Absolute Difference and Accumulate 3734defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 3735 "vaba", "s", int_arm_neon_vabds, add>; 3736defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 3737 "vaba", "u", int_arm_neon_vabdu, add>; 3738 3739// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 3740defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 3741 "vabal", "s", int_arm_neon_vabds, zext, add>; 3742defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 3743 "vabal", "u", int_arm_neon_vabdu, zext, add>; 3744 3745// Vector Maximum and Minimum. 3746 3747// VMAX : Vector Maximum 3748defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 3749 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3750 "vmax", "s", int_arm_neon_vmaxs, 1>; 3751defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 3752 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3753 "vmax", "u", int_arm_neon_vmaxu, 1>; 3754def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 3755 "vmax", "f32", 3756 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 3757def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 3758 "vmax", "f32", 3759 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 3760 3761// VMIN : Vector Minimum 3762defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 3763 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3764 "vmin", "s", int_arm_neon_vmins, 1>; 3765defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 3766 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3767 "vmin", "u", int_arm_neon_vminu, 1>; 3768def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 3769 "vmin", "f32", 3770 v2f32, v2f32, int_arm_neon_vmins, 1>; 3771def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 3772 "vmin", "f32", 3773 v4f32, v4f32, int_arm_neon_vmins, 1>; 3774 3775// Vector Pairwise Operations. 3776 3777// VPADD : Vector Pairwise Add 3778def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 3779 "vpadd", "i8", 3780 v8i8, v8i8, int_arm_neon_vpadd, 0>; 3781def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 3782 "vpadd", "i16", 3783 v4i16, v4i16, int_arm_neon_vpadd, 0>; 3784def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 3785 "vpadd", "i32", 3786 v2i32, v2i32, int_arm_neon_vpadd, 0>; 3787def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 3788 IIC_VPBIND, "vpadd", "f32", 3789 v2f32, v2f32, int_arm_neon_vpadd, 0>; 3790 3791// VPADDL : Vector Pairwise Add Long 3792defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 3793 int_arm_neon_vpaddls>; 3794defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 3795 int_arm_neon_vpaddlu>; 3796 3797// VPADAL : Vector Pairwise Add and Accumulate Long 3798defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 3799 int_arm_neon_vpadals>; 3800defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 3801 int_arm_neon_vpadalu>; 3802 3803// VPMAX : Vector Pairwise Maximum 3804def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3805 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 3806def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3807 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 3808def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3809 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 3810def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3811 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 3812def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3813 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 3814def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3815 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 3816def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 3817 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 3818 3819// VPMIN : Vector Pairwise Minimum 3820def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3821 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 3822def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3823 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 3824def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3825 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 3826def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3827 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 3828def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3829 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 3830def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3831 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 3832def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 3833 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 3834 3835// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 3836 3837// VRECPE : Vector Reciprocal Estimate 3838def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 3839 IIC_VUNAD, "vrecpe", "u32", 3840 v2i32, v2i32, int_arm_neon_vrecpe>; 3841def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 3842 IIC_VUNAQ, "vrecpe", "u32", 3843 v4i32, v4i32, int_arm_neon_vrecpe>; 3844def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 3845 IIC_VUNAD, "vrecpe", "f32", 3846 v2f32, v2f32, int_arm_neon_vrecpe>; 3847def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 3848 IIC_VUNAQ, "vrecpe", "f32", 3849 v4f32, v4f32, int_arm_neon_vrecpe>; 3850 3851// VRECPS : Vector Reciprocal Step 3852def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 3853 IIC_VRECSD, "vrecps", "f32", 3854 v2f32, v2f32, int_arm_neon_vrecps, 1>; 3855def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 3856 IIC_VRECSQ, "vrecps", "f32", 3857 v4f32, v4f32, int_arm_neon_vrecps, 1>; 3858 3859// VRSQRTE : Vector Reciprocal Square Root Estimate 3860def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 3861 IIC_VUNAD, "vrsqrte", "u32", 3862 v2i32, v2i32, int_arm_neon_vrsqrte>; 3863def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 3864 IIC_VUNAQ, "vrsqrte", "u32", 3865 v4i32, v4i32, int_arm_neon_vrsqrte>; 3866def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 3867 IIC_VUNAD, "vrsqrte", "f32", 3868 v2f32, v2f32, int_arm_neon_vrsqrte>; 3869def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 3870 IIC_VUNAQ, "vrsqrte", "f32", 3871 v4f32, v4f32, int_arm_neon_vrsqrte>; 3872 3873// VRSQRTS : Vector Reciprocal Square Root Step 3874def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 3875 IIC_VRECSD, "vrsqrts", "f32", 3876 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 3877def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 3878 IIC_VRECSQ, "vrsqrts", "f32", 3879 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 3880 3881// Vector Shifts. 3882 3883// VSHL : Vector Shift 3884defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 3885 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 3886 "vshl", "s", int_arm_neon_vshifts>; 3887defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 3888 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 3889 "vshl", "u", int_arm_neon_vshiftu>; 3890// VSHL : Vector Shift Left (Immediate) 3891defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl, 3892 N2RegVShLFrm>; 3893// VSHR : Vector Shift Right (Immediate) 3894defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs, 3895 N2RegVShRFrm>; 3896defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru, 3897 N2RegVShRFrm>; 3898 3899// VSHLL : Vector Shift Left Long 3900defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 3901defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 3902 3903// VSHLL : Vector Shift Left Long (with maximum shift count) 3904class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 3905 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 3906 ValueType OpTy, SDNode OpNode> 3907 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 3908 ResTy, OpTy, OpNode> { 3909 let Inst{21-16} = op21_16; 3910} 3911def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 3912 v8i16, v8i8, NEONvshlli>; 3913def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 3914 v4i32, v4i16, NEONvshlli>; 3915def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 3916 v2i64, v2i32, NEONvshlli>; 3917 3918// VSHRN : Vector Shift Right and Narrow 3919defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 3920 NEONvshrn>; 3921 3922// VRSHL : Vector Rounding Shift 3923defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 3924 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3925 "vrshl", "s", int_arm_neon_vrshifts>; 3926defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 3927 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3928 "vrshl", "u", int_arm_neon_vrshiftu>; 3929// VRSHR : Vector Rounding Shift Right 3930defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs, 3931 N2RegVShRFrm>; 3932defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru, 3933 N2RegVShRFrm>; 3934 3935// VRSHRN : Vector Rounding Shift Right and Narrow 3936defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 3937 NEONvrshrn>; 3938 3939// VQSHL : Vector Saturating Shift 3940defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 3941 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3942 "vqshl", "s", int_arm_neon_vqshifts>; 3943defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 3944 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3945 "vqshl", "u", int_arm_neon_vqshiftu>; 3946// VQSHL : Vector Saturating Shift Left (Immediate) 3947defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls, 3948 N2RegVShLFrm>; 3949defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu, 3950 N2RegVShLFrm>; 3951// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 3952defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu, 3953 N2RegVShLFrm>; 3954 3955// VQSHRN : Vector Saturating Shift Right and Narrow 3956defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 3957 NEONvqshrns>; 3958defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 3959 NEONvqshrnu>; 3960 3961// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 3962defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 3963 NEONvqshrnsu>; 3964 3965// VQRSHL : Vector Saturating Rounding Shift 3966defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 3967 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3968 "vqrshl", "s", int_arm_neon_vqrshifts>; 3969defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 3970 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3971 "vqrshl", "u", int_arm_neon_vqrshiftu>; 3972 3973// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 3974defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 3975 NEONvqrshrns>; 3976defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 3977 NEONvqrshrnu>; 3978 3979// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 3980defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 3981 NEONvqrshrnsu>; 3982 3983// VSRA : Vector Shift Right and Accumulate 3984defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 3985defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 3986// VRSRA : Vector Rounding Shift Right and Accumulate 3987defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 3988defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 3989 3990// VSLI : Vector Shift Left and Insert 3991defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>; 3992// VSRI : Vector Shift Right and Insert 3993defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>; 3994 3995// Vector Absolute and Saturating Absolute. 3996 3997// VABS : Vector Absolute Value 3998defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 3999 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 4000 int_arm_neon_vabs>; 4001def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4002 IIC_VUNAD, "vabs", "f32", 4003 v2f32, v2f32, int_arm_neon_vabs>; 4004def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4005 IIC_VUNAQ, "vabs", "f32", 4006 v4f32, v4f32, int_arm_neon_vabs>; 4007 4008// VQABS : Vector Saturating Absolute Value 4009defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 4010 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 4011 int_arm_neon_vqabs>; 4012 4013// Vector Negate. 4014 4015def vnegd : PatFrag<(ops node:$in), 4016 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 4017def vnegq : PatFrag<(ops node:$in), 4018 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 4019 4020class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4021 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 4022 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 4023 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 4024class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 4025 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 4026 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 4027 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 4028 4029// VNEG : Vector Negate (integer) 4030def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 4031def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 4032def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 4033def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 4034def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 4035def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 4036 4037// VNEG : Vector Negate (floating-point) 4038def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 4039 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 4040 "vneg", "f32", "$Vd, $Vm", "", 4041 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 4042def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 4043 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 4044 "vneg", "f32", "$Vd, $Vm", "", 4045 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 4046 4047def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 4048def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 4049def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 4050def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 4051def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 4052def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 4053 4054// VQNEG : Vector Saturating Negate 4055defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 4056 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 4057 int_arm_neon_vqneg>; 4058 4059// Vector Bit Counting Operations. 4060 4061// VCLS : Vector Count Leading Sign Bits 4062defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 4063 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 4064 int_arm_neon_vcls>; 4065// VCLZ : Vector Count Leading Zeros 4066defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 4067 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 4068 int_arm_neon_vclz>; 4069// VCNT : Vector Count One Bits 4070def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 4071 IIC_VCNTiD, "vcnt", "8", 4072 v8i8, v8i8, int_arm_neon_vcnt>; 4073def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 4074 IIC_VCNTiQ, "vcnt", "8", 4075 v16i8, v16i8, int_arm_neon_vcnt>; 4076 4077// Vector Swap -- for disassembly only. 4078def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 4079 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 4080 "vswp", "$Vd, $Vm", "", []>; 4081def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 4082 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 4083 "vswp", "$Vd, $Vm", "", []>; 4084 4085// Vector Move Operations. 4086 4087// VMOV : Vector Move (Register) 4088 4089let neverHasSideEffects = 1 in { 4090def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vm), 4091 N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> { 4092 let Vn{4-0} = Vm{4-0}; 4093} 4094def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$Vm), 4095 N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> { 4096 let Vn{4-0} = Vm{4-0}; 4097} 4098 4099// Pseudo vector move instructions for QQ and QQQQ registers. This should 4100// be expanded after register allocation is completed. 4101def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), 4102 NoItinerary, []>; 4103 4104def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), 4105 NoItinerary, []>; 4106} // neverHasSideEffects 4107 4108// VMOV : Vector Move (Immediate) 4109 4110let isReMaterializable = 1 in { 4111def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 4112 (ins nModImm:$SIMM), IIC_VMOVImm, 4113 "vmov", "i8", "$Vd, $SIMM", "", 4114 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 4115def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 4116 (ins nModImm:$SIMM), IIC_VMOVImm, 4117 "vmov", "i8", "$Vd, $SIMM", "", 4118 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 4119 4120def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 4121 (ins nModImm:$SIMM), IIC_VMOVImm, 4122 "vmov", "i16", "$Vd, $SIMM", "", 4123 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 4124 let Inst{9} = SIMM{9}; 4125} 4126 4127def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 4128 (ins nModImm:$SIMM), IIC_VMOVImm, 4129 "vmov", "i16", "$Vd, $SIMM", "", 4130 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 4131 let Inst{9} = SIMM{9}; 4132} 4133 4134def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 4135 (ins nModImm:$SIMM), IIC_VMOVImm, 4136 "vmov", "i32", "$Vd, $SIMM", "", 4137 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 4138 let Inst{11-8} = SIMM{11-8}; 4139} 4140 4141def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 4142 (ins nModImm:$SIMM), IIC_VMOVImm, 4143 "vmov", "i32", "$Vd, $SIMM", "", 4144 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 4145 let Inst{11-8} = SIMM{11-8}; 4146} 4147 4148def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 4149 (ins nModImm:$SIMM), IIC_VMOVImm, 4150 "vmov", "i64", "$Vd, $SIMM", "", 4151 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 4152def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 4153 (ins nModImm:$SIMM), IIC_VMOVImm, 4154 "vmov", "i64", "$Vd, $SIMM", "", 4155 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 4156} // isReMaterializable 4157 4158// VMOV : Vector Get Lane (move scalar to ARM core register) 4159 4160def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 4161 (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), 4162 IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]", 4163 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 4164 imm:$lane))]> { 4165 let Inst{21} = lane{2}; 4166 let Inst{6-5} = lane{1-0}; 4167} 4168def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 4169 (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), 4170 IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]", 4171 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 4172 imm:$lane))]> { 4173 let Inst{21} = lane{1}; 4174 let Inst{6} = lane{0}; 4175} 4176def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 4177 (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), 4178 IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]", 4179 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 4180 imm:$lane))]> { 4181 let Inst{21} = lane{2}; 4182 let Inst{6-5} = lane{1-0}; 4183} 4184def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 4185 (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), 4186 IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]", 4187 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 4188 imm:$lane))]> { 4189 let Inst{21} = lane{1}; 4190 let Inst{6} = lane{0}; 4191} 4192def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 4193 (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), 4194 IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]", 4195 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 4196 imm:$lane))]> { 4197 let Inst{21} = lane{0}; 4198} 4199// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 4200def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 4201 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 4202 (DSubReg_i8_reg imm:$lane))), 4203 (SubReg_i8_lane imm:$lane))>; 4204def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 4205 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 4206 (DSubReg_i16_reg imm:$lane))), 4207 (SubReg_i16_lane imm:$lane))>; 4208def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 4209 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 4210 (DSubReg_i8_reg imm:$lane))), 4211 (SubReg_i8_lane imm:$lane))>; 4212def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 4213 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 4214 (DSubReg_i16_reg imm:$lane))), 4215 (SubReg_i16_lane imm:$lane))>; 4216def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 4217 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 4218 (DSubReg_i32_reg imm:$lane))), 4219 (SubReg_i32_lane imm:$lane))>; 4220def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 4221 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 4222 (SSubReg_f32_reg imm:$src2))>; 4223def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 4224 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 4225 (SSubReg_f32_reg imm:$src2))>; 4226//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 4227// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 4228def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 4229 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 4230 4231 4232// VMOV : Vector Set Lane (move ARM core register to scalar) 4233 4234let Constraints = "$src1 = $V" in { 4235def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 4236 (ins DPR:$src1, GPR:$R, nohash_imm:$lane), 4237 IIC_VMOVISL, "vmov", "8", "$V[$lane], $R", 4238 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 4239 GPR:$R, imm:$lane))]> { 4240 let Inst{21} = lane{2}; 4241 let Inst{6-5} = lane{1-0}; 4242} 4243def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 4244 (ins DPR:$src1, GPR:$R, nohash_imm:$lane), 4245 IIC_VMOVISL, "vmov", "16", "$V[$lane], $R", 4246 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 4247 GPR:$R, imm:$lane))]> { 4248 let Inst{21} = lane{1}; 4249 let Inst{6} = lane{0}; 4250} 4251def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 4252 (ins DPR:$src1, GPR:$R, nohash_imm:$lane), 4253 IIC_VMOVISL, "vmov", "32", "$V[$lane], $R", 4254 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 4255 GPR:$R, imm:$lane))]> { 4256 let Inst{21} = lane{0}; 4257} 4258} 4259def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 4260 (v16i8 (INSERT_SUBREG QPR:$src1, 4261 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 4262 (DSubReg_i8_reg imm:$lane))), 4263 GPR:$src2, (SubReg_i8_lane imm:$lane))), 4264 (DSubReg_i8_reg imm:$lane)))>; 4265def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 4266 (v8i16 (INSERT_SUBREG QPR:$src1, 4267 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 4268 (DSubReg_i16_reg imm:$lane))), 4269 GPR:$src2, (SubReg_i16_lane imm:$lane))), 4270 (DSubReg_i16_reg imm:$lane)))>; 4271def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 4272 (v4i32 (INSERT_SUBREG QPR:$src1, 4273 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 4274 (DSubReg_i32_reg imm:$lane))), 4275 GPR:$src2, (SubReg_i32_lane imm:$lane))), 4276 (DSubReg_i32_reg imm:$lane)))>; 4277 4278def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 4279 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 4280 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 4281def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 4282 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 4283 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 4284 4285//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 4286// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 4287def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 4288 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 4289 4290def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 4291 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 4292def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 4293 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 4294def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 4295 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 4296 4297def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 4298 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 4299def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 4300 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 4301def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 4302 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 4303 4304def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 4305 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4306 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 4307 dsub_0)>; 4308def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 4309 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 4310 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 4311 dsub_0)>; 4312def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 4313 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 4314 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 4315 dsub_0)>; 4316 4317// VDUP : Vector Duplicate (from ARM core register to all elements) 4318 4319class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 4320 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 4321 IIC_VMOVIS, "vdup", Dt, "$V, $R", 4322 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 4323class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 4324 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 4325 IIC_VMOVIS, "vdup", Dt, "$V, $R", 4326 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 4327 4328def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 4329def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 4330def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; 4331def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 4332def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 4333def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 4334 4335def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R), 4336 IIC_VMOVIS, "vdup", "32", "$V, $R", 4337 [(set DPR:$V, (v2f32 (NEONvdup 4338 (f32 (bitconvert GPR:$R)))))]>; 4339def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R), 4340 IIC_VMOVIS, "vdup", "32", "$V, $R", 4341 [(set QPR:$V, (v4f32 (NEONvdup 4342 (f32 (bitconvert GPR:$R)))))]>; 4343 4344// VDUP : Vector Duplicate Lane (from scalar to all elements) 4345 4346class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 4347 ValueType Ty> 4348 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane), 4349 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]", 4350 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 4351 4352class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 4353 ValueType ResTy, ValueType OpTy> 4354 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane), 4355 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]", 4356 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 4357 imm:$lane)))]>; 4358 4359// Inst{19-16} is partially specified depending on the element size. 4360 4361def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> { 4362 let Inst{19-17} = lane{2-0}; 4363} 4364def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> { 4365 let Inst{19-18} = lane{1-0}; 4366} 4367def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> { 4368 let Inst{19} = lane{0}; 4369} 4370def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> { 4371 let Inst{19} = lane{0}; 4372} 4373def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> { 4374 let Inst{19-17} = lane{2-0}; 4375} 4376def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> { 4377 let Inst{19-18} = lane{1-0}; 4378} 4379def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> { 4380 let Inst{19} = lane{0}; 4381} 4382def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> { 4383 let Inst{19} = lane{0}; 4384} 4385 4386def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 4387 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 4388 (DSubReg_i8_reg imm:$lane))), 4389 (SubReg_i8_lane imm:$lane)))>; 4390def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 4391 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 4392 (DSubReg_i16_reg imm:$lane))), 4393 (SubReg_i16_lane imm:$lane)))>; 4394def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 4395 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 4396 (DSubReg_i32_reg imm:$lane))), 4397 (SubReg_i32_lane imm:$lane)))>; 4398def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 4399 (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, 4400 (DSubReg_i32_reg imm:$lane))), 4401 (SubReg_i32_lane imm:$lane)))>; 4402 4403def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 4404 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 4405def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 4406 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 4407 4408// VMOVN : Vector Narrowing Move 4409defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 4410 "vmovn", "i", trunc>; 4411// VQMOVN : Vector Saturating Narrowing Move 4412defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 4413 "vqmovn", "s", int_arm_neon_vqmovns>; 4414defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 4415 "vqmovn", "u", int_arm_neon_vqmovnu>; 4416defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 4417 "vqmovun", "s", int_arm_neon_vqmovnsu>; 4418// VMOVL : Vector Lengthening Move 4419defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 4420defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 4421 4422// Vector Conversions. 4423 4424// VCVT : Vector Convert Between Floating-Point and Integers 4425def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 4426 v2i32, v2f32, fp_to_sint>; 4427def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 4428 v2i32, v2f32, fp_to_uint>; 4429def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 4430 v2f32, v2i32, sint_to_fp>; 4431def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 4432 v2f32, v2i32, uint_to_fp>; 4433 4434def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 4435 v4i32, v4f32, fp_to_sint>; 4436def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 4437 v4i32, v4f32, fp_to_uint>; 4438def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 4439 v4f32, v4i32, sint_to_fp>; 4440def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 4441 v4f32, v4i32, uint_to_fp>; 4442 4443// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 4444def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 4445 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 4446def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 4447 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 4448def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 4449 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 4450def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 4451 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 4452 4453def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 4454 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 4455def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 4456 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 4457def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 4458 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 4459def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 4460 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 4461 4462// Vector Reverse. 4463 4464// VREV64 : Vector Reverse elements within 64-bit doublewords 4465 4466class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4467 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 4468 (ins DPR:$Vm), IIC_VMOVD, 4469 OpcodeStr, Dt, "$Vd, $Vm", "", 4470 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 4471class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4472 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 4473 (ins QPR:$Vm), IIC_VMOVQ, 4474 OpcodeStr, Dt, "$Vd, $Vm", "", 4475 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 4476 4477def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 4478def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 4479def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 4480def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; 4481 4482def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 4483def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 4484def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 4485def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; 4486 4487// VREV32 : Vector Reverse elements within 32-bit words 4488 4489class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4490 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 4491 (ins DPR:$Vm), IIC_VMOVD, 4492 OpcodeStr, Dt, "$Vd, $Vm", "", 4493 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 4494class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4495 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 4496 (ins QPR:$Vm), IIC_VMOVQ, 4497 OpcodeStr, Dt, "$Vd, $Vm", "", 4498 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 4499 4500def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 4501def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 4502 4503def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 4504def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 4505 4506// VREV16 : Vector Reverse elements within 16-bit halfwords 4507 4508class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4509 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 4510 (ins DPR:$Vm), IIC_VMOVD, 4511 OpcodeStr, Dt, "$Vd, $Vm", "", 4512 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 4513class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 4514 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 4515 (ins QPR:$Vm), IIC_VMOVQ, 4516 OpcodeStr, Dt, "$Vd, $Vm", "", 4517 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 4518 4519def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 4520def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 4521 4522// Other Vector Shuffles. 4523 4524// VEXT : Vector Extract 4525 4526class VEXTd<string OpcodeStr, string Dt, ValueType Ty> 4527 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 4528 (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm, 4529 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 4530 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 4531 (Ty DPR:$Vm), imm:$index)))]> { 4532 bits<4> index; 4533 let Inst{11-8} = index{3-0}; 4534} 4535 4536class VEXTq<string OpcodeStr, string Dt, ValueType Ty> 4537 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 4538 (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm, 4539 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 4540 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 4541 (Ty QPR:$Vm), imm:$index)))]> { 4542 bits<4> index; 4543 let Inst{11-8} = index{3-0}; 4544} 4545 4546def VEXTd8 : VEXTd<"vext", "8", v8i8> { 4547 let Inst{11-8} = index{3-0}; 4548} 4549def VEXTd16 : VEXTd<"vext", "16", v4i16> { 4550 let Inst{11-9} = index{2-0}; 4551 let Inst{8} = 0b0; 4552} 4553def VEXTd32 : VEXTd<"vext", "32", v2i32> { 4554 let Inst{11-10} = index{1-0}; 4555 let Inst{9-8} = 0b00; 4556} 4557def VEXTdf : VEXTd<"vext", "32", v2f32> { 4558 let Inst{11} = index{0}; 4559 let Inst{10-8} = 0b000; 4560} 4561 4562def VEXTq8 : VEXTq<"vext", "8", v16i8> { 4563 let Inst{11-8} = index{3-0}; 4564} 4565def VEXTq16 : VEXTq<"vext", "16", v8i16> { 4566 let Inst{11-9} = index{2-0}; 4567 let Inst{8} = 0b0; 4568} 4569def VEXTq32 : VEXTq<"vext", "32", v4i32> { 4570 let Inst{11-10} = index{1-0}; 4571 let Inst{9-8} = 0b00; 4572} 4573def VEXTqf : VEXTq<"vext", "32", v4f32> { 4574 let Inst{11} = index{0}; 4575 let Inst{10-8} = 0b000; 4576} 4577 4578// VTRN : Vector Transpose 4579 4580def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 4581def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 4582def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 4583 4584def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 4585def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 4586def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 4587 4588// VUZP : Vector Unzip (Deinterleave) 4589 4590def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 4591def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 4592def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; 4593 4594def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 4595def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 4596def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 4597 4598// VZIP : Vector Zip (Interleave) 4599 4600def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 4601def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 4602def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; 4603 4604def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 4605def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 4606def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 4607 4608// Vector Table Lookup and Table Extension. 4609 4610// VTBL : Vector Table Lookup 4611def VTBL1 4612 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 4613 (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 4614 "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "", 4615 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>; 4616let hasExtraSrcRegAllocReq = 1 in { 4617def VTBL2 4618 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 4619 (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2, 4620 "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>; 4621def VTBL3 4622 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 4623 (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3, 4624 "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>; 4625def VTBL4 4626 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 4627 (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), 4628 NVTBLFrm, IIC_VTB4, 4629 "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>; 4630} // hasExtraSrcRegAllocReq = 1 4631 4632def VTBL2Pseudo 4633 : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; 4634def VTBL3Pseudo 4635 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 4636def VTBL4Pseudo 4637 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 4638 4639// VTBX : Vector Table Extension 4640def VTBX1 4641 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 4642 (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 4643 "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd", 4644 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 4645 DPR:$orig, DPR:$Vn, DPR:$Vm)))]>; 4646let hasExtraSrcRegAllocReq = 1 in { 4647def VTBX2 4648 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 4649 (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 4650 "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>; 4651def VTBX3 4652 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 4653 (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), 4654 NVTBLFrm, IIC_VTBX3, 4655 "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", 4656 "$orig = $Vd", []>; 4657def VTBX4 4658 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn, 4659 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 4660 "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", 4661 "$orig = $Vd", []>; 4662} // hasExtraSrcRegAllocReq = 1 4663 4664def VTBX2Pseudo 4665 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), 4666 IIC_VTBX2, "$orig = $dst", []>; 4667def VTBX3Pseudo 4668 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 4669 IIC_VTBX3, "$orig = $dst", []>; 4670def VTBX4Pseudo 4671 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 4672 IIC_VTBX4, "$orig = $dst", []>; 4673 4674//===----------------------------------------------------------------------===// 4675// NEON instructions for single-precision FP math 4676//===----------------------------------------------------------------------===// 4677 4678class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> 4679 : NEONFPPat<(ResTy (OpNode SPR:$a)), 4680 (EXTRACT_SUBREG 4681 (OpTy (COPY_TO_REGCLASS 4682 (OpTy (Inst (INSERT_SUBREG 4683 (OpTy (COPY_TO_REGCLASS (OpTy (IMPLICIT_DEF)), DPR_VFP2)), 4684 SPR:$a, ssub_0))), 4685 DPR_VFP2)), ssub_0)>; 4686 4687class N3VSPat<SDNode OpNode, NeonI Inst> 4688 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 4689 (EXTRACT_SUBREG (v2f32 4690 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 4691 SPR:$a, ssub_0), 4692 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 4693 SPR:$b, ssub_0))), 4694 ssub_0)>; 4695 4696class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 4697 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 4698 (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 4699 SPR:$acc, ssub_0), 4700 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 4701 SPR:$a, ssub_0), 4702 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 4703 SPR:$b, ssub_0)), 4704 ssub_0)>; 4705 4706// These need separate instructions because they must use DPR_VFP2 register 4707// class which have SPR sub-registers. 4708 4709// Vector Add Operations used for single-precision FP 4710let neverHasSideEffects = 1 in 4711def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32">; 4712def : N3VSPat<fadd, VADDfd_sfp>; 4713 4714// Vector Sub Operations used for single-precision FP 4715let neverHasSideEffects = 1 in 4716def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32">; 4717def : N3VSPat<fsub, VSUBfd_sfp>; 4718 4719// Vector Multiply Operations used for single-precision FP 4720let neverHasSideEffects = 1 in 4721def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32">; 4722def : N3VSPat<fmul, VMULfd_sfp>; 4723 4724// Vector Multiply-Accumulate/Subtract used for single-precision FP 4725// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so 4726// we want to avoid them for now. e.g., alternating vmla/vadd instructions. 4727 4728let neverHasSideEffects = 1 in 4729def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32">; 4730def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>, 4731 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 4732 4733let neverHasSideEffects = 1 in 4734def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32">; 4735def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>, 4736 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 4737 4738// Vector Absolute used for single-precision FP 4739def : N2VSPat<fabs, f32, v2f32, VABSfd>; 4740 4741// Vector Negate used for single-precision FP 4742def : N2VSPat<fneg, f32, v2f32, VNEGfd>; 4743 4744// Vector Maximum used for single-precision FP 4745let neverHasSideEffects = 1 in 4746def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$Vd), 4747 (ins DPR_VFP2:$Vn, DPR_VFP2:$Vm), N3RegFrm, IIC_VBIND, 4748 "vmax", "f32", "$Vd, $Vn, $Vm", "", []>; 4749def : N3VSPat<NEONfmax, VMAXfd_sfp>; 4750 4751// Vector Minimum used for single-precision FP 4752let neverHasSideEffects = 1 in 4753def VMINfd_sfp : N3V<0, 0, 0b10, 0b1111, 0, 0, (outs DPR_VFP2:$Vd), 4754 (ins DPR_VFP2:$Vn, DPR_VFP2:$Vm), N3RegFrm, IIC_VBIND, 4755 "vmin", "f32", "$Vd, $Vn, $Vm", "", []>; 4756def : N3VSPat<NEONfmin, VMINfd_sfp>; 4757 4758// Vector Convert between single-precision FP and integer 4759def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd>; 4760def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud>; 4761def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd>; 4762def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd>; 4763 4764//===----------------------------------------------------------------------===// 4765// Non-Instruction Patterns 4766//===----------------------------------------------------------------------===// 4767 4768// bit_convert 4769def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 4770def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 4771def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 4772def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 4773def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 4774def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 4775def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 4776def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 4777def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 4778def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 4779def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 4780def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 4781def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 4782def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 4783def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 4784def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 4785def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 4786def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 4787def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 4788def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 4789def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 4790def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 4791def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 4792def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 4793def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 4794def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 4795def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 4796def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 4797def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 4798def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 4799 4800def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 4801def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 4802def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 4803def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 4804def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 4805def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 4806def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 4807def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 4808def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 4809def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 4810def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 4811def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 4812def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 4813def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 4814def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 4815def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 4816def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 4817def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 4818def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 4819def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 4820def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 4821def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 4822def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 4823def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 4824def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 4825def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 4826def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 4827def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 4828def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 4829def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 4830