ARMInstrNEON.td revision 2d88e9b93d3df7e98ad2494c5500637237f8faef
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// NEON-specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17 18def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 19 20def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 21def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 22def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 23def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 24def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 25def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 26 27// Types for vector shift by immediates. The "SHX" version is for long and 28// narrow operations where the source and destination vectors have different 29// types. The "SHINS" version is for shift and insert operations. 30def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 31 SDTCisVT<2, i32>]>; 32def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 33 SDTCisVT<2, i32>]>; 34def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 35 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 36 37def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 38def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 39def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 40def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 41def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 42def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 43def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 44 45def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 46def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 47def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 48 49def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 50def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 51def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 52def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 53def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 54def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 55 56def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 57def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 58def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 59 60def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 61def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 62 63def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 64 SDTCisVT<2, i32>]>; 65def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 66def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 67 68def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 69def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 70def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 71 72def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 73 74// VDUPLANE can produce a quad-register result from a double-register source, 75// so the result is not constrained to match the source. 76def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 77 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 78 SDTCisVT<2, i32>]>>; 79 80def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 81 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 82def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 83 84def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 85def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 86def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 87def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 88 89def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 90 SDTCisSameAs<0, 2>, 91 SDTCisSameAs<0, 3>]>; 92def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 93def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 94def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 95 96def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 97 SDTCisSameAs<1, 2>]>; 98def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 99def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 100 101def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 102 SDTCisSameAs<0, 2>]>; 103def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 104def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 105 106def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 107 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 108 unsigned EltBits = 0; 109 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 110 return (EltBits == 32 && EltVal == 0); 111}]>; 112 113def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 114 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 115 unsigned EltBits = 0; 116 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 117 return (EltBits == 8 && EltVal == 0xff); 118}]>; 119 120//===----------------------------------------------------------------------===// 121// NEON operand definitions 122//===----------------------------------------------------------------------===// 123 124def nModImm : Operand<i32> { 125 let PrintMethod = "printNEONModImmOperand"; 126} 127 128//===----------------------------------------------------------------------===// 129// NEON load / store instructions 130//===----------------------------------------------------------------------===// 131 132// Use VLDM to load a Q register as a D register pair. 133// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 134def VLDMQ 135 : PseudoVFPLdStM<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoad_m, "", 136 [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>; 137 138// Use VSTM to store a Q register as a D register pair. 139// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 140def VSTMQ 141 : PseudoVFPLdStM<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStore_m, "", 142 [(store (v2f64 QPR:$src), addrmode4:$addr)]>; 143 144let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 145 146// Classes for VLD* pseudo-instructions with multi-register operands. 147// These are expanded to real instructions after register allocation. 148class VLDQPseudo<InstrItinClass itin> 149 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 150class VLDQWBPseudo<InstrItinClass itin> 151 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 152 (ins addrmode6:$addr, am6offset:$offset), itin, 153 "$addr.addr = $wb">; 154class VLDQQPseudo<InstrItinClass itin> 155 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 156class VLDQQWBPseudo<InstrItinClass itin> 157 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 158 (ins addrmode6:$addr, am6offset:$offset), itin, 159 "$addr.addr = $wb">; 160class VLDQQQQWBPseudo<InstrItinClass itin> 161 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 162 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 163 "$addr.addr = $wb, $src = $dst">; 164 165// VLD1 : Vector Load (multiple single elements) 166class VLD1D<bits<4> op7_4, string Dt> 167 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), 168 (ins addrmode6:$addr), IIC_VLD1, 169 "vld1", Dt, "\\{$dst\\}, $addr", "", []>; 170class VLD1Q<bits<4> op7_4, string Dt> 171 : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2), 172 (ins addrmode6:$addr), IIC_VLD1x2, 173 "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; 174 175def VLD1d8 : VLD1D<0b0000, "8">; 176def VLD1d16 : VLD1D<0b0100, "16">; 177def VLD1d32 : VLD1D<0b1000, "32">; 178def VLD1d64 : VLD1D<0b1100, "64">; 179 180def VLD1q8 : VLD1Q<0b0000, "8">; 181def VLD1q16 : VLD1Q<0b0100, "16">; 182def VLD1q32 : VLD1Q<0b1000, "32">; 183def VLD1q64 : VLD1Q<0b1100, "64">; 184 185def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>; 186def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>; 187def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>; 188def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>; 189 190// ...with address register writeback: 191class VLD1DWB<bits<4> op7_4, string Dt> 192 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), 193 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1u, 194 "vld1", Dt, "\\{$dst\\}, $addr$offset", 195 "$addr.addr = $wb", []>; 196class VLD1QWB<bits<4> op7_4, string Dt> 197 : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), 198 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1x2u, 199 "vld1", Dt, "\\{$dst1, $dst2\\}, $addr$offset", 200 "$addr.addr = $wb", []>; 201 202def VLD1d8_UPD : VLD1DWB<0b0000, "8">; 203def VLD1d16_UPD : VLD1DWB<0b0100, "16">; 204def VLD1d32_UPD : VLD1DWB<0b1000, "32">; 205def VLD1d64_UPD : VLD1DWB<0b1100, "64">; 206 207def VLD1q8_UPD : VLD1QWB<0b0000, "8">; 208def VLD1q16_UPD : VLD1QWB<0b0100, "16">; 209def VLD1q32_UPD : VLD1QWB<0b1000, "32">; 210def VLD1q64_UPD : VLD1QWB<0b1100, "64">; 211 212def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; 213def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; 214def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; 215def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; 216 217// ...with 3 registers (some of these are only for the disassembler): 218class VLD1D3<bits<4> op7_4, string Dt> 219 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 220 (ins addrmode6:$addr), IIC_VLD1x3, "vld1", Dt, 221 "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; 222class VLD1D3WB<bits<4> op7_4, string Dt> 223 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 224 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1x3u, "vld1", Dt, 225 "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>; 226 227def VLD1d8T : VLD1D3<0b0000, "8">; 228def VLD1d16T : VLD1D3<0b0100, "16">; 229def VLD1d32T : VLD1D3<0b1000, "32">; 230def VLD1d64T : VLD1D3<0b1100, "64">; 231 232def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; 233def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; 234def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; 235def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">; 236 237def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 238def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>; 239 240// ...with 4 registers (some of these are only for the disassembler): 241class VLD1D4<bits<4> op7_4, string Dt> 242 : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 243 (ins addrmode6:$addr), IIC_VLD1x4, "vld1", Dt, 244 "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 245class VLD1D4WB<bits<4> op7_4, string Dt> 246 : NLdSt<0,0b10,0b0010,op7_4, 247 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 248 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, "vld1", Dt, 249 "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", 250 []>; 251 252def VLD1d8Q : VLD1D4<0b0000, "8">; 253def VLD1d16Q : VLD1D4<0b0100, "16">; 254def VLD1d32Q : VLD1D4<0b1000, "32">; 255def VLD1d64Q : VLD1D4<0b1100, "64">; 256 257def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">; 258def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; 259def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; 260def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">; 261 262def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 263def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>; 264 265// VLD2 : Vector Load (multiple 2-element structures) 266class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> 267 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), 268 (ins addrmode6:$addr), IIC_VLD2, 269 "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; 270class VLD2Q<bits<4> op7_4, string Dt> 271 : NLdSt<0, 0b10, 0b0011, op7_4, 272 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 273 (ins addrmode6:$addr), IIC_VLD2x2, 274 "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 275 276def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; 277def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; 278def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; 279 280def VLD2q8 : VLD2Q<0b0000, "8">; 281def VLD2q16 : VLD2Q<0b0100, "16">; 282def VLD2q32 : VLD2Q<0b1000, "32">; 283 284def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; 285def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; 286def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>; 287 288def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 289def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 290def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 291 292// ...with address register writeback: 293class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 294 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), 295 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2u, 296 "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset", 297 "$addr.addr = $wb", []>; 298class VLD2QWB<bits<4> op7_4, string Dt> 299 : NLdSt<0, 0b10, 0b0011, op7_4, 300 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 301 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2x2u, 302 "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", 303 "$addr.addr = $wb", []>; 304 305def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; 306def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; 307def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; 308 309def VLD2q8_UPD : VLD2QWB<0b0000, "8">; 310def VLD2q16_UPD : VLD2QWB<0b0100, "16">; 311def VLD2q32_UPD : VLD2QWB<0b1000, "32">; 312 313def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; 314def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; 315def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; 316 317def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; 318def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; 319def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; 320 321// ...with double-spaced registers (for disassembly only): 322def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; 323def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; 324def VLD2b32 : VLD2D<0b1001, 0b1000, "32">; 325def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">; 326def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">; 327def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">; 328 329// VLD3 : Vector Load (multiple 3-element structures) 330class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 331 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 332 (ins addrmode6:$addr), IIC_VLD3, 333 "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; 334 335def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; 336def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; 337def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; 338 339def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 340def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 341def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 342 343// ...with address register writeback: 344class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 345 : NLdSt<0, 0b10, op11_8, op7_4, 346 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 347 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3u, 348 "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", 349 "$addr.addr = $wb", []>; 350 351def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; 352def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; 353def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; 354 355def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 356def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 357def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 358 359// ...with double-spaced registers (non-updating versions for disassembly only): 360def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; 361def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; 362def VLD3q32 : VLD3D<0b0101, 0b1000, "32">; 363def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; 364def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; 365def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; 366 367def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 368def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 369def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 370 371// ...alternate versions to be allocated odd register numbers: 372def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 373def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 374def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 375 376// VLD4 : Vector Load (multiple 4-element structures) 377class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 378 : NLdSt<0, 0b10, op11_8, op7_4, 379 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 380 (ins addrmode6:$addr), IIC_VLD4, 381 "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 382 383def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; 384def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; 385def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; 386 387def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 388def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 389def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 390 391// ...with address register writeback: 392class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 393 : NLdSt<0, 0b10, op11_8, op7_4, 394 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 395 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, 396 "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", 397 "$addr.addr = $wb", []>; 398 399def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; 400def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; 401def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; 402 403def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>; 404def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>; 405def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>; 406 407// ...with double-spaced registers (non-updating versions for disassembly only): 408def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; 409def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; 410def VLD4q32 : VLD4D<0b0001, 0b1000, "32">; 411def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; 412def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; 413def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; 414 415def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 416def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 417def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 418 419// ...alternate versions to be allocated odd register numbers: 420def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 421def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 422def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; 423 424// Classes for VLD*LN pseudo-instructions with multi-register operands. 425// These are expanded to real instructions after register allocation. 426class VLDQLNPseudo<InstrItinClass itin> 427 : PseudoNLdSt<(outs QPR:$dst), 428 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 429 itin, "$src = $dst">; 430class VLDQLNWBPseudo<InstrItinClass itin> 431 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 432 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 433 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 434class VLDQQLNPseudo<InstrItinClass itin> 435 : PseudoNLdSt<(outs QQPR:$dst), 436 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 437 itin, "$src = $dst">; 438class VLDQQLNWBPseudo<InstrItinClass itin> 439 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 440 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 441 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 442class VLDQQQQLNPseudo<InstrItinClass itin> 443 : PseudoNLdSt<(outs QQQQPR:$dst), 444 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 445 itin, "$src = $dst">; 446class VLDQQQQLNWBPseudo<InstrItinClass itin> 447 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 448 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 449 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 450 451// VLD1LN : Vector Load (single element to one lane) 452// FIXME: Not yet implemented. 453 454// VLD2LN : Vector Load (single 2-element structure to one lane) 455class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 456 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), 457 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 458 IIC_VLD2ln, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", 459 "$src1 = $dst1, $src2 = $dst2", []>; 460 461def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">; 462def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">; 463def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">; 464 465def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 466def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 467def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 468 469// ...with double-spaced registers: 470def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">; 471def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">; 472 473def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 474def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 475 476// ...with address register writeback: 477class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 478 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), 479 (ins addrmode6:$addr, am6offset:$offset, 480 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 481 "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset", 482 "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>; 483 484def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">; 485def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">; 486def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">; 487 488def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 489def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 490def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 491 492def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">; 493def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">; 494 495def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 496def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 497 498// VLD3LN : Vector Load (single 3-element structure to one lane) 499class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 500 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 501 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 502 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 503 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", 504 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; 505 506def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">; 507def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">; 508def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">; 509 510def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 511def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 512def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 513 514// ...with double-spaced registers: 515def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">; 516def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">; 517 518def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 519def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 520 521// ...with address register writeback: 522class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 523 : NLdSt<1, 0b10, op11_8, op7_4, 524 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 525 (ins addrmode6:$addr, am6offset:$offset, 526 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 527 IIC_VLD3lnu, "vld3", Dt, 528 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset", 529 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", 530 []>; 531 532def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">; 533def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">; 534def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">; 535 536def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 537def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 538def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 539 540def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">; 541def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">; 542 543def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 544def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 545 546// VLD4LN : Vector Load (single 4-element structure to one lane) 547class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 548 : NLdSt<1, 0b10, op11_8, op7_4, 549 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 550 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 551 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 552 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", 553 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; 554 555def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">; 556def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">; 557def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">; 558 559def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 560def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 561def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 562 563// ...with double-spaced registers: 564def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">; 565def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">; 566 567def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 568def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 569 570// ...with address register writeback: 571class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 572 : NLdSt<1, 0b10, op11_8, op7_4, 573 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 574 (ins addrmode6:$addr, am6offset:$offset, 575 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 576 IIC_VLD4ln, "vld4", Dt, 577"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", 578"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", 579 []>; 580 581def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">; 582def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">; 583def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">; 584 585def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 586def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 587def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 588 589def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">; 590def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; 591 592def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 593def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 594 595// VLD1DUP : Vector Load (single element to all lanes) 596// VLD2DUP : Vector Load (single 2-element structure to all lanes) 597// VLD3DUP : Vector Load (single 3-element structure to all lanes) 598// VLD4DUP : Vector Load (single 4-element structure to all lanes) 599// FIXME: Not yet implemented. 600} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 601 602let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 603 604// Classes for VST* pseudo-instructions with multi-register operands. 605// These are expanded to real instructions after register allocation. 606class VSTQPseudo<InstrItinClass itin> 607 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 608class VSTQWBPseudo<InstrItinClass itin> 609 : PseudoNLdSt<(outs GPR:$wb), 610 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 611 "$addr.addr = $wb">; 612class VSTQQPseudo<InstrItinClass itin> 613 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 614class VSTQQWBPseudo<InstrItinClass itin> 615 : PseudoNLdSt<(outs GPR:$wb), 616 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 617 "$addr.addr = $wb">; 618class VSTQQQQWBPseudo<InstrItinClass itin> 619 : PseudoNLdSt<(outs GPR:$wb), 620 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 621 "$addr.addr = $wb">; 622 623// VST1 : Vector Store (multiple single elements) 624class VST1D<bits<4> op7_4, string Dt> 625 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), 626 IIC_VST1, "vst1", Dt, "\\{$src\\}, $addr", "", []>; 627class VST1Q<bits<4> op7_4, string Dt> 628 : NLdSt<0,0b00,0b1010,op7_4, (outs), 629 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST1x2, 630 "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>; 631 632def VST1d8 : VST1D<0b0000, "8">; 633def VST1d16 : VST1D<0b0100, "16">; 634def VST1d32 : VST1D<0b1000, "32">; 635def VST1d64 : VST1D<0b1100, "64">; 636 637def VST1q8 : VST1Q<0b0000, "8">; 638def VST1q16 : VST1Q<0b0100, "16">; 639def VST1q32 : VST1Q<0b1000, "32">; 640def VST1q64 : VST1Q<0b1100, "64">; 641 642def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>; 643def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>; 644def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>; 645def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>; 646 647// ...with address register writeback: 648class VST1DWB<bits<4> op7_4, string Dt> 649 : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), 650 (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST1u, 651 "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>; 652class VST1QWB<bits<4> op7_4, string Dt> 653 : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), 654 (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), 655 IIC_VST1x2u, "vst1", Dt, "\\{$src1, $src2\\}, $addr$offset", 656 "$addr.addr = $wb", []>; 657 658def VST1d8_UPD : VST1DWB<0b0000, "8">; 659def VST1d16_UPD : VST1DWB<0b0100, "16">; 660def VST1d32_UPD : VST1DWB<0b1000, "32">; 661def VST1d64_UPD : VST1DWB<0b1100, "64">; 662 663def VST1q8_UPD : VST1QWB<0b0000, "8">; 664def VST1q16_UPD : VST1QWB<0b0100, "16">; 665def VST1q32_UPD : VST1QWB<0b1000, "32">; 666def VST1q64_UPD : VST1QWB<0b1100, "64">; 667 668def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; 669def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; 670def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; 671def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; 672 673// ...with 3 registers (some of these are only for the disassembler): 674class VST1D3<bits<4> op7_4, string Dt> 675 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 676 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), 677 IIC_VST1x3, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; 678class VST1D3WB<bits<4> op7_4, string Dt> 679 : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), 680 (ins addrmode6:$addr, am6offset:$offset, 681 DPR:$src1, DPR:$src2, DPR:$src3), 682 IIC_VST1x3u, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", 683 "$addr.addr = $wb", []>; 684 685def VST1d8T : VST1D3<0b0000, "8">; 686def VST1d16T : VST1D3<0b0100, "16">; 687def VST1d32T : VST1D3<0b1000, "32">; 688def VST1d64T : VST1D3<0b1100, "64">; 689 690def VST1d8T_UPD : VST1D3WB<0b0000, "8">; 691def VST1d16T_UPD : VST1D3WB<0b0100, "16">; 692def VST1d32T_UPD : VST1D3WB<0b1000, "32">; 693def VST1d64T_UPD : VST1D3WB<0b1100, "64">; 694 695def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 696def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>; 697 698// ...with 4 registers (some of these are only for the disassembler): 699class VST1D4<bits<4> op7_4, string Dt> 700 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 701 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 702 IIC_VST1x4, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", 703 []>; 704class VST1D4WB<bits<4> op7_4, string Dt> 705 : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), 706 (ins addrmode6:$addr, am6offset:$offset, 707 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u, 708 "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 709 "$addr.addr = $wb", []>; 710 711def VST1d8Q : VST1D4<0b0000, "8">; 712def VST1d16Q : VST1D4<0b0100, "16">; 713def VST1d32Q : VST1D4<0b1000, "32">; 714def VST1d64Q : VST1D4<0b1100, "64">; 715 716def VST1d8Q_UPD : VST1D4WB<0b0000, "8">; 717def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; 718def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; 719def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; 720 721def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 722def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>; 723 724// VST2 : Vector Store (multiple 2-element structures) 725class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> 726 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 727 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), 728 IIC_VST2, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>; 729class VST2Q<bits<4> op7_4, string Dt> 730 : NLdSt<0, 0b00, 0b0011, op7_4, (outs), 731 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 732 IIC_VST2x2, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", 733 "", []>; 734 735def VST2d8 : VST2D<0b1000, 0b0000, "8">; 736def VST2d16 : VST2D<0b1000, 0b0100, "16">; 737def VST2d32 : VST2D<0b1000, 0b1000, "32">; 738 739def VST2q8 : VST2Q<0b0000, "8">; 740def VST2q16 : VST2Q<0b0100, "16">; 741def VST2q32 : VST2Q<0b1000, "32">; 742 743def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; 744def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; 745def VST2d32Pseudo : VSTQPseudo<IIC_VST2>; 746 747def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 748def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 749def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 750 751// ...with address register writeback: 752class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 753 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 754 (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), 755 IIC_VST2u, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", 756 "$addr.addr = $wb", []>; 757class VST2QWB<bits<4> op7_4, string Dt> 758 : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 759 (ins addrmode6:$addr, am6offset:$offset, 760 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u, 761 "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 762 "$addr.addr = $wb", []>; 763 764def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; 765def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; 766def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; 767 768def VST2q8_UPD : VST2QWB<0b0000, "8">; 769def VST2q16_UPD : VST2QWB<0b0100, "16">; 770def VST2q32_UPD : VST2QWB<0b1000, "32">; 771 772def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 773def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 774def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; 775 776def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 777def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 778def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; 779 780// ...with double-spaced registers (for disassembly only): 781def VST2b8 : VST2D<0b1001, 0b0000, "8">; 782def VST2b16 : VST2D<0b1001, 0b0100, "16">; 783def VST2b32 : VST2D<0b1001, 0b1000, "32">; 784def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">; 785def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">; 786def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">; 787 788// VST3 : Vector Store (multiple 3-element structures) 789class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 790 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 791 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST3, 792 "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; 793 794def VST3d8 : VST3D<0b0100, 0b0000, "8">; 795def VST3d16 : VST3D<0b0100, 0b0100, "16">; 796def VST3d32 : VST3D<0b0100, 0b1000, "32">; 797 798def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 799def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 800def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 801 802// ...with address register writeback: 803class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 804 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 805 (ins addrmode6:$addr, am6offset:$offset, 806 DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST3u, 807 "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", 808 "$addr.addr = $wb", []>; 809 810def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; 811def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; 812def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; 813 814def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 815def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 816def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 817 818// ...with double-spaced registers (non-updating versions for disassembly only): 819def VST3q8 : VST3D<0b0101, 0b0000, "8">; 820def VST3q16 : VST3D<0b0101, 0b0100, "16">; 821def VST3q32 : VST3D<0b0101, 0b1000, "32">; 822def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; 823def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; 824def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; 825 826def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 827def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 828def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 829 830// ...alternate versions to be allocated odd register numbers: 831def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 832def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 833def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 834 835// VST4 : Vector Store (multiple 4-element structures) 836class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 837 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 838 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 839 IIC_VST4, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", 840 "", []>; 841 842def VST4d8 : VST4D<0b0000, 0b0000, "8">; 843def VST4d16 : VST4D<0b0000, 0b0100, "16">; 844def VST4d32 : VST4D<0b0000, 0b1000, "32">; 845 846def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 847def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 848def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 849 850// ...with address register writeback: 851class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 852 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 853 (ins addrmode6:$addr, am6offset:$offset, 854 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 855 "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 856 "$addr.addr = $wb", []>; 857 858def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; 859def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; 860def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; 861 862def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 863def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 864def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 865 866// ...with double-spaced registers (non-updating versions for disassembly only): 867def VST4q8 : VST4D<0b0001, 0b0000, "8">; 868def VST4q16 : VST4D<0b0001, 0b0100, "16">; 869def VST4q32 : VST4D<0b0001, 0b1000, "32">; 870def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; 871def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; 872def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; 873 874def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 875def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 876def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 877 878// ...alternate versions to be allocated odd register numbers: 879def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 880def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 881def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 882 883// Classes for VST*LN pseudo-instructions with multi-register operands. 884// These are expanded to real instructions after register allocation. 885class VSTQLNPseudo<InstrItinClass itin> 886 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 887 itin, "">; 888class VSTQLNWBPseudo<InstrItinClass itin> 889 : PseudoNLdSt<(outs GPR:$wb), 890 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 891 nohash_imm:$lane), itin, "$addr.addr = $wb">; 892class VSTQQLNPseudo<InstrItinClass itin> 893 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 894 itin, "">; 895class VSTQQLNWBPseudo<InstrItinClass itin> 896 : PseudoNLdSt<(outs GPR:$wb), 897 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 898 nohash_imm:$lane), itin, "$addr.addr = $wb">; 899class VSTQQQQLNPseudo<InstrItinClass itin> 900 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 901 itin, "">; 902class VSTQQQQLNWBPseudo<InstrItinClass itin> 903 : PseudoNLdSt<(outs GPR:$wb), 904 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 905 nohash_imm:$lane), itin, "$addr.addr = $wb">; 906 907// VST1LN : Vector Store (single element from one lane) 908// FIXME: Not yet implemented. 909 910// VST2LN : Vector Store (single 2-element structure from one lane) 911class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 912 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 913 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 914 IIC_VST2ln, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", 915 "", []>; 916 917def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">; 918def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">; 919def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">; 920 921def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 922def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 923def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 924 925// ...with double-spaced registers: 926def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">; 927def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">; 928 929def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 930def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 931 932// ...with address register writeback: 933class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 934 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 935 (ins addrmode6:$addr, am6offset:$offset, 936 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 937 "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", 938 "$addr.addr = $wb", []>; 939 940def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">; 941def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">; 942def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">; 943 944def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 945def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 946def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 947 948def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">; 949def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">; 950 951def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 952def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 953 954// VST3LN : Vector Store (single 3-element structure from one lane) 955class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 956 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 957 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 958 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 959 "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; 960 961def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">; 962def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">; 963def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">; 964 965def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 966def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 967def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 968 969// ...with double-spaced registers: 970def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">; 971def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">; 972 973def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 974def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 975 976// ...with address register writeback: 977class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 978 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 979 (ins addrmode6:$addr, am6offset:$offset, 980 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 981 IIC_VST3lnu, "vst3", Dt, 982 "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", 983 "$addr.addr = $wb", []>; 984 985def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">; 986def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">; 987def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">; 988 989def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 990def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 991def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 992 993def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">; 994def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">; 995 996def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 997def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 998 999// VST4LN : Vector Store (single 4-element structure from one lane) 1000class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1001 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 1002 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1003 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 1004 "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", 1005 "", []>; 1006 1007def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">; 1008def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">; 1009def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">; 1010 1011def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1012def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1013def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 1014 1015// ...with double-spaced registers: 1016def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">; 1017def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">; 1018 1019def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 1020def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 1021 1022// ...with address register writeback: 1023class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1024 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1025 (ins addrmode6:$addr, am6offset:$offset, 1026 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1027 IIC_VST4lnu, "vst4", Dt, 1028 "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", 1029 "$addr.addr = $wb", []>; 1030 1031def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">; 1032def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">; 1033def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; 1034 1035def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 1036def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 1037def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 1038 1039def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; 1040def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; 1041 1042def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 1043def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 1044 1045} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1046 1047 1048//===----------------------------------------------------------------------===// 1049// NEON pattern fragments 1050//===----------------------------------------------------------------------===// 1051 1052// Extract D sub-registers of Q registers. 1053def DSubReg_i8_reg : SDNodeXForm<imm, [{ 1054 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1055 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 1056}]>; 1057def DSubReg_i16_reg : SDNodeXForm<imm, [{ 1058 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1059 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 1060}]>; 1061def DSubReg_i32_reg : SDNodeXForm<imm, [{ 1062 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1063 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 1064}]>; 1065def DSubReg_f64_reg : SDNodeXForm<imm, [{ 1066 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 1067 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 1068}]>; 1069 1070// Extract S sub-registers of Q/D registers. 1071def SSubReg_f32_reg : SDNodeXForm<imm, [{ 1072 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 1073 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 1074}]>; 1075 1076// Translate lane numbers from Q registers to D subregs. 1077def SubReg_i8_lane : SDNodeXForm<imm, [{ 1078 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 1079}]>; 1080def SubReg_i16_lane : SDNodeXForm<imm, [{ 1081 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 1082}]>; 1083def SubReg_i32_lane : SDNodeXForm<imm, [{ 1084 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 1085}]>; 1086 1087//===----------------------------------------------------------------------===// 1088// Instruction Classes 1089//===----------------------------------------------------------------------===// 1090 1091// Basic 2-register operations: single-, double- and quad-register. 1092class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1093 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 1094 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 1095 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 1096 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), 1097 IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; 1098class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1099 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 1100 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 1101 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 1102 (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "", 1103 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; 1104class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1105 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 1106 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 1107 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 1108 (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "", 1109 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; 1110 1111// Basic 2-register intrinsics, both double- and quad-register. 1112class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1113 bits<2> op17_16, bits<5> op11_7, bit op4, 1114 InstrItinClass itin, string OpcodeStr, string Dt, 1115 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1116 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 1117 (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 1118 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 1119class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1120 bits<2> op17_16, bits<5> op11_7, bit op4, 1121 InstrItinClass itin, string OpcodeStr, string Dt, 1122 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1123 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 1124 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 1125 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 1126 1127// Narrow 2-register operations. 1128class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1129 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 1130 InstrItinClass itin, string OpcodeStr, string Dt, 1131 ValueType TyD, ValueType TyQ, SDNode OpNode> 1132 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), 1133 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 1134 [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>; 1135 1136// Narrow 2-register intrinsics. 1137class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1138 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 1139 InstrItinClass itin, string OpcodeStr, string Dt, 1140 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 1141 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), 1142 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 1143 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; 1144 1145// Long 2-register operations (currently only used for VMOVL). 1146class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1147 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 1148 InstrItinClass itin, string OpcodeStr, string Dt, 1149 ValueType TyQ, ValueType TyD, SDNode OpNode> 1150 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), 1151 (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 1152 [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>; 1153 1154// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 1155class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 1156 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), 1157 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 1158 OpcodeStr, Dt, "$dst1, $dst2", 1159 "$src1 = $dst1, $src2 = $dst2", []>; 1160class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 1161 InstrItinClass itin, string OpcodeStr, string Dt> 1162 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), 1163 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2", 1164 "$src1 = $dst1, $src2 = $dst2", []>; 1165 1166// Basic 3-register operations: single-, double- and quad-register. 1167class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1168 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1169 SDNode OpNode, bit Commutable> 1170 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1171 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, 1172 IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { 1173 let isCommutable = Commutable; 1174} 1175 1176class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1177 InstrItinClass itin, string OpcodeStr, string Dt, 1178 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 1179 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1180 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 1181 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 1182 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 1183 let isCommutable = Commutable; 1184} 1185// Same as N3VD but no data type. 1186class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1187 InstrItinClass itin, string OpcodeStr, 1188 ValueType ResTy, ValueType OpTy, 1189 SDNode OpNode, bit Commutable> 1190 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 1191 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 1192 OpcodeStr, "$dst, $src1, $src2", "", 1193 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{ 1194 let isCommutable = Commutable; 1195} 1196 1197class N3VDSL<bits<2> op21_20, bits<4> op11_8, 1198 InstrItinClass itin, string OpcodeStr, string Dt, 1199 ValueType Ty, SDNode ShOp> 1200 : N3V<0, 1, op21_20, op11_8, 1, 0, 1201 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1202 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1203 [(set (Ty DPR:$dst), 1204 (Ty (ShOp (Ty DPR:$src1), 1205 (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> { 1206 let isCommutable = 0; 1207} 1208class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 1209 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1210 : N3V<0, 1, op21_20, op11_8, 1, 0, 1211 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1212 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", 1213 [(set (Ty DPR:$dst), 1214 (Ty (ShOp (Ty DPR:$src1), 1215 (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { 1216 let isCommutable = 0; 1217} 1218 1219class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1220 InstrItinClass itin, string OpcodeStr, string Dt, 1221 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 1222 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1223 (outs QPR:$Qd), (ins QPR:$Qn, QPR:$Qm), N3RegFrm, itin, 1224 OpcodeStr, Dt, "$Qd, $Qn, $Qm", "", 1225 [(set QPR:$Qd, (ResTy (OpNode (OpTy QPR:$Qn), (OpTy QPR:$Qm))))]> { 1226 let isCommutable = Commutable; 1227} 1228class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1229 InstrItinClass itin, string OpcodeStr, 1230 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 1231 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 1232 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, 1233 OpcodeStr, "$dst, $src1, $src2", "", 1234 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{ 1235 let isCommutable = Commutable; 1236} 1237class N3VQSL<bits<2> op21_20, bits<4> op11_8, 1238 InstrItinClass itin, string OpcodeStr, string Dt, 1239 ValueType ResTy, ValueType OpTy, SDNode ShOp> 1240 : N3V<1, 1, op21_20, op11_8, 1, 0, 1241 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1242 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1243 [(set (ResTy QPR:$dst), 1244 (ResTy (ShOp (ResTy QPR:$src1), 1245 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 1246 imm:$lane)))))]> { 1247 let isCommutable = 0; 1248} 1249class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 1250 ValueType ResTy, ValueType OpTy, SDNode ShOp> 1251 : N3V<1, 1, op21_20, op11_8, 1, 0, 1252 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1253 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", 1254 [(set (ResTy QPR:$dst), 1255 (ResTy (ShOp (ResTy QPR:$src1), 1256 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 1257 imm:$lane)))))]> { 1258 let isCommutable = 0; 1259} 1260 1261// Basic 3-register intrinsics, both double- and quad-register. 1262class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1263 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1264 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 1265 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1266 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 1267 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 1268 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 1269 let isCommutable = Commutable; 1270} 1271class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1272 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 1273 : N3V<0, 1, op21_20, op11_8, 1, 0, 1274 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1275 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1276 [(set (Ty DPR:$dst), 1277 (Ty (IntOp (Ty DPR:$src1), 1278 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), 1279 imm:$lane)))))]> { 1280 let isCommutable = 0; 1281} 1282class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1283 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 1284 : N3V<0, 1, op21_20, op11_8, 1, 0, 1285 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1286 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1287 [(set (Ty DPR:$dst), 1288 (Ty (IntOp (Ty DPR:$src1), 1289 (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { 1290 let isCommutable = 0; 1291} 1292 1293class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1294 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1295 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 1296 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1297 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 1298 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 1299 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 1300 let isCommutable = Commutable; 1301} 1302class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1303 string OpcodeStr, string Dt, 1304 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1305 : N3V<1, 1, op21_20, op11_8, 1, 0, 1306 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1307 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1308 [(set (ResTy QPR:$dst), 1309 (ResTy (IntOp (ResTy QPR:$src1), 1310 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 1311 imm:$lane)))))]> { 1312 let isCommutable = 0; 1313} 1314class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1315 string OpcodeStr, string Dt, 1316 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1317 : N3V<1, 1, op21_20, op11_8, 1, 0, 1318 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1319 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1320 [(set (ResTy QPR:$dst), 1321 (ResTy (IntOp (ResTy QPR:$src1), 1322 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 1323 imm:$lane)))))]> { 1324 let isCommutable = 0; 1325} 1326 1327// Multiply-Add/Sub operations: single-, double- and quad-register. 1328class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1329 InstrItinClass itin, string OpcodeStr, string Dt, 1330 ValueType Ty, SDNode MulOp, SDNode OpNode> 1331 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1332 (outs DPR_VFP2:$dst), 1333 (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin, 1334 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; 1335 1336class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1337 InstrItinClass itin, string OpcodeStr, string Dt, 1338 ValueType Ty, SDNode MulOp, SDNode OpNode> 1339 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1340 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 1341 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 1342 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 1343 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 1344 1345class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1346 string OpcodeStr, string Dt, 1347 ValueType Ty, SDNode MulOp, SDNode ShOp> 1348 : N3V<0, 1, op21_20, op11_8, 1, 0, 1349 (outs DPR:$dst), 1350 (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), 1351 NVMulSLFrm, itin, 1352 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1353 [(set (Ty DPR:$dst), 1354 (Ty (ShOp (Ty DPR:$src1), 1355 (Ty (MulOp DPR:$src2, 1356 (Ty (NEONvduplane (Ty DPR_VFP2:$src3), 1357 imm:$lane)))))))]>; 1358class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1359 string OpcodeStr, string Dt, 1360 ValueType Ty, SDNode MulOp, SDNode ShOp> 1361 : N3V<0, 1, op21_20, op11_8, 1, 0, 1362 (outs DPR:$Vd), 1363 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), 1364 NVMulSLFrm, itin, 1365 OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", 1366 [(set (Ty DPR:$Vd), 1367 (Ty (ShOp (Ty DPR:$src1), 1368 (Ty (MulOp DPR:$Vn, 1369 (Ty (NEONvduplane (Ty DPR_8:$Vm), 1370 imm:$lane)))))))]>; 1371 1372class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1373 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 1374 SDNode MulOp, SDNode OpNode> 1375 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1376 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 1377 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 1378 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 1379 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 1380class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1381 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1382 SDNode MulOp, SDNode ShOp> 1383 : N3V<1, 1, op21_20, op11_8, 1, 0, 1384 (outs QPR:$dst), 1385 (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), 1386 NVMulSLFrm, itin, 1387 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1388 [(set (ResTy QPR:$dst), 1389 (ResTy (ShOp (ResTy QPR:$src1), 1390 (ResTy (MulOp QPR:$src2, 1391 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), 1392 imm:$lane)))))))]>; 1393class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1394 string OpcodeStr, string Dt, 1395 ValueType ResTy, ValueType OpTy, 1396 SDNode MulOp, SDNode ShOp> 1397 : N3V<1, 1, op21_20, op11_8, 1, 0, 1398 (outs QPR:$dst), 1399 (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), 1400 NVMulSLFrm, itin, 1401 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1402 [(set (ResTy QPR:$dst), 1403 (ResTy (ShOp (ResTy QPR:$src1), 1404 (ResTy (MulOp QPR:$src2, 1405 (ResTy (NEONvduplane (OpTy DPR_8:$src3), 1406 imm:$lane)))))))]>; 1407 1408// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 1409class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1410 InstrItinClass itin, string OpcodeStr, string Dt, 1411 ValueType Ty, Intrinsic IntOp, SDNode OpNode> 1412 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1413 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 1414 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 1415 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 1416 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 1417class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1418 InstrItinClass itin, string OpcodeStr, string Dt, 1419 ValueType Ty, Intrinsic IntOp, SDNode OpNode> 1420 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1421 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 1422 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 1423 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 1424 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 1425 1426// Neon 3-argument intrinsics, both double- and quad-register. 1427// The destination register is also used as the first source operand register. 1428class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1429 InstrItinClass itin, string OpcodeStr, string Dt, 1430 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1431 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1432 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, 1433 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1434 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), 1435 (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; 1436class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1437 InstrItinClass itin, string OpcodeStr, string Dt, 1438 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1439 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1440 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, 1441 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1442 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), 1443 (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; 1444 1445// Long Multiply-Add/Sub operations. 1446class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1447 InstrItinClass itin, string OpcodeStr, string Dt, 1448 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 1449 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1450 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 1451 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 1452 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 1453 (TyQ (MulOp (TyD DPR:$Vn), 1454 (TyD DPR:$Vm)))))]>; 1455class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 1456 InstrItinClass itin, string OpcodeStr, string Dt, 1457 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 1458 : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), 1459 (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), 1460 NVMulSLFrm, itin, 1461 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1462 [(set QPR:$dst, 1463 (OpNode (TyQ QPR:$src1), 1464 (TyQ (MulOp (TyD DPR:$src2), 1465 (TyD (NEONvduplane (TyD DPR_VFP2:$src3), 1466 imm:$lane))))))]>; 1467class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 1468 InstrItinClass itin, string OpcodeStr, string Dt, 1469 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 1470 : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), 1471 (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), 1472 NVMulSLFrm, itin, 1473 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1474 [(set QPR:$dst, 1475 (OpNode (TyQ QPR:$src1), 1476 (TyQ (MulOp (TyD DPR:$src2), 1477 (TyD (NEONvduplane (TyD DPR_8:$src3), 1478 imm:$lane))))))]>; 1479 1480// Long Intrinsic-Op vector operations with explicit extend (VABAL). 1481class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1482 InstrItinClass itin, string OpcodeStr, string Dt, 1483 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, 1484 SDNode OpNode> 1485 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1486 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 1487 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 1488 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 1489 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 1490 (TyD DPR:$Vm)))))))]>; 1491 1492// Neon Long 3-argument intrinsic. The destination register is 1493// a quad-register and is also used as the first source operand register. 1494class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1495 InstrItinClass itin, string OpcodeStr, string Dt, 1496 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 1497 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1498 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 1499 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 1500 [(set QPR:$Vd, 1501 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 1502class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1503 string OpcodeStr, string Dt, 1504 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1505 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1506 (outs QPR:$dst), 1507 (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), 1508 NVMulSLFrm, itin, 1509 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1510 [(set (ResTy QPR:$dst), 1511 (ResTy (IntOp (ResTy QPR:$src1), 1512 (OpTy DPR:$src2), 1513 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), 1514 imm:$lane)))))]>; 1515class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 1516 InstrItinClass itin, string OpcodeStr, string Dt, 1517 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1518 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1519 (outs QPR:$dst), 1520 (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), 1521 NVMulSLFrm, itin, 1522 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1523 [(set (ResTy QPR:$dst), 1524 (ResTy (IntOp (ResTy QPR:$src1), 1525 (OpTy DPR:$src2), 1526 (OpTy (NEONvduplane (OpTy DPR_8:$src3), 1527 imm:$lane)))))]>; 1528 1529// Narrowing 3-register intrinsics. 1530class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1531 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 1532 Intrinsic IntOp, bit Commutable> 1533 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1534 (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D, 1535 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1536 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { 1537 let isCommutable = Commutable; 1538} 1539 1540// Long 3-register operations. 1541class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1542 InstrItinClass itin, string OpcodeStr, string Dt, 1543 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 1544 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1545 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 1546 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1547 [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> { 1548 let isCommutable = Commutable; 1549} 1550class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 1551 InstrItinClass itin, string OpcodeStr, string Dt, 1552 ValueType TyQ, ValueType TyD, SDNode OpNode> 1553 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1554 (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1555 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1556 [(set QPR:$dst, 1557 (TyQ (OpNode (TyD DPR:$src1), 1558 (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>; 1559class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 1560 InstrItinClass itin, string OpcodeStr, string Dt, 1561 ValueType TyQ, ValueType TyD, SDNode OpNode> 1562 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1563 (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1564 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1565 [(set QPR:$dst, 1566 (TyQ (OpNode (TyD DPR:$src1), 1567 (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>; 1568 1569// Long 3-register operations with explicitly extended operands. 1570class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1571 InstrItinClass itin, string OpcodeStr, string Dt, 1572 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 1573 bit Commutable> 1574 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1575 (outs QPR:$Qd), (ins DPR:$Dn, DPR:$Dm), N3RegFrm, itin, 1576 OpcodeStr, Dt, "$Qd, $Dn, $Dm", "", 1577 [(set QPR:$Qd, (OpNode (TyQ (ExtOp (TyD DPR:$Dn))), 1578 (TyQ (ExtOp (TyD DPR:$Dm)))))]> { 1579 let isCommutable = Commutable; 1580} 1581 1582// Long 3-register intrinsics with explicit extend (VABDL). 1583class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1584 InstrItinClass itin, string OpcodeStr, string Dt, 1585 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, 1586 bit Commutable> 1587 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1588 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 1589 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1590 [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1), 1591 (TyD DPR:$src2))))))]> { 1592 let isCommutable = Commutable; 1593} 1594 1595// Long 3-register intrinsics. 1596class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1597 InstrItinClass itin, string OpcodeStr, string Dt, 1598 ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> 1599 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1600 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 1601 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1602 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { 1603 let isCommutable = Commutable; 1604} 1605class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1606 string OpcodeStr, string Dt, 1607 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1608 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1609 (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1610 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1611 [(set (ResTy QPR:$dst), 1612 (ResTy (IntOp (OpTy DPR:$src1), 1613 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), 1614 imm:$lane)))))]>; 1615class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 1616 InstrItinClass itin, string OpcodeStr, string Dt, 1617 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1618 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1619 (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1620 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1621 [(set (ResTy QPR:$dst), 1622 (ResTy (IntOp (OpTy DPR:$src1), 1623 (OpTy (NEONvduplane (OpTy DPR_8:$src2), 1624 imm:$lane)))))]>; 1625 1626// Wide 3-register operations. 1627class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1628 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 1629 SDNode OpNode, SDNode ExtOp, bit Commutable> 1630 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1631 (outs QPR:$Qd), (ins QPR:$Qn, DPR:$Dm), N3RegFrm, IIC_VSUBiD, 1632 OpcodeStr, Dt, "$Qd, $Qn, $Dm", "", 1633 [(set QPR:$Qd, (OpNode (TyQ QPR:$Qn), 1634 (TyQ (ExtOp (TyD DPR:$Dm)))))]> { 1635 let isCommutable = Commutable; 1636} 1637 1638// Pairwise long 2-register intrinsics, both double- and quad-register. 1639class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1640 bits<2> op17_16, bits<5> op11_7, bit op4, 1641 string OpcodeStr, string Dt, 1642 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1643 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 1644 (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 1645 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 1646class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1647 bits<2> op17_16, bits<5> op11_7, bit op4, 1648 string OpcodeStr, string Dt, 1649 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1650 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 1651 (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 1652 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 1653 1654// Pairwise long 2-register accumulate intrinsics, 1655// both double- and quad-register. 1656// The destination register is also used as the first source operand register. 1657class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1658 bits<2> op17_16, bits<5> op11_7, bit op4, 1659 string OpcodeStr, string Dt, 1660 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1661 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 1662 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD, 1663 OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", 1664 [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; 1665class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1666 bits<2> op17_16, bits<5> op11_7, bit op4, 1667 string OpcodeStr, string Dt, 1668 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1669 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 1670 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ, 1671 OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", 1672 [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; 1673 1674// Shift by immediate, 1675// both double- and quad-register. 1676class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1677 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1678 ValueType Ty, SDNode OpNode> 1679 : N2VImm<op24, op23, op11_8, op7, 0, op4, 1680 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin, 1681 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1682 [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; 1683class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1684 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1685 ValueType Ty, SDNode OpNode> 1686 : N2VImm<op24, op23, op11_8, op7, 1, op4, 1687 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin, 1688 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1689 [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; 1690 1691// Long shift by immediate. 1692class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 1693 string OpcodeStr, string Dt, 1694 ValueType ResTy, ValueType OpTy, SDNode OpNode> 1695 : N2VImm<op24, op23, op11_8, op7, op6, op4, 1696 (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm, 1697 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1698 [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), 1699 (i32 imm:$SIMM))))]>; 1700 1701// Narrow shift by immediate. 1702class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 1703 InstrItinClass itin, string OpcodeStr, string Dt, 1704 ValueType ResTy, ValueType OpTy, SDNode OpNode> 1705 : N2VImm<op24, op23, op11_8, op7, op6, op4, 1706 (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin, 1707 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1708 [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), 1709 (i32 imm:$SIMM))))]>; 1710 1711// Shift right by immediate and accumulate, 1712// both double- and quad-register. 1713class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1714 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1715 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), 1716 (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, 1717 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1718 [(set DPR:$dst, (Ty (add DPR:$src1, 1719 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; 1720class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1721 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1722 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), 1723 (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, 1724 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1725 [(set QPR:$dst, (Ty (add QPR:$src1, 1726 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; 1727 1728// Shift by immediate and insert, 1729// both double- and quad-register. 1730class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1731 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> 1732 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), 1733 (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD, 1734 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1735 [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; 1736class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1737 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> 1738 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), 1739 (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ, 1740 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1741 [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; 1742 1743// Convert, with fractional bits immediate, 1744// both double- and quad-register. 1745class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1746 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1747 Intrinsic IntOp> 1748 : N2VImm<op24, op23, op11_8, op7, 0, op4, 1749 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm, 1750 IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1751 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; 1752class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1753 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1754 Intrinsic IntOp> 1755 : N2VImm<op24, op23, op11_8, op7, 1, op4, 1756 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm, 1757 IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1758 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; 1759 1760//===----------------------------------------------------------------------===// 1761// Multiclasses 1762//===----------------------------------------------------------------------===// 1763 1764// Abbreviations used in multiclass suffixes: 1765// Q = quarter int (8 bit) elements 1766// H = half int (16 bit) elements 1767// S = single int (32 bit) elements 1768// D = double int (64 bit) elements 1769 1770// Neon 2-register vector operations -- for disassembly only. 1771 1772// First with only element sizes of 8, 16 and 32 bits: 1773multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1774 bits<5> op11_7, bit op4, string opc, string Dt, 1775 string asm> { 1776 // 64-bit vector types. 1777 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 1778 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1779 opc, !strconcat(Dt, "8"), asm, "", []>; 1780 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 1781 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1782 opc, !strconcat(Dt, "16"), asm, "", []>; 1783 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 1784 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1785 opc, !strconcat(Dt, "32"), asm, "", []>; 1786 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 1787 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1788 opc, "f32", asm, "", []> { 1789 let Inst{10} = 1; // overwrite F = 1 1790 } 1791 1792 // 128-bit vector types. 1793 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 1794 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1795 opc, !strconcat(Dt, "8"), asm, "", []>; 1796 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 1797 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1798 opc, !strconcat(Dt, "16"), asm, "", []>; 1799 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 1800 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1801 opc, !strconcat(Dt, "32"), asm, "", []>; 1802 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 1803 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1804 opc, "f32", asm, "", []> { 1805 let Inst{10} = 1; // overwrite F = 1 1806 } 1807} 1808 1809// Neon 3-register vector operations. 1810 1811// First with only element sizes of 8, 16 and 32 bits: 1812multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1813 InstrItinClass itinD16, InstrItinClass itinD32, 1814 InstrItinClass itinQ16, InstrItinClass itinQ32, 1815 string OpcodeStr, string Dt, 1816 SDNode OpNode, bit Commutable = 0> { 1817 // 64-bit vector types. 1818 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 1819 OpcodeStr, !strconcat(Dt, "8"), 1820 v8i8, v8i8, OpNode, Commutable>; 1821 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 1822 OpcodeStr, !strconcat(Dt, "16"), 1823 v4i16, v4i16, OpNode, Commutable>; 1824 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 1825 OpcodeStr, !strconcat(Dt, "32"), 1826 v2i32, v2i32, OpNode, Commutable>; 1827 1828 // 128-bit vector types. 1829 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 1830 OpcodeStr, !strconcat(Dt, "8"), 1831 v16i8, v16i8, OpNode, Commutable>; 1832 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 1833 OpcodeStr, !strconcat(Dt, "16"), 1834 v8i16, v8i16, OpNode, Commutable>; 1835 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 1836 OpcodeStr, !strconcat(Dt, "32"), 1837 v4i32, v4i32, OpNode, Commutable>; 1838} 1839 1840multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { 1841 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 1842 v4i16, ShOp>; 1843 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), 1844 v2i32, ShOp>; 1845 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 1846 v8i16, v4i16, ShOp>; 1847 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), 1848 v4i32, v2i32, ShOp>; 1849} 1850 1851// ....then also with element size 64 bits: 1852multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1853 InstrItinClass itinD, InstrItinClass itinQ, 1854 string OpcodeStr, string Dt, 1855 SDNode OpNode, bit Commutable = 0> 1856 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 1857 OpcodeStr, Dt, OpNode, Commutable> { 1858 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 1859 OpcodeStr, !strconcat(Dt, "64"), 1860 v1i64, v1i64, OpNode, Commutable>; 1861 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 1862 OpcodeStr, !strconcat(Dt, "64"), 1863 v2i64, v2i64, OpNode, Commutable>; 1864} 1865 1866 1867// Neon Narrowing 2-register vector operations, 1868// source operand element sizes of 16, 32 and 64 bits: 1869multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1870 bits<5> op11_7, bit op6, bit op4, 1871 InstrItinClass itin, string OpcodeStr, string Dt, 1872 SDNode OpNode> { 1873 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 1874 itin, OpcodeStr, !strconcat(Dt, "16"), 1875 v8i8, v8i16, OpNode>; 1876 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 1877 itin, OpcodeStr, !strconcat(Dt, "32"), 1878 v4i16, v4i32, OpNode>; 1879 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 1880 itin, OpcodeStr, !strconcat(Dt, "64"), 1881 v2i32, v2i64, OpNode>; 1882} 1883 1884// Neon Narrowing 2-register vector intrinsics, 1885// source operand element sizes of 16, 32 and 64 bits: 1886multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1887 bits<5> op11_7, bit op6, bit op4, 1888 InstrItinClass itin, string OpcodeStr, string Dt, 1889 Intrinsic IntOp> { 1890 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 1891 itin, OpcodeStr, !strconcat(Dt, "16"), 1892 v8i8, v8i16, IntOp>; 1893 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 1894 itin, OpcodeStr, !strconcat(Dt, "32"), 1895 v4i16, v4i32, IntOp>; 1896 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 1897 itin, OpcodeStr, !strconcat(Dt, "64"), 1898 v2i32, v2i64, IntOp>; 1899} 1900 1901 1902// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 1903// source operand element sizes of 16, 32 and 64 bits: 1904multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 1905 string OpcodeStr, string Dt, SDNode OpNode> { 1906 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1907 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 1908 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1909 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 1910 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1911 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 1912} 1913 1914 1915// Neon 3-register vector intrinsics. 1916 1917// First with only element sizes of 16 and 32 bits: 1918multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 1919 InstrItinClass itinD16, InstrItinClass itinD32, 1920 InstrItinClass itinQ16, InstrItinClass itinQ32, 1921 string OpcodeStr, string Dt, 1922 Intrinsic IntOp, bit Commutable = 0> { 1923 // 64-bit vector types. 1924 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 1925 OpcodeStr, !strconcat(Dt, "16"), 1926 v4i16, v4i16, IntOp, Commutable>; 1927 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 1928 OpcodeStr, !strconcat(Dt, "32"), 1929 v2i32, v2i32, IntOp, Commutable>; 1930 1931 // 128-bit vector types. 1932 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 1933 OpcodeStr, !strconcat(Dt, "16"), 1934 v8i16, v8i16, IntOp, Commutable>; 1935 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 1936 OpcodeStr, !strconcat(Dt, "32"), 1937 v4i32, v4i32, IntOp, Commutable>; 1938} 1939 1940multiclass N3VIntSL_HS<bits<4> op11_8, 1941 InstrItinClass itinD16, InstrItinClass itinD32, 1942 InstrItinClass itinQ16, InstrItinClass itinQ32, 1943 string OpcodeStr, string Dt, Intrinsic IntOp> { 1944 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 1945 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 1946 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 1947 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 1948 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 1949 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 1950 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 1951 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 1952} 1953 1954// ....then also with element size of 8 bits: 1955multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 1956 InstrItinClass itinD16, InstrItinClass itinD32, 1957 InstrItinClass itinQ16, InstrItinClass itinQ32, 1958 string OpcodeStr, string Dt, 1959 Intrinsic IntOp, bit Commutable = 0> 1960 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 1961 OpcodeStr, Dt, IntOp, Commutable> { 1962 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 1963 OpcodeStr, !strconcat(Dt, "8"), 1964 v8i8, v8i8, IntOp, Commutable>; 1965 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 1966 OpcodeStr, !strconcat(Dt, "8"), 1967 v16i8, v16i8, IntOp, Commutable>; 1968} 1969 1970// ....then also with element size of 64 bits: 1971multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 1972 InstrItinClass itinD16, InstrItinClass itinD32, 1973 InstrItinClass itinQ16, InstrItinClass itinQ32, 1974 string OpcodeStr, string Dt, 1975 Intrinsic IntOp, bit Commutable = 0> 1976 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 1977 OpcodeStr, Dt, IntOp, Commutable> { 1978 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 1979 OpcodeStr, !strconcat(Dt, "64"), 1980 v1i64, v1i64, IntOp, Commutable>; 1981 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 1982 OpcodeStr, !strconcat(Dt, "64"), 1983 v2i64, v2i64, IntOp, Commutable>; 1984} 1985 1986// Neon Narrowing 3-register vector intrinsics, 1987// source operand element sizes of 16, 32 and 64 bits: 1988multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1989 string OpcodeStr, string Dt, 1990 Intrinsic IntOp, bit Commutable = 0> { 1991 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 1992 OpcodeStr, !strconcat(Dt, "16"), 1993 v8i8, v8i16, IntOp, Commutable>; 1994 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 1995 OpcodeStr, !strconcat(Dt, "32"), 1996 v4i16, v4i32, IntOp, Commutable>; 1997 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 1998 OpcodeStr, !strconcat(Dt, "64"), 1999 v2i32, v2i64, IntOp, Commutable>; 2000} 2001 2002 2003// Neon Long 3-register vector operations. 2004 2005multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2006 InstrItinClass itin16, InstrItinClass itin32, 2007 string OpcodeStr, string Dt, 2008 SDNode OpNode, bit Commutable = 0> { 2009 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 2010 OpcodeStr, !strconcat(Dt, "8"), 2011 v8i16, v8i8, OpNode, Commutable>; 2012 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 2013 OpcodeStr, !strconcat(Dt, "16"), 2014 v4i32, v4i16, OpNode, Commutable>; 2015 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 2016 OpcodeStr, !strconcat(Dt, "32"), 2017 v2i64, v2i32, OpNode, Commutable>; 2018} 2019 2020multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 2021 InstrItinClass itin, string OpcodeStr, string Dt, 2022 SDNode OpNode> { 2023 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 2024 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 2025 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 2026 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 2027} 2028 2029multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2030 InstrItinClass itin16, InstrItinClass itin32, 2031 string OpcodeStr, string Dt, 2032 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 2033 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 2034 OpcodeStr, !strconcat(Dt, "8"), 2035 v8i16, v8i8, OpNode, ExtOp, Commutable>; 2036 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 2037 OpcodeStr, !strconcat(Dt, "16"), 2038 v4i32, v4i16, OpNode, ExtOp, Commutable>; 2039 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 2040 OpcodeStr, !strconcat(Dt, "32"), 2041 v2i64, v2i32, OpNode, ExtOp, Commutable>; 2042} 2043 2044// Neon Long 3-register vector intrinsics. 2045 2046// First with only element sizes of 16 and 32 bits: 2047multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 2048 InstrItinClass itin16, InstrItinClass itin32, 2049 string OpcodeStr, string Dt, 2050 Intrinsic IntOp, bit Commutable = 0> { 2051 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 2052 OpcodeStr, !strconcat(Dt, "16"), 2053 v4i32, v4i16, IntOp, Commutable>; 2054 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 2055 OpcodeStr, !strconcat(Dt, "32"), 2056 v2i64, v2i32, IntOp, Commutable>; 2057} 2058 2059multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 2060 InstrItinClass itin, string OpcodeStr, string Dt, 2061 Intrinsic IntOp> { 2062 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 2063 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 2064 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 2065 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 2066} 2067 2068// ....then also with element size of 8 bits: 2069multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2070 InstrItinClass itin16, InstrItinClass itin32, 2071 string OpcodeStr, string Dt, 2072 Intrinsic IntOp, bit Commutable = 0> 2073 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 2074 IntOp, Commutable> { 2075 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 2076 OpcodeStr, !strconcat(Dt, "8"), 2077 v8i16, v8i8, IntOp, Commutable>; 2078} 2079 2080// ....with explicit extend (VABDL). 2081multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2082 InstrItinClass itin, string OpcodeStr, string Dt, 2083 Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { 2084 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 2085 OpcodeStr, !strconcat(Dt, "8"), 2086 v8i16, v8i8, IntOp, ExtOp, Commutable>; 2087 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 2088 OpcodeStr, !strconcat(Dt, "16"), 2089 v4i32, v4i16, IntOp, ExtOp, Commutable>; 2090 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 2091 OpcodeStr, !strconcat(Dt, "32"), 2092 v2i64, v2i32, IntOp, ExtOp, Commutable>; 2093} 2094 2095 2096// Neon Wide 3-register vector intrinsics, 2097// source operand element sizes of 8, 16 and 32 bits: 2098multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2099 string OpcodeStr, string Dt, 2100 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 2101 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 2102 OpcodeStr, !strconcat(Dt, "8"), 2103 v8i16, v8i8, OpNode, ExtOp, Commutable>; 2104 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 2105 OpcodeStr, !strconcat(Dt, "16"), 2106 v4i32, v4i16, OpNode, ExtOp, Commutable>; 2107 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 2108 OpcodeStr, !strconcat(Dt, "32"), 2109 v2i64, v2i32, OpNode, ExtOp, Commutable>; 2110} 2111 2112 2113// Neon Multiply-Op vector operations, 2114// element sizes of 8, 16 and 32 bits: 2115multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2116 InstrItinClass itinD16, InstrItinClass itinD32, 2117 InstrItinClass itinQ16, InstrItinClass itinQ32, 2118 string OpcodeStr, string Dt, SDNode OpNode> { 2119 // 64-bit vector types. 2120 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 2121 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 2122 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 2123 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 2124 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 2125 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 2126 2127 // 128-bit vector types. 2128 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 2129 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 2130 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 2131 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 2132 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 2133 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 2134} 2135 2136multiclass N3VMulOpSL_HS<bits<4> op11_8, 2137 InstrItinClass itinD16, InstrItinClass itinD32, 2138 InstrItinClass itinQ16, InstrItinClass itinQ32, 2139 string OpcodeStr, string Dt, SDNode ShOp> { 2140 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 2141 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 2142 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 2143 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 2144 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 2145 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 2146 mul, ShOp>; 2147 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 2148 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 2149 mul, ShOp>; 2150} 2151 2152// Neon Intrinsic-Op vector operations, 2153// element sizes of 8, 16 and 32 bits: 2154multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2155 InstrItinClass itinD, InstrItinClass itinQ, 2156 string OpcodeStr, string Dt, Intrinsic IntOp, 2157 SDNode OpNode> { 2158 // 64-bit vector types. 2159 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 2160 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 2161 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 2162 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 2163 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 2164 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 2165 2166 // 128-bit vector types. 2167 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 2168 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 2169 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 2170 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 2171 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 2172 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 2173} 2174 2175// Neon 3-argument intrinsics, 2176// element sizes of 8, 16 and 32 bits: 2177multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2178 InstrItinClass itinD, InstrItinClass itinQ, 2179 string OpcodeStr, string Dt, Intrinsic IntOp> { 2180 // 64-bit vector types. 2181 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 2182 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 2183 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 2184 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 2185 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 2186 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 2187 2188 // 128-bit vector types. 2189 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 2190 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 2191 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 2192 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 2193 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 2194 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 2195} 2196 2197 2198// Neon Long Multiply-Op vector operations, 2199// element sizes of 8, 16 and 32 bits: 2200multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2201 InstrItinClass itin16, InstrItinClass itin32, 2202 string OpcodeStr, string Dt, SDNode MulOp, 2203 SDNode OpNode> { 2204 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 2205 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 2206 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 2207 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 2208 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 2209 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 2210} 2211 2212multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 2213 string Dt, SDNode MulOp, SDNode OpNode> { 2214 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 2215 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 2216 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 2217 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 2218} 2219 2220 2221// Neon Long 3-argument intrinsics. 2222 2223// First with only element sizes of 16 and 32 bits: 2224multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 2225 InstrItinClass itin16, InstrItinClass itin32, 2226 string OpcodeStr, string Dt, Intrinsic IntOp> { 2227 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 2228 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 2229 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 2230 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 2231} 2232 2233multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 2234 string OpcodeStr, string Dt, Intrinsic IntOp> { 2235 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 2236 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 2237 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 2238 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 2239} 2240 2241// ....then also with element size of 8 bits: 2242multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2243 InstrItinClass itin16, InstrItinClass itin32, 2244 string OpcodeStr, string Dt, Intrinsic IntOp> 2245 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 2246 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 2247 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 2248} 2249 2250// ....with explicit extend (VABAL). 2251multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 2252 InstrItinClass itin, string OpcodeStr, string Dt, 2253 Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { 2254 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 2255 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 2256 IntOp, ExtOp, OpNode>; 2257 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 2258 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 2259 IntOp, ExtOp, OpNode>; 2260 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 2261 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 2262 IntOp, ExtOp, OpNode>; 2263} 2264 2265 2266// Neon 2-register vector intrinsics, 2267// element sizes of 8, 16 and 32 bits: 2268multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2269 bits<5> op11_7, bit op4, 2270 InstrItinClass itinD, InstrItinClass itinQ, 2271 string OpcodeStr, string Dt, Intrinsic IntOp> { 2272 // 64-bit vector types. 2273 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2274 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 2275 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2276 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 2277 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2278 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 2279 2280 // 128-bit vector types. 2281 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2282 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 2283 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2284 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 2285 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2286 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 2287} 2288 2289 2290// Neon Pairwise long 2-register intrinsics, 2291// element sizes of 8, 16 and 32 bits: 2292multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2293 bits<5> op11_7, bit op4, 2294 string OpcodeStr, string Dt, Intrinsic IntOp> { 2295 // 64-bit vector types. 2296 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2297 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 2298 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2299 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 2300 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2301 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 2302 2303 // 128-bit vector types. 2304 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2305 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 2306 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2307 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 2308 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2309 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 2310} 2311 2312 2313// Neon Pairwise long 2-register accumulate intrinsics, 2314// element sizes of 8, 16 and 32 bits: 2315multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 2316 bits<5> op11_7, bit op4, 2317 string OpcodeStr, string Dt, Intrinsic IntOp> { 2318 // 64-bit vector types. 2319 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2320 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 2321 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2322 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 2323 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2324 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 2325 2326 // 128-bit vector types. 2327 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 2328 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 2329 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 2330 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 2331 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 2332 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 2333} 2334 2335 2336// Neon 2-register vector shift by immediate, 2337// with f of either N2RegVShLFrm or N2RegVShRFrm 2338// element sizes of 8, 16, 32 and 64 bits: 2339multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 2340 InstrItinClass itin, string OpcodeStr, string Dt, 2341 SDNode OpNode, Format f> { 2342 // 64-bit vector types. 2343 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 2344 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 2345 let Inst{21-19} = 0b001; // imm6 = 001xxx 2346 } 2347 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 2348 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 2349 let Inst{21-20} = 0b01; // imm6 = 01xxxx 2350 } 2351 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 2352 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 2353 let Inst{21} = 0b1; // imm6 = 1xxxxx 2354 } 2355 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin, 2356 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 2357 // imm6 = xxxxxx 2358 2359 // 128-bit vector types. 2360 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 2361 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 2362 let Inst{21-19} = 0b001; // imm6 = 001xxx 2363 } 2364 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 2365 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 2366 let Inst{21-20} = 0b01; // imm6 = 01xxxx 2367 } 2368 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 2369 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 2370 let Inst{21} = 0b1; // imm6 = 1xxxxx 2371 } 2372 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin, 2373 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 2374 // imm6 = xxxxxx 2375} 2376 2377// Neon Shift-Accumulate vector operations, 2378// element sizes of 8, 16, 32 and 64 bits: 2379multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 2380 string OpcodeStr, string Dt, SDNode ShOp> { 2381 // 64-bit vector types. 2382 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, 2383 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 2384 let Inst{21-19} = 0b001; // imm6 = 001xxx 2385 } 2386 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, 2387 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 2388 let Inst{21-20} = 0b01; // imm6 = 01xxxx 2389 } 2390 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, 2391 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 2392 let Inst{21} = 0b1; // imm6 = 1xxxxx 2393 } 2394 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, 2395 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 2396 // imm6 = xxxxxx 2397 2398 // 128-bit vector types. 2399 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, 2400 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 2401 let Inst{21-19} = 0b001; // imm6 = 001xxx 2402 } 2403 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, 2404 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 2405 let Inst{21-20} = 0b01; // imm6 = 01xxxx 2406 } 2407 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, 2408 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 2409 let Inst{21} = 0b1; // imm6 = 1xxxxx 2410 } 2411 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, 2412 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 2413 // imm6 = xxxxxx 2414} 2415 2416 2417// Neon Shift-Insert vector operations, 2418// with f of either N2RegVShLFrm or N2RegVShRFrm 2419// element sizes of 8, 16, 32 and 64 bits: 2420multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 2421 string OpcodeStr, SDNode ShOp, 2422 Format f> { 2423 // 64-bit vector types. 2424 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, 2425 f, OpcodeStr, "8", v8i8, ShOp> { 2426 let Inst{21-19} = 0b001; // imm6 = 001xxx 2427 } 2428 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, 2429 f, OpcodeStr, "16", v4i16, ShOp> { 2430 let Inst{21-20} = 0b01; // imm6 = 01xxxx 2431 } 2432 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, 2433 f, OpcodeStr, "32", v2i32, ShOp> { 2434 let Inst{21} = 0b1; // imm6 = 1xxxxx 2435 } 2436 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, 2437 f, OpcodeStr, "64", v1i64, ShOp>; 2438 // imm6 = xxxxxx 2439 2440 // 128-bit vector types. 2441 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, 2442 f, OpcodeStr, "8", v16i8, ShOp> { 2443 let Inst{21-19} = 0b001; // imm6 = 001xxx 2444 } 2445 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, 2446 f, OpcodeStr, "16", v8i16, ShOp> { 2447 let Inst{21-20} = 0b01; // imm6 = 01xxxx 2448 } 2449 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, 2450 f, OpcodeStr, "32", v4i32, ShOp> { 2451 let Inst{21} = 0b1; // imm6 = 1xxxxx 2452 } 2453 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, 2454 f, OpcodeStr, "64", v2i64, ShOp>; 2455 // imm6 = xxxxxx 2456} 2457 2458// Neon Shift Long operations, 2459// element sizes of 8, 16, 32 bits: 2460multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 2461 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 2462 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 2463 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { 2464 let Inst{21-19} = 0b001; // imm6 = 001xxx 2465 } 2466 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 2467 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { 2468 let Inst{21-20} = 0b01; // imm6 = 01xxxx 2469 } 2470 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 2471 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { 2472 let Inst{21} = 0b1; // imm6 = 1xxxxx 2473 } 2474} 2475 2476// Neon Shift Narrow operations, 2477// element sizes of 16, 32, 64 bits: 2478multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 2479 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 2480 SDNode OpNode> { 2481 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 2482 OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> { 2483 let Inst{21-19} = 0b001; // imm6 = 001xxx 2484 } 2485 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 2486 OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> { 2487 let Inst{21-20} = 0b01; // imm6 = 01xxxx 2488 } 2489 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 2490 OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> { 2491 let Inst{21} = 0b1; // imm6 = 1xxxxx 2492 } 2493} 2494 2495//===----------------------------------------------------------------------===// 2496// Instruction Definitions. 2497//===----------------------------------------------------------------------===// 2498 2499// Vector Add Operations. 2500 2501// VADD : Vector Add (integer and floating-point) 2502defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 2503 add, 1>; 2504def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 2505 v2f32, v2f32, fadd, 1>; 2506def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 2507 v4f32, v4f32, fadd, 1>; 2508// VADDL : Vector Add Long (Q = D + D) 2509defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 2510 "vaddl", "s", add, sext, 1>; 2511defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 2512 "vaddl", "u", add, zext, 1>; 2513// VADDW : Vector Add Wide (Q = Q + D) 2514defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 2515defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 2516// VHADD : Vector Halving Add 2517defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 2518 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2519 "vhadd", "s", int_arm_neon_vhadds, 1>; 2520defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 2521 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2522 "vhadd", "u", int_arm_neon_vhaddu, 1>; 2523// VRHADD : Vector Rounding Halving Add 2524defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 2525 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2526 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 2527defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 2528 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2529 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 2530// VQADD : Vector Saturating Add 2531defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 2532 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2533 "vqadd", "s", int_arm_neon_vqadds, 1>; 2534defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 2535 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2536 "vqadd", "u", int_arm_neon_vqaddu, 1>; 2537// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 2538defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", 2539 int_arm_neon_vaddhn, 1>; 2540// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 2541defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 2542 int_arm_neon_vraddhn, 1>; 2543 2544// Vector Multiply Operations. 2545 2546// VMUL : Vector Multiply (integer, polynomial and floating-point) 2547defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 2548 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 2549def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 2550 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 2551def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 2552 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 2553def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 2554 v2f32, v2f32, fmul, 1>; 2555def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 2556 v4f32, v4f32, fmul, 1>; 2557defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; 2558def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 2559def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 2560 v2f32, fmul>; 2561 2562def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 2563 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 2564 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 2565 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2566 (DSubReg_i16_reg imm:$lane))), 2567 (SubReg_i16_lane imm:$lane)))>; 2568def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 2569 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 2570 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 2571 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2572 (DSubReg_i32_reg imm:$lane))), 2573 (SubReg_i32_lane imm:$lane)))>; 2574def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 2575 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 2576 (v4f32 (VMULslfq (v4f32 QPR:$src1), 2577 (v2f32 (EXTRACT_SUBREG QPR:$src2, 2578 (DSubReg_i32_reg imm:$lane))), 2579 (SubReg_i32_lane imm:$lane)))>; 2580 2581// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 2582defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 2583 IIC_VMULi16Q, IIC_VMULi32Q, 2584 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 2585defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 2586 IIC_VMULi16Q, IIC_VMULi32Q, 2587 "vqdmulh", "s", int_arm_neon_vqdmulh>; 2588def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 2589 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 2590 imm:$lane)))), 2591 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 2592 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2593 (DSubReg_i16_reg imm:$lane))), 2594 (SubReg_i16_lane imm:$lane)))>; 2595def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 2596 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 2597 imm:$lane)))), 2598 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 2599 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2600 (DSubReg_i32_reg imm:$lane))), 2601 (SubReg_i32_lane imm:$lane)))>; 2602 2603// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 2604defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 2605 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 2606 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 2607defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 2608 IIC_VMULi16Q, IIC_VMULi32Q, 2609 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 2610def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 2611 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 2612 imm:$lane)))), 2613 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 2614 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2615 (DSubReg_i16_reg imm:$lane))), 2616 (SubReg_i16_lane imm:$lane)))>; 2617def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 2618 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 2619 imm:$lane)))), 2620 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 2621 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2622 (DSubReg_i32_reg imm:$lane))), 2623 (SubReg_i32_lane imm:$lane)))>; 2624 2625// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 2626defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 2627 "vmull", "s", NEONvmulls, 1>; 2628defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 2629 "vmull", "u", NEONvmullu, 1>; 2630def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 2631 v8i16, v8i8, int_arm_neon_vmullp, 1>; 2632defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 2633defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 2634 2635// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 2636defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 2637 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 2638defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 2639 "vqdmull", "s", int_arm_neon_vqdmull>; 2640 2641// Vector Multiply-Accumulate and Multiply-Subtract Operations. 2642 2643// VMLA : Vector Multiply Accumulate (integer and floating-point) 2644defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2645 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 2646def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 2647 v2f32, fmul, fadd>; 2648def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 2649 v4f32, fmul, fadd>; 2650defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 2651 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 2652def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 2653 v2f32, fmul, fadd>; 2654def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 2655 v4f32, v2f32, fmul, fadd>; 2656 2657def : Pat<(v8i16 (add (v8i16 QPR:$src1), 2658 (mul (v8i16 QPR:$src2), 2659 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 2660 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 2661 (v4i16 (EXTRACT_SUBREG QPR:$src3, 2662 (DSubReg_i16_reg imm:$lane))), 2663 (SubReg_i16_lane imm:$lane)))>; 2664 2665def : Pat<(v4i32 (add (v4i32 QPR:$src1), 2666 (mul (v4i32 QPR:$src2), 2667 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 2668 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 2669 (v2i32 (EXTRACT_SUBREG QPR:$src3, 2670 (DSubReg_i32_reg imm:$lane))), 2671 (SubReg_i32_lane imm:$lane)))>; 2672 2673def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), 2674 (fmul (v4f32 QPR:$src2), 2675 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 2676 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 2677 (v4f32 QPR:$src2), 2678 (v2f32 (EXTRACT_SUBREG QPR:$src3, 2679 (DSubReg_i32_reg imm:$lane))), 2680 (SubReg_i32_lane imm:$lane)))>; 2681 2682// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 2683defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 2684 "vmlal", "s", NEONvmulls, add>; 2685defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 2686 "vmlal", "u", NEONvmullu, add>; 2687 2688defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 2689defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 2690 2691// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 2692defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2693 "vqdmlal", "s", int_arm_neon_vqdmlal>; 2694defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; 2695 2696// VMLS : Vector Multiply Subtract (integer and floating-point) 2697defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2698 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 2699def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 2700 v2f32, fmul, fsub>; 2701def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 2702 v4f32, fmul, fsub>; 2703defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 2704 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 2705def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 2706 v2f32, fmul, fsub>; 2707def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 2708 v4f32, v2f32, fmul, fsub>; 2709 2710def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 2711 (mul (v8i16 QPR:$src2), 2712 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 2713 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 2714 (v4i16 (EXTRACT_SUBREG QPR:$src3, 2715 (DSubReg_i16_reg imm:$lane))), 2716 (SubReg_i16_lane imm:$lane)))>; 2717 2718def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 2719 (mul (v4i32 QPR:$src2), 2720 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 2721 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 2722 (v2i32 (EXTRACT_SUBREG QPR:$src3, 2723 (DSubReg_i32_reg imm:$lane))), 2724 (SubReg_i32_lane imm:$lane)))>; 2725 2726def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), 2727 (fmul (v4f32 QPR:$src2), 2728 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 2729 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 2730 (v2f32 (EXTRACT_SUBREG QPR:$src3, 2731 (DSubReg_i32_reg imm:$lane))), 2732 (SubReg_i32_lane imm:$lane)))>; 2733 2734// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 2735defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 2736 "vmlsl", "s", NEONvmulls, sub>; 2737defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 2738 "vmlsl", "u", NEONvmullu, sub>; 2739 2740defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 2741defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 2742 2743// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 2744defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 2745 "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 2746defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 2747 2748// Vector Subtract Operations. 2749 2750// VSUB : Vector Subtract (integer and floating-point) 2751defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 2752 "vsub", "i", sub, 0>; 2753def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 2754 v2f32, v2f32, fsub, 0>; 2755def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 2756 v4f32, v4f32, fsub, 0>; 2757// VSUBL : Vector Subtract Long (Q = D - D) 2758defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 2759 "vsubl", "s", sub, sext, 0>; 2760defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 2761 "vsubl", "u", sub, zext, 0>; 2762// VSUBW : Vector Subtract Wide (Q = Q - D) 2763defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 2764defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 2765// VHSUB : Vector Halving Subtract 2766defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 2767 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2768 "vhsub", "s", int_arm_neon_vhsubs, 0>; 2769defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 2770 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2771 "vhsub", "u", int_arm_neon_vhsubu, 0>; 2772// VQSUB : Vector Saturing Subtract 2773defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 2774 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2775 "vqsub", "s", int_arm_neon_vqsubs, 0>; 2776defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 2777 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2778 "vqsub", "u", int_arm_neon_vqsubu, 0>; 2779// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 2780defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", 2781 int_arm_neon_vsubhn, 0>; 2782// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 2783defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 2784 int_arm_neon_vrsubhn, 0>; 2785 2786// Vector Comparisons. 2787 2788// VCEQ : Vector Compare Equal 2789defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2790 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 2791def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 2792 NEONvceq, 1>; 2793def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 2794 NEONvceq, 1>; 2795// For disassembly only. 2796defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 2797 "$dst, $src, #0">; 2798 2799// VCGE : Vector Compare Greater Than or Equal 2800defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2801 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 2802defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2803 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 2804def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 2805 NEONvcge, 0>; 2806def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 2807 NEONvcge, 0>; 2808// For disassembly only. 2809// FIXME: This instruction's encoding MAY NOT BE correct. 2810defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 2811 "$dst, $src, #0">; 2812// For disassembly only. 2813// FIXME: This instruction's encoding MAY NOT BE correct. 2814defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 2815 "$dst, $src, #0">; 2816 2817// VCGT : Vector Compare Greater Than 2818defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2819 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 2820defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2821 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 2822def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 2823 NEONvcgt, 0>; 2824def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 2825 NEONvcgt, 0>; 2826// For disassembly only. 2827// FIXME: This instruction's encoding MAY NOT BE correct. 2828defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 2829 "$dst, $src, #0">; 2830// For disassembly only. 2831// FIXME: This instruction's encoding MAY NOT BE correct. 2832defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 2833 "$dst, $src, #0">; 2834 2835// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 2836def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 2837 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 2838def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 2839 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 2840// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 2841def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 2842 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 2843def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 2844 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 2845// VTST : Vector Test Bits 2846defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2847 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 2848 2849// Vector Bitwise Operations. 2850 2851def vnotd : PatFrag<(ops node:$in), 2852 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 2853def vnotq : PatFrag<(ops node:$in), 2854 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 2855 2856 2857// VAND : Vector Bitwise AND 2858def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 2859 v2i32, v2i32, and, 1>; 2860def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 2861 v4i32, v4i32, and, 1>; 2862 2863// VEOR : Vector Bitwise Exclusive OR 2864def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 2865 v2i32, v2i32, xor, 1>; 2866def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 2867 v4i32, v4i32, xor, 1>; 2868 2869// VORR : Vector Bitwise OR 2870def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 2871 v2i32, v2i32, or, 1>; 2872def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 2873 v4i32, v4i32, or, 1>; 2874 2875// VBIC : Vector Bitwise Bit Clear (AND NOT) 2876def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 2877 (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, 2878 "vbic", "$dst, $src1, $src2", "", 2879 [(set DPR:$dst, (v2i32 (and DPR:$src1, 2880 (vnotd DPR:$src2))))]>; 2881def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 2882 (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, 2883 "vbic", "$dst, $src1, $src2", "", 2884 [(set QPR:$dst, (v4i32 (and QPR:$src1, 2885 (vnotq QPR:$src2))))]>; 2886 2887// VORN : Vector Bitwise OR NOT 2888def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), 2889 (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, 2890 "vorn", "$dst, $src1, $src2", "", 2891 [(set DPR:$dst, (v2i32 (or DPR:$src1, 2892 (vnotd DPR:$src2))))]>; 2893def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), 2894 (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, 2895 "vorn", "$dst, $src1, $src2", "", 2896 [(set QPR:$dst, (v4i32 (or QPR:$src1, 2897 (vnotq QPR:$src2))))]>; 2898 2899// VMVN : Vector Bitwise NOT (Immediate) 2900 2901let isReMaterializable = 1 in { 2902// FIXME: This instruction's encoding MAY NOT BE correct. 2903def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst), 2904 (ins nModImm:$SIMM), IIC_VMOVImm, 2905 "vmvn", "i16", "$dst, $SIMM", "", 2906 [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>; 2907// FIXME: This instruction's encoding MAY NOT BE correct. 2908def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst), 2909 (ins nModImm:$SIMM), IIC_VMOVImm, 2910 "vmvn", "i16", "$dst, $SIMM", "", 2911 [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>; 2912// FIXME: This instruction's encoding MAY NOT BE correct. 2913def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst), 2914 (ins nModImm:$SIMM), IIC_VMOVImm, 2915 "vmvn", "i32", "$dst, $SIMM", "", 2916 [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>; 2917// FIXME: This instruction's encoding MAY NOT BE correct. 2918def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst), 2919 (ins nModImm:$SIMM), IIC_VMOVImm, 2920 "vmvn", "i32", "$dst, $SIMM", "", 2921 [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>; 2922} 2923 2924// VMVN : Vector Bitwise NOT 2925def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 2926 (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, 2927 "vmvn", "$dst, $src", "", 2928 [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>; 2929def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 2930 (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD, 2931 "vmvn", "$dst, $src", "", 2932 [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>; 2933def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 2934def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 2935 2936// VBSL : Vector Bitwise Select 2937def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 2938 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 2939 N3RegFrm, IIC_VCNTiD, 2940 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 2941 [(set DPR:$Vd, 2942 (v2i32 (or (and DPR:$Vn, DPR:$src1), 2943 (and DPR:$Vm, (vnotd DPR:$src1)))))]>; 2944def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 2945 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 2946 N3RegFrm, IIC_VCNTiQ, 2947 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 2948 [(set QPR:$Vd, 2949 (v4i32 (or (and QPR:$Vn, QPR:$src1), 2950 (and QPR:$Vm, (vnotq QPR:$src1)))))]>; 2951 2952// VBIF : Vector Bitwise Insert if False 2953// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 2954// FIXME: This instruction's encoding MAY NOT BE correct. 2955def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 2956 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 2957 N3RegFrm, IIC_VBINiD, 2958 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 2959 [/* For disassembly only; pattern left blank */]>; 2960def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 2961 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 2962 N3RegFrm, IIC_VBINiQ, 2963 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 2964 [/* For disassembly only; pattern left blank */]>; 2965 2966// VBIT : Vector Bitwise Insert if True 2967// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 2968// FIXME: This instruction's encoding MAY NOT BE correct. 2969def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 2970 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 2971 N3RegFrm, IIC_VBINiD, 2972 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 2973 [/* For disassembly only; pattern left blank */]>; 2974def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 2975 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 2976 N3RegFrm, IIC_VBINiQ, 2977 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 2978 [/* For disassembly only; pattern left blank */]>; 2979 2980// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 2981// for equivalent operations with different register constraints; it just 2982// inserts copies. 2983 2984// Vector Absolute Differences. 2985 2986// VABD : Vector Absolute Difference 2987defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 2988 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2989 "vabd", "s", int_arm_neon_vabds, 1>; 2990defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 2991 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2992 "vabd", "u", int_arm_neon_vabdu, 1>; 2993def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 2994 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 2995def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 2996 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 2997 2998// VABDL : Vector Absolute Difference Long (Q = | D - D |) 2999defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 3000 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 3001defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 3002 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 3003 3004// VABA : Vector Absolute Difference and Accumulate 3005defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 3006 "vaba", "s", int_arm_neon_vabds, add>; 3007defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 3008 "vaba", "u", int_arm_neon_vabdu, add>; 3009 3010// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 3011defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 3012 "vabal", "s", int_arm_neon_vabds, zext, add>; 3013defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 3014 "vabal", "u", int_arm_neon_vabdu, zext, add>; 3015 3016// Vector Maximum and Minimum. 3017 3018// VMAX : Vector Maximum 3019defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 3020 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3021 "vmax", "s", int_arm_neon_vmaxs, 1>; 3022defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 3023 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3024 "vmax", "u", int_arm_neon_vmaxu, 1>; 3025def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 3026 "vmax", "f32", 3027 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 3028def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 3029 "vmax", "f32", 3030 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 3031 3032// VMIN : Vector Minimum 3033defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 3034 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3035 "vmin", "s", int_arm_neon_vmins, 1>; 3036defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 3037 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 3038 "vmin", "u", int_arm_neon_vminu, 1>; 3039def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 3040 "vmin", "f32", 3041 v2f32, v2f32, int_arm_neon_vmins, 1>; 3042def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 3043 "vmin", "f32", 3044 v4f32, v4f32, int_arm_neon_vmins, 1>; 3045 3046// Vector Pairwise Operations. 3047 3048// VPADD : Vector Pairwise Add 3049def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 3050 "vpadd", "i8", 3051 v8i8, v8i8, int_arm_neon_vpadd, 0>; 3052def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 3053 "vpadd", "i16", 3054 v4i16, v4i16, int_arm_neon_vpadd, 0>; 3055def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 3056 "vpadd", "i32", 3057 v2i32, v2i32, int_arm_neon_vpadd, 0>; 3058def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 3059 IIC_VPBIND, "vpadd", "f32", 3060 v2f32, v2f32, int_arm_neon_vpadd, 0>; 3061 3062// VPADDL : Vector Pairwise Add Long 3063defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 3064 int_arm_neon_vpaddls>; 3065defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 3066 int_arm_neon_vpaddlu>; 3067 3068// VPADAL : Vector Pairwise Add and Accumulate Long 3069defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 3070 int_arm_neon_vpadals>; 3071defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 3072 int_arm_neon_vpadalu>; 3073 3074// VPMAX : Vector Pairwise Maximum 3075def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3076 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 3077def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3078 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 3079def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3080 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 3081def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3082 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 3083def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3084 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 3085def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 3086 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 3087def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 3088 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 3089 3090// VPMIN : Vector Pairwise Minimum 3091def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3092 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 3093def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3094 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 3095def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3096 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 3097def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3098 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 3099def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3100 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 3101def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 3102 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 3103def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 3104 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 3105 3106// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 3107 3108// VRECPE : Vector Reciprocal Estimate 3109def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 3110 IIC_VUNAD, "vrecpe", "u32", 3111 v2i32, v2i32, int_arm_neon_vrecpe>; 3112def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 3113 IIC_VUNAQ, "vrecpe", "u32", 3114 v4i32, v4i32, int_arm_neon_vrecpe>; 3115def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 3116 IIC_VUNAD, "vrecpe", "f32", 3117 v2f32, v2f32, int_arm_neon_vrecpe>; 3118def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 3119 IIC_VUNAQ, "vrecpe", "f32", 3120 v4f32, v4f32, int_arm_neon_vrecpe>; 3121 3122// VRECPS : Vector Reciprocal Step 3123def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 3124 IIC_VRECSD, "vrecps", "f32", 3125 v2f32, v2f32, int_arm_neon_vrecps, 1>; 3126def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 3127 IIC_VRECSQ, "vrecps", "f32", 3128 v4f32, v4f32, int_arm_neon_vrecps, 1>; 3129 3130// VRSQRTE : Vector Reciprocal Square Root Estimate 3131def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 3132 IIC_VUNAD, "vrsqrte", "u32", 3133 v2i32, v2i32, int_arm_neon_vrsqrte>; 3134def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 3135 IIC_VUNAQ, "vrsqrte", "u32", 3136 v4i32, v4i32, int_arm_neon_vrsqrte>; 3137def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 3138 IIC_VUNAD, "vrsqrte", "f32", 3139 v2f32, v2f32, int_arm_neon_vrsqrte>; 3140def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 3141 IIC_VUNAQ, "vrsqrte", "f32", 3142 v4f32, v4f32, int_arm_neon_vrsqrte>; 3143 3144// VRSQRTS : Vector Reciprocal Square Root Step 3145def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 3146 IIC_VRECSD, "vrsqrts", "f32", 3147 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 3148def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 3149 IIC_VRECSQ, "vrsqrts", "f32", 3150 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 3151 3152// Vector Shifts. 3153 3154// VSHL : Vector Shift 3155defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm, 3156 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 3157 "vshl", "s", int_arm_neon_vshifts, 0>; 3158defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm, 3159 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 3160 "vshl", "u", int_arm_neon_vshiftu, 0>; 3161// VSHL : Vector Shift Left (Immediate) 3162defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl, 3163 N2RegVShLFrm>; 3164// VSHR : Vector Shift Right (Immediate) 3165defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs, 3166 N2RegVShRFrm>; 3167defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru, 3168 N2RegVShRFrm>; 3169 3170// VSHLL : Vector Shift Left Long 3171defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 3172defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 3173 3174// VSHLL : Vector Shift Left Long (with maximum shift count) 3175class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 3176 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 3177 ValueType OpTy, SDNode OpNode> 3178 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 3179 ResTy, OpTy, OpNode> { 3180 let Inst{21-16} = op21_16; 3181} 3182def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 3183 v8i16, v8i8, NEONvshlli>; 3184def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 3185 v4i32, v4i16, NEONvshlli>; 3186def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 3187 v2i64, v2i32, NEONvshlli>; 3188 3189// VSHRN : Vector Shift Right and Narrow 3190defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 3191 NEONvshrn>; 3192 3193// VRSHL : Vector Rounding Shift 3194defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm, 3195 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3196 "vrshl", "s", int_arm_neon_vrshifts, 0>; 3197defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm, 3198 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3199 "vrshl", "u", int_arm_neon_vrshiftu, 0>; 3200// VRSHR : Vector Rounding Shift Right 3201defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs, 3202 N2RegVShRFrm>; 3203defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru, 3204 N2RegVShRFrm>; 3205 3206// VRSHRN : Vector Rounding Shift Right and Narrow 3207defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 3208 NEONvrshrn>; 3209 3210// VQSHL : Vector Saturating Shift 3211defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm, 3212 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3213 "vqshl", "s", int_arm_neon_vqshifts, 0>; 3214defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm, 3215 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3216 "vqshl", "u", int_arm_neon_vqshiftu, 0>; 3217// VQSHL : Vector Saturating Shift Left (Immediate) 3218defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls, 3219 N2RegVShLFrm>; 3220defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu, 3221 N2RegVShLFrm>; 3222// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 3223defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu, 3224 N2RegVShLFrm>; 3225 3226// VQSHRN : Vector Saturating Shift Right and Narrow 3227defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 3228 NEONvqshrns>; 3229defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 3230 NEONvqshrnu>; 3231 3232// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 3233defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 3234 NEONvqshrnsu>; 3235 3236// VQRSHL : Vector Saturating Rounding Shift 3237defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm, 3238 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3239 "vqrshl", "s", int_arm_neon_vqrshifts, 0>; 3240defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm, 3241 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 3242 "vqrshl", "u", int_arm_neon_vqrshiftu, 0>; 3243 3244// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 3245defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 3246 NEONvqrshrns>; 3247defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 3248 NEONvqrshrnu>; 3249 3250// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 3251defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 3252 NEONvqrshrnsu>; 3253 3254// VSRA : Vector Shift Right and Accumulate 3255defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 3256defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 3257// VRSRA : Vector Rounding Shift Right and Accumulate 3258defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 3259defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 3260 3261// VSLI : Vector Shift Left and Insert 3262defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>; 3263// VSRI : Vector Shift Right and Insert 3264defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>; 3265 3266// Vector Absolute and Saturating Absolute. 3267 3268// VABS : Vector Absolute Value 3269defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 3270 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 3271 int_arm_neon_vabs>; 3272def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 3273 IIC_VUNAD, "vabs", "f32", 3274 v2f32, v2f32, int_arm_neon_vabs>; 3275def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 3276 IIC_VUNAQ, "vabs", "f32", 3277 v4f32, v4f32, int_arm_neon_vabs>; 3278 3279// VQABS : Vector Saturating Absolute Value 3280defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 3281 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 3282 int_arm_neon_vqabs>; 3283 3284// Vector Negate. 3285 3286def vnegd : PatFrag<(ops node:$in), 3287 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 3288def vnegq : PatFrag<(ops node:$in), 3289 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 3290 3291class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 3292 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), 3293 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 3294 [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>; 3295class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 3296 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), 3297 IIC_VSHLiQ, OpcodeStr, Dt, "$dst, $src", "", 3298 [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>; 3299 3300// VNEG : Vector Negate (integer) 3301def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 3302def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 3303def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 3304def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 3305def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 3306def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 3307 3308// VNEG : Vector Negate (floating-point) 3309def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 3310 (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, 3311 "vneg", "f32", "$dst, $src", "", 3312 [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; 3313def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 3314 (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, 3315 "vneg", "f32", "$dst, $src", "", 3316 [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; 3317 3318def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 3319def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 3320def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 3321def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 3322def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 3323def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 3324 3325// VQNEG : Vector Saturating Negate 3326defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 3327 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 3328 int_arm_neon_vqneg>; 3329 3330// Vector Bit Counting Operations. 3331 3332// VCLS : Vector Count Leading Sign Bits 3333defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 3334 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 3335 int_arm_neon_vcls>; 3336// VCLZ : Vector Count Leading Zeros 3337defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 3338 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 3339 int_arm_neon_vclz>; 3340// VCNT : Vector Count One Bits 3341def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 3342 IIC_VCNTiD, "vcnt", "8", 3343 v8i8, v8i8, int_arm_neon_vcnt>; 3344def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 3345 IIC_VCNTiQ, "vcnt", "8", 3346 v16i8, v16i8, int_arm_neon_vcnt>; 3347 3348// Vector Swap -- for disassembly only. 3349def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 3350 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 3351 "vswp", "$dst, $src", "", []>; 3352def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 3353 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 3354 "vswp", "$dst, $src", "", []>; 3355 3356// Vector Move Operations. 3357 3358// VMOV : Vector Move (Register) 3359 3360let neverHasSideEffects = 1 in { 3361def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), 3362 N3RegFrm, IIC_VMOV, "vmov", "$dst, $src", "", []>; 3363def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), 3364 N3RegFrm, IIC_VMOV, "vmov", "$dst, $src", "", []>; 3365 3366// Pseudo vector move instructions for QQ and QQQQ registers. This should 3367// be expanded after register allocation is completed. 3368def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), 3369 NoItinerary, "", []>; 3370 3371def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), 3372 NoItinerary, "", []>; 3373} // neverHasSideEffects 3374 3375// VMOV : Vector Move (Immediate) 3376 3377let isReMaterializable = 1 in { 3378def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), 3379 (ins nModImm:$SIMM), IIC_VMOVImm, 3380 "vmov", "i8", "$dst, $SIMM", "", 3381 [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 3382def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), 3383 (ins nModImm:$SIMM), IIC_VMOVImm, 3384 "vmov", "i8", "$dst, $SIMM", "", 3385 [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 3386 3387def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst), 3388 (ins nModImm:$SIMM), IIC_VMOVImm, 3389 "vmov", "i16", "$dst, $SIMM", "", 3390 [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>; 3391def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst), 3392 (ins nModImm:$SIMM), IIC_VMOVImm, 3393 "vmov", "i16", "$dst, $SIMM", "", 3394 [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>; 3395 3396def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst), 3397 (ins nModImm:$SIMM), IIC_VMOVImm, 3398 "vmov", "i32", "$dst, $SIMM", "", 3399 [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>; 3400def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst), 3401 (ins nModImm:$SIMM), IIC_VMOVImm, 3402 "vmov", "i32", "$dst, $SIMM", "", 3403 [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>; 3404 3405def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), 3406 (ins nModImm:$SIMM), IIC_VMOVImm, 3407 "vmov", "i64", "$dst, $SIMM", "", 3408 [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 3409def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), 3410 (ins nModImm:$SIMM), IIC_VMOVImm, 3411 "vmov", "i64", "$dst, $SIMM", "", 3412 [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 3413} // isReMaterializable 3414 3415// VMOV : Vector Get Lane (move scalar to ARM core register) 3416 3417def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 3418 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3419 IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]", 3420 [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), 3421 imm:$lane))]>; 3422def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 3423 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3424 IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]", 3425 [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), 3426 imm:$lane))]>; 3427def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 3428 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3429 IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]", 3430 [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), 3431 imm:$lane))]>; 3432def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 3433 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3434 IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]", 3435 [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), 3436 imm:$lane))]>; 3437def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 3438 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3439 IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]", 3440 [(set GPR:$dst, (extractelt (v2i32 DPR:$src), 3441 imm:$lane))]>; 3442// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 3443def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 3444 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 3445 (DSubReg_i8_reg imm:$lane))), 3446 (SubReg_i8_lane imm:$lane))>; 3447def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 3448 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 3449 (DSubReg_i16_reg imm:$lane))), 3450 (SubReg_i16_lane imm:$lane))>; 3451def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 3452 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 3453 (DSubReg_i8_reg imm:$lane))), 3454 (SubReg_i8_lane imm:$lane))>; 3455def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 3456 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 3457 (DSubReg_i16_reg imm:$lane))), 3458 (SubReg_i16_lane imm:$lane))>; 3459def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 3460 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 3461 (DSubReg_i32_reg imm:$lane))), 3462 (SubReg_i32_lane imm:$lane))>; 3463def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 3464 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 3465 (SSubReg_f32_reg imm:$src2))>; 3466def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 3467 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 3468 (SSubReg_f32_reg imm:$src2))>; 3469//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 3470// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 3471def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 3472 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 3473 3474 3475// VMOV : Vector Set Lane (move ARM core register to scalar) 3476 3477let Constraints = "$src1 = $dst" in { 3478def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst), 3479 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 3480 IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2", 3481 [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), 3482 GPR:$src2, imm:$lane))]>; 3483def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst), 3484 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 3485 IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2", 3486 [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), 3487 GPR:$src2, imm:$lane))]>; 3488def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst), 3489 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 3490 IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2", 3491 [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), 3492 GPR:$src2, imm:$lane))]>; 3493} 3494def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 3495 (v16i8 (INSERT_SUBREG QPR:$src1, 3496 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 3497 (DSubReg_i8_reg imm:$lane))), 3498 GPR:$src2, (SubReg_i8_lane imm:$lane))), 3499 (DSubReg_i8_reg imm:$lane)))>; 3500def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 3501 (v8i16 (INSERT_SUBREG QPR:$src1, 3502 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 3503 (DSubReg_i16_reg imm:$lane))), 3504 GPR:$src2, (SubReg_i16_lane imm:$lane))), 3505 (DSubReg_i16_reg imm:$lane)))>; 3506def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 3507 (v4i32 (INSERT_SUBREG QPR:$src1, 3508 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 3509 (DSubReg_i32_reg imm:$lane))), 3510 GPR:$src2, (SubReg_i32_lane imm:$lane))), 3511 (DSubReg_i32_reg imm:$lane)))>; 3512 3513def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 3514 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 3515 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 3516def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 3517 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 3518 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 3519 3520//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 3521// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 3522def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 3523 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 3524 3525def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 3526 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 3527def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 3528 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 3529def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 3530 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 3531 3532def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 3533 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 3534def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 3535 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 3536def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 3537 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 3538 3539def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 3540 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3541 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 3542 dsub_0)>; 3543def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 3544 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 3545 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 3546 dsub_0)>; 3547def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 3548 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3549 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 3550 dsub_0)>; 3551 3552// VDUP : Vector Duplicate (from ARM core register to all elements) 3553 3554class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 3555 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), 3556 IIC_VMOVIS, "vdup", Dt, "$dst, $src", 3557 [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 3558class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 3559 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), 3560 IIC_VMOVIS, "vdup", Dt, "$dst, $src", 3561 [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 3562 3563def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 3564def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 3565def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; 3566def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 3567def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 3568def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 3569 3570def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), 3571 IIC_VMOVIS, "vdup", "32", "$dst, $src", 3572 [(set DPR:$dst, (v2f32 (NEONvdup 3573 (f32 (bitconvert GPR:$src)))))]>; 3574def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), 3575 IIC_VMOVIS, "vdup", "32", "$dst, $src", 3576 [(set QPR:$dst, (v4f32 (NEONvdup 3577 (f32 (bitconvert GPR:$src)))))]>; 3578 3579// VDUP : Vector Duplicate Lane (from scalar to all elements) 3580 3581class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 3582 ValueType Ty> 3583 : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3584 IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]", 3585 [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; 3586 3587class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 3588 ValueType ResTy, ValueType OpTy> 3589 : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3590 IIC_VMOVQ, OpcodeStr, Dt, "$dst, $src[$lane]", 3591 [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), 3592 imm:$lane)))]>; 3593 3594// Inst{19-16} is partially specified depending on the element size. 3595 3596def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>; 3597def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>; 3598def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>; 3599def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>; 3600def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>; 3601def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>; 3602def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>; 3603def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>; 3604 3605def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 3606 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 3607 (DSubReg_i8_reg imm:$lane))), 3608 (SubReg_i8_lane imm:$lane)))>; 3609def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 3610 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 3611 (DSubReg_i16_reg imm:$lane))), 3612 (SubReg_i16_lane imm:$lane)))>; 3613def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 3614 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 3615 (DSubReg_i32_reg imm:$lane))), 3616 (SubReg_i32_lane imm:$lane)))>; 3617def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 3618 (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, 3619 (DSubReg_i32_reg imm:$lane))), 3620 (SubReg_i32_lane imm:$lane)))>; 3621 3622def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 3623 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 3624def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 3625 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 3626 3627// VMOVN : Vector Narrowing Move 3628defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 3629 "vmovn", "i", trunc>; 3630// VQMOVN : Vector Saturating Narrowing Move 3631defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 3632 "vqmovn", "s", int_arm_neon_vqmovns>; 3633defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 3634 "vqmovn", "u", int_arm_neon_vqmovnu>; 3635defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 3636 "vqmovun", "s", int_arm_neon_vqmovnsu>; 3637// VMOVL : Vector Lengthening Move 3638defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 3639defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 3640 3641// Vector Conversions. 3642 3643// VCVT : Vector Convert Between Floating-Point and Integers 3644def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3645 v2i32, v2f32, fp_to_sint>; 3646def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3647 v2i32, v2f32, fp_to_uint>; 3648def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3649 v2f32, v2i32, sint_to_fp>; 3650def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3651 v2f32, v2i32, uint_to_fp>; 3652 3653def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3654 v4i32, v4f32, fp_to_sint>; 3655def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3656 v4i32, v4f32, fp_to_uint>; 3657def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3658 v4f32, v4i32, sint_to_fp>; 3659def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3660 v4f32, v4i32, uint_to_fp>; 3661 3662// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 3663def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 3664 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 3665def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 3666 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 3667def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 3668 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 3669def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 3670 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 3671 3672def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 3673 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 3674def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 3675 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 3676def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 3677 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 3678def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 3679 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 3680 3681// Vector Reverse. 3682 3683// VREV64 : Vector Reverse elements within 64-bit doublewords 3684 3685class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3686 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), 3687 (ins DPR:$src), IIC_VMOVD, 3688 OpcodeStr, Dt, "$dst, $src", "", 3689 [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; 3690class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3691 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), 3692 (ins QPR:$src), IIC_VMOVQ, 3693 OpcodeStr, Dt, "$dst, $src", "", 3694 [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; 3695 3696def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 3697def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 3698def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 3699def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; 3700 3701def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 3702def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 3703def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 3704def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; 3705 3706// VREV32 : Vector Reverse elements within 32-bit words 3707 3708class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3709 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), 3710 (ins DPR:$src), IIC_VMOVD, 3711 OpcodeStr, Dt, "$dst, $src", "", 3712 [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; 3713class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3714 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), 3715 (ins QPR:$src), IIC_VMOVQ, 3716 OpcodeStr, Dt, "$dst, $src", "", 3717 [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; 3718 3719def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 3720def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 3721 3722def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 3723def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 3724 3725// VREV16 : Vector Reverse elements within 16-bit halfwords 3726 3727class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3728 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), 3729 (ins DPR:$src), IIC_VMOVD, 3730 OpcodeStr, Dt, "$dst, $src", "", 3731 [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; 3732class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3733 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), 3734 (ins QPR:$src), IIC_VMOVQ, 3735 OpcodeStr, Dt, "$dst, $src", "", 3736 [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; 3737 3738def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 3739def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 3740 3741// Other Vector Shuffles. 3742 3743// VEXT : Vector Extract 3744 3745class VEXTd<string OpcodeStr, string Dt, ValueType Ty> 3746 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst), 3747 (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm, 3748 IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", 3749 [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), 3750 (Ty DPR:$rhs), imm:$index)))]>; 3751 3752class VEXTq<string OpcodeStr, string Dt, ValueType Ty> 3753 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst), 3754 (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm, 3755 IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", 3756 [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), 3757 (Ty QPR:$rhs), imm:$index)))]>; 3758 3759def VEXTd8 : VEXTd<"vext", "8", v8i8>; 3760def VEXTd16 : VEXTd<"vext", "16", v4i16>; 3761def VEXTd32 : VEXTd<"vext", "32", v2i32>; 3762def VEXTdf : VEXTd<"vext", "32", v2f32>; 3763 3764def VEXTq8 : VEXTq<"vext", "8", v16i8>; 3765def VEXTq16 : VEXTq<"vext", "16", v8i16>; 3766def VEXTq32 : VEXTq<"vext", "32", v4i32>; 3767def VEXTqf : VEXTq<"vext", "32", v4f32>; 3768 3769// VTRN : Vector Transpose 3770 3771def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 3772def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 3773def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 3774 3775def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 3776def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 3777def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 3778 3779// VUZP : Vector Unzip (Deinterleave) 3780 3781def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 3782def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 3783def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; 3784 3785def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 3786def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 3787def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 3788 3789// VZIP : Vector Zip (Interleave) 3790 3791def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 3792def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 3793def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; 3794 3795def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 3796def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 3797def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 3798 3799// Vector Table Lookup and Table Extension. 3800 3801// VTBL : Vector Table Lookup 3802def VTBL1 3803 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), 3804 (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1, 3805 "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "", 3806 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; 3807let hasExtraSrcRegAllocReq = 1 in { 3808def VTBL2 3809 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), 3810 (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2, 3811 "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>; 3812def VTBL3 3813 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), 3814 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3, 3815 "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>; 3816def VTBL4 3817 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), 3818 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), 3819 NVTBLFrm, IIC_VTB4, 3820 "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>; 3821} // hasExtraSrcRegAllocReq = 1 3822 3823def VTBL2Pseudo 3824 : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; 3825def VTBL3Pseudo 3826 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 3827def VTBL4Pseudo 3828 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 3829 3830// VTBX : Vector Table Extension 3831def VTBX1 3832 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), 3833 (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1, 3834 "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst", 3835 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 3836 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; 3837let hasExtraSrcRegAllocReq = 1 in { 3838def VTBX2 3839 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), 3840 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2, 3841 "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>; 3842def VTBX3 3843 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), 3844 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), 3845 NVTBLFrm, IIC_VTBX3, 3846 "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", 3847 "$orig = $dst", []>; 3848def VTBX4 3849 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, 3850 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4, 3851 "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", 3852 "$orig = $dst", []>; 3853} // hasExtraSrcRegAllocReq = 1 3854 3855def VTBX2Pseudo 3856 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), 3857 IIC_VTBX2, "$orig = $dst", []>; 3858def VTBX3Pseudo 3859 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 3860 IIC_VTBX3, "$orig = $dst", []>; 3861def VTBX4Pseudo 3862 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 3863 IIC_VTBX4, "$orig = $dst", []>; 3864 3865//===----------------------------------------------------------------------===// 3866// NEON instructions for single-precision FP math 3867//===----------------------------------------------------------------------===// 3868 3869class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> 3870 : NEONFPPat<(ResTy (OpNode SPR:$a)), 3871 (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), 3872 SPR:$a, ssub_0))), 3873 ssub_0)>; 3874 3875class N3VSPat<SDNode OpNode, NeonI Inst> 3876 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 3877 (EXTRACT_SUBREG (v2f32 3878 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3879 SPR:$a, ssub_0), 3880 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3881 SPR:$b, ssub_0))), 3882 ssub_0)>; 3883 3884class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 3885 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 3886 (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3887 SPR:$acc, ssub_0), 3888 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3889 SPR:$a, ssub_0), 3890 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3891 SPR:$b, ssub_0)), 3892 ssub_0)>; 3893 3894// These need separate instructions because they must use DPR_VFP2 register 3895// class which have SPR sub-registers. 3896 3897// Vector Add Operations used for single-precision FP 3898let neverHasSideEffects = 1 in 3899def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; 3900def : N3VSPat<fadd, VADDfd_sfp>; 3901 3902// Vector Sub Operations used for single-precision FP 3903let neverHasSideEffects = 1 in 3904def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; 3905def : N3VSPat<fsub, VSUBfd_sfp>; 3906 3907// Vector Multiply Operations used for single-precision FP 3908let neverHasSideEffects = 1 in 3909def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; 3910def : N3VSPat<fmul, VMULfd_sfp>; 3911 3912// Vector Multiply-Accumulate/Subtract used for single-precision FP 3913// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so 3914// we want to avoid them for now. e.g., alternating vmla/vadd instructions. 3915 3916//let neverHasSideEffects = 1 in 3917//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", 3918// v2f32, fmul, fadd>; 3919//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>; 3920 3921//let neverHasSideEffects = 1 in 3922//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", 3923// v2f32, fmul, fsub>; 3924//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>; 3925 3926// Vector Absolute used for single-precision FP 3927let neverHasSideEffects = 1 in 3928def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0, 3929 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 3930 "vabs", "f32", "$dst, $src", "", []>; 3931def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>; 3932 3933// Vector Negate used for single-precision FP 3934let neverHasSideEffects = 1 in 3935def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 3936 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 3937 "vneg", "f32", "$dst, $src", "", []>; 3938def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>; 3939 3940// Vector Maximum used for single-precision FP 3941let neverHasSideEffects = 1 in 3942def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), 3943 (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, 3944 "vmax", "f32", "$dst, $src1, $src2", "", []>; 3945def : N3VSPat<NEONfmax, VMAXfd_sfp>; 3946 3947// Vector Minimum used for single-precision FP 3948let neverHasSideEffects = 1 in 3949def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), 3950 (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, 3951 "vmin", "f32", "$dst, $src1, $src2", "", []>; 3952def : N3VSPat<NEONfmin, VMINfd_sfp>; 3953 3954// Vector Convert between single-precision FP and integer 3955let neverHasSideEffects = 1 in 3956def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3957 v2i32, v2f32, fp_to_sint>; 3958def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; 3959 3960let neverHasSideEffects = 1 in 3961def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3962 v2i32, v2f32, fp_to_uint>; 3963def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; 3964 3965let neverHasSideEffects = 1 in 3966def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3967 v2f32, v2i32, sint_to_fp>; 3968def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; 3969 3970let neverHasSideEffects = 1 in 3971def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3972 v2f32, v2i32, uint_to_fp>; 3973def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; 3974 3975//===----------------------------------------------------------------------===// 3976// Non-Instruction Patterns 3977//===----------------------------------------------------------------------===// 3978 3979// bit_convert 3980def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 3981def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 3982def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 3983def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 3984def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 3985def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 3986def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 3987def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 3988def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 3989def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 3990def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 3991def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 3992def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 3993def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 3994def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 3995def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 3996def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 3997def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 3998def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 3999def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 4000def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 4001def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 4002def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 4003def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 4004def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 4005def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 4006def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 4007def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 4008def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 4009def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 4010 4011def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 4012def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 4013def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 4014def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 4015def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 4016def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 4017def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 4018def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 4019def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 4020def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 4021def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 4022def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 4023def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 4024def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 4025def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 4026def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 4027def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 4028def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 4029def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 4030def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 4031def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 4032def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 4033def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 4034def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 4035def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 4036def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 4037def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 4038def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 4039def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 4040def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 4041