ARMInstrNEON.td revision 70e48b23a3455e4689ee24cec4eb153d67223e86
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// NEON-specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17 18def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 19 20def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 21def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 22def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 23def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 24def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 25def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 26 27// Types for vector shift by immediates. The "SHX" version is for long and 28// narrow operations where the source and destination vectors have different 29// types. The "SHINS" version is for shift and insert operations. 30def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 31 SDTCisVT<2, i32>]>; 32def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 33 SDTCisVT<2, i32>]>; 34def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 35 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 36 37def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 38def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 39def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 40def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 41def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 42def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 43def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 44 45def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 46def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 47def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 48 49def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 50def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 51def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 52def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 53def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 54def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 55 56def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 57def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 58def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 59 60def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 61def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 62 63def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 64 SDTCisVT<2, i32>]>; 65def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 66def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 67 68def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 69def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 70def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 71 72def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 73 74// VDUPLANE can produce a quad-register result from a double-register source, 75// so the result is not constrained to match the source. 76def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 77 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 78 SDTCisVT<2, i32>]>>; 79 80def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 81 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 82def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 83 84def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 85def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 86def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 87def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 88 89def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 90 SDTCisSameAs<0, 2>, 91 SDTCisSameAs<0, 3>]>; 92def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 93def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 94def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 95 96def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 97 SDTCisSameAs<0, 2>]>; 98def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 99def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 100 101def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 102 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 103 unsigned EltBits = 0; 104 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 105 return (EltBits == 32 && EltVal == 0); 106}]>; 107 108def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 109 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 110 unsigned EltBits = 0; 111 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 112 return (EltBits == 8 && EltVal == 0xff); 113}]>; 114 115//===----------------------------------------------------------------------===// 116// NEON operand definitions 117//===----------------------------------------------------------------------===// 118 119def nModImm : Operand<i32> { 120 let PrintMethod = "printNEONModImmOperand"; 121} 122 123//===----------------------------------------------------------------------===// 124// NEON load / store instructions 125//===----------------------------------------------------------------------===// 126 127let mayLoad = 1, neverHasSideEffects = 1 in { 128// Use vldmia to load a Q register as a D register pair. 129// This is equivalent to VLDMD except that it has a Q register operand 130// instead of a pair of D registers. 131def VLDMQ 132 : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), 133 IndexModeNone, IIC_fpLoadm, 134 "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; 135 136// Use vld1 to load a Q register as a D register pair. 137// This alternative to VLDMQ allows an alignment to be specified. 138// This is equivalent to VLD1q64 except that it has a Q register operand. 139def VLD1q 140 : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr), 141 IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; 142} // mayLoad = 1, neverHasSideEffects = 1 143 144let mayStore = 1, neverHasSideEffects = 1 in { 145// Use vstmia to store a Q register as a D register pair. 146// This is equivalent to VSTMD except that it has a Q register operand 147// instead of a pair of D registers. 148def VSTMQ 149 : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), 150 IndexModeNone, IIC_fpStorem, 151 "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; 152 153// Use vst1 to store a Q register as a D register pair. 154// This alternative to VSTMQ allows an alignment to be specified. 155// This is equivalent to VST1q64 except that it has a Q register operand. 156def VST1q 157 : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), 158 IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; 159} // mayStore = 1, neverHasSideEffects = 1 160 161let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 162 163// VLD1 : Vector Load (multiple single elements) 164class VLD1D<bits<4> op7_4, string Dt> 165 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), 166 (ins addrmode6:$addr), IIC_VLD1, 167 "vld1", Dt, "\\{$dst\\}, $addr", "", []>; 168class VLD1Q<bits<4> op7_4, string Dt> 169 : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2), 170 (ins addrmode6:$addr), IIC_VLD1, 171 "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; 172 173def VLD1d8 : VLD1D<0b0000, "8">; 174def VLD1d16 : VLD1D<0b0100, "16">; 175def VLD1d32 : VLD1D<0b1000, "32">; 176def VLD1d64 : VLD1D<0b1100, "64">; 177 178def VLD1q8 : VLD1Q<0b0000, "8">; 179def VLD1q16 : VLD1Q<0b0100, "16">; 180def VLD1q32 : VLD1Q<0b1000, "32">; 181def VLD1q64 : VLD1Q<0b1100, "64">; 182 183// ...with address register writeback: 184class VLD1DWB<bits<4> op7_4, string Dt> 185 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), 186 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, 187 "vld1", Dt, "\\{$dst\\}, $addr$offset", 188 "$addr.addr = $wb", []>; 189class VLD1QWB<bits<4> op7_4, string Dt> 190 : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb), 191 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, 192 "vld1", Dt, "${dst:dregpair}, $addr$offset", 193 "$addr.addr = $wb", []>; 194 195def VLD1d8_UPD : VLD1DWB<0b0000, "8">; 196def VLD1d16_UPD : VLD1DWB<0b0100, "16">; 197def VLD1d32_UPD : VLD1DWB<0b1000, "32">; 198def VLD1d64_UPD : VLD1DWB<0b1100, "64">; 199 200def VLD1q8_UPD : VLD1QWB<0b0000, "8">; 201def VLD1q16_UPD : VLD1QWB<0b0100, "16">; 202def VLD1q32_UPD : VLD1QWB<0b1000, "32">; 203def VLD1q64_UPD : VLD1QWB<0b1100, "64">; 204 205// ...with 3 registers (some of these are only for the disassembler): 206class VLD1D3<bits<4> op7_4, string Dt> 207 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 208 (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, 209 "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; 210class VLD1D3WB<bits<4> op7_4, string Dt> 211 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 212 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, 213 "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>; 214 215def VLD1d8T : VLD1D3<0b0000, "8">; 216def VLD1d16T : VLD1D3<0b0100, "16">; 217def VLD1d32T : VLD1D3<0b1000, "32">; 218def VLD1d64T : VLD1D3<0b1100, "64">; 219 220def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; 221def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; 222def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; 223def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">; 224 225// ...with 4 registers (some of these are only for the disassembler): 226class VLD1D4<bits<4> op7_4, string Dt> 227 : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 228 (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, 229 "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 230class VLD1D4WB<bits<4> op7_4, string Dt> 231 : NLdSt<0,0b10,0b0010,op7_4, 232 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 233 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, 234 "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", 235 []>; 236 237def VLD1d8Q : VLD1D4<0b0000, "8">; 238def VLD1d16Q : VLD1D4<0b0100, "16">; 239def VLD1d32Q : VLD1D4<0b1000, "32">; 240def VLD1d64Q : VLD1D4<0b1100, "64">; 241 242def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">; 243def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; 244def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; 245def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">; 246 247// VLD2 : Vector Load (multiple 2-element structures) 248class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> 249 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), 250 (ins addrmode6:$addr), IIC_VLD2, 251 "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; 252class VLD2Q<bits<4> op7_4, string Dt> 253 : NLdSt<0, 0b10, 0b0011, op7_4, 254 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 255 (ins addrmode6:$addr), IIC_VLD2, 256 "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 257 258def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; 259def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; 260def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; 261 262def VLD2q8 : VLD2Q<0b0000, "8">; 263def VLD2q16 : VLD2Q<0b0100, "16">; 264def VLD2q32 : VLD2Q<0b1000, "32">; 265 266// ...with address register writeback: 267class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 268 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), 269 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, 270 "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset", 271 "$addr.addr = $wb", []>; 272class VLD2QWB<bits<4> op7_4, string Dt> 273 : NLdSt<0, 0b10, 0b0011, op7_4, 274 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 275 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, 276 "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", 277 "$addr.addr = $wb", []>; 278 279def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; 280def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; 281def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; 282 283def VLD2q8_UPD : VLD2QWB<0b0000, "8">; 284def VLD2q16_UPD : VLD2QWB<0b0100, "16">; 285def VLD2q32_UPD : VLD2QWB<0b1000, "32">; 286 287// ...with double-spaced registers (for disassembly only): 288def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; 289def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; 290def VLD2b32 : VLD2D<0b1001, 0b1000, "32">; 291def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">; 292def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">; 293def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">; 294 295// VLD3 : Vector Load (multiple 3-element structures) 296class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 297 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 298 (ins addrmode6:$addr), IIC_VLD3, 299 "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; 300 301def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; 302def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; 303def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; 304 305// ...with address register writeback: 306class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 307 : NLdSt<0, 0b10, op11_8, op7_4, 308 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 309 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3, 310 "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", 311 "$addr.addr = $wb", []>; 312 313def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; 314def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; 315def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; 316 317// ...with double-spaced registers (non-updating versions for disassembly only): 318def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; 319def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; 320def VLD3q32 : VLD3D<0b0101, 0b1000, "32">; 321def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; 322def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; 323def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; 324 325// ...alternate versions to be allocated odd register numbers: 326def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">; 327def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">; 328def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">; 329 330// VLD4 : Vector Load (multiple 4-element structures) 331class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 332 : NLdSt<0, 0b10, op11_8, op7_4, 333 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 334 (ins addrmode6:$addr), IIC_VLD4, 335 "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 336 337def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; 338def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; 339def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; 340 341// ...with address register writeback: 342class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 343 : NLdSt<0, 0b10, op11_8, op7_4, 344 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 345 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, 346 "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", 347 "$addr.addr = $wb", []>; 348 349def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; 350def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; 351def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; 352 353// ...with double-spaced registers (non-updating versions for disassembly only): 354def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; 355def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; 356def VLD4q32 : VLD4D<0b0001, 0b1000, "32">; 357def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; 358def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; 359def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; 360 361// ...alternate versions to be allocated odd register numbers: 362def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">; 363def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">; 364def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; 365 366// VLD1LN : Vector Load (single element to one lane) 367// FIXME: Not yet implemented. 368 369// VLD2LN : Vector Load (single 2-element structure to one lane) 370class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 371 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), 372 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 373 IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", 374 "$src1 = $dst1, $src2 = $dst2", []>; 375 376def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">; 377def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">; 378def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">; 379 380// ...with double-spaced registers: 381def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">; 382def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">; 383 384// ...alternate versions to be allocated odd register numbers: 385def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">; 386def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">; 387 388// ...with address register writeback: 389class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 390 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), 391 (ins addrmode6:$addr, am6offset:$offset, 392 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt, 393 "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset", 394 "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>; 395 396def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">; 397def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">; 398def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">; 399 400def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">; 401def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">; 402 403// VLD3LN : Vector Load (single 3-element structure to one lane) 404class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 405 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 406 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 407 nohash_imm:$lane), IIC_VLD3, "vld3", Dt, 408 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", 409 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; 410 411def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">; 412def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">; 413def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">; 414 415// ...with double-spaced registers: 416def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">; 417def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">; 418 419// ...alternate versions to be allocated odd register numbers: 420def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">; 421def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">; 422 423// ...with address register writeback: 424class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 425 : NLdSt<1, 0b10, op11_8, op7_4, 426 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 427 (ins addrmode6:$addr, am6offset:$offset, 428 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 429 IIC_VLD3, "vld3", Dt, 430 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset", 431 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", 432 []>; 433 434def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">; 435def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">; 436def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">; 437 438def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">; 439def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">; 440 441// VLD4LN : Vector Load (single 4-element structure to one lane) 442class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 443 : NLdSt<1, 0b10, op11_8, op7_4, 444 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 445 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 446 nohash_imm:$lane), IIC_VLD4, "vld4", Dt, 447 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", 448 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; 449 450def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">; 451def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">; 452def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">; 453 454// ...with double-spaced registers: 455def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">; 456def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">; 457 458// ...alternate versions to be allocated odd register numbers: 459def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">; 460def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">; 461 462// ...with address register writeback: 463class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 464 : NLdSt<1, 0b10, op11_8, op7_4, 465 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 466 (ins addrmode6:$addr, am6offset:$offset, 467 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 468 IIC_VLD4, "vld4", Dt, 469"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", 470"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", 471 []>; 472 473def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">; 474def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">; 475def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">; 476 477def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">; 478def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; 479 480// VLD1DUP : Vector Load (single element to all lanes) 481// VLD2DUP : Vector Load (single 2-element structure to all lanes) 482// VLD3DUP : Vector Load (single 3-element structure to all lanes) 483// VLD4DUP : Vector Load (single 4-element structure to all lanes) 484// FIXME: Not yet implemented. 485} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 486 487let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 488 489// Classes for VST* pseudo-instructions with multi-register operands. 490// These are expanded to real instructions after register allocation. 491class VSTQQPseudo 492 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">; 493class VSTQQWBPseudo 494 : PseudoNLdSt<(outs GPR:$wb), 495 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST, 496 "$addr.addr = $wb">; 497class VSTQQQQWBPseudo 498 : PseudoNLdSt<(outs GPR:$wb), 499 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, 500 "$addr.addr = $wb">; 501 502// VST1 : Vector Store (multiple single elements) 503class VST1D<bits<4> op7_4, string Dt> 504 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, 505 "vst1", Dt, "\\{$src\\}, $addr", "", []>; 506class VST1Q<bits<4> op7_4, string Dt> 507 : NLdSt<0,0b00,0b1010,op7_4, (outs), 508 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, 509 "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>; 510 511def VST1d8 : VST1D<0b0000, "8">; 512def VST1d16 : VST1D<0b0100, "16">; 513def VST1d32 : VST1D<0b1000, "32">; 514def VST1d64 : VST1D<0b1100, "64">; 515 516def VST1q8 : VST1Q<0b0000, "8">; 517def VST1q16 : VST1Q<0b0100, "16">; 518def VST1q32 : VST1Q<0b1000, "32">; 519def VST1q64 : VST1Q<0b1100, "64">; 520 521// ...with address register writeback: 522class VST1DWB<bits<4> op7_4, string Dt> 523 : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), 524 (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST, 525 "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>; 526class VST1QWB<bits<4> op7_4, string Dt> 527 : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), 528 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, 529 "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>; 530 531def VST1d8_UPD : VST1DWB<0b0000, "8">; 532def VST1d16_UPD : VST1DWB<0b0100, "16">; 533def VST1d32_UPD : VST1DWB<0b1000, "32">; 534def VST1d64_UPD : VST1DWB<0b1100, "64">; 535 536def VST1q8_UPD : VST1QWB<0b0000, "8">; 537def VST1q16_UPD : VST1QWB<0b0100, "16">; 538def VST1q32_UPD : VST1QWB<0b1000, "32">; 539def VST1q64_UPD : VST1QWB<0b1100, "64">; 540 541// ...with 3 registers (some of these are only for the disassembler): 542class VST1D3<bits<4> op7_4, string Dt> 543 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 544 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), 545 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; 546class VST1D3WB<bits<4> op7_4, string Dt> 547 : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), 548 (ins addrmode6:$addr, am6offset:$offset, 549 DPR:$src1, DPR:$src2, DPR:$src3), 550 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", 551 "$addr.addr = $wb", []>; 552 553def VST1d8T : VST1D3<0b0000, "8">; 554def VST1d16T : VST1D3<0b0100, "16">; 555def VST1d32T : VST1D3<0b1000, "32">; 556def VST1d64T : VST1D3<0b1100, "64">; 557 558def VST1d8T_UPD : VST1D3WB<0b0000, "8">; 559def VST1d16T_UPD : VST1D3WB<0b0100, "16">; 560def VST1d32T_UPD : VST1D3WB<0b1000, "32">; 561def VST1d64T_UPD : VST1D3WB<0b1100, "64">; 562 563// ...with 4 registers (some of these are only for the disassembler): 564class VST1D4<bits<4> op7_4, string Dt> 565 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 566 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 567 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", 568 []>; 569class VST1D4WB<bits<4> op7_4, string Dt> 570 : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), 571 (ins addrmode6:$addr, am6offset:$offset, 572 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 573 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 574 "$addr.addr = $wb", []>; 575 576def VST1d8Q : VST1D4<0b0000, "8">; 577def VST1d16Q : VST1D4<0b0100, "16">; 578def VST1d32Q : VST1D4<0b1000, "32">; 579def VST1d64Q : VST1D4<0b1100, "64">; 580 581def VST1d8Q_UPD : VST1D4WB<0b0000, "8">; 582def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; 583def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; 584def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; 585 586def VST1d64QPseudo : VSTQQPseudo; 587def VST1d64QPseudo_UPD : VSTQQWBPseudo; 588 589// VST2 : Vector Store (multiple 2-element structures) 590class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> 591 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 592 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), 593 IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>; 594class VST2Q<bits<4> op7_4, string Dt> 595 : NLdSt<0, 0b00, 0b0011, op7_4, (outs), 596 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 597 IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", 598 "", []>; 599 600def VST2d8 : VST2D<0b1000, 0b0000, "8">; 601def VST2d16 : VST2D<0b1000, 0b0100, "16">; 602def VST2d32 : VST2D<0b1000, 0b1000, "32">; 603 604def VST2q8 : VST2Q<0b0000, "8">; 605def VST2q16 : VST2Q<0b0100, "16">; 606def VST2q32 : VST2Q<0b1000, "32">; 607 608// ...with address register writeback: 609class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 610 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 611 (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), 612 IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", 613 "$addr.addr = $wb", []>; 614class VST2QWB<bits<4> op7_4, string Dt> 615 : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 616 (ins addrmode6:$addr, am6offset:$offset, 617 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 618 IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 619 "$addr.addr = $wb", []>; 620 621def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; 622def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; 623def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; 624 625def VST2q8_UPD : VST2QWB<0b0000, "8">; 626def VST2q16_UPD : VST2QWB<0b0100, "16">; 627def VST2q32_UPD : VST2QWB<0b1000, "32">; 628 629// ...with double-spaced registers (for disassembly only): 630def VST2b8 : VST2D<0b1001, 0b0000, "8">; 631def VST2b16 : VST2D<0b1001, 0b0100, "16">; 632def VST2b32 : VST2D<0b1001, 0b1000, "32">; 633def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">; 634def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">; 635def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">; 636 637// VST3 : Vector Store (multiple 3-element structures) 638class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 639 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 640 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, 641 "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; 642 643def VST3d8 : VST3D<0b0100, 0b0000, "8">; 644def VST3d16 : VST3D<0b0100, 0b0100, "16">; 645def VST3d32 : VST3D<0b0100, 0b1000, "32">; 646 647// ...with address register writeback: 648class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 649 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 650 (ins addrmode6:$addr, am6offset:$offset, 651 DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, 652 "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", 653 "$addr.addr = $wb", []>; 654 655def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; 656def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; 657def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; 658 659// ...with double-spaced registers (non-updating versions for disassembly only): 660def VST3q8 : VST3D<0b0101, 0b0000, "8">; 661def VST3q16 : VST3D<0b0101, 0b0100, "16">; 662def VST3q32 : VST3D<0b0101, 0b1000, "32">; 663def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; 664def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; 665def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; 666 667// ...alternate versions to be allocated odd register numbers: 668def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">; 669def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">; 670def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">; 671 672// VST4 : Vector Store (multiple 4-element structures) 673class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 674 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 675 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 676 IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", 677 "", []>; 678 679def VST4d8 : VST4D<0b0000, 0b0000, "8">; 680def VST4d16 : VST4D<0b0000, 0b0100, "16">; 681def VST4d32 : VST4D<0b0000, 0b1000, "32">; 682 683def VST4d8Pseudo : VSTQQPseudo; 684def VST4d16Pseudo : VSTQQPseudo; 685def VST4d32Pseudo : VSTQQPseudo; 686 687// ...with address register writeback: 688class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 689 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 690 (ins addrmode6:$addr, am6offset:$offset, 691 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, 692 "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 693 "$addr.addr = $wb", []>; 694 695def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; 696def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; 697def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; 698 699def VST4d8Pseudo_UPD : VSTQQWBPseudo; 700def VST4d16Pseudo_UPD : VSTQQWBPseudo; 701def VST4d32Pseudo_UPD : VSTQQWBPseudo; 702 703// ...with double-spaced registers (non-updating versions for disassembly only): 704def VST4q8 : VST4D<0b0001, 0b0000, "8">; 705def VST4q16 : VST4D<0b0001, 0b0100, "16">; 706def VST4q32 : VST4D<0b0001, 0b1000, "32">; 707def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; 708def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; 709def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; 710 711def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; 712def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; 713def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; 714 715// ...alternate versions to be allocated odd register numbers: 716def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; 717def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; 718def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; 719 720// VST1LN : Vector Store (single element from one lane) 721// FIXME: Not yet implemented. 722 723// VST2LN : Vector Store (single 2-element structure from one lane) 724class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 725 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 726 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 727 IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", 728 "", []>; 729 730def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">; 731def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">; 732def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">; 733 734// ...with double-spaced registers: 735def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">; 736def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">; 737 738// ...alternate versions to be allocated odd register numbers: 739def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">; 740def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">; 741 742// ...with address register writeback: 743class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 744 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 745 (ins addrmode6:$addr, am6offset:$offset, 746 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, 747 "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", 748 "$addr.addr = $wb", []>; 749 750def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">; 751def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">; 752def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">; 753 754def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">; 755def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">; 756 757// VST3LN : Vector Store (single 3-element structure from one lane) 758class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 759 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 760 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 761 nohash_imm:$lane), IIC_VST, "vst3", Dt, 762 "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; 763 764def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">; 765def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">; 766def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">; 767 768// ...with double-spaced registers: 769def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">; 770def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">; 771 772// ...alternate versions to be allocated odd register numbers: 773def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">; 774def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">; 775 776// ...with address register writeback: 777class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 778 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 779 (ins addrmode6:$addr, am6offset:$offset, 780 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 781 IIC_VST, "vst3", Dt, 782 "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", 783 "$addr.addr = $wb", []>; 784 785def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">; 786def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">; 787def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">; 788 789def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">; 790def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">; 791 792// VST4LN : Vector Store (single 4-element structure from one lane) 793class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 794 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 795 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 796 nohash_imm:$lane), IIC_VST, "vst4", Dt, 797 "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", 798 "", []>; 799 800def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">; 801def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">; 802def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">; 803 804// ...with double-spaced registers: 805def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">; 806def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">; 807 808// ...alternate versions to be allocated odd register numbers: 809def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">; 810def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">; 811 812// ...with address register writeback: 813class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 814 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 815 (ins addrmode6:$addr, am6offset:$offset, 816 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 817 IIC_VST, "vst4", Dt, 818 "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", 819 "$addr.addr = $wb", []>; 820 821def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">; 822def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">; 823def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; 824 825def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; 826def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; 827 828} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 829 830 831//===----------------------------------------------------------------------===// 832// NEON pattern fragments 833//===----------------------------------------------------------------------===// 834 835// Extract D sub-registers of Q registers. 836def DSubReg_i8_reg : SDNodeXForm<imm, [{ 837 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 838 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 839}]>; 840def DSubReg_i16_reg : SDNodeXForm<imm, [{ 841 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 842 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 843}]>; 844def DSubReg_i32_reg : SDNodeXForm<imm, [{ 845 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 846 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 847}]>; 848def DSubReg_f64_reg : SDNodeXForm<imm, [{ 849 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 850 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 851}]>; 852 853// Extract S sub-registers of Q/D registers. 854def SSubReg_f32_reg : SDNodeXForm<imm, [{ 855 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 856 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 857}]>; 858 859// Translate lane numbers from Q registers to D subregs. 860def SubReg_i8_lane : SDNodeXForm<imm, [{ 861 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 862}]>; 863def SubReg_i16_lane : SDNodeXForm<imm, [{ 864 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 865}]>; 866def SubReg_i32_lane : SDNodeXForm<imm, [{ 867 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 868}]>; 869 870//===----------------------------------------------------------------------===// 871// Instruction Classes 872//===----------------------------------------------------------------------===// 873 874// Basic 2-register operations: single-, double- and quad-register. 875class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 876 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 877 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 878 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 879 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), 880 IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; 881class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 882 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 883 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 884 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 885 (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "", 886 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; 887class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 888 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 889 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 890 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 891 (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "", 892 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; 893 894// Basic 2-register intrinsics, both double- and quad-register. 895class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 896 bits<2> op17_16, bits<5> op11_7, bit op4, 897 InstrItinClass itin, string OpcodeStr, string Dt, 898 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 899 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 900 (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 901 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 902class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 903 bits<2> op17_16, bits<5> op11_7, bit op4, 904 InstrItinClass itin, string OpcodeStr, string Dt, 905 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 906 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 907 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 908 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 909 910// Narrow 2-register intrinsics. 911class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 912 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 913 InstrItinClass itin, string OpcodeStr, string Dt, 914 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 915 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), 916 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 917 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; 918 919// Long 2-register operations (currently only used for VMOVL). 920class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 921 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 922 InstrItinClass itin, string OpcodeStr, string Dt, 923 ValueType TyQ, ValueType TyD, SDNode OpNode> 924 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), 925 (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 926 [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>; 927 928// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 929class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 930 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), 931 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 932 OpcodeStr, Dt, "$dst1, $dst2", 933 "$src1 = $dst1, $src2 = $dst2", []>; 934class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 935 InstrItinClass itin, string OpcodeStr, string Dt> 936 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), 937 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2", 938 "$src1 = $dst1, $src2 = $dst2", []>; 939 940// Basic 3-register operations: single-, double- and quad-register. 941class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 942 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 943 SDNode OpNode, bit Commutable> 944 : N3V<op24, op23, op21_20, op11_8, 0, op4, 945 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, 946 IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { 947 let isCommutable = Commutable; 948} 949 950class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 951 InstrItinClass itin, string OpcodeStr, string Dt, 952 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 953 : N3V<op24, op23, op21_20, op11_8, 0, op4, 954 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 955 OpcodeStr, Dt, "$dst, $src1, $src2", "", 956 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 957 let isCommutable = Commutable; 958} 959// Same as N3VD but no data type. 960class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 961 InstrItinClass itin, string OpcodeStr, 962 ValueType ResTy, ValueType OpTy, 963 SDNode OpNode, bit Commutable> 964 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 965 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 966 OpcodeStr, "$dst, $src1, $src2", "", 967 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{ 968 let isCommutable = Commutable; 969} 970 971class N3VDSL<bits<2> op21_20, bits<4> op11_8, 972 InstrItinClass itin, string OpcodeStr, string Dt, 973 ValueType Ty, SDNode ShOp> 974 : N3V<0, 1, op21_20, op11_8, 1, 0, 975 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 976 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 977 [(set (Ty DPR:$dst), 978 (Ty (ShOp (Ty DPR:$src1), 979 (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> { 980 let isCommutable = 0; 981} 982class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 983 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 984 : N3V<0, 1, op21_20, op11_8, 1, 0, 985 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 986 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", 987 [(set (Ty DPR:$dst), 988 (Ty (ShOp (Ty DPR:$src1), 989 (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { 990 let isCommutable = 0; 991} 992 993class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 994 InstrItinClass itin, string OpcodeStr, string Dt, 995 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 996 : N3V<op24, op23, op21_20, op11_8, 1, op4, 997 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, 998 OpcodeStr, Dt, "$dst, $src1, $src2", "", 999 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 1000 let isCommutable = Commutable; 1001} 1002class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1003 InstrItinClass itin, string OpcodeStr, 1004 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 1005 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 1006 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, 1007 OpcodeStr, "$dst, $src1, $src2", "", 1008 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{ 1009 let isCommutable = Commutable; 1010} 1011class N3VQSL<bits<2> op21_20, bits<4> op11_8, 1012 InstrItinClass itin, string OpcodeStr, string Dt, 1013 ValueType ResTy, ValueType OpTy, SDNode ShOp> 1014 : N3V<1, 1, op21_20, op11_8, 1, 0, 1015 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1016 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1017 [(set (ResTy QPR:$dst), 1018 (ResTy (ShOp (ResTy QPR:$src1), 1019 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 1020 imm:$lane)))))]> { 1021 let isCommutable = 0; 1022} 1023class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 1024 ValueType ResTy, ValueType OpTy, SDNode ShOp> 1025 : N3V<1, 1, op21_20, op11_8, 1, 0, 1026 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1027 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", 1028 [(set (ResTy QPR:$dst), 1029 (ResTy (ShOp (ResTy QPR:$src1), 1030 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 1031 imm:$lane)))))]> { 1032 let isCommutable = 0; 1033} 1034 1035// Basic 3-register intrinsics, both double- and quad-register. 1036class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1037 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1038 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 1039 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1040 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin, 1041 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1042 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 1043 let isCommutable = Commutable; 1044} 1045class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1046 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 1047 : N3V<0, 1, op21_20, op11_8, 1, 0, 1048 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1049 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1050 [(set (Ty DPR:$dst), 1051 (Ty (IntOp (Ty DPR:$src1), 1052 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), 1053 imm:$lane)))))]> { 1054 let isCommutable = 0; 1055} 1056class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1057 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 1058 : N3V<0, 1, op21_20, op11_8, 1, 0, 1059 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1060 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1061 [(set (Ty DPR:$dst), 1062 (Ty (IntOp (Ty DPR:$src1), 1063 (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { 1064 let isCommutable = 0; 1065} 1066 1067class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1068 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1069 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 1070 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1071 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin, 1072 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1073 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 1074 let isCommutable = Commutable; 1075} 1076class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1077 string OpcodeStr, string Dt, 1078 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1079 : N3V<1, 1, op21_20, op11_8, 1, 0, 1080 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1081 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1082 [(set (ResTy QPR:$dst), 1083 (ResTy (IntOp (ResTy QPR:$src1), 1084 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 1085 imm:$lane)))))]> { 1086 let isCommutable = 0; 1087} 1088class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1089 string OpcodeStr, string Dt, 1090 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1091 : N3V<1, 1, op21_20, op11_8, 1, 0, 1092 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1093 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1094 [(set (ResTy QPR:$dst), 1095 (ResTy (IntOp (ResTy QPR:$src1), 1096 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 1097 imm:$lane)))))]> { 1098 let isCommutable = 0; 1099} 1100 1101// Multiply-Add/Sub operations: single-, double- and quad-register. 1102class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1103 InstrItinClass itin, string OpcodeStr, string Dt, 1104 ValueType Ty, SDNode MulOp, SDNode OpNode> 1105 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1106 (outs DPR_VFP2:$dst), 1107 (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin, 1108 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; 1109 1110class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1111 InstrItinClass itin, string OpcodeStr, string Dt, 1112 ValueType Ty, SDNode MulOp, SDNode OpNode> 1113 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1114 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, 1115 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1116 [(set DPR:$dst, (Ty (OpNode DPR:$src1, 1117 (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; 1118class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1119 string OpcodeStr, string Dt, 1120 ValueType Ty, SDNode MulOp, SDNode ShOp> 1121 : N3V<0, 1, op21_20, op11_8, 1, 0, 1122 (outs DPR:$dst), 1123 (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), 1124 NVMulSLFrm, itin, 1125 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1126 [(set (Ty DPR:$dst), 1127 (Ty (ShOp (Ty DPR:$src1), 1128 (Ty (MulOp DPR:$src2, 1129 (Ty (NEONvduplane (Ty DPR_VFP2:$src3), 1130 imm:$lane)))))))]>; 1131class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1132 string OpcodeStr, string Dt, 1133 ValueType Ty, SDNode MulOp, SDNode ShOp> 1134 : N3V<0, 1, op21_20, op11_8, 1, 0, 1135 (outs DPR:$dst), 1136 (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), 1137 NVMulSLFrm, itin, 1138 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1139 [(set (Ty DPR:$dst), 1140 (Ty (ShOp (Ty DPR:$src1), 1141 (Ty (MulOp DPR:$src2, 1142 (Ty (NEONvduplane (Ty DPR_8:$src3), 1143 imm:$lane)))))))]>; 1144 1145class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1146 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 1147 SDNode MulOp, SDNode OpNode> 1148 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1149 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, 1150 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1151 [(set QPR:$dst, (Ty (OpNode QPR:$src1, 1152 (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; 1153class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1154 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1155 SDNode MulOp, SDNode ShOp> 1156 : N3V<1, 1, op21_20, op11_8, 1, 0, 1157 (outs QPR:$dst), 1158 (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), 1159 NVMulSLFrm, itin, 1160 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1161 [(set (ResTy QPR:$dst), 1162 (ResTy (ShOp (ResTy QPR:$src1), 1163 (ResTy (MulOp QPR:$src2, 1164 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), 1165 imm:$lane)))))))]>; 1166class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1167 string OpcodeStr, string Dt, 1168 ValueType ResTy, ValueType OpTy, 1169 SDNode MulOp, SDNode ShOp> 1170 : N3V<1, 1, op21_20, op11_8, 1, 0, 1171 (outs QPR:$dst), 1172 (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), 1173 NVMulSLFrm, itin, 1174 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1175 [(set (ResTy QPR:$dst), 1176 (ResTy (ShOp (ResTy QPR:$src1), 1177 (ResTy (MulOp QPR:$src2, 1178 (ResTy (NEONvduplane (OpTy DPR_8:$src3), 1179 imm:$lane)))))))]>; 1180 1181// Neon 3-argument intrinsics, both double- and quad-register. 1182// The destination register is also used as the first source operand register. 1183class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1184 InstrItinClass itin, string OpcodeStr, string Dt, 1185 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1186 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1187 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, 1188 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1189 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), 1190 (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; 1191class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1192 InstrItinClass itin, string OpcodeStr, string Dt, 1193 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1194 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1195 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, 1196 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1197 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), 1198 (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; 1199 1200// Neon Long 3-argument intrinsic. The destination register is 1201// a quad-register and is also used as the first source operand register. 1202class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1203 InstrItinClass itin, string OpcodeStr, string Dt, 1204 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 1205 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1206 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, 1207 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1208 [(set QPR:$dst, 1209 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; 1210class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1211 string OpcodeStr, string Dt, 1212 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1213 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1214 (outs QPR:$dst), 1215 (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), 1216 NVMulSLFrm, itin, 1217 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1218 [(set (ResTy QPR:$dst), 1219 (ResTy (IntOp (ResTy QPR:$src1), 1220 (OpTy DPR:$src2), 1221 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), 1222 imm:$lane)))))]>; 1223class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 1224 InstrItinClass itin, string OpcodeStr, string Dt, 1225 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1226 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1227 (outs QPR:$dst), 1228 (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), 1229 NVMulSLFrm, itin, 1230 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1231 [(set (ResTy QPR:$dst), 1232 (ResTy (IntOp (ResTy QPR:$src1), 1233 (OpTy DPR:$src2), 1234 (OpTy (NEONvduplane (OpTy DPR_8:$src3), 1235 imm:$lane)))))]>; 1236 1237// Narrowing 3-register intrinsics. 1238class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1239 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 1240 Intrinsic IntOp, bit Commutable> 1241 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1242 (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D, 1243 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1244 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { 1245 let isCommutable = Commutable; 1246} 1247 1248// Long 3-register intrinsics. 1249class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1250 InstrItinClass itin, string OpcodeStr, string Dt, 1251 ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> 1252 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1253 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 1254 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1255 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { 1256 let isCommutable = Commutable; 1257} 1258class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1259 string OpcodeStr, string Dt, 1260 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1261 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1262 (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1263 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1264 [(set (ResTy QPR:$dst), 1265 (ResTy (IntOp (OpTy DPR:$src1), 1266 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), 1267 imm:$lane)))))]>; 1268class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 1269 InstrItinClass itin, string OpcodeStr, string Dt, 1270 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1271 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1272 (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1273 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1274 [(set (ResTy QPR:$dst), 1275 (ResTy (IntOp (OpTy DPR:$src1), 1276 (OpTy (NEONvduplane (OpTy DPR_8:$src2), 1277 imm:$lane)))))]>; 1278 1279// Wide 3-register intrinsics. 1280class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1281 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 1282 Intrinsic IntOp, bit Commutable> 1283 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1284 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD, 1285 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1286 [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { 1287 let isCommutable = Commutable; 1288} 1289 1290// Pairwise long 2-register intrinsics, both double- and quad-register. 1291class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1292 bits<2> op17_16, bits<5> op11_7, bit op4, 1293 string OpcodeStr, string Dt, 1294 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1295 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 1296 (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 1297 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 1298class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1299 bits<2> op17_16, bits<5> op11_7, bit op4, 1300 string OpcodeStr, string Dt, 1301 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1302 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 1303 (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 1304 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 1305 1306// Pairwise long 2-register accumulate intrinsics, 1307// both double- and quad-register. 1308// The destination register is also used as the first source operand register. 1309class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1310 bits<2> op17_16, bits<5> op11_7, bit op4, 1311 string OpcodeStr, string Dt, 1312 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1313 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 1314 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD, 1315 OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", 1316 [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; 1317class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1318 bits<2> op17_16, bits<5> op11_7, bit op4, 1319 string OpcodeStr, string Dt, 1320 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1321 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 1322 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ, 1323 OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", 1324 [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; 1325 1326// Shift by immediate, 1327// both double- and quad-register. 1328class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1329 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1330 ValueType Ty, SDNode OpNode> 1331 : N2VImm<op24, op23, op11_8, op7, 0, op4, 1332 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin, 1333 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1334 [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; 1335class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1336 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1337 ValueType Ty, SDNode OpNode> 1338 : N2VImm<op24, op23, op11_8, op7, 1, op4, 1339 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin, 1340 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1341 [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; 1342 1343// Long shift by immediate. 1344class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 1345 string OpcodeStr, string Dt, 1346 ValueType ResTy, ValueType OpTy, SDNode OpNode> 1347 : N2VImm<op24, op23, op11_8, op7, op6, op4, 1348 (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm, 1349 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1350 [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), 1351 (i32 imm:$SIMM))))]>; 1352 1353// Narrow shift by immediate. 1354class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 1355 InstrItinClass itin, string OpcodeStr, string Dt, 1356 ValueType ResTy, ValueType OpTy, SDNode OpNode> 1357 : N2VImm<op24, op23, op11_8, op7, op6, op4, 1358 (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin, 1359 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1360 [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), 1361 (i32 imm:$SIMM))))]>; 1362 1363// Shift right by immediate and accumulate, 1364// both double- and quad-register. 1365class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1366 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1367 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), 1368 (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, 1369 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1370 [(set DPR:$dst, (Ty (add DPR:$src1, 1371 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; 1372class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1373 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1374 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), 1375 (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, 1376 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1377 [(set QPR:$dst, (Ty (add QPR:$src1, 1378 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; 1379 1380// Shift by immediate and insert, 1381// both double- and quad-register. 1382class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1383 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> 1384 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), 1385 (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD, 1386 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1387 [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; 1388class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1389 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> 1390 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), 1391 (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ, 1392 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1393 [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; 1394 1395// Convert, with fractional bits immediate, 1396// both double- and quad-register. 1397class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1398 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1399 Intrinsic IntOp> 1400 : N2VImm<op24, op23, op11_8, op7, 0, op4, 1401 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm, 1402 IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1403 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; 1404class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1405 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1406 Intrinsic IntOp> 1407 : N2VImm<op24, op23, op11_8, op7, 1, op4, 1408 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm, 1409 IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1410 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; 1411 1412//===----------------------------------------------------------------------===// 1413// Multiclasses 1414//===----------------------------------------------------------------------===// 1415 1416// Abbreviations used in multiclass suffixes: 1417// Q = quarter int (8 bit) elements 1418// H = half int (16 bit) elements 1419// S = single int (32 bit) elements 1420// D = double int (64 bit) elements 1421 1422// Neon 2-register vector operations -- for disassembly only. 1423 1424// First with only element sizes of 8, 16 and 32 bits: 1425multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1426 bits<5> op11_7, bit op4, string opc, string Dt, 1427 string asm> { 1428 // 64-bit vector types. 1429 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 1430 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1431 opc, !strconcat(Dt, "8"), asm, "", []>; 1432 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 1433 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1434 opc, !strconcat(Dt, "16"), asm, "", []>; 1435 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 1436 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1437 opc, !strconcat(Dt, "32"), asm, "", []>; 1438 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 1439 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1440 opc, "f32", asm, "", []> { 1441 let Inst{10} = 1; // overwrite F = 1 1442 } 1443 1444 // 128-bit vector types. 1445 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 1446 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1447 opc, !strconcat(Dt, "8"), asm, "", []>; 1448 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 1449 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1450 opc, !strconcat(Dt, "16"), asm, "", []>; 1451 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 1452 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1453 opc, !strconcat(Dt, "32"), asm, "", []>; 1454 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 1455 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1456 opc, "f32", asm, "", []> { 1457 let Inst{10} = 1; // overwrite F = 1 1458 } 1459} 1460 1461// Neon 3-register vector operations. 1462 1463// First with only element sizes of 8, 16 and 32 bits: 1464multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1465 InstrItinClass itinD16, InstrItinClass itinD32, 1466 InstrItinClass itinQ16, InstrItinClass itinQ32, 1467 string OpcodeStr, string Dt, 1468 SDNode OpNode, bit Commutable = 0> { 1469 // 64-bit vector types. 1470 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 1471 OpcodeStr, !strconcat(Dt, "8"), 1472 v8i8, v8i8, OpNode, Commutable>; 1473 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 1474 OpcodeStr, !strconcat(Dt, "16"), 1475 v4i16, v4i16, OpNode, Commutable>; 1476 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 1477 OpcodeStr, !strconcat(Dt, "32"), 1478 v2i32, v2i32, OpNode, Commutable>; 1479 1480 // 128-bit vector types. 1481 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 1482 OpcodeStr, !strconcat(Dt, "8"), 1483 v16i8, v16i8, OpNode, Commutable>; 1484 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 1485 OpcodeStr, !strconcat(Dt, "16"), 1486 v8i16, v8i16, OpNode, Commutable>; 1487 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 1488 OpcodeStr, !strconcat(Dt, "32"), 1489 v4i32, v4i32, OpNode, Commutable>; 1490} 1491 1492multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { 1493 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 1494 v4i16, ShOp>; 1495 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), 1496 v2i32, ShOp>; 1497 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 1498 v8i16, v4i16, ShOp>; 1499 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), 1500 v4i32, v2i32, ShOp>; 1501} 1502 1503// ....then also with element size 64 bits: 1504multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1505 InstrItinClass itinD, InstrItinClass itinQ, 1506 string OpcodeStr, string Dt, 1507 SDNode OpNode, bit Commutable = 0> 1508 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 1509 OpcodeStr, Dt, OpNode, Commutable> { 1510 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 1511 OpcodeStr, !strconcat(Dt, "64"), 1512 v1i64, v1i64, OpNode, Commutable>; 1513 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 1514 OpcodeStr, !strconcat(Dt, "64"), 1515 v2i64, v2i64, OpNode, Commutable>; 1516} 1517 1518 1519// Neon Narrowing 2-register vector intrinsics, 1520// source operand element sizes of 16, 32 and 64 bits: 1521multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1522 bits<5> op11_7, bit op6, bit op4, 1523 InstrItinClass itin, string OpcodeStr, string Dt, 1524 Intrinsic IntOp> { 1525 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 1526 itin, OpcodeStr, !strconcat(Dt, "16"), 1527 v8i8, v8i16, IntOp>; 1528 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 1529 itin, OpcodeStr, !strconcat(Dt, "32"), 1530 v4i16, v4i32, IntOp>; 1531 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 1532 itin, OpcodeStr, !strconcat(Dt, "64"), 1533 v2i32, v2i64, IntOp>; 1534} 1535 1536 1537// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 1538// source operand element sizes of 16, 32 and 64 bits: 1539multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 1540 string OpcodeStr, string Dt, SDNode OpNode> { 1541 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1542 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 1543 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1544 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 1545 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1546 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 1547} 1548 1549 1550// Neon 3-register vector intrinsics. 1551 1552// First with only element sizes of 16 and 32 bits: 1553multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 1554 InstrItinClass itinD16, InstrItinClass itinD32, 1555 InstrItinClass itinQ16, InstrItinClass itinQ32, 1556 string OpcodeStr, string Dt, 1557 Intrinsic IntOp, bit Commutable = 0> { 1558 // 64-bit vector types. 1559 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 1560 OpcodeStr, !strconcat(Dt, "16"), 1561 v4i16, v4i16, IntOp, Commutable>; 1562 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 1563 OpcodeStr, !strconcat(Dt, "32"), 1564 v2i32, v2i32, IntOp, Commutable>; 1565 1566 // 128-bit vector types. 1567 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 1568 OpcodeStr, !strconcat(Dt, "16"), 1569 v8i16, v8i16, IntOp, Commutable>; 1570 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 1571 OpcodeStr, !strconcat(Dt, "32"), 1572 v4i32, v4i32, IntOp, Commutable>; 1573} 1574 1575multiclass N3VIntSL_HS<bits<4> op11_8, 1576 InstrItinClass itinD16, InstrItinClass itinD32, 1577 InstrItinClass itinQ16, InstrItinClass itinQ32, 1578 string OpcodeStr, string Dt, Intrinsic IntOp> { 1579 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 1580 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 1581 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 1582 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 1583 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 1584 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 1585 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 1586 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 1587} 1588 1589// ....then also with element size of 8 bits: 1590multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 1591 InstrItinClass itinD16, InstrItinClass itinD32, 1592 InstrItinClass itinQ16, InstrItinClass itinQ32, 1593 string OpcodeStr, string Dt, 1594 Intrinsic IntOp, bit Commutable = 0> 1595 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 1596 OpcodeStr, Dt, IntOp, Commutable> { 1597 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 1598 OpcodeStr, !strconcat(Dt, "8"), 1599 v8i8, v8i8, IntOp, Commutable>; 1600 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 1601 OpcodeStr, !strconcat(Dt, "8"), 1602 v16i8, v16i8, IntOp, Commutable>; 1603} 1604 1605// ....then also with element size of 64 bits: 1606multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 1607 InstrItinClass itinD16, InstrItinClass itinD32, 1608 InstrItinClass itinQ16, InstrItinClass itinQ32, 1609 string OpcodeStr, string Dt, 1610 Intrinsic IntOp, bit Commutable = 0> 1611 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 1612 OpcodeStr, Dt, IntOp, Commutable> { 1613 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 1614 OpcodeStr, !strconcat(Dt, "64"), 1615 v1i64, v1i64, IntOp, Commutable>; 1616 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 1617 OpcodeStr, !strconcat(Dt, "64"), 1618 v2i64, v2i64, IntOp, Commutable>; 1619} 1620 1621// Neon Narrowing 3-register vector intrinsics, 1622// source operand element sizes of 16, 32 and 64 bits: 1623multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1624 string OpcodeStr, string Dt, 1625 Intrinsic IntOp, bit Commutable = 0> { 1626 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 1627 OpcodeStr, !strconcat(Dt, "16"), 1628 v8i8, v8i16, IntOp, Commutable>; 1629 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 1630 OpcodeStr, !strconcat(Dt, "32"), 1631 v4i16, v4i32, IntOp, Commutable>; 1632 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 1633 OpcodeStr, !strconcat(Dt, "64"), 1634 v2i32, v2i64, IntOp, Commutable>; 1635} 1636 1637 1638// Neon Long 3-register vector intrinsics. 1639 1640// First with only element sizes of 16 and 32 bits: 1641multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1642 InstrItinClass itin16, InstrItinClass itin32, 1643 string OpcodeStr, string Dt, 1644 Intrinsic IntOp, bit Commutable = 0> { 1645 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 1646 OpcodeStr, !strconcat(Dt, "16"), 1647 v4i32, v4i16, IntOp, Commutable>; 1648 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 1649 OpcodeStr, !strconcat(Dt, "32"), 1650 v2i64, v2i32, IntOp, Commutable>; 1651} 1652 1653multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 1654 InstrItinClass itin, string OpcodeStr, string Dt, 1655 Intrinsic IntOp> { 1656 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 1657 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 1658 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 1659 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1660} 1661 1662// ....then also with element size of 8 bits: 1663multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1664 InstrItinClass itin16, InstrItinClass itin32, 1665 string OpcodeStr, string Dt, 1666 Intrinsic IntOp, bit Commutable = 0> 1667 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 1668 IntOp, Commutable> { 1669 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 1670 OpcodeStr, !strconcat(Dt, "8"), 1671 v8i16, v8i8, IntOp, Commutable>; 1672} 1673 1674 1675// Neon Wide 3-register vector intrinsics, 1676// source operand element sizes of 8, 16 and 32 bits: 1677multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1678 string OpcodeStr, string Dt, 1679 Intrinsic IntOp, bit Commutable = 0> { 1680 def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, 1681 OpcodeStr, !strconcat(Dt, "8"), 1682 v8i16, v8i8, IntOp, Commutable>; 1683 def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, 1684 OpcodeStr, !strconcat(Dt, "16"), 1685 v4i32, v4i16, IntOp, Commutable>; 1686 def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, 1687 OpcodeStr, !strconcat(Dt, "32"), 1688 v2i64, v2i32, IntOp, Commutable>; 1689} 1690 1691 1692// Neon Multiply-Op vector operations, 1693// element sizes of 8, 16 and 32 bits: 1694multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1695 InstrItinClass itinD16, InstrItinClass itinD32, 1696 InstrItinClass itinQ16, InstrItinClass itinQ32, 1697 string OpcodeStr, string Dt, SDNode OpNode> { 1698 // 64-bit vector types. 1699 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 1700 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 1701 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 1702 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 1703 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 1704 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 1705 1706 // 128-bit vector types. 1707 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 1708 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 1709 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 1710 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 1711 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 1712 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 1713} 1714 1715multiclass N3VMulOpSL_HS<bits<4> op11_8, 1716 InstrItinClass itinD16, InstrItinClass itinD32, 1717 InstrItinClass itinQ16, InstrItinClass itinQ32, 1718 string OpcodeStr, string Dt, SDNode ShOp> { 1719 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 1720 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 1721 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 1722 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 1723 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 1724 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 1725 mul, ShOp>; 1726 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 1727 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 1728 mul, ShOp>; 1729} 1730 1731// Neon 3-argument intrinsics, 1732// element sizes of 8, 16 and 32 bits: 1733multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1734 InstrItinClass itinD, InstrItinClass itinQ, 1735 string OpcodeStr, string Dt, Intrinsic IntOp> { 1736 // 64-bit vector types. 1737 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 1738 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 1739 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 1740 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 1741 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 1742 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 1743 1744 // 128-bit vector types. 1745 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 1746 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 1747 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 1748 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 1749 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 1750 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 1751} 1752 1753 1754// Neon Long 3-argument intrinsics. 1755 1756// First with only element sizes of 16 and 32 bits: 1757multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1758 InstrItinClass itin16, InstrItinClass itin32, 1759 string OpcodeStr, string Dt, Intrinsic IntOp> { 1760 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 1761 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 1762 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 1763 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1764} 1765 1766multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 1767 string OpcodeStr, string Dt, Intrinsic IntOp> { 1768 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 1769 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 1770 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 1771 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1772} 1773 1774// ....then also with element size of 8 bits: 1775multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1776 InstrItinClass itin16, InstrItinClass itin32, 1777 string OpcodeStr, string Dt, Intrinsic IntOp> 1778 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 1779 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 1780 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 1781} 1782 1783 1784// Neon 2-register vector intrinsics, 1785// element sizes of 8, 16 and 32 bits: 1786multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1787 bits<5> op11_7, bit op4, 1788 InstrItinClass itinD, InstrItinClass itinQ, 1789 string OpcodeStr, string Dt, Intrinsic IntOp> { 1790 // 64-bit vector types. 1791 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1792 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 1793 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1794 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 1795 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1796 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 1797 1798 // 128-bit vector types. 1799 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1800 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 1801 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1802 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 1803 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1804 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 1805} 1806 1807 1808// Neon Pairwise long 2-register intrinsics, 1809// element sizes of 8, 16 and 32 bits: 1810multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1811 bits<5> op11_7, bit op4, 1812 string OpcodeStr, string Dt, Intrinsic IntOp> { 1813 // 64-bit vector types. 1814 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1815 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 1816 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1817 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 1818 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1819 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 1820 1821 // 128-bit vector types. 1822 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1823 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 1824 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1825 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 1826 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1827 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 1828} 1829 1830 1831// Neon Pairwise long 2-register accumulate intrinsics, 1832// element sizes of 8, 16 and 32 bits: 1833multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1834 bits<5> op11_7, bit op4, 1835 string OpcodeStr, string Dt, Intrinsic IntOp> { 1836 // 64-bit vector types. 1837 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1838 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 1839 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1840 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 1841 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1842 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 1843 1844 // 128-bit vector types. 1845 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1846 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 1847 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1848 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 1849 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1850 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 1851} 1852 1853 1854// Neon 2-register vector shift by immediate, 1855// with f of either N2RegVShLFrm or N2RegVShRFrm 1856// element sizes of 8, 16, 32 and 64 bits: 1857multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1858 InstrItinClass itin, string OpcodeStr, string Dt, 1859 SDNode OpNode, Format f> { 1860 // 64-bit vector types. 1861 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 1862 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 1863 let Inst{21-19} = 0b001; // imm6 = 001xxx 1864 } 1865 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 1866 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 1867 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1868 } 1869 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 1870 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 1871 let Inst{21} = 0b1; // imm6 = 1xxxxx 1872 } 1873 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin, 1874 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 1875 // imm6 = xxxxxx 1876 1877 // 128-bit vector types. 1878 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 1879 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 1880 let Inst{21-19} = 0b001; // imm6 = 001xxx 1881 } 1882 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 1883 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 1884 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1885 } 1886 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 1887 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 1888 let Inst{21} = 0b1; // imm6 = 1xxxxx 1889 } 1890 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin, 1891 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 1892 // imm6 = xxxxxx 1893} 1894 1895// Neon Shift-Accumulate vector operations, 1896// element sizes of 8, 16, 32 and 64 bits: 1897multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1898 string OpcodeStr, string Dt, SDNode ShOp> { 1899 // 64-bit vector types. 1900 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, 1901 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 1902 let Inst{21-19} = 0b001; // imm6 = 001xxx 1903 } 1904 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, 1905 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 1906 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1907 } 1908 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, 1909 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 1910 let Inst{21} = 0b1; // imm6 = 1xxxxx 1911 } 1912 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, 1913 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 1914 // imm6 = xxxxxx 1915 1916 // 128-bit vector types. 1917 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, 1918 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 1919 let Inst{21-19} = 0b001; // imm6 = 001xxx 1920 } 1921 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, 1922 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 1923 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1924 } 1925 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, 1926 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 1927 let Inst{21} = 0b1; // imm6 = 1xxxxx 1928 } 1929 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, 1930 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 1931 // imm6 = xxxxxx 1932} 1933 1934 1935// Neon Shift-Insert vector operations, 1936// with f of either N2RegVShLFrm or N2RegVShRFrm 1937// element sizes of 8, 16, 32 and 64 bits: 1938multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1939 string OpcodeStr, SDNode ShOp, 1940 Format f> { 1941 // 64-bit vector types. 1942 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, 1943 f, OpcodeStr, "8", v8i8, ShOp> { 1944 let Inst{21-19} = 0b001; // imm6 = 001xxx 1945 } 1946 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, 1947 f, OpcodeStr, "16", v4i16, ShOp> { 1948 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1949 } 1950 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, 1951 f, OpcodeStr, "32", v2i32, ShOp> { 1952 let Inst{21} = 0b1; // imm6 = 1xxxxx 1953 } 1954 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, 1955 f, OpcodeStr, "64", v1i64, ShOp>; 1956 // imm6 = xxxxxx 1957 1958 // 128-bit vector types. 1959 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, 1960 f, OpcodeStr, "8", v16i8, ShOp> { 1961 let Inst{21-19} = 0b001; // imm6 = 001xxx 1962 } 1963 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, 1964 f, OpcodeStr, "16", v8i16, ShOp> { 1965 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1966 } 1967 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, 1968 f, OpcodeStr, "32", v4i32, ShOp> { 1969 let Inst{21} = 0b1; // imm6 = 1xxxxx 1970 } 1971 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, 1972 f, OpcodeStr, "64", v2i64, ShOp>; 1973 // imm6 = xxxxxx 1974} 1975 1976// Neon Shift Long operations, 1977// element sizes of 8, 16, 32 bits: 1978multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 1979 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 1980 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 1981 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { 1982 let Inst{21-19} = 0b001; // imm6 = 001xxx 1983 } 1984 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 1985 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { 1986 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1987 } 1988 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 1989 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { 1990 let Inst{21} = 0b1; // imm6 = 1xxxxx 1991 } 1992} 1993 1994// Neon Shift Narrow operations, 1995// element sizes of 16, 32, 64 bits: 1996multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 1997 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 1998 SDNode OpNode> { 1999 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 2000 OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> { 2001 let Inst{21-19} = 0b001; // imm6 = 001xxx 2002 } 2003 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 2004 OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> { 2005 let Inst{21-20} = 0b01; // imm6 = 01xxxx 2006 } 2007 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 2008 OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> { 2009 let Inst{21} = 0b1; // imm6 = 1xxxxx 2010 } 2011} 2012 2013//===----------------------------------------------------------------------===// 2014// Instruction Definitions. 2015//===----------------------------------------------------------------------===// 2016 2017// Vector Add Operations. 2018 2019// VADD : Vector Add (integer and floating-point) 2020defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 2021 add, 1>; 2022def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 2023 v2f32, v2f32, fadd, 1>; 2024def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 2025 v4f32, v4f32, fadd, 1>; 2026// VADDL : Vector Add Long (Q = D + D) 2027defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 2028 "vaddl", "s", int_arm_neon_vaddls, 1>; 2029defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 2030 "vaddl", "u", int_arm_neon_vaddlu, 1>; 2031// VADDW : Vector Add Wide (Q = Q + D) 2032defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; 2033defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; 2034// VHADD : Vector Halving Add 2035defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 2036 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2037 "vhadd", "s", int_arm_neon_vhadds, 1>; 2038defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 2039 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2040 "vhadd", "u", int_arm_neon_vhaddu, 1>; 2041// VRHADD : Vector Rounding Halving Add 2042defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 2043 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2044 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 2045defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 2046 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2047 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 2048// VQADD : Vector Saturating Add 2049defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 2050 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2051 "vqadd", "s", int_arm_neon_vqadds, 1>; 2052defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 2053 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2054 "vqadd", "u", int_arm_neon_vqaddu, 1>; 2055// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 2056defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", 2057 int_arm_neon_vaddhn, 1>; 2058// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 2059defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 2060 int_arm_neon_vraddhn, 1>; 2061 2062// Vector Multiply Operations. 2063 2064// VMUL : Vector Multiply (integer, polynomial and floating-point) 2065defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 2066 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 2067def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 2068 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 2069def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 2070 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 2071def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", 2072 v2f32, v2f32, fmul, 1>; 2073def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", 2074 v4f32, v4f32, fmul, 1>; 2075defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; 2076def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 2077def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 2078 v2f32, fmul>; 2079 2080def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 2081 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 2082 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 2083 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2084 (DSubReg_i16_reg imm:$lane))), 2085 (SubReg_i16_lane imm:$lane)))>; 2086def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 2087 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 2088 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 2089 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2090 (DSubReg_i32_reg imm:$lane))), 2091 (SubReg_i32_lane imm:$lane)))>; 2092def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 2093 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 2094 (v4f32 (VMULslfq (v4f32 QPR:$src1), 2095 (v2f32 (EXTRACT_SUBREG QPR:$src2, 2096 (DSubReg_i32_reg imm:$lane))), 2097 (SubReg_i32_lane imm:$lane)))>; 2098 2099// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 2100defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 2101 IIC_VMULi16Q, IIC_VMULi32Q, 2102 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 2103defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 2104 IIC_VMULi16Q, IIC_VMULi32Q, 2105 "vqdmulh", "s", int_arm_neon_vqdmulh>; 2106def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 2107 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 2108 imm:$lane)))), 2109 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 2110 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2111 (DSubReg_i16_reg imm:$lane))), 2112 (SubReg_i16_lane imm:$lane)))>; 2113def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 2114 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 2115 imm:$lane)))), 2116 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 2117 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2118 (DSubReg_i32_reg imm:$lane))), 2119 (SubReg_i32_lane imm:$lane)))>; 2120 2121// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 2122defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 2123 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 2124 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 2125defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 2126 IIC_VMULi16Q, IIC_VMULi32Q, 2127 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 2128def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 2129 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 2130 imm:$lane)))), 2131 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 2132 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2133 (DSubReg_i16_reg imm:$lane))), 2134 (SubReg_i16_lane imm:$lane)))>; 2135def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 2136 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 2137 imm:$lane)))), 2138 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 2139 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2140 (DSubReg_i32_reg imm:$lane))), 2141 (SubReg_i32_lane imm:$lane)))>; 2142 2143// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 2144defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 2145 "vmull", "s", int_arm_neon_vmulls, 1>; 2146defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 2147 "vmull", "u", int_arm_neon_vmullu, 1>; 2148def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 2149 v8i16, v8i8, int_arm_neon_vmullp, 1>; 2150defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", 2151 int_arm_neon_vmulls>; 2152defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", 2153 int_arm_neon_vmullu>; 2154 2155// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 2156defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 2157 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 2158defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 2159 "vqdmull", "s", int_arm_neon_vqdmull>; 2160 2161// Vector Multiply-Accumulate and Multiply-Subtract Operations. 2162 2163// VMLA : Vector Multiply Accumulate (integer and floating-point) 2164defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2165 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 2166def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 2167 v2f32, fmul, fadd>; 2168def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 2169 v4f32, fmul, fadd>; 2170defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 2171 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 2172def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 2173 v2f32, fmul, fadd>; 2174def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 2175 v4f32, v2f32, fmul, fadd>; 2176 2177def : Pat<(v8i16 (add (v8i16 QPR:$src1), 2178 (mul (v8i16 QPR:$src2), 2179 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 2180 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 2181 (v4i16 (EXTRACT_SUBREG QPR:$src3, 2182 (DSubReg_i16_reg imm:$lane))), 2183 (SubReg_i16_lane imm:$lane)))>; 2184 2185def : Pat<(v4i32 (add (v4i32 QPR:$src1), 2186 (mul (v4i32 QPR:$src2), 2187 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 2188 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 2189 (v2i32 (EXTRACT_SUBREG QPR:$src3, 2190 (DSubReg_i32_reg imm:$lane))), 2191 (SubReg_i32_lane imm:$lane)))>; 2192 2193def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), 2194 (fmul (v4f32 QPR:$src2), 2195 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 2196 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 2197 (v4f32 QPR:$src2), 2198 (v2f32 (EXTRACT_SUBREG QPR:$src3, 2199 (DSubReg_i32_reg imm:$lane))), 2200 (SubReg_i32_lane imm:$lane)))>; 2201 2202// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 2203defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 2204 "vmlal", "s", int_arm_neon_vmlals>; 2205defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 2206 "vmlal", "u", int_arm_neon_vmlalu>; 2207 2208defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; 2209defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; 2210 2211// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 2212defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2213 "vqdmlal", "s", int_arm_neon_vqdmlal>; 2214defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; 2215 2216// VMLS : Vector Multiply Subtract (integer and floating-point) 2217defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2218 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 2219def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 2220 v2f32, fmul, fsub>; 2221def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 2222 v4f32, fmul, fsub>; 2223defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 2224 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 2225def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 2226 v2f32, fmul, fsub>; 2227def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 2228 v4f32, v2f32, fmul, fsub>; 2229 2230def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 2231 (mul (v8i16 QPR:$src2), 2232 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 2233 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 2234 (v4i16 (EXTRACT_SUBREG QPR:$src3, 2235 (DSubReg_i16_reg imm:$lane))), 2236 (SubReg_i16_lane imm:$lane)))>; 2237 2238def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 2239 (mul (v4i32 QPR:$src2), 2240 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 2241 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 2242 (v2i32 (EXTRACT_SUBREG QPR:$src3, 2243 (DSubReg_i32_reg imm:$lane))), 2244 (SubReg_i32_lane imm:$lane)))>; 2245 2246def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), 2247 (fmul (v4f32 QPR:$src2), 2248 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 2249 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 2250 (v2f32 (EXTRACT_SUBREG QPR:$src3, 2251 (DSubReg_i32_reg imm:$lane))), 2252 (SubReg_i32_lane imm:$lane)))>; 2253 2254// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 2255defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 2256 "vmlsl", "s", int_arm_neon_vmlsls>; 2257defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 2258 "vmlsl", "u", int_arm_neon_vmlslu>; 2259 2260defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; 2261defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; 2262 2263// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 2264defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 2265 "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 2266defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 2267 2268// Vector Subtract Operations. 2269 2270// VSUB : Vector Subtract (integer and floating-point) 2271defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 2272 "vsub", "i", sub, 0>; 2273def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 2274 v2f32, v2f32, fsub, 0>; 2275def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 2276 v4f32, v4f32, fsub, 0>; 2277// VSUBL : Vector Subtract Long (Q = D - D) 2278defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 2279 "vsubl", "s", int_arm_neon_vsubls, 1>; 2280defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 2281 "vsubl", "u", int_arm_neon_vsublu, 1>; 2282// VSUBW : Vector Subtract Wide (Q = Q - D) 2283defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; 2284defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; 2285// VHSUB : Vector Halving Subtract 2286defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 2287 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2288 "vhsub", "s", int_arm_neon_vhsubs, 0>; 2289defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 2290 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2291 "vhsub", "u", int_arm_neon_vhsubu, 0>; 2292// VQSUB : Vector Saturing Subtract 2293defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 2294 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2295 "vqsub", "s", int_arm_neon_vqsubs, 0>; 2296defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 2297 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2298 "vqsub", "u", int_arm_neon_vqsubu, 0>; 2299// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 2300defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", 2301 int_arm_neon_vsubhn, 0>; 2302// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 2303defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 2304 int_arm_neon_vrsubhn, 0>; 2305 2306// Vector Comparisons. 2307 2308// VCEQ : Vector Compare Equal 2309defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2310 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 2311def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 2312 NEONvceq, 1>; 2313def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 2314 NEONvceq, 1>; 2315// For disassembly only. 2316defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 2317 "$dst, $src, #0">; 2318 2319// VCGE : Vector Compare Greater Than or Equal 2320defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2321 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 2322defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2323 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 2324def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 2325 NEONvcge, 0>; 2326def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 2327 NEONvcge, 0>; 2328// For disassembly only. 2329defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 2330 "$dst, $src, #0">; 2331// For disassembly only. 2332defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 2333 "$dst, $src, #0">; 2334 2335// VCGT : Vector Compare Greater Than 2336defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2337 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 2338defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2339 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 2340def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 2341 NEONvcgt, 0>; 2342def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 2343 NEONvcgt, 0>; 2344// For disassembly only. 2345defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 2346 "$dst, $src, #0">; 2347// For disassembly only. 2348defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 2349 "$dst, $src, #0">; 2350 2351// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 2352def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 2353 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 2354def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 2355 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 2356// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 2357def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 2358 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 2359def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 2360 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 2361// VTST : Vector Test Bits 2362defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2363 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 2364 2365// Vector Bitwise Operations. 2366 2367def vnotd : PatFrag<(ops node:$in), 2368 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 2369def vnotq : PatFrag<(ops node:$in), 2370 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 2371 2372 2373// VAND : Vector Bitwise AND 2374def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 2375 v2i32, v2i32, and, 1>; 2376def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 2377 v4i32, v4i32, and, 1>; 2378 2379// VEOR : Vector Bitwise Exclusive OR 2380def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 2381 v2i32, v2i32, xor, 1>; 2382def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 2383 v4i32, v4i32, xor, 1>; 2384 2385// VORR : Vector Bitwise OR 2386def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 2387 v2i32, v2i32, or, 1>; 2388def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 2389 v4i32, v4i32, or, 1>; 2390 2391// VBIC : Vector Bitwise Bit Clear (AND NOT) 2392def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 2393 (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, 2394 "vbic", "$dst, $src1, $src2", "", 2395 [(set DPR:$dst, (v2i32 (and DPR:$src1, 2396 (vnotd DPR:$src2))))]>; 2397def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 2398 (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, 2399 "vbic", "$dst, $src1, $src2", "", 2400 [(set QPR:$dst, (v4i32 (and QPR:$src1, 2401 (vnotq QPR:$src2))))]>; 2402 2403// VORN : Vector Bitwise OR NOT 2404def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), 2405 (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, 2406 "vorn", "$dst, $src1, $src2", "", 2407 [(set DPR:$dst, (v2i32 (or DPR:$src1, 2408 (vnotd DPR:$src2))))]>; 2409def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), 2410 (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, 2411 "vorn", "$dst, $src1, $src2", "", 2412 [(set QPR:$dst, (v4i32 (or QPR:$src1, 2413 (vnotq QPR:$src2))))]>; 2414 2415// VMVN : Vector Bitwise NOT (Immediate) 2416 2417let isReMaterializable = 1 in { 2418def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst), 2419 (ins nModImm:$SIMM), IIC_VMOVImm, 2420 "vmvn", "i16", "$dst, $SIMM", "", 2421 [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>; 2422def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst), 2423 (ins nModImm:$SIMM), IIC_VMOVImm, 2424 "vmvn", "i16", "$dst, $SIMM", "", 2425 [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>; 2426 2427def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst), 2428 (ins nModImm:$SIMM), IIC_VMOVImm, 2429 "vmvn", "i32", "$dst, $SIMM", "", 2430 [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>; 2431def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst), 2432 (ins nModImm:$SIMM), IIC_VMOVImm, 2433 "vmvn", "i32", "$dst, $SIMM", "", 2434 [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>; 2435} 2436 2437// VMVN : Vector Bitwise NOT 2438def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 2439 (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, 2440 "vmvn", "$dst, $src", "", 2441 [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>; 2442def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 2443 (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD, 2444 "vmvn", "$dst, $src", "", 2445 [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>; 2446def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 2447def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 2448 2449// VBSL : Vector Bitwise Select 2450def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 2451 (ins DPR:$src1, DPR:$src2, DPR:$src3), 2452 N3RegFrm, IIC_VCNTiD, 2453 "vbsl", "$dst, $src2, $src3", "$src1 = $dst", 2454 [(set DPR:$dst, 2455 (v2i32 (or (and DPR:$src2, DPR:$src1), 2456 (and DPR:$src3, (vnotd DPR:$src1)))))]>; 2457def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 2458 (ins QPR:$src1, QPR:$src2, QPR:$src3), 2459 N3RegFrm, IIC_VCNTiQ, 2460 "vbsl", "$dst, $src2, $src3", "$src1 = $dst", 2461 [(set QPR:$dst, 2462 (v4i32 (or (and QPR:$src2, QPR:$src1), 2463 (and QPR:$src3, (vnotq QPR:$src1)))))]>; 2464 2465// VBIF : Vector Bitwise Insert if False 2466// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 2467def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 2468 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), 2469 N3RegFrm, IIC_VBINiD, 2470 "vbif", "$dst, $src2, $src3", "$src1 = $dst", 2471 [/* For disassembly only; pattern left blank */]>; 2472def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 2473 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), 2474 N3RegFrm, IIC_VBINiQ, 2475 "vbif", "$dst, $src2, $src3", "$src1 = $dst", 2476 [/* For disassembly only; pattern left blank */]>; 2477 2478// VBIT : Vector Bitwise Insert if True 2479// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 2480def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 2481 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), 2482 N3RegFrm, IIC_VBINiD, 2483 "vbit", "$dst, $src2, $src3", "$src1 = $dst", 2484 [/* For disassembly only; pattern left blank */]>; 2485def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 2486 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), 2487 N3RegFrm, IIC_VBINiQ, 2488 "vbit", "$dst, $src2, $src3", "$src1 = $dst", 2489 [/* For disassembly only; pattern left blank */]>; 2490 2491// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 2492// for equivalent operations with different register constraints; it just 2493// inserts copies. 2494 2495// Vector Absolute Differences. 2496 2497// VABD : Vector Absolute Difference 2498defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 2499 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2500 "vabd", "s", int_arm_neon_vabds, 0>; 2501defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 2502 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2503 "vabd", "u", int_arm_neon_vabdu, 0>; 2504def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 2505 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; 2506def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 2507 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; 2508 2509// VABDL : Vector Absolute Difference Long (Q = | D - D |) 2510defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, 2511 "vabdl", "s", int_arm_neon_vabdls, 0>; 2512defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, 2513 "vabdl", "u", int_arm_neon_vabdlu, 0>; 2514 2515// VABA : Vector Absolute Difference and Accumulate 2516defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 2517 "vaba", "s", int_arm_neon_vabas>; 2518defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 2519 "vaba", "u", int_arm_neon_vabau>; 2520 2521// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 2522defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD, 2523 "vabal", "s", int_arm_neon_vabals>; 2524defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD, 2525 "vabal", "u", int_arm_neon_vabalu>; 2526 2527// Vector Maximum and Minimum. 2528 2529// VMAX : Vector Maximum 2530defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 2531 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2532 "vmax", "s", int_arm_neon_vmaxs, 1>; 2533defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 2534 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2535 "vmax", "u", int_arm_neon_vmaxu, 1>; 2536def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 2537 "vmax", "f32", 2538 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 2539def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 2540 "vmax", "f32", 2541 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 2542 2543// VMIN : Vector Minimum 2544defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 2545 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2546 "vmin", "s", int_arm_neon_vmins, 1>; 2547defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 2548 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2549 "vmin", "u", int_arm_neon_vminu, 1>; 2550def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 2551 "vmin", "f32", 2552 v2f32, v2f32, int_arm_neon_vmins, 1>; 2553def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 2554 "vmin", "f32", 2555 v4f32, v4f32, int_arm_neon_vmins, 1>; 2556 2557// Vector Pairwise Operations. 2558 2559// VPADD : Vector Pairwise Add 2560def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 2561 "vpadd", "i8", 2562 v8i8, v8i8, int_arm_neon_vpadd, 0>; 2563def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 2564 "vpadd", "i16", 2565 v4i16, v4i16, int_arm_neon_vpadd, 0>; 2566def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 2567 "vpadd", "i32", 2568 v2i32, v2i32, int_arm_neon_vpadd, 0>; 2569def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 2570 IIC_VBIND, "vpadd", "f32", 2571 v2f32, v2f32, int_arm_neon_vpadd, 0>; 2572 2573// VPADDL : Vector Pairwise Add Long 2574defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 2575 int_arm_neon_vpaddls>; 2576defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 2577 int_arm_neon_vpaddlu>; 2578 2579// VPADAL : Vector Pairwise Add and Accumulate Long 2580defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 2581 int_arm_neon_vpadals>; 2582defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 2583 int_arm_neon_vpadalu>; 2584 2585// VPMAX : Vector Pairwise Maximum 2586def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2587 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 2588def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2589 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 2590def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2591 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 2592def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2593 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 2594def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2595 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 2596def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2597 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 2598def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2599 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 2600 2601// VPMIN : Vector Pairwise Minimum 2602def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2603 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 2604def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2605 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 2606def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2607 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 2608def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2609 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 2610def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2611 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 2612def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2613 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 2614def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin", 2615 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 2616 2617// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 2618 2619// VRECPE : Vector Reciprocal Estimate 2620def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 2621 IIC_VUNAD, "vrecpe", "u32", 2622 v2i32, v2i32, int_arm_neon_vrecpe>; 2623def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 2624 IIC_VUNAQ, "vrecpe", "u32", 2625 v4i32, v4i32, int_arm_neon_vrecpe>; 2626def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 2627 IIC_VUNAD, "vrecpe", "f32", 2628 v2f32, v2f32, int_arm_neon_vrecpe>; 2629def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 2630 IIC_VUNAQ, "vrecpe", "f32", 2631 v4f32, v4f32, int_arm_neon_vrecpe>; 2632 2633// VRECPS : Vector Reciprocal Step 2634def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 2635 IIC_VRECSD, "vrecps", "f32", 2636 v2f32, v2f32, int_arm_neon_vrecps, 1>; 2637def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 2638 IIC_VRECSQ, "vrecps", "f32", 2639 v4f32, v4f32, int_arm_neon_vrecps, 1>; 2640 2641// VRSQRTE : Vector Reciprocal Square Root Estimate 2642def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 2643 IIC_VUNAD, "vrsqrte", "u32", 2644 v2i32, v2i32, int_arm_neon_vrsqrte>; 2645def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 2646 IIC_VUNAQ, "vrsqrte", "u32", 2647 v4i32, v4i32, int_arm_neon_vrsqrte>; 2648def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 2649 IIC_VUNAD, "vrsqrte", "f32", 2650 v2f32, v2f32, int_arm_neon_vrsqrte>; 2651def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 2652 IIC_VUNAQ, "vrsqrte", "f32", 2653 v4f32, v4f32, int_arm_neon_vrsqrte>; 2654 2655// VRSQRTS : Vector Reciprocal Square Root Step 2656def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 2657 IIC_VRECSD, "vrsqrts", "f32", 2658 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 2659def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 2660 IIC_VRECSQ, "vrsqrts", "f32", 2661 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 2662 2663// Vector Shifts. 2664 2665// VSHL : Vector Shift 2666defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm, 2667 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 2668 "vshl", "s", int_arm_neon_vshifts, 0>; 2669defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm, 2670 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 2671 "vshl", "u", int_arm_neon_vshiftu, 0>; 2672// VSHL : Vector Shift Left (Immediate) 2673defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl, 2674 N2RegVShLFrm>; 2675// VSHR : Vector Shift Right (Immediate) 2676defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs, 2677 N2RegVShRFrm>; 2678defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru, 2679 N2RegVShRFrm>; 2680 2681// VSHLL : Vector Shift Left Long 2682defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 2683defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 2684 2685// VSHLL : Vector Shift Left Long (with maximum shift count) 2686class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 2687 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 2688 ValueType OpTy, SDNode OpNode> 2689 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 2690 ResTy, OpTy, OpNode> { 2691 let Inst{21-16} = op21_16; 2692} 2693def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 2694 v8i16, v8i8, NEONvshlli>; 2695def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 2696 v4i32, v4i16, NEONvshlli>; 2697def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 2698 v2i64, v2i32, NEONvshlli>; 2699 2700// VSHRN : Vector Shift Right and Narrow 2701defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 2702 NEONvshrn>; 2703 2704// VRSHL : Vector Rounding Shift 2705defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm, 2706 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2707 "vrshl", "s", int_arm_neon_vrshifts, 0>; 2708defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm, 2709 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2710 "vrshl", "u", int_arm_neon_vrshiftu, 0>; 2711// VRSHR : Vector Rounding Shift Right 2712defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs, 2713 N2RegVShRFrm>; 2714defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru, 2715 N2RegVShRFrm>; 2716 2717// VRSHRN : Vector Rounding Shift Right and Narrow 2718defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 2719 NEONvrshrn>; 2720 2721// VQSHL : Vector Saturating Shift 2722defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm, 2723 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2724 "vqshl", "s", int_arm_neon_vqshifts, 0>; 2725defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm, 2726 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2727 "vqshl", "u", int_arm_neon_vqshiftu, 0>; 2728// VQSHL : Vector Saturating Shift Left (Immediate) 2729defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls, 2730 N2RegVShLFrm>; 2731defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu, 2732 N2RegVShLFrm>; 2733// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 2734defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu, 2735 N2RegVShLFrm>; 2736 2737// VQSHRN : Vector Saturating Shift Right and Narrow 2738defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 2739 NEONvqshrns>; 2740defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 2741 NEONvqshrnu>; 2742 2743// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 2744defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 2745 NEONvqshrnsu>; 2746 2747// VQRSHL : Vector Saturating Rounding Shift 2748defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm, 2749 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2750 "vqrshl", "s", int_arm_neon_vqrshifts, 0>; 2751defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm, 2752 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2753 "vqrshl", "u", int_arm_neon_vqrshiftu, 0>; 2754 2755// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 2756defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 2757 NEONvqrshrns>; 2758defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 2759 NEONvqrshrnu>; 2760 2761// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 2762defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 2763 NEONvqrshrnsu>; 2764 2765// VSRA : Vector Shift Right and Accumulate 2766defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 2767defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 2768// VRSRA : Vector Rounding Shift Right and Accumulate 2769defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 2770defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 2771 2772// VSLI : Vector Shift Left and Insert 2773defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>; 2774// VSRI : Vector Shift Right and Insert 2775defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>; 2776 2777// Vector Absolute and Saturating Absolute. 2778 2779// VABS : Vector Absolute Value 2780defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 2781 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 2782 int_arm_neon_vabs>; 2783def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2784 IIC_VUNAD, "vabs", "f32", 2785 v2f32, v2f32, int_arm_neon_vabs>; 2786def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2787 IIC_VUNAQ, "vabs", "f32", 2788 v4f32, v4f32, int_arm_neon_vabs>; 2789 2790// VQABS : Vector Saturating Absolute Value 2791defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 2792 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 2793 int_arm_neon_vqabs>; 2794 2795// Vector Negate. 2796 2797def vnegd : PatFrag<(ops node:$in), 2798 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 2799def vnegq : PatFrag<(ops node:$in), 2800 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 2801 2802class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 2803 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), 2804 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 2805 [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>; 2806class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 2807 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), 2808 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 2809 [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>; 2810 2811// VNEG : Vector Negate (integer) 2812def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 2813def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 2814def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 2815def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 2816def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 2817def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 2818 2819// VNEG : Vector Negate (floating-point) 2820def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 2821 (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, 2822 "vneg", "f32", "$dst, $src", "", 2823 [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; 2824def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 2825 (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, 2826 "vneg", "f32", "$dst, $src", "", 2827 [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; 2828 2829def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 2830def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 2831def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 2832def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 2833def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 2834def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 2835 2836// VQNEG : Vector Saturating Negate 2837defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 2838 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 2839 int_arm_neon_vqneg>; 2840 2841// Vector Bit Counting Operations. 2842 2843// VCLS : Vector Count Leading Sign Bits 2844defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 2845 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 2846 int_arm_neon_vcls>; 2847// VCLZ : Vector Count Leading Zeros 2848defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 2849 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 2850 int_arm_neon_vclz>; 2851// VCNT : Vector Count One Bits 2852def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2853 IIC_VCNTiD, "vcnt", "8", 2854 v8i8, v8i8, int_arm_neon_vcnt>; 2855def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2856 IIC_VCNTiQ, "vcnt", "8", 2857 v16i8, v16i8, int_arm_neon_vcnt>; 2858 2859// Vector Swap -- for disassembly only. 2860def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 2861 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 2862 "vswp", "$dst, $src", "", []>; 2863def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 2864 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 2865 "vswp", "$dst, $src", "", []>; 2866 2867// Vector Move Operations. 2868 2869// VMOV : Vector Move (Register) 2870 2871let neverHasSideEffects = 1 in { 2872def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), 2873 N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; 2874def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), 2875 N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; 2876 2877// Pseudo vector move instructions for QQ and QQQQ registers. This should 2878// be expanded after register allocation is completed. 2879def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), 2880 NoItinerary, "${:comment} vmov\t$dst, $src", []>; 2881 2882def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), 2883 NoItinerary, "${:comment} vmov\t$dst, $src", []>; 2884} // neverHasSideEffects 2885 2886// VMOV : Vector Move (Immediate) 2887 2888let isReMaterializable = 1 in { 2889def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), 2890 (ins nModImm:$SIMM), IIC_VMOVImm, 2891 "vmov", "i8", "$dst, $SIMM", "", 2892 [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 2893def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), 2894 (ins nModImm:$SIMM), IIC_VMOVImm, 2895 "vmov", "i8", "$dst, $SIMM", "", 2896 [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 2897 2898def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst), 2899 (ins nModImm:$SIMM), IIC_VMOVImm, 2900 "vmov", "i16", "$dst, $SIMM", "", 2901 [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>; 2902def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst), 2903 (ins nModImm:$SIMM), IIC_VMOVImm, 2904 "vmov", "i16", "$dst, $SIMM", "", 2905 [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>; 2906 2907def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst), 2908 (ins nModImm:$SIMM), IIC_VMOVImm, 2909 "vmov", "i32", "$dst, $SIMM", "", 2910 [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>; 2911def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst), 2912 (ins nModImm:$SIMM), IIC_VMOVImm, 2913 "vmov", "i32", "$dst, $SIMM", "", 2914 [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>; 2915 2916def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), 2917 (ins nModImm:$SIMM), IIC_VMOVImm, 2918 "vmov", "i64", "$dst, $SIMM", "", 2919 [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 2920def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), 2921 (ins nModImm:$SIMM), IIC_VMOVImm, 2922 "vmov", "i64", "$dst, $SIMM", "", 2923 [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 2924} // isReMaterializable 2925 2926// VMOV : Vector Get Lane (move scalar to ARM core register) 2927 2928def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 2929 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2930 IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]", 2931 [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), 2932 imm:$lane))]>; 2933def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 2934 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2935 IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]", 2936 [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), 2937 imm:$lane))]>; 2938def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 2939 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2940 IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]", 2941 [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), 2942 imm:$lane))]>; 2943def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 2944 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2945 IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]", 2946 [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), 2947 imm:$lane))]>; 2948def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 2949 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2950 IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]", 2951 [(set GPR:$dst, (extractelt (v2i32 DPR:$src), 2952 imm:$lane))]>; 2953// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 2954def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 2955 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2956 (DSubReg_i8_reg imm:$lane))), 2957 (SubReg_i8_lane imm:$lane))>; 2958def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 2959 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2960 (DSubReg_i16_reg imm:$lane))), 2961 (SubReg_i16_lane imm:$lane))>; 2962def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 2963 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2964 (DSubReg_i8_reg imm:$lane))), 2965 (SubReg_i8_lane imm:$lane))>; 2966def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 2967 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2968 (DSubReg_i16_reg imm:$lane))), 2969 (SubReg_i16_lane imm:$lane))>; 2970def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 2971 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 2972 (DSubReg_i32_reg imm:$lane))), 2973 (SubReg_i32_lane imm:$lane))>; 2974def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 2975 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 2976 (SSubReg_f32_reg imm:$src2))>; 2977def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 2978 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 2979 (SSubReg_f32_reg imm:$src2))>; 2980//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 2981// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2982def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 2983 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2984 2985 2986// VMOV : Vector Set Lane (move ARM core register to scalar) 2987 2988let Constraints = "$src1 = $dst" in { 2989def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst), 2990 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2991 IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2", 2992 [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), 2993 GPR:$src2, imm:$lane))]>; 2994def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst), 2995 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2996 IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2", 2997 [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), 2998 GPR:$src2, imm:$lane))]>; 2999def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst), 3000 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 3001 IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2", 3002 [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), 3003 GPR:$src2, imm:$lane))]>; 3004} 3005def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 3006 (v16i8 (INSERT_SUBREG QPR:$src1, 3007 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 3008 (DSubReg_i8_reg imm:$lane))), 3009 GPR:$src2, (SubReg_i8_lane imm:$lane))), 3010 (DSubReg_i8_reg imm:$lane)))>; 3011def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 3012 (v8i16 (INSERT_SUBREG QPR:$src1, 3013 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 3014 (DSubReg_i16_reg imm:$lane))), 3015 GPR:$src2, (SubReg_i16_lane imm:$lane))), 3016 (DSubReg_i16_reg imm:$lane)))>; 3017def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 3018 (v4i32 (INSERT_SUBREG QPR:$src1, 3019 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 3020 (DSubReg_i32_reg imm:$lane))), 3021 GPR:$src2, (SubReg_i32_lane imm:$lane))), 3022 (DSubReg_i32_reg imm:$lane)))>; 3023 3024def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 3025 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 3026 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 3027def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 3028 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 3029 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 3030 3031//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 3032// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 3033def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 3034 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 3035 3036def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 3037 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 3038def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 3039 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 3040def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 3041 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 3042 3043def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 3044 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 3045def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 3046 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 3047def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 3048 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 3049 3050def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 3051 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3052 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 3053 dsub_0)>; 3054def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 3055 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 3056 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 3057 dsub_0)>; 3058def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 3059 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3060 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 3061 dsub_0)>; 3062 3063// VDUP : Vector Duplicate (from ARM core register to all elements) 3064 3065class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 3066 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), 3067 IIC_VMOVIS, "vdup", Dt, "$dst, $src", 3068 [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 3069class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 3070 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), 3071 IIC_VMOVIS, "vdup", Dt, "$dst, $src", 3072 [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 3073 3074def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 3075def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 3076def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; 3077def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 3078def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 3079def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 3080 3081def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), 3082 IIC_VMOVIS, "vdup", "32", "$dst, $src", 3083 [(set DPR:$dst, (v2f32 (NEONvdup 3084 (f32 (bitconvert GPR:$src)))))]>; 3085def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), 3086 IIC_VMOVIS, "vdup", "32", "$dst, $src", 3087 [(set QPR:$dst, (v4f32 (NEONvdup 3088 (f32 (bitconvert GPR:$src)))))]>; 3089 3090// VDUP : Vector Duplicate Lane (from scalar to all elements) 3091 3092class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 3093 ValueType Ty> 3094 : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3095 IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]", 3096 [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; 3097 3098class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 3099 ValueType ResTy, ValueType OpTy> 3100 : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3101 IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]", 3102 [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), 3103 imm:$lane)))]>; 3104 3105// Inst{19-16} is partially specified depending on the element size. 3106 3107def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>; 3108def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>; 3109def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>; 3110def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>; 3111def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>; 3112def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>; 3113def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>; 3114def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>; 3115 3116def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 3117 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 3118 (DSubReg_i8_reg imm:$lane))), 3119 (SubReg_i8_lane imm:$lane)))>; 3120def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 3121 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 3122 (DSubReg_i16_reg imm:$lane))), 3123 (SubReg_i16_lane imm:$lane)))>; 3124def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 3125 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 3126 (DSubReg_i32_reg imm:$lane))), 3127 (SubReg_i32_lane imm:$lane)))>; 3128def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 3129 (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, 3130 (DSubReg_i32_reg imm:$lane))), 3131 (SubReg_i32_lane imm:$lane)))>; 3132 3133def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0, 3134 (outs DPR:$dst), (ins SPR:$src), 3135 IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", 3136 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 3137 3138def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, 3139 (outs QPR:$dst), (ins SPR:$src), 3140 IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", 3141 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 3142 3143// VMOVN : Vector Narrowing Move 3144defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, 3145 "vmovn", "i", int_arm_neon_vmovn>; 3146// VQMOVN : Vector Saturating Narrowing Move 3147defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 3148 "vqmovn", "s", int_arm_neon_vqmovns>; 3149defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 3150 "vqmovn", "u", int_arm_neon_vqmovnu>; 3151defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 3152 "vqmovun", "s", int_arm_neon_vqmovnsu>; 3153// VMOVL : Vector Lengthening Move 3154defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 3155defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 3156 3157// Vector Conversions. 3158 3159// VCVT : Vector Convert Between Floating-Point and Integers 3160def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3161 v2i32, v2f32, fp_to_sint>; 3162def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3163 v2i32, v2f32, fp_to_uint>; 3164def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3165 v2f32, v2i32, sint_to_fp>; 3166def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3167 v2f32, v2i32, uint_to_fp>; 3168 3169def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3170 v4i32, v4f32, fp_to_sint>; 3171def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3172 v4i32, v4f32, fp_to_uint>; 3173def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3174 v4f32, v4i32, sint_to_fp>; 3175def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3176 v4f32, v4i32, uint_to_fp>; 3177 3178// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 3179def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 3180 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 3181def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 3182 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 3183def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 3184 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 3185def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 3186 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 3187 3188def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 3189 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 3190def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 3191 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 3192def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 3193 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 3194def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 3195 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 3196 3197// Vector Reverse. 3198 3199// VREV64 : Vector Reverse elements within 64-bit doublewords 3200 3201class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3202 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), 3203 (ins DPR:$src), IIC_VMOVD, 3204 OpcodeStr, Dt, "$dst, $src", "", 3205 [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; 3206class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3207 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), 3208 (ins QPR:$src), IIC_VMOVD, 3209 OpcodeStr, Dt, "$dst, $src", "", 3210 [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; 3211 3212def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 3213def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 3214def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 3215def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; 3216 3217def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 3218def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 3219def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 3220def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; 3221 3222// VREV32 : Vector Reverse elements within 32-bit words 3223 3224class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3225 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), 3226 (ins DPR:$src), IIC_VMOVD, 3227 OpcodeStr, Dt, "$dst, $src", "", 3228 [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; 3229class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3230 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), 3231 (ins QPR:$src), IIC_VMOVD, 3232 OpcodeStr, Dt, "$dst, $src", "", 3233 [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; 3234 3235def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 3236def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 3237 3238def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 3239def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 3240 3241// VREV16 : Vector Reverse elements within 16-bit halfwords 3242 3243class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3244 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), 3245 (ins DPR:$src), IIC_VMOVD, 3246 OpcodeStr, Dt, "$dst, $src", "", 3247 [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; 3248class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3249 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), 3250 (ins QPR:$src), IIC_VMOVD, 3251 OpcodeStr, Dt, "$dst, $src", "", 3252 [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; 3253 3254def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 3255def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 3256 3257// Other Vector Shuffles. 3258 3259// VEXT : Vector Extract 3260 3261class VEXTd<string OpcodeStr, string Dt, ValueType Ty> 3262 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst), 3263 (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm, 3264 IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", 3265 [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), 3266 (Ty DPR:$rhs), imm:$index)))]>; 3267 3268class VEXTq<string OpcodeStr, string Dt, ValueType Ty> 3269 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst), 3270 (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm, 3271 IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", 3272 [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), 3273 (Ty QPR:$rhs), imm:$index)))]>; 3274 3275def VEXTd8 : VEXTd<"vext", "8", v8i8>; 3276def VEXTd16 : VEXTd<"vext", "16", v4i16>; 3277def VEXTd32 : VEXTd<"vext", "32", v2i32>; 3278def VEXTdf : VEXTd<"vext", "32", v2f32>; 3279 3280def VEXTq8 : VEXTq<"vext", "8", v16i8>; 3281def VEXTq16 : VEXTq<"vext", "16", v8i16>; 3282def VEXTq32 : VEXTq<"vext", "32", v4i32>; 3283def VEXTqf : VEXTq<"vext", "32", v4f32>; 3284 3285// VTRN : Vector Transpose 3286 3287def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 3288def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 3289def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 3290 3291def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 3292def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 3293def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 3294 3295// VUZP : Vector Unzip (Deinterleave) 3296 3297def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 3298def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 3299def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; 3300 3301def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 3302def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 3303def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 3304 3305// VZIP : Vector Zip (Interleave) 3306 3307def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 3308def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 3309def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; 3310 3311def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 3312def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 3313def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 3314 3315// Vector Table Lookup and Table Extension. 3316 3317// VTBL : Vector Table Lookup 3318def VTBL1 3319 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), 3320 (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1, 3321 "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "", 3322 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; 3323let hasExtraSrcRegAllocReq = 1 in { 3324def VTBL2 3325 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), 3326 (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2, 3327 "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>; 3328def VTBL3 3329 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), 3330 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3, 3331 "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>; 3332def VTBL4 3333 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), 3334 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), 3335 NVTBLFrm, IIC_VTB4, 3336 "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>; 3337} // hasExtraSrcRegAllocReq = 1 3338 3339// VTBX : Vector Table Extension 3340def VTBX1 3341 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), 3342 (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1, 3343 "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst", 3344 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 3345 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; 3346let hasExtraSrcRegAllocReq = 1 in { 3347def VTBX2 3348 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), 3349 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2, 3350 "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>; 3351def VTBX3 3352 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), 3353 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), 3354 NVTBLFrm, IIC_VTBX3, 3355 "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", 3356 "$orig = $dst", []>; 3357def VTBX4 3358 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, 3359 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4, 3360 "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", 3361 "$orig = $dst", []>; 3362} // hasExtraSrcRegAllocReq = 1 3363 3364//===----------------------------------------------------------------------===// 3365// NEON instructions for single-precision FP math 3366//===----------------------------------------------------------------------===// 3367 3368class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> 3369 : NEONFPPat<(ResTy (OpNode SPR:$a)), 3370 (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), 3371 SPR:$a, ssub_0))), 3372 ssub_0)>; 3373 3374class N3VSPat<SDNode OpNode, NeonI Inst> 3375 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 3376 (EXTRACT_SUBREG (v2f32 3377 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3378 SPR:$a, ssub_0), 3379 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3380 SPR:$b, ssub_0))), 3381 ssub_0)>; 3382 3383class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 3384 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 3385 (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3386 SPR:$acc, ssub_0), 3387 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3388 SPR:$a, ssub_0), 3389 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3390 SPR:$b, ssub_0)), 3391 ssub_0)>; 3392 3393// These need separate instructions because they must use DPR_VFP2 register 3394// class which have SPR sub-registers. 3395 3396// Vector Add Operations used for single-precision FP 3397let neverHasSideEffects = 1 in 3398def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; 3399def : N3VSPat<fadd, VADDfd_sfp>; 3400 3401// Vector Sub Operations used for single-precision FP 3402let neverHasSideEffects = 1 in 3403def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; 3404def : N3VSPat<fsub, VSUBfd_sfp>; 3405 3406// Vector Multiply Operations used for single-precision FP 3407let neverHasSideEffects = 1 in 3408def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; 3409def : N3VSPat<fmul, VMULfd_sfp>; 3410 3411// Vector Multiply-Accumulate/Subtract used for single-precision FP 3412// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so 3413// we want to avoid them for now. e.g., alternating vmla/vadd instructions. 3414 3415//let neverHasSideEffects = 1 in 3416//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", 3417// v2f32, fmul, fadd>; 3418//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>; 3419 3420//let neverHasSideEffects = 1 in 3421//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", 3422// v2f32, fmul, fsub>; 3423//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>; 3424 3425// Vector Absolute used for single-precision FP 3426let neverHasSideEffects = 1 in 3427def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0, 3428 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 3429 "vabs", "f32", "$dst, $src", "", []>; 3430def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>; 3431 3432// Vector Negate used for single-precision FP 3433let neverHasSideEffects = 1 in 3434def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 3435 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 3436 "vneg", "f32", "$dst, $src", "", []>; 3437def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>; 3438 3439// Vector Maximum used for single-precision FP 3440let neverHasSideEffects = 1 in 3441def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), 3442 (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, 3443 "vmax", "f32", "$dst, $src1, $src2", "", []>; 3444def : N3VSPat<NEONfmax, VMAXfd_sfp>; 3445 3446// Vector Minimum used for single-precision FP 3447let neverHasSideEffects = 1 in 3448def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), 3449 (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, 3450 "vmin", "f32", "$dst, $src1, $src2", "", []>; 3451def : N3VSPat<NEONfmin, VMINfd_sfp>; 3452 3453// Vector Convert between single-precision FP and integer 3454let neverHasSideEffects = 1 in 3455def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3456 v2i32, v2f32, fp_to_sint>; 3457def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; 3458 3459let neverHasSideEffects = 1 in 3460def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3461 v2i32, v2f32, fp_to_uint>; 3462def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; 3463 3464let neverHasSideEffects = 1 in 3465def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3466 v2f32, v2i32, sint_to_fp>; 3467def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; 3468 3469let neverHasSideEffects = 1 in 3470def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3471 v2f32, v2i32, uint_to_fp>; 3472def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; 3473 3474//===----------------------------------------------------------------------===// 3475// Non-Instruction Patterns 3476//===----------------------------------------------------------------------===// 3477 3478// bit_convert 3479def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 3480def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 3481def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 3482def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 3483def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 3484def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 3485def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 3486def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 3487def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 3488def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 3489def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 3490def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 3491def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 3492def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 3493def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 3494def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 3495def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 3496def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 3497def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 3498def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 3499def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 3500def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 3501def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 3502def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 3503def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 3504def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 3505def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 3506def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 3507def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 3508def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 3509 3510def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 3511def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 3512def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 3513def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 3514def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 3515def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 3516def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 3517def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 3518def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 3519def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 3520def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 3521def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 3522def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 3523def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 3524def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 3525def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 3526def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 3527def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 3528def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 3529def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 3530def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 3531def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 3532def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 3533def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 3534def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 3535def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 3536def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 3537def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 3538def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 3539def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 3540