ARMInstrNEON.td revision 22c687b6421d9cc03351ddb0c7fd3d45382bc01a
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// NEON-specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17 18def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 19 20def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 21def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 22def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 23def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 24def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 25def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 26 27// Types for vector shift by immediates. The "SHX" version is for long and 28// narrow operations where the source and destination vectors have different 29// types. The "SHINS" version is for shift and insert operations. 30def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 31 SDTCisVT<2, i32>]>; 32def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 33 SDTCisVT<2, i32>]>; 34def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 35 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 36 37def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 38def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 39def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 40def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 41def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 42def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 43def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 44 45def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 46def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 47def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 48 49def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 50def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 51def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 52def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 53def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 54def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 55 56def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 57def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 58def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 59 60def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 61def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 62 63def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 64 SDTCisVT<2, i32>]>; 65def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 66def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 67 68def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 69 70// VDUPLANE can produce a quad-register result from a double-register source, 71// so the result is not constrained to match the source. 72def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 73 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 74 SDTCisVT<2, i32>]>>; 75 76def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 77 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 78def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 79 80def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 81def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 82def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 83def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 84 85def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 86 SDTCisSameAs<0, 2>, 87 SDTCisSameAs<0, 3>]>; 88def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 89def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 90def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 91 92def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 93 SDTCisSameAs<0, 2>]>; 94def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 95def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 96 97//===----------------------------------------------------------------------===// 98// NEON operand definitions 99//===----------------------------------------------------------------------===// 100 101def h8imm : Operand<i8> { 102 let PrintMethod = "printHex8ImmOperand"; 103} 104def h16imm : Operand<i16> { 105 let PrintMethod = "printHex16ImmOperand"; 106} 107def h32imm : Operand<i32> { 108 let PrintMethod = "printHex32ImmOperand"; 109} 110def h64imm : Operand<i64> { 111 let PrintMethod = "printHex64ImmOperand"; 112} 113 114//===----------------------------------------------------------------------===// 115// NEON load / store instructions 116//===----------------------------------------------------------------------===// 117 118let mayLoad = 1 in { 119// Use vldmia to load a Q register as a D register pair. 120// This is equivalent to VLDMD except that it has a Q register operand 121// instead of a pair of D registers. 122def VLDMQ 123 : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), 124 IndexModeNone, IIC_fpLoadm, 125 "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; 126 127// Use vld1 to load a Q register as a D register pair. 128// This alternative to VLDMQ allows an alignment to be specified. 129// This is equivalent to VLD1q64 except that it has a Q register operand. 130def VLD1q 131 : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr), 132 IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; 133} // mayLoad = 1 134 135let mayStore = 1 in { 136// Use vstmia to store a Q register as a D register pair. 137// This is equivalent to VSTMD except that it has a Q register operand 138// instead of a pair of D registers. 139def VSTMQ 140 : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), 141 IndexModeNone, IIC_fpStorem, 142 "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; 143 144// Use vst1 to store a Q register as a D register pair. 145// This alternative to VSTMQ allows an alignment to be specified. 146// This is equivalent to VST1q64 except that it has a Q register operand. 147def VST1q 148 : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), 149 IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; 150} // mayStore = 1 151 152let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { 153 154// VLD1 : Vector Load (multiple single elements) 155class VLD1D<bits<4> op7_4, string Dt> 156 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), 157 (ins addrmode6:$addr), IIC_VLD1, 158 "vld1", Dt, "\\{$dst\\}, $addr", "", []>; 159class VLD1Q<bits<4> op7_4, string Dt> 160 : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2), 161 (ins addrmode6:$addr), IIC_VLD1, 162 "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; 163 164def VLD1d8 : VLD1D<0b0000, "8">; 165def VLD1d16 : VLD1D<0b0100, "16">; 166def VLD1d32 : VLD1D<0b1000, "32">; 167def VLD1d64 : VLD1D<0b1100, "64">; 168 169def VLD1q8 : VLD1Q<0b0000, "8">; 170def VLD1q16 : VLD1Q<0b0100, "16">; 171def VLD1q32 : VLD1Q<0b1000, "32">; 172def VLD1q64 : VLD1Q<0b1100, "64">; 173 174// ...with address register writeback: 175class VLD1DWB<bits<4> op7_4, string Dt> 176 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), 177 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, 178 "vld1", Dt, "\\{$dst\\}, $addr$offset", 179 "$addr.addr = $wb", []>; 180class VLD1QWB<bits<4> op7_4, string Dt> 181 : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb), 182 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, 183 "vld1", Dt, "${dst:dregpair}, $addr$offset", 184 "$addr.addr = $wb", []>; 185 186def VLD1d8_UPD : VLD1DWB<0b0000, "8">; 187def VLD1d16_UPD : VLD1DWB<0b0100, "16">; 188def VLD1d32_UPD : VLD1DWB<0b1000, "32">; 189def VLD1d64_UPD : VLD1DWB<0b1100, "64">; 190 191def VLD1q8_UPD : VLD1QWB<0b0000, "8">; 192def VLD1q16_UPD : VLD1QWB<0b0100, "16">; 193def VLD1q32_UPD : VLD1QWB<0b1000, "32">; 194def VLD1q64_UPD : VLD1QWB<0b1100, "64">; 195 196// ...with 3 registers (some of these are only for the disassembler): 197class VLD1D3<bits<4> op7_4, string Dt> 198 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 199 (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, 200 "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; 201class VLD1D3WB<bits<4> op7_4, string Dt> 202 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 203 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, 204 "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>; 205 206def VLD1d8T : VLD1D3<0b0000, "8">; 207def VLD1d16T : VLD1D3<0b0100, "16">; 208def VLD1d32T : VLD1D3<0b1000, "32">; 209def VLD1d64T : VLD1D3<0b1100, "64">; 210 211def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; 212def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; 213def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; 214def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">; 215 216// ...with 4 registers (some of these are only for the disassembler): 217class VLD1D4<bits<4> op7_4, string Dt> 218 : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 219 (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, 220 "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 221class VLD1D4WB<bits<4> op7_4, string Dt> 222 : NLdSt<0,0b10,0b0010,op7_4, 223 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 224 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, 225 "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", 226 []>; 227 228def VLD1d8Q : VLD1D4<0b0000, "8">; 229def VLD1d16Q : VLD1D4<0b0100, "16">; 230def VLD1d32Q : VLD1D4<0b1000, "32">; 231def VLD1d64Q : VLD1D4<0b1100, "64">; 232 233def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">; 234def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; 235def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; 236def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">; 237 238// VLD2 : Vector Load (multiple 2-element structures) 239class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> 240 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), 241 (ins addrmode6:$addr), IIC_VLD2, 242 "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; 243class VLD2Q<bits<4> op7_4, string Dt> 244 : NLdSt<0, 0b10, 0b0011, op7_4, 245 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 246 (ins addrmode6:$addr), IIC_VLD2, 247 "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 248 249def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; 250def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; 251def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; 252 253def VLD2q8 : VLD2Q<0b0000, "8">; 254def VLD2q16 : VLD2Q<0b0100, "16">; 255def VLD2q32 : VLD2Q<0b1000, "32">; 256 257// ...with address register writeback: 258class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 259 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), 260 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, 261 "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset", 262 "$addr.addr = $wb", []>; 263class VLD2QWB<bits<4> op7_4, string Dt> 264 : NLdSt<0, 0b10, 0b0011, op7_4, 265 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 266 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, 267 "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", 268 "$addr.addr = $wb", []>; 269 270def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; 271def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; 272def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; 273 274def VLD2q8_UPD : VLD2QWB<0b0000, "8">; 275def VLD2q16_UPD : VLD2QWB<0b0100, "16">; 276def VLD2q32_UPD : VLD2QWB<0b1000, "32">; 277 278// ...with double-spaced registers (for disassembly only): 279def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; 280def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; 281def VLD2b32 : VLD2D<0b1001, 0b1000, "32">; 282def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">; 283def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">; 284def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">; 285 286// VLD3 : Vector Load (multiple 3-element structures) 287class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 288 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 289 (ins addrmode6:$addr), IIC_VLD3, 290 "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; 291 292def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; 293def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; 294def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; 295 296// ...with address register writeback: 297class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 298 : NLdSt<0, 0b10, op11_8, op7_4, 299 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 300 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3, 301 "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", 302 "$addr.addr = $wb", []>; 303 304def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; 305def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; 306def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; 307 308// ...with double-spaced registers (non-updating versions for disassembly only): 309def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; 310def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; 311def VLD3q32 : VLD3D<0b0101, 0b1000, "32">; 312def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; 313def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; 314def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; 315 316// ...alternate versions to be allocated odd register numbers: 317def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">; 318def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">; 319def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">; 320 321// VLD4 : Vector Load (multiple 4-element structures) 322class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 323 : NLdSt<0, 0b10, op11_8, op7_4, 324 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 325 (ins addrmode6:$addr), IIC_VLD4, 326 "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 327 328def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; 329def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; 330def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; 331 332// ...with address register writeback: 333class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 334 : NLdSt<0, 0b10, op11_8, op7_4, 335 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 336 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, 337 "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", 338 "$addr.addr = $wb", []>; 339 340def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; 341def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; 342def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; 343 344// ...with double-spaced registers (non-updating versions for disassembly only): 345def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; 346def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; 347def VLD4q32 : VLD4D<0b0001, 0b1000, "32">; 348def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; 349def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; 350def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; 351 352// ...alternate versions to be allocated odd register numbers: 353def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">; 354def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">; 355def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; 356 357// VLD1LN : Vector Load (single element to one lane) 358// FIXME: Not yet implemented. 359 360// VLD2LN : Vector Load (single 2-element structure to one lane) 361class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 362 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), 363 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 364 IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", 365 "$src1 = $dst1, $src2 = $dst2", []>; 366 367def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">; 368def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">; 369def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">; 370 371// ...with double-spaced registers: 372def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">; 373def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">; 374 375// ...alternate versions to be allocated odd register numbers: 376def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">; 377def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">; 378 379// ...with address register writeback: 380class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 381 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), 382 (ins addrmode6:$addr, am6offset:$offset, 383 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt, 384 "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset", 385 "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>; 386 387def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">; 388def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">; 389def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">; 390 391def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">; 392def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">; 393 394// VLD3LN : Vector Load (single 3-element structure to one lane) 395class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 396 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 397 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 398 nohash_imm:$lane), IIC_VLD3, "vld3", Dt, 399 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", 400 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; 401 402def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">; 403def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">; 404def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">; 405 406// ...with double-spaced registers: 407def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">; 408def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">; 409 410// ...alternate versions to be allocated odd register numbers: 411def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">; 412def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">; 413 414// ...with address register writeback: 415class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 416 : NLdSt<1, 0b10, op11_8, op7_4, 417 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 418 (ins addrmode6:$addr, am6offset:$offset, 419 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 420 IIC_VLD3, "vld3", Dt, 421 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset", 422 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", 423 []>; 424 425def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">; 426def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">; 427def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">; 428 429def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">; 430def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">; 431 432// VLD4LN : Vector Load (single 4-element structure to one lane) 433class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 434 : NLdSt<1, 0b10, op11_8, op7_4, 435 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 436 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 437 nohash_imm:$lane), IIC_VLD4, "vld4", Dt, 438 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", 439 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; 440 441def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">; 442def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">; 443def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">; 444 445// ...with double-spaced registers: 446def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">; 447def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">; 448 449// ...alternate versions to be allocated odd register numbers: 450def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">; 451def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">; 452 453// ...with address register writeback: 454class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 455 : NLdSt<1, 0b10, op11_8, op7_4, 456 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 457 (ins addrmode6:$addr, am6offset:$offset, 458 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 459 IIC_VLD4, "vld4", Dt, 460"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", 461"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", 462 []>; 463 464def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">; 465def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">; 466def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">; 467 468def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">; 469def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; 470 471// VLD1DUP : Vector Load (single element to all lanes) 472// VLD2DUP : Vector Load (single 2-element structure to all lanes) 473// VLD3DUP : Vector Load (single 3-element structure to all lanes) 474// VLD4DUP : Vector Load (single 4-element structure to all lanes) 475// FIXME: Not yet implemented. 476} // mayLoad = 1, hasExtraDefRegAllocReq = 1 477 478let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { 479 480// VST1 : Vector Store (multiple single elements) 481class VST1D<bits<4> op7_4, string Dt> 482 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, 483 "vst1", Dt, "\\{$src\\}, $addr", "", []>; 484class VST1Q<bits<4> op7_4, string Dt> 485 : NLdSt<0,0b00,0b1010,op7_4, (outs), 486 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, 487 "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>; 488 489def VST1d8 : VST1D<0b0000, "8">; 490def VST1d16 : VST1D<0b0100, "16">; 491def VST1d32 : VST1D<0b1000, "32">; 492def VST1d64 : VST1D<0b1100, "64">; 493 494def VST1q8 : VST1Q<0b0000, "8">; 495def VST1q16 : VST1Q<0b0100, "16">; 496def VST1q32 : VST1Q<0b1000, "32">; 497def VST1q64 : VST1Q<0b1100, "64">; 498 499// ...with address register writeback: 500class VST1DWB<bits<4> op7_4, string Dt> 501 : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), 502 (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST, 503 "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>; 504class VST1QWB<bits<4> op7_4, string Dt> 505 : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), 506 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, 507 "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>; 508 509def VST1d8_UPD : VST1DWB<0b0000, "8">; 510def VST1d16_UPD : VST1DWB<0b0100, "16">; 511def VST1d32_UPD : VST1DWB<0b1000, "32">; 512def VST1d64_UPD : VST1DWB<0b1100, "64">; 513 514def VST1q8_UPD : VST1QWB<0b0000, "8">; 515def VST1q16_UPD : VST1QWB<0b0100, "16">; 516def VST1q32_UPD : VST1QWB<0b1000, "32">; 517def VST1q64_UPD : VST1QWB<0b1100, "64">; 518 519// ...with 3 registers (some of these are only for the disassembler): 520class VST1D3<bits<4> op7_4, string Dt> 521 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 522 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), 523 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; 524class VST1D3WB<bits<4> op7_4, string Dt> 525 : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), 526 (ins addrmode6:$addr, am6offset:$offset, 527 DPR:$src1, DPR:$src2, DPR:$src3), 528 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", 529 "$addr.addr = $wb", []>; 530 531def VST1d8T : VST1D3<0b0000, "8">; 532def VST1d16T : VST1D3<0b0100, "16">; 533def VST1d32T : VST1D3<0b1000, "32">; 534def VST1d64T : VST1D3<0b1100, "64">; 535 536def VST1d8T_UPD : VST1D3WB<0b0000, "8">; 537def VST1d16T_UPD : VST1D3WB<0b0100, "16">; 538def VST1d32T_UPD : VST1D3WB<0b1000, "32">; 539def VST1d64T_UPD : VST1D3WB<0b1100, "64">; 540 541// ...with 4 registers (some of these are only for the disassembler): 542class VST1D4<bits<4> op7_4, string Dt> 543 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 544 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 545 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", 546 []>; 547class VST1D4WB<bits<4> op7_4, string Dt> 548 : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), 549 (ins addrmode6:$addr, am6offset:$offset, 550 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 551 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 552 "$addr.addr = $wb", []>; 553 554def VST1d8Q : VST1D4<0b0000, "8">; 555def VST1d16Q : VST1D4<0b0100, "16">; 556def VST1d32Q : VST1D4<0b1000, "32">; 557def VST1d64Q : VST1D4<0b1100, "64">; 558 559def VST1d8Q_UPD : VST1D4WB<0b0000, "8">; 560def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; 561def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; 562def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; 563 564// VST2 : Vector Store (multiple 2-element structures) 565class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> 566 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 567 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), 568 IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>; 569class VST2Q<bits<4> op7_4, string Dt> 570 : NLdSt<0, 0b00, 0b0011, op7_4, (outs), 571 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 572 IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", 573 "", []>; 574 575def VST2d8 : VST2D<0b1000, 0b0000, "8">; 576def VST2d16 : VST2D<0b1000, 0b0100, "16">; 577def VST2d32 : VST2D<0b1000, 0b1000, "32">; 578 579def VST2q8 : VST2Q<0b0000, "8">; 580def VST2q16 : VST2Q<0b0100, "16">; 581def VST2q32 : VST2Q<0b1000, "32">; 582 583// ...with address register writeback: 584class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 585 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 586 (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), 587 IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", 588 "$addr.addr = $wb", []>; 589class VST2QWB<bits<4> op7_4, string Dt> 590 : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 591 (ins addrmode6:$addr, am6offset:$offset, 592 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 593 IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 594 "$addr.addr = $wb", []>; 595 596def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; 597def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; 598def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; 599 600def VST2q8_UPD : VST2QWB<0b0000, "8">; 601def VST2q16_UPD : VST2QWB<0b0100, "16">; 602def VST2q32_UPD : VST2QWB<0b1000, "32">; 603 604// ...with double-spaced registers (for disassembly only): 605def VST2b8 : VST2D<0b1001, 0b0000, "8">; 606def VST2b16 : VST2D<0b1001, 0b0100, "16">; 607def VST2b32 : VST2D<0b1001, 0b1000, "32">; 608def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">; 609def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">; 610def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">; 611 612// VST3 : Vector Store (multiple 3-element structures) 613class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 614 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 615 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, 616 "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; 617 618def VST3d8 : VST3D<0b0100, 0b0000, "8">; 619def VST3d16 : VST3D<0b0100, 0b0100, "16">; 620def VST3d32 : VST3D<0b0100, 0b1000, "32">; 621 622// ...with address register writeback: 623class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 624 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 625 (ins addrmode6:$addr, am6offset:$offset, 626 DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, 627 "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", 628 "$addr.addr = $wb", []>; 629 630def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; 631def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; 632def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; 633 634// ...with double-spaced registers (non-updating versions for disassembly only): 635def VST3q8 : VST3D<0b0101, 0b0000, "8">; 636def VST3q16 : VST3D<0b0101, 0b0100, "16">; 637def VST3q32 : VST3D<0b0101, 0b1000, "32">; 638def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; 639def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; 640def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; 641 642// ...alternate versions to be allocated odd register numbers: 643def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">; 644def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">; 645def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">; 646 647// VST4 : Vector Store (multiple 4-element structures) 648class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 649 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 650 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 651 IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", 652 "", []>; 653 654def VST4d8 : VST4D<0b0000, 0b0000, "8">; 655def VST4d16 : VST4D<0b0000, 0b0100, "16">; 656def VST4d32 : VST4D<0b0000, 0b1000, "32">; 657 658// ...with address register writeback: 659class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 660 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 661 (ins addrmode6:$addr, am6offset:$offset, 662 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, 663 "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 664 "$addr.addr = $wb", []>; 665 666def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; 667def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; 668def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; 669 670// ...with double-spaced registers (non-updating versions for disassembly only): 671def VST4q8 : VST4D<0b0001, 0b0000, "8">; 672def VST4q16 : VST4D<0b0001, 0b0100, "16">; 673def VST4q32 : VST4D<0b0001, 0b1000, "32">; 674def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; 675def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; 676def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; 677 678// ...alternate versions to be allocated odd register numbers: 679def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">; 680def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">; 681def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">; 682 683// VST1LN : Vector Store (single element from one lane) 684// FIXME: Not yet implemented. 685 686// VST2LN : Vector Store (single 2-element structure from one lane) 687class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 688 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 689 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 690 IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", 691 "", []>; 692 693def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">; 694def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">; 695def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">; 696 697// ...with double-spaced registers: 698def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">; 699def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">; 700 701// ...alternate versions to be allocated odd register numbers: 702def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">; 703def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">; 704 705// ...with address register writeback: 706class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 707 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 708 (ins addrmode6:$addr, am6offset:$offset, 709 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, 710 "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", 711 "$addr.addr = $wb", []>; 712 713def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">; 714def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">; 715def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">; 716 717def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">; 718def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">; 719 720// VST3LN : Vector Store (single 3-element structure from one lane) 721class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 722 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 723 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 724 nohash_imm:$lane), IIC_VST, "vst3", Dt, 725 "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; 726 727def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">; 728def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">; 729def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">; 730 731// ...with double-spaced registers: 732def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">; 733def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">; 734 735// ...alternate versions to be allocated odd register numbers: 736def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">; 737def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">; 738 739// ...with address register writeback: 740class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 741 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 742 (ins addrmode6:$addr, am6offset:$offset, 743 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 744 IIC_VST, "vst3", Dt, 745 "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", 746 "$addr.addr = $wb", []>; 747 748def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">; 749def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">; 750def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">; 751 752def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">; 753def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">; 754 755// VST4LN : Vector Store (single 4-element structure from one lane) 756class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 757 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 758 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 759 nohash_imm:$lane), IIC_VST, "vst4", Dt, 760 "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", 761 "", []>; 762 763def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">; 764def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">; 765def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">; 766 767// ...with double-spaced registers: 768def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">; 769def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">; 770 771// ...alternate versions to be allocated odd register numbers: 772def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">; 773def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">; 774 775// ...with address register writeback: 776class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 777 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 778 (ins addrmode6:$addr, am6offset:$offset, 779 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 780 IIC_VST, "vst4", Dt, 781 "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", 782 "$addr.addr = $wb", []>; 783 784def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">; 785def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">; 786def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; 787 788def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; 789def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; 790 791} // mayStore = 1, hasExtraSrcRegAllocReq = 1 792 793 794//===----------------------------------------------------------------------===// 795// NEON pattern fragments 796//===----------------------------------------------------------------------===// 797 798// Extract D sub-registers of Q registers. 799// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) 800def DSubReg_i8_reg : SDNodeXForm<imm, [{ 801 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); 802}]>; 803def DSubReg_i16_reg : SDNodeXForm<imm, [{ 804 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); 805}]>; 806def DSubReg_i32_reg : SDNodeXForm<imm, [{ 807 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); 808}]>; 809def DSubReg_f64_reg : SDNodeXForm<imm, [{ 810 return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); 811}]>; 812def DSubReg_f64_other_reg : SDNodeXForm<imm, [{ 813 return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32); 814}]>; 815 816// Extract S sub-registers of Q/D registers. 817// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.) 818def SSubReg_f32_reg : SDNodeXForm<imm, [{ 819 return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32); 820}]>; 821 822// Translate lane numbers from Q registers to D subregs. 823def SubReg_i8_lane : SDNodeXForm<imm, [{ 824 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 825}]>; 826def SubReg_i16_lane : SDNodeXForm<imm, [{ 827 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 828}]>; 829def SubReg_i32_lane : SDNodeXForm<imm, [{ 830 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 831}]>; 832 833//===----------------------------------------------------------------------===// 834// Instruction Classes 835//===----------------------------------------------------------------------===// 836 837// Basic 2-register operations: single-, double- and quad-register. 838class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 839 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 840 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 841 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 842 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), 843 IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; 844class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 845 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 846 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 847 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 848 (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "", 849 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; 850class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 851 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 852 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 853 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 854 (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "", 855 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; 856 857// Basic 2-register intrinsics, both double- and quad-register. 858class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 859 bits<2> op17_16, bits<5> op11_7, bit op4, 860 InstrItinClass itin, string OpcodeStr, string Dt, 861 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 862 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 863 (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 864 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 865class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 866 bits<2> op17_16, bits<5> op11_7, bit op4, 867 InstrItinClass itin, string OpcodeStr, string Dt, 868 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 869 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 870 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 871 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 872 873// Narrow 2-register intrinsics. 874class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 875 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 876 InstrItinClass itin, string OpcodeStr, string Dt, 877 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 878 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), 879 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 880 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; 881 882// Long 2-register intrinsics (currently only used for VMOVL). 883class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 884 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 885 InstrItinClass itin, string OpcodeStr, string Dt, 886 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 887 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), 888 (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 889 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; 890 891// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 892class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 893 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), 894 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 895 OpcodeStr, Dt, "$dst1, $dst2", 896 "$src1 = $dst1, $src2 = $dst2", []>; 897class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 898 InstrItinClass itin, string OpcodeStr, string Dt> 899 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), 900 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2", 901 "$src1 = $dst1, $src2 = $dst2", []>; 902 903// Basic 3-register operations: single-, double- and quad-register. 904class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 905 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 906 SDNode OpNode, bit Commutable> 907 : N3V<op24, op23, op21_20, op11_8, 0, op4, 908 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, 909 IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { 910 let isCommutable = Commutable; 911} 912 913class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 914 InstrItinClass itin, string OpcodeStr, string Dt, 915 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 916 : N3V<op24, op23, op21_20, op11_8, 0, op4, 917 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 918 OpcodeStr, Dt, "$dst, $src1, $src2", "", 919 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 920 let isCommutable = Commutable; 921} 922// Same as N3VD but no data type. 923class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 924 InstrItinClass itin, string OpcodeStr, 925 ValueType ResTy, ValueType OpTy, 926 SDNode OpNode, bit Commutable> 927 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 928 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 929 OpcodeStr, "$dst, $src1, $src2", "", 930 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{ 931 let isCommutable = Commutable; 932} 933 934class N3VDSL<bits<2> op21_20, bits<4> op11_8, 935 InstrItinClass itin, string OpcodeStr, string Dt, 936 ValueType Ty, SDNode ShOp> 937 : N3V<0, 1, op21_20, op11_8, 1, 0, 938 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 939 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 940 [(set (Ty DPR:$dst), 941 (Ty (ShOp (Ty DPR:$src1), 942 (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> { 943 let isCommutable = 0; 944} 945class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 946 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 947 : N3V<0, 1, op21_20, op11_8, 1, 0, 948 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 949 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", 950 [(set (Ty DPR:$dst), 951 (Ty (ShOp (Ty DPR:$src1), 952 (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { 953 let isCommutable = 0; 954} 955 956class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 957 InstrItinClass itin, string OpcodeStr, string Dt, 958 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 959 : N3V<op24, op23, op21_20, op11_8, 1, op4, 960 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, 961 OpcodeStr, Dt, "$dst, $src1, $src2", "", 962 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 963 let isCommutable = Commutable; 964} 965class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 966 InstrItinClass itin, string OpcodeStr, 967 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 968 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 969 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, 970 OpcodeStr, "$dst, $src1, $src2", "", 971 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{ 972 let isCommutable = Commutable; 973} 974class N3VQSL<bits<2> op21_20, bits<4> op11_8, 975 InstrItinClass itin, string OpcodeStr, string Dt, 976 ValueType ResTy, ValueType OpTy, SDNode ShOp> 977 : N3V<1, 1, op21_20, op11_8, 1, 0, 978 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 979 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 980 [(set (ResTy QPR:$dst), 981 (ResTy (ShOp (ResTy QPR:$src1), 982 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 983 imm:$lane)))))]> { 984 let isCommutable = 0; 985} 986class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 987 ValueType ResTy, ValueType OpTy, SDNode ShOp> 988 : N3V<1, 1, op21_20, op11_8, 1, 0, 989 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 990 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", 991 [(set (ResTy QPR:$dst), 992 (ResTy (ShOp (ResTy QPR:$src1), 993 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 994 imm:$lane)))))]> { 995 let isCommutable = 0; 996} 997 998// Basic 3-register intrinsics, both double- and quad-register. 999class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1000 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1001 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 1002 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1003 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin, 1004 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1005 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 1006 let isCommutable = Commutable; 1007} 1008class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1009 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 1010 : N3V<0, 1, op21_20, op11_8, 1, 0, 1011 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1012 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1013 [(set (Ty DPR:$dst), 1014 (Ty (IntOp (Ty DPR:$src1), 1015 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), 1016 imm:$lane)))))]> { 1017 let isCommutable = 0; 1018} 1019class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1020 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 1021 : N3V<0, 1, op21_20, op11_8, 1, 0, 1022 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1023 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1024 [(set (Ty DPR:$dst), 1025 (Ty (IntOp (Ty DPR:$src1), 1026 (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { 1027 let isCommutable = 0; 1028} 1029 1030class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1031 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1032 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 1033 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1034 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin, 1035 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1036 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 1037 let isCommutable = Commutable; 1038} 1039class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1040 string OpcodeStr, string Dt, 1041 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1042 : N3V<1, 1, op21_20, op11_8, 1, 0, 1043 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1044 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1045 [(set (ResTy QPR:$dst), 1046 (ResTy (IntOp (ResTy QPR:$src1), 1047 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 1048 imm:$lane)))))]> { 1049 let isCommutable = 0; 1050} 1051class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1052 string OpcodeStr, string Dt, 1053 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1054 : N3V<1, 1, op21_20, op11_8, 1, 0, 1055 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1056 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1057 [(set (ResTy QPR:$dst), 1058 (ResTy (IntOp (ResTy QPR:$src1), 1059 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 1060 imm:$lane)))))]> { 1061 let isCommutable = 0; 1062} 1063 1064// Multiply-Add/Sub operations: single-, double- and quad-register. 1065class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1066 InstrItinClass itin, string OpcodeStr, string Dt, 1067 ValueType Ty, SDNode MulOp, SDNode OpNode> 1068 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1069 (outs DPR_VFP2:$dst), 1070 (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin, 1071 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; 1072 1073class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1074 InstrItinClass itin, string OpcodeStr, string Dt, 1075 ValueType Ty, SDNode MulOp, SDNode OpNode> 1076 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1077 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, 1078 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1079 [(set DPR:$dst, (Ty (OpNode DPR:$src1, 1080 (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; 1081class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1082 string OpcodeStr, string Dt, 1083 ValueType Ty, SDNode MulOp, SDNode ShOp> 1084 : N3V<0, 1, op21_20, op11_8, 1, 0, 1085 (outs DPR:$dst), 1086 (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), 1087 NVMulSLFrm, itin, 1088 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1089 [(set (Ty DPR:$dst), 1090 (Ty (ShOp (Ty DPR:$src1), 1091 (Ty (MulOp DPR:$src2, 1092 (Ty (NEONvduplane (Ty DPR_VFP2:$src3), 1093 imm:$lane)))))))]>; 1094class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1095 string OpcodeStr, string Dt, 1096 ValueType Ty, SDNode MulOp, SDNode ShOp> 1097 : N3V<0, 1, op21_20, op11_8, 1, 0, 1098 (outs DPR:$dst), 1099 (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), 1100 NVMulSLFrm, itin, 1101 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1102 [(set (Ty DPR:$dst), 1103 (Ty (ShOp (Ty DPR:$src1), 1104 (Ty (MulOp DPR:$src2, 1105 (Ty (NEONvduplane (Ty DPR_8:$src3), 1106 imm:$lane)))))))]>; 1107 1108class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1109 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 1110 SDNode MulOp, SDNode OpNode> 1111 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1112 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, 1113 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1114 [(set QPR:$dst, (Ty (OpNode QPR:$src1, 1115 (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; 1116class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1117 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1118 SDNode MulOp, SDNode ShOp> 1119 : N3V<1, 1, op21_20, op11_8, 1, 0, 1120 (outs QPR:$dst), 1121 (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), 1122 NVMulSLFrm, itin, 1123 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1124 [(set (ResTy QPR:$dst), 1125 (ResTy (ShOp (ResTy QPR:$src1), 1126 (ResTy (MulOp QPR:$src2, 1127 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), 1128 imm:$lane)))))))]>; 1129class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1130 string OpcodeStr, string Dt, 1131 ValueType ResTy, ValueType OpTy, 1132 SDNode MulOp, SDNode ShOp> 1133 : N3V<1, 1, op21_20, op11_8, 1, 0, 1134 (outs QPR:$dst), 1135 (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), 1136 NVMulSLFrm, itin, 1137 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1138 [(set (ResTy QPR:$dst), 1139 (ResTy (ShOp (ResTy QPR:$src1), 1140 (ResTy (MulOp QPR:$src2, 1141 (ResTy (NEONvduplane (OpTy DPR_8:$src3), 1142 imm:$lane)))))))]>; 1143 1144// Neon 3-argument intrinsics, both double- and quad-register. 1145// The destination register is also used as the first source operand register. 1146class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1147 InstrItinClass itin, string OpcodeStr, string Dt, 1148 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1149 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1150 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, 1151 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1152 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), 1153 (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; 1154class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1155 InstrItinClass itin, string OpcodeStr, string Dt, 1156 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1157 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1158 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, 1159 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1160 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), 1161 (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; 1162 1163// Neon Long 3-argument intrinsic. The destination register is 1164// a quad-register and is also used as the first source operand register. 1165class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1166 InstrItinClass itin, string OpcodeStr, string Dt, 1167 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 1168 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1169 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, 1170 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1171 [(set QPR:$dst, 1172 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; 1173class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1174 string OpcodeStr, string Dt, 1175 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1176 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1177 (outs QPR:$dst), 1178 (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), 1179 NVMulSLFrm, itin, 1180 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1181 [(set (ResTy QPR:$dst), 1182 (ResTy (IntOp (ResTy QPR:$src1), 1183 (OpTy DPR:$src2), 1184 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), 1185 imm:$lane)))))]>; 1186class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 1187 InstrItinClass itin, string OpcodeStr, string Dt, 1188 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1189 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1190 (outs QPR:$dst), 1191 (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), 1192 NVMulSLFrm, itin, 1193 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1194 [(set (ResTy QPR:$dst), 1195 (ResTy (IntOp (ResTy QPR:$src1), 1196 (OpTy DPR:$src2), 1197 (OpTy (NEONvduplane (OpTy DPR_8:$src3), 1198 imm:$lane)))))]>; 1199 1200// Narrowing 3-register intrinsics. 1201class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1202 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 1203 Intrinsic IntOp, bit Commutable> 1204 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1205 (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D, 1206 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1207 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { 1208 let isCommutable = Commutable; 1209} 1210 1211// Long 3-register intrinsics. 1212class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1213 InstrItinClass itin, string OpcodeStr, string Dt, 1214 ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> 1215 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1216 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 1217 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1218 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { 1219 let isCommutable = Commutable; 1220} 1221class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1222 string OpcodeStr, string Dt, 1223 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1224 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1225 (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1226 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1227 [(set (ResTy QPR:$dst), 1228 (ResTy (IntOp (OpTy DPR:$src1), 1229 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), 1230 imm:$lane)))))]>; 1231class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 1232 InstrItinClass itin, string OpcodeStr, string Dt, 1233 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1234 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1235 (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1236 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1237 [(set (ResTy QPR:$dst), 1238 (ResTy (IntOp (OpTy DPR:$src1), 1239 (OpTy (NEONvduplane (OpTy DPR_8:$src2), 1240 imm:$lane)))))]>; 1241 1242// Wide 3-register intrinsics. 1243class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1244 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 1245 Intrinsic IntOp, bit Commutable> 1246 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1247 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD, 1248 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1249 [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { 1250 let isCommutable = Commutable; 1251} 1252 1253// Pairwise long 2-register intrinsics, both double- and quad-register. 1254class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1255 bits<2> op17_16, bits<5> op11_7, bit op4, 1256 string OpcodeStr, string Dt, 1257 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1258 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 1259 (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 1260 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 1261class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1262 bits<2> op17_16, bits<5> op11_7, bit op4, 1263 string OpcodeStr, string Dt, 1264 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1265 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 1266 (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 1267 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 1268 1269// Pairwise long 2-register accumulate intrinsics, 1270// both double- and quad-register. 1271// The destination register is also used as the first source operand register. 1272class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1273 bits<2> op17_16, bits<5> op11_7, bit op4, 1274 string OpcodeStr, string Dt, 1275 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1276 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 1277 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD, 1278 OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", 1279 [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; 1280class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1281 bits<2> op17_16, bits<5> op11_7, bit op4, 1282 string OpcodeStr, string Dt, 1283 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1284 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 1285 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ, 1286 OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", 1287 [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; 1288 1289// Shift by immediate, 1290// both double- and quad-register. 1291class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1292 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1293 ValueType Ty, SDNode OpNode> 1294 : N2VImm<op24, op23, op11_8, op7, 0, op4, 1295 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin, 1296 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1297 [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; 1298class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1299 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 1300 ValueType Ty, SDNode OpNode> 1301 : N2VImm<op24, op23, op11_8, op7, 1, op4, 1302 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin, 1303 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1304 [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; 1305 1306// Long shift by immediate. 1307class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 1308 string OpcodeStr, string Dt, 1309 ValueType ResTy, ValueType OpTy, SDNode OpNode> 1310 : N2VImm<op24, op23, op11_8, op7, op6, op4, 1311 (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm, 1312 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1313 [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), 1314 (i32 imm:$SIMM))))]>; 1315 1316// Narrow shift by immediate. 1317class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 1318 InstrItinClass itin, string OpcodeStr, string Dt, 1319 ValueType ResTy, ValueType OpTy, SDNode OpNode> 1320 : N2VImm<op24, op23, op11_8, op7, op6, op4, 1321 (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin, 1322 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1323 [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), 1324 (i32 imm:$SIMM))))]>; 1325 1326// Shift right by immediate and accumulate, 1327// both double- and quad-register. 1328class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1329 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1330 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), 1331 (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, 1332 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1333 [(set DPR:$dst, (Ty (add DPR:$src1, 1334 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; 1335class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1336 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1337 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), 1338 (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, 1339 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1340 [(set QPR:$dst, (Ty (add QPR:$src1, 1341 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; 1342 1343// Shift by immediate and insert, 1344// both double- and quad-register. 1345class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1346 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> 1347 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), 1348 (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD, 1349 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1350 [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; 1351class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1352 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> 1353 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), 1354 (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ, 1355 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1356 [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; 1357 1358// Convert, with fractional bits immediate, 1359// both double- and quad-register. 1360class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1361 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1362 Intrinsic IntOp> 1363 : N2VImm<op24, op23, op11_8, op7, 0, op4, 1364 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm, 1365 IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1366 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; 1367class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1368 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1369 Intrinsic IntOp> 1370 : N2VImm<op24, op23, op11_8, op7, 1, op4, 1371 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm, 1372 IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1373 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; 1374 1375//===----------------------------------------------------------------------===// 1376// Multiclasses 1377//===----------------------------------------------------------------------===// 1378 1379// Abbreviations used in multiclass suffixes: 1380// Q = quarter int (8 bit) elements 1381// H = half int (16 bit) elements 1382// S = single int (32 bit) elements 1383// D = double int (64 bit) elements 1384 1385// Neon 2-register vector operations -- for disassembly only. 1386 1387// First with only element sizes of 8, 16 and 32 bits: 1388multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1389 bits<5> op11_7, bit op4, string opc, string Dt, 1390 string asm> { 1391 // 64-bit vector types. 1392 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 1393 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1394 opc, !strconcat(Dt, "8"), asm, "", []>; 1395 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 1396 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1397 opc, !strconcat(Dt, "16"), asm, "", []>; 1398 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 1399 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1400 opc, !strconcat(Dt, "32"), asm, "", []>; 1401 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 1402 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1403 opc, "f32", asm, "", []> { 1404 let Inst{10} = 1; // overwrite F = 1 1405 } 1406 1407 // 128-bit vector types. 1408 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 1409 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1410 opc, !strconcat(Dt, "8"), asm, "", []>; 1411 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 1412 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1413 opc, !strconcat(Dt, "16"), asm, "", []>; 1414 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 1415 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1416 opc, !strconcat(Dt, "32"), asm, "", []>; 1417 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 1418 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1419 opc, "f32", asm, "", []> { 1420 let Inst{10} = 1; // overwrite F = 1 1421 } 1422} 1423 1424// Neon 3-register vector operations. 1425 1426// First with only element sizes of 8, 16 and 32 bits: 1427multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1428 InstrItinClass itinD16, InstrItinClass itinD32, 1429 InstrItinClass itinQ16, InstrItinClass itinQ32, 1430 string OpcodeStr, string Dt, 1431 SDNode OpNode, bit Commutable = 0> { 1432 // 64-bit vector types. 1433 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 1434 OpcodeStr, !strconcat(Dt, "8"), 1435 v8i8, v8i8, OpNode, Commutable>; 1436 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 1437 OpcodeStr, !strconcat(Dt, "16"), 1438 v4i16, v4i16, OpNode, Commutable>; 1439 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 1440 OpcodeStr, !strconcat(Dt, "32"), 1441 v2i32, v2i32, OpNode, Commutable>; 1442 1443 // 128-bit vector types. 1444 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 1445 OpcodeStr, !strconcat(Dt, "8"), 1446 v16i8, v16i8, OpNode, Commutable>; 1447 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 1448 OpcodeStr, !strconcat(Dt, "16"), 1449 v8i16, v8i16, OpNode, Commutable>; 1450 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 1451 OpcodeStr, !strconcat(Dt, "32"), 1452 v4i32, v4i32, OpNode, Commutable>; 1453} 1454 1455multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { 1456 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 1457 v4i16, ShOp>; 1458 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), 1459 v2i32, ShOp>; 1460 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 1461 v8i16, v4i16, ShOp>; 1462 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), 1463 v4i32, v2i32, ShOp>; 1464} 1465 1466// ....then also with element size 64 bits: 1467multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1468 InstrItinClass itinD, InstrItinClass itinQ, 1469 string OpcodeStr, string Dt, 1470 SDNode OpNode, bit Commutable = 0> 1471 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 1472 OpcodeStr, Dt, OpNode, Commutable> { 1473 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 1474 OpcodeStr, !strconcat(Dt, "64"), 1475 v1i64, v1i64, OpNode, Commutable>; 1476 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 1477 OpcodeStr, !strconcat(Dt, "64"), 1478 v2i64, v2i64, OpNode, Commutable>; 1479} 1480 1481 1482// Neon Narrowing 2-register vector intrinsics, 1483// source operand element sizes of 16, 32 and 64 bits: 1484multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1485 bits<5> op11_7, bit op6, bit op4, 1486 InstrItinClass itin, string OpcodeStr, string Dt, 1487 Intrinsic IntOp> { 1488 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 1489 itin, OpcodeStr, !strconcat(Dt, "16"), 1490 v8i8, v8i16, IntOp>; 1491 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 1492 itin, OpcodeStr, !strconcat(Dt, "32"), 1493 v4i16, v4i32, IntOp>; 1494 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 1495 itin, OpcodeStr, !strconcat(Dt, "64"), 1496 v2i32, v2i64, IntOp>; 1497} 1498 1499 1500// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 1501// source operand element sizes of 16, 32 and 64 bits: 1502multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 1503 string OpcodeStr, string Dt, Intrinsic IntOp> { 1504 def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1505 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 1506 def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1507 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 1508 def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1509 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1510} 1511 1512 1513// Neon 3-register vector intrinsics. 1514 1515// First with only element sizes of 16 and 32 bits: 1516multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 1517 InstrItinClass itinD16, InstrItinClass itinD32, 1518 InstrItinClass itinQ16, InstrItinClass itinQ32, 1519 string OpcodeStr, string Dt, 1520 Intrinsic IntOp, bit Commutable = 0> { 1521 // 64-bit vector types. 1522 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 1523 OpcodeStr, !strconcat(Dt, "16"), 1524 v4i16, v4i16, IntOp, Commutable>; 1525 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 1526 OpcodeStr, !strconcat(Dt, "32"), 1527 v2i32, v2i32, IntOp, Commutable>; 1528 1529 // 128-bit vector types. 1530 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 1531 OpcodeStr, !strconcat(Dt, "16"), 1532 v8i16, v8i16, IntOp, Commutable>; 1533 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 1534 OpcodeStr, !strconcat(Dt, "32"), 1535 v4i32, v4i32, IntOp, Commutable>; 1536} 1537 1538multiclass N3VIntSL_HS<bits<4> op11_8, 1539 InstrItinClass itinD16, InstrItinClass itinD32, 1540 InstrItinClass itinQ16, InstrItinClass itinQ32, 1541 string OpcodeStr, string Dt, Intrinsic IntOp> { 1542 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 1543 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 1544 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 1545 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 1546 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 1547 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 1548 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 1549 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 1550} 1551 1552// ....then also with element size of 8 bits: 1553multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 1554 InstrItinClass itinD16, InstrItinClass itinD32, 1555 InstrItinClass itinQ16, InstrItinClass itinQ32, 1556 string OpcodeStr, string Dt, 1557 Intrinsic IntOp, bit Commutable = 0> 1558 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 1559 OpcodeStr, Dt, IntOp, Commutable> { 1560 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 1561 OpcodeStr, !strconcat(Dt, "8"), 1562 v8i8, v8i8, IntOp, Commutable>; 1563 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 1564 OpcodeStr, !strconcat(Dt, "8"), 1565 v16i8, v16i8, IntOp, Commutable>; 1566} 1567 1568// ....then also with element size of 64 bits: 1569multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 1570 InstrItinClass itinD16, InstrItinClass itinD32, 1571 InstrItinClass itinQ16, InstrItinClass itinQ32, 1572 string OpcodeStr, string Dt, 1573 Intrinsic IntOp, bit Commutable = 0> 1574 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 1575 OpcodeStr, Dt, IntOp, Commutable> { 1576 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 1577 OpcodeStr, !strconcat(Dt, "64"), 1578 v1i64, v1i64, IntOp, Commutable>; 1579 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 1580 OpcodeStr, !strconcat(Dt, "64"), 1581 v2i64, v2i64, IntOp, Commutable>; 1582} 1583 1584// Neon Narrowing 3-register vector intrinsics, 1585// source operand element sizes of 16, 32 and 64 bits: 1586multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1587 string OpcodeStr, string Dt, 1588 Intrinsic IntOp, bit Commutable = 0> { 1589 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 1590 OpcodeStr, !strconcat(Dt, "16"), 1591 v8i8, v8i16, IntOp, Commutable>; 1592 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 1593 OpcodeStr, !strconcat(Dt, "32"), 1594 v4i16, v4i32, IntOp, Commutable>; 1595 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 1596 OpcodeStr, !strconcat(Dt, "64"), 1597 v2i32, v2i64, IntOp, Commutable>; 1598} 1599 1600 1601// Neon Long 3-register vector intrinsics. 1602 1603// First with only element sizes of 16 and 32 bits: 1604multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1605 InstrItinClass itin16, InstrItinClass itin32, 1606 string OpcodeStr, string Dt, 1607 Intrinsic IntOp, bit Commutable = 0> { 1608 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 1609 OpcodeStr, !strconcat(Dt, "16"), 1610 v4i32, v4i16, IntOp, Commutable>; 1611 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 1612 OpcodeStr, !strconcat(Dt, "32"), 1613 v2i64, v2i32, IntOp, Commutable>; 1614} 1615 1616multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 1617 InstrItinClass itin, string OpcodeStr, string Dt, 1618 Intrinsic IntOp> { 1619 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 1620 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 1621 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 1622 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1623} 1624 1625// ....then also with element size of 8 bits: 1626multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1627 InstrItinClass itin16, InstrItinClass itin32, 1628 string OpcodeStr, string Dt, 1629 Intrinsic IntOp, bit Commutable = 0> 1630 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 1631 IntOp, Commutable> { 1632 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 1633 OpcodeStr, !strconcat(Dt, "8"), 1634 v8i16, v8i8, IntOp, Commutable>; 1635} 1636 1637 1638// Neon Wide 3-register vector intrinsics, 1639// source operand element sizes of 8, 16 and 32 bits: 1640multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1641 string OpcodeStr, string Dt, 1642 Intrinsic IntOp, bit Commutable = 0> { 1643 def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, 1644 OpcodeStr, !strconcat(Dt, "8"), 1645 v8i16, v8i8, IntOp, Commutable>; 1646 def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, 1647 OpcodeStr, !strconcat(Dt, "16"), 1648 v4i32, v4i16, IntOp, Commutable>; 1649 def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, 1650 OpcodeStr, !strconcat(Dt, "32"), 1651 v2i64, v2i32, IntOp, Commutable>; 1652} 1653 1654 1655// Neon Multiply-Op vector operations, 1656// element sizes of 8, 16 and 32 bits: 1657multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1658 InstrItinClass itinD16, InstrItinClass itinD32, 1659 InstrItinClass itinQ16, InstrItinClass itinQ32, 1660 string OpcodeStr, string Dt, SDNode OpNode> { 1661 // 64-bit vector types. 1662 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 1663 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 1664 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 1665 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 1666 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 1667 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 1668 1669 // 128-bit vector types. 1670 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 1671 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 1672 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 1673 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 1674 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 1675 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 1676} 1677 1678multiclass N3VMulOpSL_HS<bits<4> op11_8, 1679 InstrItinClass itinD16, InstrItinClass itinD32, 1680 InstrItinClass itinQ16, InstrItinClass itinQ32, 1681 string OpcodeStr, string Dt, SDNode ShOp> { 1682 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 1683 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 1684 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 1685 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 1686 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 1687 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 1688 mul, ShOp>; 1689 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 1690 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 1691 mul, ShOp>; 1692} 1693 1694// Neon 3-argument intrinsics, 1695// element sizes of 8, 16 and 32 bits: 1696multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1697 InstrItinClass itinD, InstrItinClass itinQ, 1698 string OpcodeStr, string Dt, Intrinsic IntOp> { 1699 // 64-bit vector types. 1700 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 1701 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 1702 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 1703 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 1704 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 1705 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 1706 1707 // 128-bit vector types. 1708 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 1709 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 1710 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 1711 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 1712 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 1713 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 1714} 1715 1716 1717// Neon Long 3-argument intrinsics. 1718 1719// First with only element sizes of 16 and 32 bits: 1720multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1721 InstrItinClass itin16, InstrItinClass itin32, 1722 string OpcodeStr, string Dt, Intrinsic IntOp> { 1723 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 1724 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 1725 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 1726 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1727} 1728 1729multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 1730 string OpcodeStr, string Dt, Intrinsic IntOp> { 1731 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 1732 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 1733 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 1734 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1735} 1736 1737// ....then also with element size of 8 bits: 1738multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1739 InstrItinClass itin16, InstrItinClass itin32, 1740 string OpcodeStr, string Dt, Intrinsic IntOp> 1741 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 1742 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 1743 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 1744} 1745 1746 1747// Neon 2-register vector intrinsics, 1748// element sizes of 8, 16 and 32 bits: 1749multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1750 bits<5> op11_7, bit op4, 1751 InstrItinClass itinD, InstrItinClass itinQ, 1752 string OpcodeStr, string Dt, Intrinsic IntOp> { 1753 // 64-bit vector types. 1754 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1755 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 1756 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1757 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 1758 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1759 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 1760 1761 // 128-bit vector types. 1762 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1763 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 1764 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1765 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 1766 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1767 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 1768} 1769 1770 1771// Neon Pairwise long 2-register intrinsics, 1772// element sizes of 8, 16 and 32 bits: 1773multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1774 bits<5> op11_7, bit op4, 1775 string OpcodeStr, string Dt, Intrinsic IntOp> { 1776 // 64-bit vector types. 1777 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1778 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 1779 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1780 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 1781 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1782 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 1783 1784 // 128-bit vector types. 1785 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1786 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 1787 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1788 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 1789 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1790 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 1791} 1792 1793 1794// Neon Pairwise long 2-register accumulate intrinsics, 1795// element sizes of 8, 16 and 32 bits: 1796multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1797 bits<5> op11_7, bit op4, 1798 string OpcodeStr, string Dt, Intrinsic IntOp> { 1799 // 64-bit vector types. 1800 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1801 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 1802 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1803 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 1804 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1805 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 1806 1807 // 128-bit vector types. 1808 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1809 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 1810 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1811 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 1812 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1813 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 1814} 1815 1816 1817// Neon 2-register vector shift by immediate, 1818// with f of either N2RegVShLFrm or N2RegVShRFrm 1819// element sizes of 8, 16, 32 and 64 bits: 1820multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1821 InstrItinClass itin, string OpcodeStr, string Dt, 1822 SDNode OpNode, Format f> { 1823 // 64-bit vector types. 1824 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 1825 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 1826 let Inst{21-19} = 0b001; // imm6 = 001xxx 1827 } 1828 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 1829 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 1830 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1831 } 1832 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, 1833 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 1834 let Inst{21} = 0b1; // imm6 = 1xxxxx 1835 } 1836 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin, 1837 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 1838 // imm6 = xxxxxx 1839 1840 // 128-bit vector types. 1841 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 1842 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 1843 let Inst{21-19} = 0b001; // imm6 = 001xxx 1844 } 1845 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 1846 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 1847 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1848 } 1849 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, 1850 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 1851 let Inst{21} = 0b1; // imm6 = 1xxxxx 1852 } 1853 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin, 1854 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 1855 // imm6 = xxxxxx 1856} 1857 1858// Neon Shift-Accumulate vector operations, 1859// element sizes of 8, 16, 32 and 64 bits: 1860multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1861 string OpcodeStr, string Dt, SDNode ShOp> { 1862 // 64-bit vector types. 1863 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, 1864 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 1865 let Inst{21-19} = 0b001; // imm6 = 001xxx 1866 } 1867 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, 1868 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 1869 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1870 } 1871 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, 1872 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 1873 let Inst{21} = 0b1; // imm6 = 1xxxxx 1874 } 1875 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, 1876 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 1877 // imm6 = xxxxxx 1878 1879 // 128-bit vector types. 1880 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, 1881 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 1882 let Inst{21-19} = 0b001; // imm6 = 001xxx 1883 } 1884 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, 1885 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 1886 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1887 } 1888 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, 1889 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 1890 let Inst{21} = 0b1; // imm6 = 1xxxxx 1891 } 1892 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, 1893 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 1894 // imm6 = xxxxxx 1895} 1896 1897 1898// Neon Shift-Insert vector operations, 1899// with f of either N2RegVShLFrm or N2RegVShRFrm 1900// element sizes of 8, 16, 32 and 64 bits: 1901multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1902 string OpcodeStr, SDNode ShOp, 1903 Format f> { 1904 // 64-bit vector types. 1905 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, 1906 f, OpcodeStr, "8", v8i8, ShOp> { 1907 let Inst{21-19} = 0b001; // imm6 = 001xxx 1908 } 1909 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, 1910 f, OpcodeStr, "16", v4i16, ShOp> { 1911 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1912 } 1913 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, 1914 f, OpcodeStr, "32", v2i32, ShOp> { 1915 let Inst{21} = 0b1; // imm6 = 1xxxxx 1916 } 1917 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, 1918 f, OpcodeStr, "64", v1i64, ShOp>; 1919 // imm6 = xxxxxx 1920 1921 // 128-bit vector types. 1922 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, 1923 f, OpcodeStr, "8", v16i8, ShOp> { 1924 let Inst{21-19} = 0b001; // imm6 = 001xxx 1925 } 1926 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, 1927 f, OpcodeStr, "16", v8i16, ShOp> { 1928 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1929 } 1930 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, 1931 f, OpcodeStr, "32", v4i32, ShOp> { 1932 let Inst{21} = 0b1; // imm6 = 1xxxxx 1933 } 1934 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, 1935 f, OpcodeStr, "64", v2i64, ShOp>; 1936 // imm6 = xxxxxx 1937} 1938 1939// Neon Shift Long operations, 1940// element sizes of 8, 16, 32 bits: 1941multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 1942 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 1943 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 1944 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { 1945 let Inst{21-19} = 0b001; // imm6 = 001xxx 1946 } 1947 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 1948 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { 1949 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1950 } 1951 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 1952 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { 1953 let Inst{21} = 0b1; // imm6 = 1xxxxx 1954 } 1955} 1956 1957// Neon Shift Narrow operations, 1958// element sizes of 16, 32, 64 bits: 1959multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 1960 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 1961 SDNode OpNode> { 1962 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 1963 OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> { 1964 let Inst{21-19} = 0b001; // imm6 = 001xxx 1965 } 1966 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 1967 OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> { 1968 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1969 } 1970 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 1971 OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> { 1972 let Inst{21} = 0b1; // imm6 = 1xxxxx 1973 } 1974} 1975 1976//===----------------------------------------------------------------------===// 1977// Instruction Definitions. 1978//===----------------------------------------------------------------------===// 1979 1980// Vector Add Operations. 1981 1982// VADD : Vector Add (integer and floating-point) 1983defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 1984 add, 1>; 1985def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 1986 v2f32, v2f32, fadd, 1>; 1987def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 1988 v4f32, v4f32, fadd, 1>; 1989// VADDL : Vector Add Long (Q = D + D) 1990defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 1991 "vaddl", "s", int_arm_neon_vaddls, 1>; 1992defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 1993 "vaddl", "u", int_arm_neon_vaddlu, 1>; 1994// VADDW : Vector Add Wide (Q = Q + D) 1995defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; 1996defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; 1997// VHADD : Vector Halving Add 1998defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 1999 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2000 "vhadd", "s", int_arm_neon_vhadds, 1>; 2001defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 2002 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2003 "vhadd", "u", int_arm_neon_vhaddu, 1>; 2004// VRHADD : Vector Rounding Halving Add 2005defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 2006 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2007 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 2008defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 2009 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2010 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 2011// VQADD : Vector Saturating Add 2012defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 2013 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2014 "vqadd", "s", int_arm_neon_vqadds, 1>; 2015defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 2016 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 2017 "vqadd", "u", int_arm_neon_vqaddu, 1>; 2018// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 2019defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", 2020 int_arm_neon_vaddhn, 1>; 2021// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 2022defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 2023 int_arm_neon_vraddhn, 1>; 2024 2025// Vector Multiply Operations. 2026 2027// VMUL : Vector Multiply (integer, polynomial and floating-point) 2028defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 2029 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 2030def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 2031 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 2032def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 2033 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 2034def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", 2035 v2f32, v2f32, fmul, 1>; 2036def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", 2037 v4f32, v4f32, fmul, 1>; 2038defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; 2039def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 2040def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 2041 v2f32, fmul>; 2042 2043def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 2044 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 2045 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 2046 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2047 (DSubReg_i16_reg imm:$lane))), 2048 (SubReg_i16_lane imm:$lane)))>; 2049def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 2050 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 2051 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 2052 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2053 (DSubReg_i32_reg imm:$lane))), 2054 (SubReg_i32_lane imm:$lane)))>; 2055def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 2056 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 2057 (v4f32 (VMULslfq (v4f32 QPR:$src1), 2058 (v2f32 (EXTRACT_SUBREG QPR:$src2, 2059 (DSubReg_i32_reg imm:$lane))), 2060 (SubReg_i32_lane imm:$lane)))>; 2061 2062// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 2063defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 2064 IIC_VMULi16Q, IIC_VMULi32Q, 2065 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 2066defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 2067 IIC_VMULi16Q, IIC_VMULi32Q, 2068 "vqdmulh", "s", int_arm_neon_vqdmulh>; 2069def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 2070 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 2071 imm:$lane)))), 2072 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 2073 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2074 (DSubReg_i16_reg imm:$lane))), 2075 (SubReg_i16_lane imm:$lane)))>; 2076def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 2077 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 2078 imm:$lane)))), 2079 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 2080 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2081 (DSubReg_i32_reg imm:$lane))), 2082 (SubReg_i32_lane imm:$lane)))>; 2083 2084// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 2085defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 2086 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 2087 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 2088defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 2089 IIC_VMULi16Q, IIC_VMULi32Q, 2090 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 2091def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 2092 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 2093 imm:$lane)))), 2094 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 2095 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2096 (DSubReg_i16_reg imm:$lane))), 2097 (SubReg_i16_lane imm:$lane)))>; 2098def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 2099 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 2100 imm:$lane)))), 2101 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 2102 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2103 (DSubReg_i32_reg imm:$lane))), 2104 (SubReg_i32_lane imm:$lane)))>; 2105 2106// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 2107defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 2108 "vmull", "s", int_arm_neon_vmulls, 1>; 2109defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 2110 "vmull", "u", int_arm_neon_vmullu, 1>; 2111def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 2112 v8i16, v8i8, int_arm_neon_vmullp, 1>; 2113defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", 2114 int_arm_neon_vmulls>; 2115defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", 2116 int_arm_neon_vmullu>; 2117 2118// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 2119defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 2120 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 2121defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 2122 "vqdmull", "s", int_arm_neon_vqdmull>; 2123 2124// Vector Multiply-Accumulate and Multiply-Subtract Operations. 2125 2126// VMLA : Vector Multiply Accumulate (integer and floating-point) 2127defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2128 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 2129def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 2130 v2f32, fmul, fadd>; 2131def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 2132 v4f32, fmul, fadd>; 2133defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 2134 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 2135def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 2136 v2f32, fmul, fadd>; 2137def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 2138 v4f32, v2f32, fmul, fadd>; 2139 2140def : Pat<(v8i16 (add (v8i16 QPR:$src1), 2141 (mul (v8i16 QPR:$src2), 2142 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 2143 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 2144 (v4i16 (EXTRACT_SUBREG QPR:$src3, 2145 (DSubReg_i16_reg imm:$lane))), 2146 (SubReg_i16_lane imm:$lane)))>; 2147 2148def : Pat<(v4i32 (add (v4i32 QPR:$src1), 2149 (mul (v4i32 QPR:$src2), 2150 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 2151 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 2152 (v2i32 (EXTRACT_SUBREG QPR:$src3, 2153 (DSubReg_i32_reg imm:$lane))), 2154 (SubReg_i32_lane imm:$lane)))>; 2155 2156def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), 2157 (fmul (v4f32 QPR:$src2), 2158 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 2159 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 2160 (v4f32 QPR:$src2), 2161 (v2f32 (EXTRACT_SUBREG QPR:$src3, 2162 (DSubReg_i32_reg imm:$lane))), 2163 (SubReg_i32_lane imm:$lane)))>; 2164 2165// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 2166defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 2167 "vmlal", "s", int_arm_neon_vmlals>; 2168defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 2169 "vmlal", "u", int_arm_neon_vmlalu>; 2170 2171defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; 2172defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; 2173 2174// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 2175defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2176 "vqdmlal", "s", int_arm_neon_vqdmlal>; 2177defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; 2178 2179// VMLS : Vector Multiply Subtract (integer and floating-point) 2180defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2181 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 2182def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 2183 v2f32, fmul, fsub>; 2184def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 2185 v4f32, fmul, fsub>; 2186defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 2187 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 2188def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 2189 v2f32, fmul, fsub>; 2190def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 2191 v4f32, v2f32, fmul, fsub>; 2192 2193def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 2194 (mul (v8i16 QPR:$src2), 2195 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 2196 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 2197 (v4i16 (EXTRACT_SUBREG QPR:$src3, 2198 (DSubReg_i16_reg imm:$lane))), 2199 (SubReg_i16_lane imm:$lane)))>; 2200 2201def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 2202 (mul (v4i32 QPR:$src2), 2203 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 2204 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 2205 (v2i32 (EXTRACT_SUBREG QPR:$src3, 2206 (DSubReg_i32_reg imm:$lane))), 2207 (SubReg_i32_lane imm:$lane)))>; 2208 2209def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), 2210 (fmul (v4f32 QPR:$src2), 2211 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 2212 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 2213 (v2f32 (EXTRACT_SUBREG QPR:$src3, 2214 (DSubReg_i32_reg imm:$lane))), 2215 (SubReg_i32_lane imm:$lane)))>; 2216 2217// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 2218defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 2219 "vmlsl", "s", int_arm_neon_vmlsls>; 2220defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 2221 "vmlsl", "u", int_arm_neon_vmlslu>; 2222 2223defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; 2224defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; 2225 2226// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 2227defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 2228 "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 2229defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 2230 2231// Vector Subtract Operations. 2232 2233// VSUB : Vector Subtract (integer and floating-point) 2234defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 2235 "vsub", "i", sub, 0>; 2236def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 2237 v2f32, v2f32, fsub, 0>; 2238def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 2239 v4f32, v4f32, fsub, 0>; 2240// VSUBL : Vector Subtract Long (Q = D - D) 2241defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 2242 "vsubl", "s", int_arm_neon_vsubls, 1>; 2243defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 2244 "vsubl", "u", int_arm_neon_vsublu, 1>; 2245// VSUBW : Vector Subtract Wide (Q = Q - D) 2246defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; 2247defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; 2248// VHSUB : Vector Halving Subtract 2249defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 2250 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2251 "vhsub", "s", int_arm_neon_vhsubs, 0>; 2252defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 2253 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2254 "vhsub", "u", int_arm_neon_vhsubu, 0>; 2255// VQSUB : Vector Saturing Subtract 2256defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 2257 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2258 "vqsub", "s", int_arm_neon_vqsubs, 0>; 2259defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 2260 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2261 "vqsub", "u", int_arm_neon_vqsubu, 0>; 2262// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 2263defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", 2264 int_arm_neon_vsubhn, 0>; 2265// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 2266defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 2267 int_arm_neon_vrsubhn, 0>; 2268 2269// Vector Comparisons. 2270 2271// VCEQ : Vector Compare Equal 2272defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2273 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 2274def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 2275 NEONvceq, 1>; 2276def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 2277 NEONvceq, 1>; 2278// For disassembly only. 2279defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 2280 "$dst, $src, #0">; 2281 2282// VCGE : Vector Compare Greater Than or Equal 2283defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2284 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 2285defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2286 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 2287def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 2288 NEONvcge, 0>; 2289def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 2290 NEONvcge, 0>; 2291// For disassembly only. 2292defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 2293 "$dst, $src, #0">; 2294// For disassembly only. 2295defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 2296 "$dst, $src, #0">; 2297 2298// VCGT : Vector Compare Greater Than 2299defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2300 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 2301defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 2302 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 2303def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 2304 NEONvcgt, 0>; 2305def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 2306 NEONvcgt, 0>; 2307// For disassembly only. 2308defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 2309 "$dst, $src, #0">; 2310// For disassembly only. 2311defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 2312 "$dst, $src, #0">; 2313 2314// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 2315def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 2316 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 2317def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 2318 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 2319// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 2320def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 2321 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 2322def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 2323 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 2324// VTST : Vector Test Bits 2325defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2326 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 2327 2328// Vector Bitwise Operations. 2329 2330def vnot8 : PatFrag<(ops node:$in), 2331 (xor node:$in, (bitconvert (v8i8 immAllOnesV)))>; 2332def vnot16 : PatFrag<(ops node:$in), 2333 (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>; 2334 2335 2336// VAND : Vector Bitwise AND 2337def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 2338 v2i32, v2i32, and, 1>; 2339def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 2340 v4i32, v4i32, and, 1>; 2341 2342// VEOR : Vector Bitwise Exclusive OR 2343def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 2344 v2i32, v2i32, xor, 1>; 2345def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 2346 v4i32, v4i32, xor, 1>; 2347 2348// VORR : Vector Bitwise OR 2349def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 2350 v2i32, v2i32, or, 1>; 2351def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 2352 v4i32, v4i32, or, 1>; 2353 2354// VBIC : Vector Bitwise Bit Clear (AND NOT) 2355def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 2356 (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, 2357 "vbic", "$dst, $src1, $src2", "", 2358 [(set DPR:$dst, (v2i32 (and DPR:$src1, 2359 (vnot8 DPR:$src2))))]>; 2360def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 2361 (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, 2362 "vbic", "$dst, $src1, $src2", "", 2363 [(set QPR:$dst, (v4i32 (and QPR:$src1, 2364 (vnot16 QPR:$src2))))]>; 2365 2366// VORN : Vector Bitwise OR NOT 2367def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), 2368 (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, 2369 "vorn", "$dst, $src1, $src2", "", 2370 [(set DPR:$dst, (v2i32 (or DPR:$src1, 2371 (vnot8 DPR:$src2))))]>; 2372def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), 2373 (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, 2374 "vorn", "$dst, $src1, $src2", "", 2375 [(set QPR:$dst, (v4i32 (or QPR:$src1, 2376 (vnot16 QPR:$src2))))]>; 2377 2378// VMVN : Vector Bitwise NOT 2379def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 2380 (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, 2381 "vmvn", "$dst, $src", "", 2382 [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>; 2383def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 2384 (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD, 2385 "vmvn", "$dst, $src", "", 2386 [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>; 2387def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>; 2388def : Pat<(v4i32 (vnot16 QPR:$src)), (VMVNq QPR:$src)>; 2389 2390// VBSL : Vector Bitwise Select 2391def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 2392 (ins DPR:$src1, DPR:$src2, DPR:$src3), 2393 N3RegFrm, IIC_VCNTiD, 2394 "vbsl", "$dst, $src2, $src3", "$src1 = $dst", 2395 [(set DPR:$dst, 2396 (v2i32 (or (and DPR:$src2, DPR:$src1), 2397 (and DPR:$src3, (vnot8 DPR:$src1)))))]>; 2398def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 2399 (ins QPR:$src1, QPR:$src2, QPR:$src3), 2400 N3RegFrm, IIC_VCNTiQ, 2401 "vbsl", "$dst, $src2, $src3", "$src1 = $dst", 2402 [(set QPR:$dst, 2403 (v4i32 (or (and QPR:$src2, QPR:$src1), 2404 (and QPR:$src3, (vnot16 QPR:$src1)))))]>; 2405 2406// VBIF : Vector Bitwise Insert if False 2407// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 2408def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 2409 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), 2410 N3RegFrm, IIC_VBINiD, 2411 "vbif", "$dst, $src2, $src3", "$src1 = $dst", 2412 [/* For disassembly only; pattern left blank */]>; 2413def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 2414 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), 2415 N3RegFrm, IIC_VBINiQ, 2416 "vbif", "$dst, $src2, $src3", "$src1 = $dst", 2417 [/* For disassembly only; pattern left blank */]>; 2418 2419// VBIT : Vector Bitwise Insert if True 2420// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 2421def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 2422 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), 2423 N3RegFrm, IIC_VBINiD, 2424 "vbit", "$dst, $src2, $src3", "$src1 = $dst", 2425 [/* For disassembly only; pattern left blank */]>; 2426def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 2427 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), 2428 N3RegFrm, IIC_VBINiQ, 2429 "vbit", "$dst, $src2, $src3", "$src1 = $dst", 2430 [/* For disassembly only; pattern left blank */]>; 2431 2432// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 2433// for equivalent operations with different register constraints; it just 2434// inserts copies. 2435 2436// Vector Absolute Differences. 2437 2438// VABD : Vector Absolute Difference 2439defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 2440 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2441 "vabd", "s", int_arm_neon_vabds, 0>; 2442defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 2443 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2444 "vabd", "u", int_arm_neon_vabdu, 0>; 2445def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 2446 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; 2447def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 2448 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; 2449 2450// VABDL : Vector Absolute Difference Long (Q = | D - D |) 2451defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, 2452 "vabdl", "s", int_arm_neon_vabdls, 0>; 2453defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, 2454 "vabdl", "u", int_arm_neon_vabdlu, 0>; 2455 2456// VABA : Vector Absolute Difference and Accumulate 2457defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 2458 "vaba", "s", int_arm_neon_vabas>; 2459defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 2460 "vaba", "u", int_arm_neon_vabau>; 2461 2462// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 2463defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD, 2464 "vabal", "s", int_arm_neon_vabals>; 2465defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD, 2466 "vabal", "u", int_arm_neon_vabalu>; 2467 2468// Vector Maximum and Minimum. 2469 2470// VMAX : Vector Maximum 2471defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 2472 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2473 "vmax", "s", int_arm_neon_vmaxs, 1>; 2474defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 2475 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2476 "vmax", "u", int_arm_neon_vmaxu, 1>; 2477def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 2478 "vmax", "f32", 2479 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 2480def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 2481 "vmax", "f32", 2482 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 2483 2484// VMIN : Vector Minimum 2485defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 2486 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2487 "vmin", "s", int_arm_neon_vmins, 1>; 2488defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 2489 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 2490 "vmin", "u", int_arm_neon_vminu, 1>; 2491def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 2492 "vmin", "f32", 2493 v2f32, v2f32, int_arm_neon_vmins, 1>; 2494def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 2495 "vmin", "f32", 2496 v4f32, v4f32, int_arm_neon_vmins, 1>; 2497 2498// Vector Pairwise Operations. 2499 2500// VPADD : Vector Pairwise Add 2501def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 2502 "vpadd", "i8", 2503 v8i8, v8i8, int_arm_neon_vpadd, 0>; 2504def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 2505 "vpadd", "i16", 2506 v4i16, v4i16, int_arm_neon_vpadd, 0>; 2507def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 2508 "vpadd", "i32", 2509 v2i32, v2i32, int_arm_neon_vpadd, 0>; 2510def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 2511 IIC_VBIND, "vpadd", "f32", 2512 v2f32, v2f32, int_arm_neon_vpadd, 0>; 2513 2514// VPADDL : Vector Pairwise Add Long 2515defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 2516 int_arm_neon_vpaddls>; 2517defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 2518 int_arm_neon_vpaddlu>; 2519 2520// VPADAL : Vector Pairwise Add and Accumulate Long 2521defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 2522 int_arm_neon_vpadals>; 2523defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 2524 int_arm_neon_vpadalu>; 2525 2526// VPMAX : Vector Pairwise Maximum 2527def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2528 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 2529def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2530 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 2531def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2532 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 2533def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2534 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 2535def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2536 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 2537def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2538 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 2539def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 2540 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 2541 2542// VPMIN : Vector Pairwise Minimum 2543def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2544 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 2545def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2546 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 2547def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2548 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 2549def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2550 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 2551def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2552 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 2553def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 2554 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 2555def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin", 2556 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 2557 2558// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 2559 2560// VRECPE : Vector Reciprocal Estimate 2561def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 2562 IIC_VUNAD, "vrecpe", "u32", 2563 v2i32, v2i32, int_arm_neon_vrecpe>; 2564def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 2565 IIC_VUNAQ, "vrecpe", "u32", 2566 v4i32, v4i32, int_arm_neon_vrecpe>; 2567def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 2568 IIC_VUNAD, "vrecpe", "f32", 2569 v2f32, v2f32, int_arm_neon_vrecpe>; 2570def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 2571 IIC_VUNAQ, "vrecpe", "f32", 2572 v4f32, v4f32, int_arm_neon_vrecpe>; 2573 2574// VRECPS : Vector Reciprocal Step 2575def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 2576 IIC_VRECSD, "vrecps", "f32", 2577 v2f32, v2f32, int_arm_neon_vrecps, 1>; 2578def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 2579 IIC_VRECSQ, "vrecps", "f32", 2580 v4f32, v4f32, int_arm_neon_vrecps, 1>; 2581 2582// VRSQRTE : Vector Reciprocal Square Root Estimate 2583def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 2584 IIC_VUNAD, "vrsqrte", "u32", 2585 v2i32, v2i32, int_arm_neon_vrsqrte>; 2586def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 2587 IIC_VUNAQ, "vrsqrte", "u32", 2588 v4i32, v4i32, int_arm_neon_vrsqrte>; 2589def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 2590 IIC_VUNAD, "vrsqrte", "f32", 2591 v2f32, v2f32, int_arm_neon_vrsqrte>; 2592def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 2593 IIC_VUNAQ, "vrsqrte", "f32", 2594 v4f32, v4f32, int_arm_neon_vrsqrte>; 2595 2596// VRSQRTS : Vector Reciprocal Square Root Step 2597def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 2598 IIC_VRECSD, "vrsqrts", "f32", 2599 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 2600def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 2601 IIC_VRECSQ, "vrsqrts", "f32", 2602 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 2603 2604// Vector Shifts. 2605 2606// VSHL : Vector Shift 2607defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm, 2608 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 2609 "vshl", "s", int_arm_neon_vshifts, 0>; 2610defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm, 2611 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 2612 "vshl", "u", int_arm_neon_vshiftu, 0>; 2613// VSHL : Vector Shift Left (Immediate) 2614defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl, 2615 N2RegVShLFrm>; 2616// VSHR : Vector Shift Right (Immediate) 2617defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs, 2618 N2RegVShRFrm>; 2619defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru, 2620 N2RegVShRFrm>; 2621 2622// VSHLL : Vector Shift Left Long 2623defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 2624defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 2625 2626// VSHLL : Vector Shift Left Long (with maximum shift count) 2627class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 2628 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 2629 ValueType OpTy, SDNode OpNode> 2630 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 2631 ResTy, OpTy, OpNode> { 2632 let Inst{21-16} = op21_16; 2633} 2634def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 2635 v8i16, v8i8, NEONvshlli>; 2636def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 2637 v4i32, v4i16, NEONvshlli>; 2638def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 2639 v2i64, v2i32, NEONvshlli>; 2640 2641// VSHRN : Vector Shift Right and Narrow 2642defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 2643 NEONvshrn>; 2644 2645// VRSHL : Vector Rounding Shift 2646defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm, 2647 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2648 "vrshl", "s", int_arm_neon_vrshifts, 0>; 2649defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm, 2650 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2651 "vrshl", "u", int_arm_neon_vrshiftu, 0>; 2652// VRSHR : Vector Rounding Shift Right 2653defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs, 2654 N2RegVShRFrm>; 2655defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru, 2656 N2RegVShRFrm>; 2657 2658// VRSHRN : Vector Rounding Shift Right and Narrow 2659defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 2660 NEONvrshrn>; 2661 2662// VQSHL : Vector Saturating Shift 2663defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm, 2664 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2665 "vqshl", "s", int_arm_neon_vqshifts, 0>; 2666defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm, 2667 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2668 "vqshl", "u", int_arm_neon_vqshiftu, 0>; 2669// VQSHL : Vector Saturating Shift Left (Immediate) 2670defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls, 2671 N2RegVShLFrm>; 2672defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu, 2673 N2RegVShLFrm>; 2674// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 2675defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu, 2676 N2RegVShLFrm>; 2677 2678// VQSHRN : Vector Saturating Shift Right and Narrow 2679defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 2680 NEONvqshrns>; 2681defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 2682 NEONvqshrnu>; 2683 2684// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 2685defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 2686 NEONvqshrnsu>; 2687 2688// VQRSHL : Vector Saturating Rounding Shift 2689defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm, 2690 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2691 "vqrshl", "s", int_arm_neon_vqrshifts, 0>; 2692defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm, 2693 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 2694 "vqrshl", "u", int_arm_neon_vqrshiftu, 0>; 2695 2696// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 2697defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 2698 NEONvqrshrns>; 2699defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 2700 NEONvqrshrnu>; 2701 2702// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 2703defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 2704 NEONvqrshrnsu>; 2705 2706// VSRA : Vector Shift Right and Accumulate 2707defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 2708defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 2709// VRSRA : Vector Rounding Shift Right and Accumulate 2710defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 2711defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 2712 2713// VSLI : Vector Shift Left and Insert 2714defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>; 2715// VSRI : Vector Shift Right and Insert 2716defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>; 2717 2718// Vector Absolute and Saturating Absolute. 2719 2720// VABS : Vector Absolute Value 2721defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 2722 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 2723 int_arm_neon_vabs>; 2724def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2725 IIC_VUNAD, "vabs", "f32", 2726 v2f32, v2f32, int_arm_neon_vabs>; 2727def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2728 IIC_VUNAQ, "vabs", "f32", 2729 v4f32, v4f32, int_arm_neon_vabs>; 2730 2731// VQABS : Vector Saturating Absolute Value 2732defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 2733 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 2734 int_arm_neon_vqabs>; 2735 2736// Vector Negate. 2737 2738def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; 2739def vneg8 : PatFrag<(ops node:$in), 2740 (sub (bitconvert (v8i8 immAllZerosV)), node:$in)>; 2741def vneg16 : PatFrag<(ops node:$in), 2742 (sub (bitconvert (v16i8 immAllZerosV)), node:$in)>; 2743 2744class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 2745 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), 2746 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 2747 [(set DPR:$dst, (Ty (vneg8 DPR:$src)))]>; 2748class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 2749 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), 2750 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 2751 [(set QPR:$dst, (Ty (vneg16 QPR:$src)))]>; 2752 2753// VNEG : Vector Negate (integer) 2754def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 2755def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 2756def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 2757def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 2758def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 2759def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 2760 2761// VNEG : Vector Negate (floating-point) 2762def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 2763 (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, 2764 "vneg", "f32", "$dst, $src", "", 2765 [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; 2766def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 2767 (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, 2768 "vneg", "f32", "$dst, $src", "", 2769 [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; 2770 2771def : Pat<(v8i8 (vneg8 DPR:$src)), (VNEGs8d DPR:$src)>; 2772def : Pat<(v4i16 (vneg8 DPR:$src)), (VNEGs16d DPR:$src)>; 2773def : Pat<(v2i32 (vneg8 DPR:$src)), (VNEGs32d DPR:$src)>; 2774def : Pat<(v16i8 (vneg16 QPR:$src)), (VNEGs8q QPR:$src)>; 2775def : Pat<(v8i16 (vneg16 QPR:$src)), (VNEGs16q QPR:$src)>; 2776def : Pat<(v4i32 (vneg16 QPR:$src)), (VNEGs32q QPR:$src)>; 2777 2778// VQNEG : Vector Saturating Negate 2779defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 2780 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 2781 int_arm_neon_vqneg>; 2782 2783// Vector Bit Counting Operations. 2784 2785// VCLS : Vector Count Leading Sign Bits 2786defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 2787 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 2788 int_arm_neon_vcls>; 2789// VCLZ : Vector Count Leading Zeros 2790defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 2791 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 2792 int_arm_neon_vclz>; 2793// VCNT : Vector Count One Bits 2794def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2795 IIC_VCNTiD, "vcnt", "8", 2796 v8i8, v8i8, int_arm_neon_vcnt>; 2797def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2798 IIC_VCNTiQ, "vcnt", "8", 2799 v16i8, v16i8, int_arm_neon_vcnt>; 2800 2801// Vector Swap -- for disassembly only. 2802def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 2803 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 2804 "vswp", "$dst, $src", "", []>; 2805def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 2806 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 2807 "vswp", "$dst, $src", "", []>; 2808 2809// Vector Move Operations. 2810 2811// VMOV : Vector Move (Register) 2812 2813let neverHasSideEffects = 1 in { 2814def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), 2815 N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; 2816def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), 2817 N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; 2818 2819// Pseudo vector move instructions for QQ and QQQQ registers. This should 2820// be expanded after register allocation is completed. 2821def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), 2822 NoItinerary, "@ vmov\t$dst, $src", []>; 2823 2824def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), 2825 NoItinerary, "@ vmov\t$dst, $src", []>; 2826} // neverHasSideEffects 2827 2828// VMOV : Vector Move (Immediate) 2829 2830// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. 2831def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ 2832 return ARM::getVMOVImm(N, 1, *CurDAG); 2833}]>; 2834def vmovImm8 : PatLeaf<(build_vector), [{ 2835 return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; 2836}], VMOV_get_imm8>; 2837 2838// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. 2839def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ 2840 return ARM::getVMOVImm(N, 2, *CurDAG); 2841}]>; 2842def vmovImm16 : PatLeaf<(build_vector), [{ 2843 return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; 2844}], VMOV_get_imm16>; 2845 2846// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. 2847def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ 2848 return ARM::getVMOVImm(N, 4, *CurDAG); 2849}]>; 2850def vmovImm32 : PatLeaf<(build_vector), [{ 2851 return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; 2852}], VMOV_get_imm32>; 2853 2854// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. 2855def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ 2856 return ARM::getVMOVImm(N, 8, *CurDAG); 2857}]>; 2858def vmovImm64 : PatLeaf<(build_vector), [{ 2859 return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; 2860}], VMOV_get_imm64>; 2861 2862// Note: Some of the cmode bits in the following VMOV instructions need to 2863// be encoded based on the immed values. 2864 2865def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), 2866 (ins h8imm:$SIMM), IIC_VMOVImm, 2867 "vmov", "i8", "$dst, $SIMM", "", 2868 [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; 2869def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), 2870 (ins h8imm:$SIMM), IIC_VMOVImm, 2871 "vmov", "i8", "$dst, $SIMM", "", 2872 [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; 2873 2874def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), 2875 (ins h16imm:$SIMM), IIC_VMOVImm, 2876 "vmov", "i16", "$dst, $SIMM", "", 2877 [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; 2878def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), 2879 (ins h16imm:$SIMM), IIC_VMOVImm, 2880 "vmov", "i16", "$dst, $SIMM", "", 2881 [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; 2882 2883def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), 2884 (ins h32imm:$SIMM), IIC_VMOVImm, 2885 "vmov", "i32", "$dst, $SIMM", "", 2886 [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; 2887def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), 2888 (ins h32imm:$SIMM), IIC_VMOVImm, 2889 "vmov", "i32", "$dst, $SIMM", "", 2890 [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; 2891 2892def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), 2893 (ins h64imm:$SIMM), IIC_VMOVImm, 2894 "vmov", "i64", "$dst, $SIMM", "", 2895 [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; 2896def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), 2897 (ins h64imm:$SIMM), IIC_VMOVImm, 2898 "vmov", "i64", "$dst, $SIMM", "", 2899 [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; 2900 2901// VMOV : Vector Get Lane (move scalar to ARM core register) 2902 2903def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 2904 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2905 IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]", 2906 [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), 2907 imm:$lane))]>; 2908def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 2909 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2910 IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]", 2911 [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), 2912 imm:$lane))]>; 2913def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 2914 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2915 IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]", 2916 [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), 2917 imm:$lane))]>; 2918def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 2919 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2920 IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]", 2921 [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), 2922 imm:$lane))]>; 2923def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 2924 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2925 IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]", 2926 [(set GPR:$dst, (extractelt (v2i32 DPR:$src), 2927 imm:$lane))]>; 2928// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 2929def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 2930 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2931 (DSubReg_i8_reg imm:$lane))), 2932 (SubReg_i8_lane imm:$lane))>; 2933def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 2934 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2935 (DSubReg_i16_reg imm:$lane))), 2936 (SubReg_i16_lane imm:$lane))>; 2937def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 2938 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2939 (DSubReg_i8_reg imm:$lane))), 2940 (SubReg_i8_lane imm:$lane))>; 2941def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 2942 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2943 (DSubReg_i16_reg imm:$lane))), 2944 (SubReg_i16_lane imm:$lane))>; 2945def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 2946 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 2947 (DSubReg_i32_reg imm:$lane))), 2948 (SubReg_i32_lane imm:$lane))>; 2949def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 2950 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 2951 (SSubReg_f32_reg imm:$src2))>; 2952def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 2953 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 2954 (SSubReg_f32_reg imm:$src2))>; 2955//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 2956// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2957def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 2958 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2959 2960 2961// VMOV : Vector Set Lane (move ARM core register to scalar) 2962 2963let Constraints = "$src1 = $dst" in { 2964def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst), 2965 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2966 IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2", 2967 [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), 2968 GPR:$src2, imm:$lane))]>; 2969def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst), 2970 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2971 IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2", 2972 [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), 2973 GPR:$src2, imm:$lane))]>; 2974def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst), 2975 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2976 IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2", 2977 [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), 2978 GPR:$src2, imm:$lane))]>; 2979} 2980def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 2981 (v16i8 (INSERT_SUBREG QPR:$src1, 2982 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 2983 (DSubReg_i8_reg imm:$lane))), 2984 GPR:$src2, (SubReg_i8_lane imm:$lane))), 2985 (DSubReg_i8_reg imm:$lane)))>; 2986def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 2987 (v8i16 (INSERT_SUBREG QPR:$src1, 2988 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 2989 (DSubReg_i16_reg imm:$lane))), 2990 GPR:$src2, (SubReg_i16_lane imm:$lane))), 2991 (DSubReg_i16_reg imm:$lane)))>; 2992def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 2993 (v4i32 (INSERT_SUBREG QPR:$src1, 2994 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 2995 (DSubReg_i32_reg imm:$lane))), 2996 GPR:$src2, (SubReg_i32_lane imm:$lane))), 2997 (DSubReg_i32_reg imm:$lane)))>; 2998 2999def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 3000 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 3001 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 3002def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 3003 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 3004 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 3005 3006//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 3007// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 3008def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 3009 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 3010 3011def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 3012 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; 3013def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 3014 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>; 3015def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 3016 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; 3017 3018def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 3019 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 3020def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 3021 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 3022def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 3023 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 3024 3025def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 3026 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3027 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 3028 arm_dsubreg_0)>; 3029def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 3030 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 3031 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 3032 arm_dsubreg_0)>; 3033def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 3034 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3035 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 3036 arm_dsubreg_0)>; 3037 3038// VDUP : Vector Duplicate (from ARM core register to all elements) 3039 3040class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 3041 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), 3042 IIC_VMOVIS, "vdup", Dt, "$dst, $src", 3043 [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 3044class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 3045 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), 3046 IIC_VMOVIS, "vdup", Dt, "$dst, $src", 3047 [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 3048 3049def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 3050def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 3051def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; 3052def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 3053def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 3054def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 3055 3056def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), 3057 IIC_VMOVIS, "vdup", "32", "$dst, $src", 3058 [(set DPR:$dst, (v2f32 (NEONvdup 3059 (f32 (bitconvert GPR:$src)))))]>; 3060def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), 3061 IIC_VMOVIS, "vdup", "32", "$dst, $src", 3062 [(set QPR:$dst, (v4f32 (NEONvdup 3063 (f32 (bitconvert GPR:$src)))))]>; 3064 3065// VDUP : Vector Duplicate Lane (from scalar to all elements) 3066 3067class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 3068 ValueType Ty> 3069 : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3070 IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]", 3071 [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; 3072 3073class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 3074 ValueType ResTy, ValueType OpTy> 3075 : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), 3076 IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]", 3077 [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), 3078 imm:$lane)))]>; 3079 3080// Inst{19-16} is partially specified depending on the element size. 3081 3082def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>; 3083def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>; 3084def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>; 3085def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>; 3086def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>; 3087def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>; 3088def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>; 3089def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>; 3090 3091def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 3092 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 3093 (DSubReg_i8_reg imm:$lane))), 3094 (SubReg_i8_lane imm:$lane)))>; 3095def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 3096 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 3097 (DSubReg_i16_reg imm:$lane))), 3098 (SubReg_i16_lane imm:$lane)))>; 3099def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 3100 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 3101 (DSubReg_i32_reg imm:$lane))), 3102 (SubReg_i32_lane imm:$lane)))>; 3103def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 3104 (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, 3105 (DSubReg_i32_reg imm:$lane))), 3106 (SubReg_i32_lane imm:$lane)))>; 3107 3108def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0, 3109 (outs DPR:$dst), (ins SPR:$src), 3110 IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", 3111 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 3112 3113def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, 3114 (outs QPR:$dst), (ins SPR:$src), 3115 IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", 3116 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 3117 3118def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), 3119 (INSERT_SUBREG QPR:$src, 3120 (i64 (EXTRACT_SUBREG QPR:$src, 3121 (DSubReg_f64_reg imm:$lane))), 3122 (DSubReg_f64_other_reg imm:$lane))>; 3123def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), 3124 (INSERT_SUBREG QPR:$src, 3125 (f64 (EXTRACT_SUBREG QPR:$src, 3126 (DSubReg_f64_reg imm:$lane))), 3127 (DSubReg_f64_other_reg imm:$lane))>; 3128 3129// VMOVN : Vector Narrowing Move 3130defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, 3131 "vmovn", "i", int_arm_neon_vmovn>; 3132// VQMOVN : Vector Saturating Narrowing Move 3133defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 3134 "vqmovn", "s", int_arm_neon_vqmovns>; 3135defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 3136 "vqmovn", "u", int_arm_neon_vqmovnu>; 3137defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 3138 "vqmovun", "s", int_arm_neon_vqmovnsu>; 3139// VMOVL : Vector Lengthening Move 3140defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s", 3141 int_arm_neon_vmovls>; 3142defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u", 3143 int_arm_neon_vmovlu>; 3144 3145// Vector Conversions. 3146 3147// VCVT : Vector Convert Between Floating-Point and Integers 3148def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3149 v2i32, v2f32, fp_to_sint>; 3150def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3151 v2i32, v2f32, fp_to_uint>; 3152def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3153 v2f32, v2i32, sint_to_fp>; 3154def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3155 v2f32, v2i32, uint_to_fp>; 3156 3157def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3158 v4i32, v4f32, fp_to_sint>; 3159def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3160 v4i32, v4f32, fp_to_uint>; 3161def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3162 v4f32, v4i32, sint_to_fp>; 3163def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3164 v4f32, v4i32, uint_to_fp>; 3165 3166// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 3167def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 3168 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 3169def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 3170 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 3171def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 3172 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 3173def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 3174 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 3175 3176def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 3177 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 3178def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 3179 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 3180def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 3181 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 3182def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 3183 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 3184 3185// Vector Reverse. 3186 3187// VREV64 : Vector Reverse elements within 64-bit doublewords 3188 3189class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3190 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), 3191 (ins DPR:$src), IIC_VMOVD, 3192 OpcodeStr, Dt, "$dst, $src", "", 3193 [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; 3194class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3195 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), 3196 (ins QPR:$src), IIC_VMOVD, 3197 OpcodeStr, Dt, "$dst, $src", "", 3198 [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; 3199 3200def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 3201def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 3202def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 3203def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; 3204 3205def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 3206def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 3207def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 3208def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; 3209 3210// VREV32 : Vector Reverse elements within 32-bit words 3211 3212class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3213 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), 3214 (ins DPR:$src), IIC_VMOVD, 3215 OpcodeStr, Dt, "$dst, $src", "", 3216 [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; 3217class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3218 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), 3219 (ins QPR:$src), IIC_VMOVD, 3220 OpcodeStr, Dt, "$dst, $src", "", 3221 [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; 3222 3223def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 3224def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 3225 3226def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 3227def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 3228 3229// VREV16 : Vector Reverse elements within 16-bit halfwords 3230 3231class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3232 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), 3233 (ins DPR:$src), IIC_VMOVD, 3234 OpcodeStr, Dt, "$dst, $src", "", 3235 [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; 3236class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3237 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), 3238 (ins QPR:$src), IIC_VMOVD, 3239 OpcodeStr, Dt, "$dst, $src", "", 3240 [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; 3241 3242def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 3243def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 3244 3245// Other Vector Shuffles. 3246 3247// VEXT : Vector Extract 3248 3249class VEXTd<string OpcodeStr, string Dt, ValueType Ty> 3250 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst), 3251 (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm, 3252 IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", 3253 [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), 3254 (Ty DPR:$rhs), imm:$index)))]>; 3255 3256class VEXTq<string OpcodeStr, string Dt, ValueType Ty> 3257 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst), 3258 (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm, 3259 IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", 3260 [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), 3261 (Ty QPR:$rhs), imm:$index)))]>; 3262 3263def VEXTd8 : VEXTd<"vext", "8", v8i8>; 3264def VEXTd16 : VEXTd<"vext", "16", v4i16>; 3265def VEXTd32 : VEXTd<"vext", "32", v2i32>; 3266def VEXTdf : VEXTd<"vext", "32", v2f32>; 3267 3268def VEXTq8 : VEXTq<"vext", "8", v16i8>; 3269def VEXTq16 : VEXTq<"vext", "16", v8i16>; 3270def VEXTq32 : VEXTq<"vext", "32", v4i32>; 3271def VEXTqf : VEXTq<"vext", "32", v4f32>; 3272 3273// VTRN : Vector Transpose 3274 3275def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 3276def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 3277def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 3278 3279def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 3280def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 3281def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 3282 3283// VUZP : Vector Unzip (Deinterleave) 3284 3285def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 3286def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 3287def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; 3288 3289def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 3290def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 3291def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 3292 3293// VZIP : Vector Zip (Interleave) 3294 3295def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 3296def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 3297def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; 3298 3299def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 3300def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 3301def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 3302 3303// Vector Table Lookup and Table Extension. 3304 3305// VTBL : Vector Table Lookup 3306def VTBL1 3307 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), 3308 (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1, 3309 "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "", 3310 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; 3311let hasExtraSrcRegAllocReq = 1 in { 3312def VTBL2 3313 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), 3314 (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2, 3315 "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", 3316 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 3317 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; 3318def VTBL3 3319 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), 3320 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3, 3321 "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", 3322 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 3323 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; 3324def VTBL4 3325 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), 3326 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), 3327 NVTBLFrm, IIC_VTB4, 3328 "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", 3329 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, 3330 DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; 3331} // hasExtraSrcRegAllocReq = 1 3332 3333// VTBX : Vector Table Extension 3334def VTBX1 3335 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), 3336 (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1, 3337 "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst", 3338 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 3339 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; 3340let hasExtraSrcRegAllocReq = 1 in { 3341def VTBX2 3342 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), 3343 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2, 3344 "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", 3345 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 3346 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; 3347def VTBX3 3348 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), 3349 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), 3350 NVTBLFrm, IIC_VTBX3, 3351 "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst", 3352 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, 3353 DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; 3354def VTBX4 3355 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, 3356 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4, 3357 "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", 3358 "$orig = $dst", 3359 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, 3360 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; 3361} // hasExtraSrcRegAllocReq = 1 3362 3363//===----------------------------------------------------------------------===// 3364// NEON instructions for single-precision FP math 3365//===----------------------------------------------------------------------===// 3366 3367class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> 3368 : NEONFPPat<(ResTy (OpNode SPR:$a)), 3369 (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), 3370 SPR:$a, arm_ssubreg_0))), 3371 arm_ssubreg_0)>; 3372 3373class N3VSPat<SDNode OpNode, NeonI Inst> 3374 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 3375 (EXTRACT_SUBREG (v2f32 3376 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3377 SPR:$a, arm_ssubreg_0), 3378 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3379 SPR:$b, arm_ssubreg_0))), 3380 arm_ssubreg_0)>; 3381 3382class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 3383 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 3384 (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3385 SPR:$acc, arm_ssubreg_0), 3386 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3387 SPR:$a, arm_ssubreg_0), 3388 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3389 SPR:$b, arm_ssubreg_0)), 3390 arm_ssubreg_0)>; 3391 3392// These need separate instructions because they must use DPR_VFP2 register 3393// class which have SPR sub-registers. 3394 3395// Vector Add Operations used for single-precision FP 3396let neverHasSideEffects = 1 in 3397def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; 3398def : N3VSPat<fadd, VADDfd_sfp>; 3399 3400// Vector Sub Operations used for single-precision FP 3401let neverHasSideEffects = 1 in 3402def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; 3403def : N3VSPat<fsub, VSUBfd_sfp>; 3404 3405// Vector Multiply Operations used for single-precision FP 3406let neverHasSideEffects = 1 in 3407def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; 3408def : N3VSPat<fmul, VMULfd_sfp>; 3409 3410// Vector Multiply-Accumulate/Subtract used for single-precision FP 3411// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so 3412// we want to avoid them for now. e.g., alternating vmla/vadd instructions. 3413 3414//let neverHasSideEffects = 1 in 3415//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", 3416// v2f32, fmul, fadd>; 3417//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>; 3418 3419//let neverHasSideEffects = 1 in 3420//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", 3421// v2f32, fmul, fsub>; 3422//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>; 3423 3424// Vector Absolute used for single-precision FP 3425let neverHasSideEffects = 1 in 3426def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0, 3427 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 3428 "vabs", "f32", "$dst, $src", "", []>; 3429def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>; 3430 3431// Vector Negate used for single-precision FP 3432let neverHasSideEffects = 1 in 3433def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 3434 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 3435 "vneg", "f32", "$dst, $src", "", []>; 3436def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>; 3437 3438// Vector Maximum used for single-precision FP 3439let neverHasSideEffects = 1 in 3440def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), 3441 (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, 3442 "vmax", "f32", "$dst, $src1, $src2", "", []>; 3443def : N3VSPat<NEONfmax, VMAXfd_sfp>; 3444 3445// Vector Minimum used for single-precision FP 3446let neverHasSideEffects = 1 in 3447def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), 3448 (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, 3449 "vmin", "f32", "$dst, $src1, $src2", "", []>; 3450def : N3VSPat<NEONfmin, VMINfd_sfp>; 3451 3452// Vector Convert between single-precision FP and integer 3453let neverHasSideEffects = 1 in 3454def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3455 v2i32, v2f32, fp_to_sint>; 3456def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; 3457 3458let neverHasSideEffects = 1 in 3459def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3460 v2i32, v2f32, fp_to_uint>; 3461def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; 3462 3463let neverHasSideEffects = 1 in 3464def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3465 v2f32, v2i32, sint_to_fp>; 3466def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; 3467 3468let neverHasSideEffects = 1 in 3469def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3470 v2f32, v2i32, uint_to_fp>; 3471def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; 3472 3473//===----------------------------------------------------------------------===// 3474// Non-Instruction Patterns 3475//===----------------------------------------------------------------------===// 3476 3477// bit_convert 3478def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 3479def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 3480def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 3481def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 3482def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 3483def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 3484def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 3485def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 3486def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 3487def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 3488def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 3489def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 3490def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 3491def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 3492def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 3493def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 3494def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 3495def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 3496def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 3497def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 3498def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 3499def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 3500def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 3501def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 3502def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 3503def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 3504def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 3505def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 3506def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 3507def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 3508 3509def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 3510def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 3511def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 3512def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 3513def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 3514def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 3515def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 3516def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 3517def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 3518def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 3519def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 3520def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 3521def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 3522def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 3523def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 3524def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 3525def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 3526def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 3527def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 3528def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 3529def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 3530def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 3531def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 3532def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 3533def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 3534def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 3535def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 3536def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 3537def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 3538def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 3539