ARMInstrNEON.td revision c289a0252bba42248d7b11699dda27feca8860b6
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// NEON-specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17 18def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 19 20def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 21def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 22def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 23def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 24def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 25def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 26 27// Types for vector shift by immediates. The "SHX" version is for long and 28// narrow operations where the source and destination vectors have different 29// types. The "SHINS" version is for shift and insert operations. 30def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 31 SDTCisVT<2, i32>]>; 32def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 33 SDTCisVT<2, i32>]>; 34def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 35 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 36 37def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 38def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 39def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 40def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 41def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 42def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 43def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 44 45def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 46def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 47def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 48 49def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 50def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 51def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 52def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 53def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 54def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 55 56def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 57def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 58def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 59 60def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 61def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 62 63def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 64 SDTCisVT<2, i32>]>; 65def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 66def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 67 68def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 69 70// VDUPLANE can produce a quad-register result from a double-register source, 71// so the result is not constrained to match the source. 72def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 73 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 74 SDTCisVT<2, i32>]>>; 75 76def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 77 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 78def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 79 80def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 81def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 82def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 83def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 84 85def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 86 SDTCisSameAs<0, 2>, 87 SDTCisSameAs<0, 3>]>; 88def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 89def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 90def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 91 92def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 93 SDTCisSameAs<0, 2>]>; 94def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 95def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 96 97//===----------------------------------------------------------------------===// 98// NEON operand definitions 99//===----------------------------------------------------------------------===// 100 101def h8imm : Operand<i8> { 102 let PrintMethod = "printHex8ImmOperand"; 103} 104def h16imm : Operand<i16> { 105 let PrintMethod = "printHex16ImmOperand"; 106} 107def h32imm : Operand<i32> { 108 let PrintMethod = "printHex32ImmOperand"; 109} 110def h64imm : Operand<i64> { 111 let PrintMethod = "printHex64ImmOperand"; 112} 113 114//===----------------------------------------------------------------------===// 115// NEON load / store instructions 116//===----------------------------------------------------------------------===// 117 118// Use vldmia to load a Q register as a D register pair. 119// This is equivalent to VLDMD except that it has a Q register operand. 120def VLDMQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm, 121 "vldmia", "$addr, ${dst:dregpair}", 122 [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { 123 let Inst{27-25} = 0b110; 124 let Inst{24} = 0; // P bit 125 let Inst{23} = 1; // U bit 126 let Inst{20} = 1; 127 let Inst{11-8} = 0b1011; 128} 129 130let mayLoad = 1 in { 131// Use vld1 to load a Q register as a D register pair. 132// This alternative to VLDMQ allows an alignment to be specified. 133// This is equivalent to VLD1q64 except that it has a Q register operand. 134def VLD1q 135 : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr), 136 IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; 137def VLD1q_UPD 138 : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb), 139 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64", 140 "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>; 141} // mayLoad = 1 142 143// Use vstmia to store a Q register as a D register pair. 144// This is equivalent to VSTMD except that it has a Q register operand. 145def VSTMQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, 146 "vstmia", "$addr, ${src:dregpair}", 147 [(store (v2f64 QPR:$src), addrmode4:$addr)]> { 148 let Inst{27-25} = 0b110; 149 let Inst{24} = 0; // P bit 150 let Inst{23} = 1; // U bit 151 let Inst{20} = 0; 152 let Inst{11-8} = 0b1011; 153} 154 155let mayStore = 1 in { 156// Use vst1 to store a Q register as a D register pair. 157// This alternative to VSTMQ allows an alignment to be specified. 158// This is equivalent to VST1q64 except that it has a Q register operand. 159def VST1q 160 : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), 161 IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; 162def VST1q_UPD 163 : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), 164 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), 165 IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset", 166 "$addr.addr = $wb", []>; 167} // mayStore = 1 168 169let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { 170 171// VLD1 : Vector Load (multiple single elements) 172class VLD1D<bits<4> op7_4, string Dt> 173 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), 174 (ins addrmode6:$addr), IIC_VLD1, 175 "vld1", Dt, "\\{$dst\\}, $addr", "", []>; 176class VLD1Q<bits<4> op7_4, string Dt> 177 : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2), 178 (ins addrmode6:$addr), IIC_VLD1, 179 "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; 180 181def VLD1d8 : VLD1D<0b0000, "8">; 182def VLD1d16 : VLD1D<0b0100, "16">; 183def VLD1d32 : VLD1D<0b1000, "32">; 184def VLD1d64 : VLD1D<0b1100, "64">; 185 186def VLD1q8 : VLD1Q<0b0000, "8">; 187def VLD1q16 : VLD1Q<0b0100, "16">; 188def VLD1q32 : VLD1Q<0b1000, "32">; 189def VLD1q64 : VLD1Q<0b1100, "64">; 190 191// ...with address register writeback: 192class VLD1DWB<bits<4> op7_4, string Dt> 193 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), 194 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, 195 "vld1", Dt, "\\{$dst\\}, $addr$offset", 196 "$addr.addr = $wb", []>; 197class VLD1QWB<bits<4> op7_4, string Dt> 198 : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb), 199 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, 200 "vld1", Dt, "${dst:dregpair}, $addr$offset", 201 "$addr.addr = $wb", []>; 202 203def VLD1d8_UPD : VLD1DWB<0b0000, "8">; 204def VLD1d16_UPD : VLD1DWB<0b0100, "16">; 205def VLD1d32_UPD : VLD1DWB<0b1000, "32">; 206def VLD1d64_UPD : VLD1DWB<0b1100, "64">; 207 208def VLD1q8_UPD : VLD1QWB<0b0000, "8">; 209def VLD1q16_UPD : VLD1QWB<0b0100, "16">; 210def VLD1q32_UPD : VLD1QWB<0b1000, "32">; 211def VLD1q64_UPD : VLD1QWB<0b1100, "64">; 212 213// ...with 3 registers (some of these are only for the disassembler): 214class VLD1D3<bits<4> op7_4, string Dt> 215 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 216 (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, 217 "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; 218class VLD1D3WB<bits<4> op7_4, string Dt> 219 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 220 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, 221 "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>; 222 223def VLD1d8T : VLD1D3<0b0000, "8">; 224def VLD1d16T : VLD1D3<0b0100, "16">; 225def VLD1d32T : VLD1D3<0b1000, "32">; 226def VLD1d64T : VLD1D3<0b1100, "64">; 227 228def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; 229def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; 230def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; 231def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">; 232 233// ...with 4 registers (some of these are only for the disassembler): 234class VLD1D4<bits<4> op7_4, string Dt> 235 : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 236 (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, 237 "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 238class VLD1D4WB<bits<4> op7_4, string Dt> 239 : NLdSt<0,0b10,0b0010,op7_4, 240 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 241 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, 242 "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", 243 []>; 244 245def VLD1d8Q : VLD1D4<0b0000, "8">; 246def VLD1d16Q : VLD1D4<0b0100, "16">; 247def VLD1d32Q : VLD1D4<0b1000, "32">; 248def VLD1d64Q : VLD1D4<0b1100, "64">; 249 250def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">; 251def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; 252def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; 253def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">; 254 255// VLD2 : Vector Load (multiple 2-element structures) 256class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> 257 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), 258 (ins addrmode6:$addr), IIC_VLD2, 259 "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; 260class VLD2Q<bits<4> op7_4, string Dt> 261 : NLdSt<0, 0b10, 0b0011, op7_4, 262 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 263 (ins addrmode6:$addr), IIC_VLD2, 264 "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 265 266def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; 267def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; 268def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; 269 270def VLD2q8 : VLD2Q<0b0000, "8">; 271def VLD2q16 : VLD2Q<0b0100, "16">; 272def VLD2q32 : VLD2Q<0b1000, "32">; 273 274// ...with address register writeback: 275class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 276 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), 277 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, 278 "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset", 279 "$addr.addr = $wb", []>; 280class VLD2QWB<bits<4> op7_4, string Dt> 281 : NLdSt<0, 0b10, 0b0011, op7_4, 282 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 283 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, 284 "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", 285 "$addr.addr = $wb", []>; 286 287def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; 288def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; 289def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; 290 291def VLD2q8_UPD : VLD2QWB<0b0000, "8">; 292def VLD2q16_UPD : VLD2QWB<0b0100, "16">; 293def VLD2q32_UPD : VLD2QWB<0b1000, "32">; 294 295// ...with double-spaced registers (for disassembly only): 296def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; 297def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; 298def VLD2b32 : VLD2D<0b1001, 0b1000, "32">; 299def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">; 300def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">; 301def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">; 302 303// VLD3 : Vector Load (multiple 3-element structures) 304class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 305 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 306 (ins addrmode6:$addr), IIC_VLD3, 307 "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; 308 309def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; 310def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; 311def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; 312 313// ...with address register writeback: 314class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 315 : NLdSt<0, 0b10, op11_8, op7_4, 316 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 317 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3, 318 "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", 319 "$addr.addr = $wb", []>; 320 321def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; 322def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; 323def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; 324 325// ...with double-spaced registers (non-updating versions for disassembly only): 326def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; 327def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; 328def VLD3q32 : VLD3D<0b0101, 0b1000, "32">; 329def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; 330def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; 331def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; 332 333// ...alternate versions to be allocated odd register numbers: 334def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">; 335def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">; 336def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">; 337 338// VLD4 : Vector Load (multiple 4-element structures) 339class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 340 : NLdSt<0, 0b10, op11_8, op7_4, 341 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 342 (ins addrmode6:$addr), IIC_VLD4, 343 "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; 344 345def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; 346def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; 347def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; 348 349// ...with address register writeback: 350class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 351 : NLdSt<0, 0b10, op11_8, op7_4, 352 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 353 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, 354 "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", 355 "$addr.addr = $wb", []>; 356 357def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; 358def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; 359def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; 360 361// ...with double-spaced registers (non-updating versions for disassembly only): 362def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; 363def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; 364def VLD4q32 : VLD4D<0b0001, 0b1000, "32">; 365def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; 366def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; 367def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; 368 369// ...alternate versions to be allocated odd register numbers: 370def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">; 371def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">; 372def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; 373 374// VLD1LN : Vector Load (single element to one lane) 375// FIXME: Not yet implemented. 376 377// VLD2LN : Vector Load (single 2-element structure to one lane) 378class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 379 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), 380 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 381 IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", 382 "$src1 = $dst1, $src2 = $dst2", []>; 383 384def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">; 385def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">; 386def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">; 387 388// ...with double-spaced registers: 389def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">; 390def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">; 391 392// ...alternate versions to be allocated odd register numbers: 393def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">; 394def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">; 395 396// ...with address register writeback: 397class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 398 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), 399 (ins addrmode6:$addr, am6offset:$offset, 400 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt, 401 "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset", 402 "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>; 403 404def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">; 405def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">; 406def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">; 407 408def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">; 409def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">; 410 411// VLD3LN : Vector Load (single 3-element structure to one lane) 412class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 413 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 414 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 415 nohash_imm:$lane), IIC_VLD3, "vld3", Dt, 416 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", 417 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; 418 419def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">; 420def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">; 421def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">; 422 423// ...with double-spaced registers: 424def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">; 425def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">; 426 427// ...alternate versions to be allocated odd register numbers: 428def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">; 429def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">; 430 431// ...with address register writeback: 432class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 433 : NLdSt<1, 0b10, op11_8, op7_4, 434 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 435 (ins addrmode6:$addr, am6offset:$offset, 436 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 437 IIC_VLD3, "vld3", Dt, 438 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset", 439 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", 440 []>; 441 442def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">; 443def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">; 444def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">; 445 446def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">; 447def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">; 448 449// VLD4LN : Vector Load (single 4-element structure to one lane) 450class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 451 : NLdSt<1, 0b10, op11_8, op7_4, 452 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 453 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 454 nohash_imm:$lane), IIC_VLD4, "vld4", Dt, 455 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", 456 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; 457 458def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">; 459def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">; 460def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">; 461 462// ...with double-spaced registers: 463def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">; 464def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">; 465 466// ...alternate versions to be allocated odd register numbers: 467def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">; 468def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">; 469 470// ...with address register writeback: 471class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 472 : NLdSt<1, 0b10, op11_8, op7_4, 473 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 474 (ins addrmode6:$addr, am6offset:$offset, 475 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 476 IIC_VLD4, "vld4", Dt, 477"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", 478"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", 479 []>; 480 481def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">; 482def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">; 483def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">; 484 485def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">; 486def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; 487 488// VLD1DUP : Vector Load (single element to all lanes) 489// VLD2DUP : Vector Load (single 2-element structure to all lanes) 490// VLD3DUP : Vector Load (single 3-element structure to all lanes) 491// VLD4DUP : Vector Load (single 4-element structure to all lanes) 492// FIXME: Not yet implemented. 493} // mayLoad = 1, hasExtraDefRegAllocReq = 1 494 495let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { 496 497// VST1 : Vector Store (multiple single elements) 498class VST1D<bits<4> op7_4, string Dt> 499 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, 500 "vst1", Dt, "\\{$src\\}, $addr", "", []>; 501class VST1Q<bits<4> op7_4, string Dt> 502 : NLdSt<0,0b00,0b1010,op7_4, (outs), 503 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, 504 "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>; 505 506def VST1d8 : VST1D<0b0000, "8">; 507def VST1d16 : VST1D<0b0100, "16">; 508def VST1d32 : VST1D<0b1000, "32">; 509def VST1d64 : VST1D<0b1100, "64">; 510 511def VST1q8 : VST1Q<0b0000, "8">; 512def VST1q16 : VST1Q<0b0100, "16">; 513def VST1q32 : VST1Q<0b1000, "32">; 514def VST1q64 : VST1Q<0b1100, "64">; 515 516// ...with address register writeback: 517class VST1DWB<bits<4> op7_4, string Dt> 518 : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), 519 (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST, 520 "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>; 521class VST1QWB<bits<4> op7_4, string Dt> 522 : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), 523 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, 524 "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>; 525 526def VST1d8_UPD : VST1DWB<0b0000, "8">; 527def VST1d16_UPD : VST1DWB<0b0100, "16">; 528def VST1d32_UPD : VST1DWB<0b1000, "32">; 529def VST1d64_UPD : VST1DWB<0b1100, "64">; 530 531def VST1q8_UPD : VST1QWB<0b0000, "8">; 532def VST1q16_UPD : VST1QWB<0b0100, "16">; 533def VST1q32_UPD : VST1QWB<0b1000, "32">; 534def VST1q64_UPD : VST1QWB<0b1100, "64">; 535 536// ...with 3 registers (some of these are only for the disassembler): 537class VST1D3<bits<4> op7_4, string Dt> 538 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 539 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), 540 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; 541class VST1D3WB<bits<4> op7_4, string Dt> 542 : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), 543 (ins addrmode6:$addr, am6offset:$offset, 544 DPR:$src1, DPR:$src2, DPR:$src3), 545 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", 546 "$addr.addr = $wb", []>; 547 548def VST1d8T : VST1D3<0b0000, "8">; 549def VST1d16T : VST1D3<0b0100, "16">; 550def VST1d32T : VST1D3<0b1000, "32">; 551def VST1d64T : VST1D3<0b1100, "64">; 552 553def VST1d8T_UPD : VST1D3WB<0b0000, "8">; 554def VST1d16T_UPD : VST1D3WB<0b0100, "16">; 555def VST1d32T_UPD : VST1D3WB<0b1000, "32">; 556def VST1d64T_UPD : VST1D3WB<0b1100, "64">; 557 558// ...with 4 registers (some of these are only for the disassembler): 559class VST1D4<bits<4> op7_4, string Dt> 560 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 561 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 562 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", 563 []>; 564class VST1D4WB<bits<4> op7_4, string Dt> 565 : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), 566 (ins addrmode6:$addr, am6offset:$offset, 567 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 568 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 569 "$addr.addr = $wb", []>; 570 571def VST1d8Q : VST1D4<0b0000, "8">; 572def VST1d16Q : VST1D4<0b0100, "16">; 573def VST1d32Q : VST1D4<0b1000, "32">; 574def VST1d64Q : VST1D4<0b1100, "64">; 575 576def VST1d8Q_UPD : VST1D4WB<0b0000, "8">; 577def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; 578def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; 579def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; 580 581// VST2 : Vector Store (multiple 2-element structures) 582class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> 583 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 584 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), 585 IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>; 586class VST2Q<bits<4> op7_4, string Dt> 587 : NLdSt<0, 0b00, 0b0011, op7_4, (outs), 588 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 589 IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", 590 "", []>; 591 592def VST2d8 : VST2D<0b1000, 0b0000, "8">; 593def VST2d16 : VST2D<0b1000, 0b0100, "16">; 594def VST2d32 : VST2D<0b1000, 0b1000, "32">; 595 596def VST2q8 : VST2Q<0b0000, "8">; 597def VST2q16 : VST2Q<0b0100, "16">; 598def VST2q32 : VST2Q<0b1000, "32">; 599 600// ...with address register writeback: 601class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 602 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 603 (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), 604 IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", 605 "$addr.addr = $wb", []>; 606class VST2QWB<bits<4> op7_4, string Dt> 607 : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 608 (ins addrmode6:$addr, am6offset:$offset, 609 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 610 IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 611 "$addr.addr = $wb", []>; 612 613def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; 614def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; 615def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; 616 617def VST2q8_UPD : VST2QWB<0b0000, "8">; 618def VST2q16_UPD : VST2QWB<0b0100, "16">; 619def VST2q32_UPD : VST2QWB<0b1000, "32">; 620 621// ...with double-spaced registers (for disassembly only): 622def VST2b8 : VST2D<0b1001, 0b0000, "8">; 623def VST2b16 : VST2D<0b1001, 0b0100, "16">; 624def VST2b32 : VST2D<0b1001, 0b1000, "32">; 625def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">; 626def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">; 627def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">; 628 629// VST3 : Vector Store (multiple 3-element structures) 630class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 631 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 632 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, 633 "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; 634 635def VST3d8 : VST3D<0b0100, 0b0000, "8">; 636def VST3d16 : VST3D<0b0100, 0b0100, "16">; 637def VST3d32 : VST3D<0b0100, 0b1000, "32">; 638 639// ...with address register writeback: 640class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 641 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 642 (ins addrmode6:$addr, am6offset:$offset, 643 DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, 644 "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", 645 "$addr.addr = $wb", []>; 646 647def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; 648def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; 649def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; 650 651// ...with double-spaced registers (non-updating versions for disassembly only): 652def VST3q8 : VST3D<0b0101, 0b0000, "8">; 653def VST3q16 : VST3D<0b0101, 0b0100, "16">; 654def VST3q32 : VST3D<0b0101, 0b1000, "32">; 655def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; 656def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; 657def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; 658 659// ...alternate versions to be allocated odd register numbers: 660def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">; 661def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">; 662def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">; 663 664// VST4 : Vector Store (multiple 4-element structures) 665class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 666 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 667 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 668 IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", 669 "", []>; 670 671def VST4d8 : VST4D<0b0000, 0b0000, "8">; 672def VST4d16 : VST4D<0b0000, 0b0100, "16">; 673def VST4d32 : VST4D<0b0000, 0b1000, "32">; 674 675// ...with address register writeback: 676class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 677 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 678 (ins addrmode6:$addr, am6offset:$offset, 679 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, 680 "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", 681 "$addr.addr = $wb", []>; 682 683def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; 684def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; 685def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; 686 687// ...with double-spaced registers (non-updating versions for disassembly only): 688def VST4q8 : VST4D<0b0001, 0b0000, "8">; 689def VST4q16 : VST4D<0b0001, 0b0100, "16">; 690def VST4q32 : VST4D<0b0001, 0b1000, "32">; 691def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; 692def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; 693def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; 694 695// ...alternate versions to be allocated odd register numbers: 696def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">; 697def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">; 698def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">; 699 700// VST1LN : Vector Store (single element from one lane) 701// FIXME: Not yet implemented. 702 703// VST2LN : Vector Store (single 2-element structure from one lane) 704class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 705 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 706 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 707 IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", 708 "", []>; 709 710def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">; 711def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">; 712def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">; 713 714// ...with double-spaced registers: 715def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">; 716def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">; 717 718// ...alternate versions to be allocated odd register numbers: 719def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">; 720def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">; 721 722// ...with address register writeback: 723class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 724 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 725 (ins addrmode6:$addr, am6offset:$offset, 726 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, 727 "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", 728 "$addr.addr = $wb", []>; 729 730def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">; 731def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">; 732def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">; 733 734def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">; 735def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">; 736 737// VST3LN : Vector Store (single 3-element structure from one lane) 738class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 739 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 740 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 741 nohash_imm:$lane), IIC_VST, "vst3", Dt, 742 "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; 743 744def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">; 745def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">; 746def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">; 747 748// ...with double-spaced registers: 749def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">; 750def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">; 751 752// ...alternate versions to be allocated odd register numbers: 753def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">; 754def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">; 755 756// ...with address register writeback: 757class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 758 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 759 (ins addrmode6:$addr, am6offset:$offset, 760 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 761 IIC_VST, "vst3", Dt, 762 "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", 763 "$addr.addr = $wb", []>; 764 765def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">; 766def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">; 767def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">; 768 769def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">; 770def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">; 771 772// VST4LN : Vector Store (single 4-element structure from one lane) 773class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 774 : NLdSt<1, 0b00, op11_8, op7_4, (outs), 775 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 776 nohash_imm:$lane), IIC_VST, "vst4", Dt, 777 "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", 778 "", []>; 779 780def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">; 781def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">; 782def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">; 783 784// ...with double-spaced registers: 785def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">; 786def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">; 787 788// ...alternate versions to be allocated odd register numbers: 789def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">; 790def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">; 791 792// ...with address register writeback: 793class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 794 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 795 (ins addrmode6:$addr, am6offset:$offset, 796 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 797 IIC_VST, "vst4", Dt, 798 "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", 799 "$addr.addr = $wb", []>; 800 801def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">; 802def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">; 803def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; 804 805def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; 806def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; 807 808} // mayStore = 1, hasExtraSrcRegAllocReq = 1 809 810 811//===----------------------------------------------------------------------===// 812// NEON pattern fragments 813//===----------------------------------------------------------------------===// 814 815// Extract D sub-registers of Q registers. 816// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) 817def DSubReg_i8_reg : SDNodeXForm<imm, [{ 818 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); 819}]>; 820def DSubReg_i16_reg : SDNodeXForm<imm, [{ 821 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); 822}]>; 823def DSubReg_i32_reg : SDNodeXForm<imm, [{ 824 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); 825}]>; 826def DSubReg_f64_reg : SDNodeXForm<imm, [{ 827 return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); 828}]>; 829def DSubReg_f64_other_reg : SDNodeXForm<imm, [{ 830 return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32); 831}]>; 832 833// Extract S sub-registers of Q/D registers. 834// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.) 835def SSubReg_f32_reg : SDNodeXForm<imm, [{ 836 return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32); 837}]>; 838 839// Translate lane numbers from Q registers to D subregs. 840def SubReg_i8_lane : SDNodeXForm<imm, [{ 841 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 842}]>; 843def SubReg_i16_lane : SDNodeXForm<imm, [{ 844 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 845}]>; 846def SubReg_i32_lane : SDNodeXForm<imm, [{ 847 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 848}]>; 849 850//===----------------------------------------------------------------------===// 851// Instruction Classes 852//===----------------------------------------------------------------------===// 853 854// Basic 2-register operations: single-, double- and quad-register. 855class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 856 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 857 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 858 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 859 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), 860 IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; 861class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 862 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 863 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 864 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 865 (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", 866 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; 867class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 868 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 869 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 870 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 871 (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "", 872 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; 873 874// Basic 2-register intrinsics, both double- and quad-register. 875class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 876 bits<2> op17_16, bits<5> op11_7, bit op4, 877 InstrItinClass itin, string OpcodeStr, string Dt, 878 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 879 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 880 (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 881 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 882class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 883 bits<2> op17_16, bits<5> op11_7, bit op4, 884 InstrItinClass itin, string OpcodeStr, string Dt, 885 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 886 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 887 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 888 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 889 890// Narrow 2-register intrinsics. 891class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 892 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 893 InstrItinClass itin, string OpcodeStr, string Dt, 894 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 895 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), 896 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 897 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; 898 899// Long 2-register intrinsics (currently only used for VMOVL). 900class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 901 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 902 InstrItinClass itin, string OpcodeStr, string Dt, 903 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 904 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), 905 (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", 906 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; 907 908// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 909class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 910 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), 911 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 912 OpcodeStr, Dt, "$dst1, $dst2", 913 "$src1 = $dst1, $src2 = $dst2", []>; 914class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 915 InstrItinClass itin, string OpcodeStr, string Dt> 916 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), 917 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2", 918 "$src1 = $dst1, $src2 = $dst2", []>; 919 920// Basic 3-register operations: single-, double- and quad-register. 921class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 922 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 923 SDNode OpNode, bit Commutable> 924 : N3V<op24, op23, op21_20, op11_8, 0, op4, 925 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, 926 OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { 927 let isCommutable = Commutable; 928} 929 930class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 931 InstrItinClass itin, string OpcodeStr, string Dt, 932 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 933 : N3V<op24, op23, op21_20, op11_8, 0, op4, 934 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 935 OpcodeStr, Dt, "$dst, $src1, $src2", "", 936 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 937 let isCommutable = Commutable; 938} 939// Same as N3VD but no data type. 940class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 941 InstrItinClass itin, string OpcodeStr, 942 ValueType ResTy, ValueType OpTy, 943 SDNode OpNode, bit Commutable> 944 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 945 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 946 OpcodeStr, "$dst, $src1, $src2", "", 947 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{ 948 let isCommutable = Commutable; 949} 950class N3VDSL<bits<2> op21_20, bits<4> op11_8, 951 InstrItinClass itin, string OpcodeStr, string Dt, 952 ValueType Ty, SDNode ShOp> 953 : N3V<0, 1, op21_20, op11_8, 1, 0, 954 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 955 itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 956 [(set (Ty DPR:$dst), 957 (Ty (ShOp (Ty DPR:$src1), 958 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]>{ 959 let isCommutable = 0; 960} 961class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 962 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 963 : N3V<0, 1, op21_20, op11_8, 1, 0, 964 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 965 IIC_VMULi16D, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 966 [(set (Ty DPR:$dst), 967 (Ty (ShOp (Ty DPR:$src1), 968 (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { 969 let isCommutable = 0; 970} 971 972class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 973 InstrItinClass itin, string OpcodeStr, string Dt, 974 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 975 : N3V<op24, op23, op21_20, op11_8, 1, op4, 976 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 977 OpcodeStr, Dt, "$dst, $src1, $src2", "", 978 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 979 let isCommutable = Commutable; 980} 981class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 982 InstrItinClass itin, string OpcodeStr, 983 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 984 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 985 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 986 OpcodeStr, "$dst, $src1, $src2", "", 987 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{ 988 let isCommutable = Commutable; 989} 990class N3VQSL<bits<2> op21_20, bits<4> op11_8, 991 InstrItinClass itin, string OpcodeStr, string Dt, 992 ValueType ResTy, ValueType OpTy, SDNode ShOp> 993 : N3V<1, 1, op21_20, op11_8, 1, 0, 994 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 995 itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 996 [(set (ResTy QPR:$dst), 997 (ResTy (ShOp (ResTy QPR:$src1), 998 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 999 imm:$lane)))))]> { 1000 let isCommutable = 0; 1001} 1002class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 1003 ValueType ResTy, ValueType OpTy, SDNode ShOp> 1004 : N3V<1, 1, op21_20, op11_8, 1, 0, 1005 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1006 IIC_VMULi16Q, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1007 [(set (ResTy QPR:$dst), 1008 (ResTy (ShOp (ResTy QPR:$src1), 1009 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 1010 imm:$lane)))))]> { 1011 let isCommutable = 0; 1012} 1013 1014// Basic 3-register intrinsics, both double- and quad-register. 1015class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1016 InstrItinClass itin, string OpcodeStr, string Dt, 1017 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 1018 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1019 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 1020 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1021 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 1022 let isCommutable = Commutable; 1023} 1024class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1025 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 1026 : N3V<0, 1, op21_20, op11_8, 1, 0, 1027 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1028 itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1029 [(set (Ty DPR:$dst), 1030 (Ty (IntOp (Ty DPR:$src1), 1031 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), 1032 imm:$lane)))))]> { 1033 let isCommutable = 0; 1034} 1035class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1036 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> 1037 : N3V<0, 1, op21_20, op11_8, 1, 0, 1038 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1039 itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1040 [(set (Ty DPR:$dst), 1041 (Ty (IntOp (Ty DPR:$src1), 1042 (Ty (NEONvduplane (Ty DPR_8:$src2), 1043 imm:$lane)))))]> { 1044 let isCommutable = 0; 1045} 1046 1047class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1048 InstrItinClass itin, string OpcodeStr, string Dt, 1049 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> 1050 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1051 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 1052 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1053 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 1054 let isCommutable = Commutable; 1055} 1056class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1057 string OpcodeStr, string Dt, 1058 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1059 : N3V<1, 1, op21_20, op11_8, 1, 0, 1060 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1061 itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1062 [(set (ResTy QPR:$dst), 1063 (ResTy (IntOp (ResTy QPR:$src1), 1064 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 1065 imm:$lane)))))]> { 1066 let isCommutable = 0; 1067} 1068class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1069 string OpcodeStr, string Dt, 1070 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1071 : N3V<1, 1, op21_20, op11_8, 1, 0, 1072 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1073 itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1074 [(set (ResTy QPR:$dst), 1075 (ResTy (IntOp (ResTy QPR:$src1), 1076 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 1077 imm:$lane)))))]> { 1078 let isCommutable = 0; 1079} 1080 1081// Multiply-Add/Sub operations: single-, double- and quad-register. 1082class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1083 InstrItinClass itin, string OpcodeStr, string Dt, 1084 ValueType Ty, SDNode MulOp, SDNode OpNode> 1085 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1086 (outs DPR_VFP2:$dst), 1087 (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, 1088 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; 1089 1090class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1091 InstrItinClass itin, string OpcodeStr, string Dt, 1092 ValueType Ty, SDNode MulOp, SDNode OpNode> 1093 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1094 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, 1095 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1096 [(set DPR:$dst, (Ty (OpNode DPR:$src1, 1097 (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; 1098class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1099 string OpcodeStr, string Dt, 1100 ValueType Ty, SDNode MulOp, SDNode ShOp> 1101 : N3V<0, 1, op21_20, op11_8, 1, 0, 1102 (outs DPR:$dst), 1103 (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 1104 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1105 [(set (Ty DPR:$dst), 1106 (Ty (ShOp (Ty DPR:$src1), 1107 (Ty (MulOp DPR:$src2, 1108 (Ty (NEONvduplane (Ty DPR_VFP2:$src3), 1109 imm:$lane)))))))]>; 1110class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1111 string OpcodeStr, string Dt, 1112 ValueType Ty, SDNode MulOp, SDNode ShOp> 1113 : N3V<0, 1, op21_20, op11_8, 1, 0, 1114 (outs DPR:$dst), 1115 (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 1116 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1117 [(set (Ty DPR:$dst), 1118 (Ty (ShOp (Ty DPR:$src1), 1119 (Ty (MulOp DPR:$src2, 1120 (Ty (NEONvduplane (Ty DPR_8:$src3), 1121 imm:$lane)))))))]>; 1122 1123class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1124 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 1125 SDNode MulOp, SDNode OpNode> 1126 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1127 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, 1128 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1129 [(set QPR:$dst, (Ty (OpNode QPR:$src1, 1130 (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; 1131class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1132 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1133 SDNode MulOp, SDNode ShOp> 1134 : N3V<1, 1, op21_20, op11_8, 1, 0, 1135 (outs QPR:$dst), 1136 (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 1137 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1138 [(set (ResTy QPR:$dst), 1139 (ResTy (ShOp (ResTy QPR:$src1), 1140 (ResTy (MulOp QPR:$src2, 1141 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), 1142 imm:$lane)))))))]>; 1143class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1144 string OpcodeStr, string Dt, 1145 ValueType ResTy, ValueType OpTy, 1146 SDNode MulOp, SDNode ShOp> 1147 : N3V<1, 1, op21_20, op11_8, 1, 0, 1148 (outs QPR:$dst), 1149 (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 1150 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1151 [(set (ResTy QPR:$dst), 1152 (ResTy (ShOp (ResTy QPR:$src1), 1153 (ResTy (MulOp QPR:$src2, 1154 (ResTy (NEONvduplane (OpTy DPR_8:$src3), 1155 imm:$lane)))))))]>; 1156 1157// Neon 3-argument intrinsics, both double- and quad-register. 1158// The destination register is also used as the first source operand register. 1159class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1160 InstrItinClass itin, string OpcodeStr, string Dt, 1161 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1162 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1163 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, 1164 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1165 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), 1166 (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; 1167class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1168 InstrItinClass itin, string OpcodeStr, string Dt, 1169 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1170 : N3V<op24, op23, op21_20, op11_8, 1, op4, 1171 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, 1172 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1173 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), 1174 (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; 1175 1176// Neon Long 3-argument intrinsic. The destination register is 1177// a quad-register and is also used as the first source operand register. 1178class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1179 InstrItinClass itin, string OpcodeStr, string Dt, 1180 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 1181 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1182 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin, 1183 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", 1184 [(set QPR:$dst, 1185 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; 1186class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1187 string OpcodeStr, string Dt, 1188 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1189 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1190 (outs QPR:$dst), 1191 (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 1192 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1193 [(set (ResTy QPR:$dst), 1194 (ResTy (IntOp (ResTy QPR:$src1), 1195 (OpTy DPR:$src2), 1196 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), 1197 imm:$lane)))))]>; 1198class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 1199 InstrItinClass itin, string OpcodeStr, string Dt, 1200 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1201 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1202 (outs QPR:$dst), 1203 (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 1204 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", 1205 [(set (ResTy QPR:$dst), 1206 (ResTy (IntOp (ResTy QPR:$src1), 1207 (OpTy DPR:$src2), 1208 (OpTy (NEONvduplane (OpTy DPR_8:$src3), 1209 imm:$lane)))))]>; 1210 1211// Narrowing 3-register intrinsics. 1212class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1213 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 1214 Intrinsic IntOp, bit Commutable> 1215 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1216 (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D, 1217 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1218 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { 1219 let isCommutable = Commutable; 1220} 1221 1222// Long 3-register intrinsics. 1223class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1224 InstrItinClass itin, string OpcodeStr, string Dt, 1225 ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> 1226 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1227 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 1228 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1229 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { 1230 let isCommutable = Commutable; 1231} 1232class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 1233 string OpcodeStr, string Dt, 1234 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1235 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1236 (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 1237 itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1238 [(set (ResTy QPR:$dst), 1239 (ResTy (IntOp (OpTy DPR:$src1), 1240 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), 1241 imm:$lane)))))]>; 1242class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 1243 InstrItinClass itin, string OpcodeStr, string Dt, 1244 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1245 : N3V<op24, 1, op21_20, op11_8, 1, 0, 1246 (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 1247 itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", 1248 [(set (ResTy QPR:$dst), 1249 (ResTy (IntOp (OpTy DPR:$src1), 1250 (OpTy (NEONvduplane (OpTy DPR_8:$src2), 1251 imm:$lane)))))]>; 1252 1253// Wide 3-register intrinsics. 1254class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 1255 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 1256 Intrinsic IntOp, bit Commutable> 1257 : N3V<op24, op23, op21_20, op11_8, 0, op4, 1258 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD, 1259 OpcodeStr, Dt, "$dst, $src1, $src2", "", 1260 [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { 1261 let isCommutable = Commutable; 1262} 1263 1264// Pairwise long 2-register intrinsics, both double- and quad-register. 1265class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1266 bits<2> op17_16, bits<5> op11_7, bit op4, 1267 string OpcodeStr, string Dt, 1268 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1269 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 1270 (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 1271 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 1272class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1273 bits<2> op17_16, bits<5> op11_7, bit op4, 1274 string OpcodeStr, string Dt, 1275 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1276 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 1277 (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 1278 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 1279 1280// Pairwise long 2-register accumulate intrinsics, 1281// both double- and quad-register. 1282// The destination register is also used as the first source operand register. 1283class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1284 bits<2> op17_16, bits<5> op11_7, bit op4, 1285 string OpcodeStr, string Dt, 1286 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1287 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 1288 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD, 1289 OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", 1290 [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; 1291class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1292 bits<2> op17_16, bits<5> op11_7, bit op4, 1293 string OpcodeStr, string Dt, 1294 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1295 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 1296 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ, 1297 OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", 1298 [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; 1299 1300// Shift by immediate, 1301// both double- and quad-register. 1302class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1303 InstrItinClass itin, string OpcodeStr, string Dt, 1304 ValueType Ty, SDNode OpNode> 1305 : N2VImm<op24, op23, op11_8, op7, 0, op4, 1306 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin, 1307 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1308 [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; 1309class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1310 InstrItinClass itin, string OpcodeStr, string Dt, 1311 ValueType Ty, SDNode OpNode> 1312 : N2VImm<op24, op23, op11_8, op7, 1, op4, 1313 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, 1314 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1315 [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; 1316 1317// Long shift by immediate. 1318class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 1319 string OpcodeStr, string Dt, 1320 ValueType ResTy, ValueType OpTy, SDNode OpNode> 1321 : N2VImm<op24, op23, op11_8, op7, op6, op4, 1322 (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD, 1323 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1324 [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), 1325 (i32 imm:$SIMM))))]>; 1326 1327// Narrow shift by immediate. 1328class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 1329 InstrItinClass itin, string OpcodeStr, string Dt, 1330 ValueType ResTy, ValueType OpTy, SDNode OpNode> 1331 : N2VImm<op24, op23, op11_8, op7, op6, op4, 1332 (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, 1333 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1334 [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), 1335 (i32 imm:$SIMM))))]>; 1336 1337// Shift right by immediate and accumulate, 1338// both double- and quad-register. 1339class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1340 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1341 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), 1342 (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VPALiD, 1343 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1344 [(set DPR:$dst, (Ty (add DPR:$src1, 1345 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; 1346class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1347 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1348 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), 1349 (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VPALiD, 1350 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1351 [(set QPR:$dst, (Ty (add QPR:$src1, 1352 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; 1353 1354// Shift by immediate and insert, 1355// both double- and quad-register. 1356class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1357 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1358 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), 1359 (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VSHLiD, 1360 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1361 [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; 1362class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1363 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 1364 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), 1365 (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VSHLiQ, 1366 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", 1367 [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; 1368 1369// Convert, with fractional bits immediate, 1370// both double- and quad-register. 1371class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1372 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1373 Intrinsic IntOp> 1374 : N2VImm<op24, op23, op11_8, op7, 0, op4, 1375 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, 1376 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1377 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; 1378class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 1379 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 1380 Intrinsic IntOp> 1381 : N2VImm<op24, op23, op11_8, op7, 1, op4, 1382 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, 1383 OpcodeStr, Dt, "$dst, $src, $SIMM", "", 1384 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; 1385 1386//===----------------------------------------------------------------------===// 1387// Multiclasses 1388//===----------------------------------------------------------------------===// 1389 1390// Abbreviations used in multiclass suffixes: 1391// Q = quarter int (8 bit) elements 1392// H = half int (16 bit) elements 1393// S = single int (32 bit) elements 1394// D = double int (64 bit) elements 1395 1396// Neon 2-register vector operations -- for disassembly only. 1397 1398// First with only element sizes of 8, 16 and 32 bits: 1399multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1400 bits<5> op11_7, bit op4, string opc, string Dt, 1401 string asm> { 1402 // 64-bit vector types. 1403 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 1404 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1405 opc, !strconcat(Dt, "8"), asm, "", []>; 1406 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 1407 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1408 opc, !strconcat(Dt, "16"), asm, "", []>; 1409 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 1410 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1411 opc, !strconcat(Dt, "32"), asm, "", []>; 1412 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 1413 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 1414 opc, "f32", asm, "", []> { 1415 let Inst{10} = 1; // overwrite F = 1 1416 } 1417 1418 // 128-bit vector types. 1419 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 1420 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1421 opc, !strconcat(Dt, "8"), asm, "", []>; 1422 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 1423 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1424 opc, !strconcat(Dt, "16"), asm, "", []>; 1425 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 1426 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1427 opc, !strconcat(Dt, "32"), asm, "", []>; 1428 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 1429 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 1430 opc, "f32", asm, "", []> { 1431 let Inst{10} = 1; // overwrite F = 1 1432 } 1433} 1434 1435// Neon 3-register vector operations. 1436 1437// First with only element sizes of 8, 16 and 32 bits: 1438multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1439 InstrItinClass itinD16, InstrItinClass itinD32, 1440 InstrItinClass itinQ16, InstrItinClass itinQ32, 1441 string OpcodeStr, string Dt, 1442 SDNode OpNode, bit Commutable = 0> { 1443 // 64-bit vector types. 1444 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 1445 OpcodeStr, !strconcat(Dt, "8"), 1446 v8i8, v8i8, OpNode, Commutable>; 1447 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 1448 OpcodeStr, !strconcat(Dt, "16"), 1449 v4i16, v4i16, OpNode, Commutable>; 1450 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 1451 OpcodeStr, !strconcat(Dt, "32"), 1452 v2i32, v2i32, OpNode, Commutable>; 1453 1454 // 128-bit vector types. 1455 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 1456 OpcodeStr, !strconcat(Dt, "8"), 1457 v16i8, v16i8, OpNode, Commutable>; 1458 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 1459 OpcodeStr, !strconcat(Dt, "16"), 1460 v8i16, v8i16, OpNode, Commutable>; 1461 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 1462 OpcodeStr, !strconcat(Dt, "32"), 1463 v4i32, v4i32, OpNode, Commutable>; 1464} 1465 1466multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { 1467 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 1468 v4i16, ShOp>; 1469 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), 1470 v2i32, ShOp>; 1471 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), 1472 v8i16, v4i16, ShOp>; 1473 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), 1474 v4i32, v2i32, ShOp>; 1475} 1476 1477// ....then also with element size 64 bits: 1478multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1479 InstrItinClass itinD, InstrItinClass itinQ, 1480 string OpcodeStr, string Dt, 1481 SDNode OpNode, bit Commutable = 0> 1482 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 1483 OpcodeStr, Dt, OpNode, Commutable> { 1484 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 1485 OpcodeStr, !strconcat(Dt, "64"), 1486 v1i64, v1i64, OpNode, Commutable>; 1487 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 1488 OpcodeStr, !strconcat(Dt, "64"), 1489 v2i64, v2i64, OpNode, Commutable>; 1490} 1491 1492 1493// Neon Narrowing 2-register vector intrinsics, 1494// source operand element sizes of 16, 32 and 64 bits: 1495multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1496 bits<5> op11_7, bit op6, bit op4, 1497 InstrItinClass itin, string OpcodeStr, string Dt, 1498 Intrinsic IntOp> { 1499 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 1500 itin, OpcodeStr, !strconcat(Dt, "16"), 1501 v8i8, v8i16, IntOp>; 1502 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 1503 itin, OpcodeStr, !strconcat(Dt, "32"), 1504 v4i16, v4i32, IntOp>; 1505 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 1506 itin, OpcodeStr, !strconcat(Dt, "64"), 1507 v2i32, v2i64, IntOp>; 1508} 1509 1510 1511// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 1512// source operand element sizes of 16, 32 and 64 bits: 1513multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 1514 string OpcodeStr, string Dt, Intrinsic IntOp> { 1515 def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1516 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 1517 def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1518 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 1519 def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 1520 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1521} 1522 1523 1524// Neon 3-register vector intrinsics. 1525 1526// First with only element sizes of 16 and 32 bits: 1527multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1528 InstrItinClass itinD16, InstrItinClass itinD32, 1529 InstrItinClass itinQ16, InstrItinClass itinQ32, 1530 string OpcodeStr, string Dt, 1531 Intrinsic IntOp, bit Commutable = 0> { 1532 // 64-bit vector types. 1533 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, 1534 OpcodeStr, !strconcat(Dt, "16"), 1535 v4i16, v4i16, IntOp, Commutable>; 1536 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, 1537 OpcodeStr, !strconcat(Dt, "32"), 1538 v2i32, v2i32, IntOp, Commutable>; 1539 1540 // 128-bit vector types. 1541 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, 1542 OpcodeStr, !strconcat(Dt, "16"), 1543 v8i16, v8i16, IntOp, Commutable>; 1544 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, 1545 OpcodeStr, !strconcat(Dt, "32"), 1546 v4i32, v4i32, IntOp, Commutable>; 1547} 1548 1549multiclass N3VIntSL_HS<bits<4> op11_8, 1550 InstrItinClass itinD16, InstrItinClass itinD32, 1551 InstrItinClass itinQ16, InstrItinClass itinQ32, 1552 string OpcodeStr, string Dt, Intrinsic IntOp> { 1553 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 1554 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 1555 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 1556 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 1557 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 1558 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 1559 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 1560 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 1561} 1562 1563// ....then also with element size of 8 bits: 1564multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1565 InstrItinClass itinD16, InstrItinClass itinD32, 1566 InstrItinClass itinQ16, InstrItinClass itinQ32, 1567 string OpcodeStr, string Dt, 1568 Intrinsic IntOp, bit Commutable = 0> 1569 : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, 1570 OpcodeStr, Dt, IntOp, Commutable> { 1571 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16, 1572 OpcodeStr, !strconcat(Dt, "8"), 1573 v8i8, v8i8, IntOp, Commutable>; 1574 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16, 1575 OpcodeStr, !strconcat(Dt, "8"), 1576 v16i8, v16i8, IntOp, Commutable>; 1577} 1578 1579// ....then also with element size of 64 bits: 1580multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1581 InstrItinClass itinD16, InstrItinClass itinD32, 1582 InstrItinClass itinQ16, InstrItinClass itinQ32, 1583 string OpcodeStr, string Dt, 1584 Intrinsic IntOp, bit Commutable = 0> 1585 : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, 1586 OpcodeStr, Dt, IntOp, Commutable> { 1587 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32, 1588 OpcodeStr, !strconcat(Dt, "64"), 1589 v1i64, v1i64, IntOp, Commutable>; 1590 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32, 1591 OpcodeStr, !strconcat(Dt, "64"), 1592 v2i64, v2i64, IntOp, Commutable>; 1593} 1594 1595 1596// Neon Narrowing 3-register vector intrinsics, 1597// source operand element sizes of 16, 32 and 64 bits: 1598multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1599 string OpcodeStr, string Dt, 1600 Intrinsic IntOp, bit Commutable = 0> { 1601 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 1602 OpcodeStr, !strconcat(Dt, "16"), 1603 v8i8, v8i16, IntOp, Commutable>; 1604 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 1605 OpcodeStr, !strconcat(Dt, "32"), 1606 v4i16, v4i32, IntOp, Commutable>; 1607 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 1608 OpcodeStr, !strconcat(Dt, "64"), 1609 v2i32, v2i64, IntOp, Commutable>; 1610} 1611 1612 1613// Neon Long 3-register vector intrinsics. 1614 1615// First with only element sizes of 16 and 32 bits: 1616multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1617 InstrItinClass itin, string OpcodeStr, string Dt, 1618 Intrinsic IntOp, bit Commutable = 0> { 1619 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, 1620 OpcodeStr, !strconcat(Dt, "16"), 1621 v4i32, v4i16, IntOp, Commutable>; 1622 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin, 1623 OpcodeStr, !strconcat(Dt, "32"), 1624 v2i64, v2i32, IntOp, Commutable>; 1625} 1626 1627multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 1628 InstrItinClass itin, string OpcodeStr, string Dt, 1629 Intrinsic IntOp> { 1630 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 1631 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 1632 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 1633 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1634} 1635 1636// ....then also with element size of 8 bits: 1637multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1638 InstrItinClass itin, string OpcodeStr, string Dt, 1639 Intrinsic IntOp, bit Commutable = 0> 1640 : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, Dt, 1641 IntOp, Commutable> { 1642 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, 1643 OpcodeStr, !strconcat(Dt, "8"), 1644 v8i16, v8i8, IntOp, Commutable>; 1645} 1646 1647 1648// Neon Wide 3-register vector intrinsics, 1649// source operand element sizes of 8, 16 and 32 bits: 1650multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1651 string OpcodeStr, string Dt, 1652 Intrinsic IntOp, bit Commutable = 0> { 1653 def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, 1654 OpcodeStr, !strconcat(Dt, "8"), 1655 v8i16, v8i8, IntOp, Commutable>; 1656 def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, 1657 OpcodeStr, !strconcat(Dt, "16"), 1658 v4i32, v4i16, IntOp, Commutable>; 1659 def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, 1660 OpcodeStr, !strconcat(Dt, "32"), 1661 v2i64, v2i32, IntOp, Commutable>; 1662} 1663 1664 1665// Neon Multiply-Op vector operations, 1666// element sizes of 8, 16 and 32 bits: 1667multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1668 InstrItinClass itinD16, InstrItinClass itinD32, 1669 InstrItinClass itinQ16, InstrItinClass itinQ32, 1670 string OpcodeStr, string Dt, SDNode OpNode> { 1671 // 64-bit vector types. 1672 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 1673 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 1674 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 1675 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 1676 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 1677 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 1678 1679 // 128-bit vector types. 1680 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 1681 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 1682 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 1683 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 1684 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 1685 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 1686} 1687 1688multiclass N3VMulOpSL_HS<bits<4> op11_8, 1689 InstrItinClass itinD16, InstrItinClass itinD32, 1690 InstrItinClass itinQ16, InstrItinClass itinQ32, 1691 string OpcodeStr, string Dt, SDNode ShOp> { 1692 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 1693 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 1694 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 1695 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 1696 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 1697 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 1698 mul, ShOp>; 1699 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 1700 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 1701 mul, ShOp>; 1702} 1703 1704// Neon 3-argument intrinsics, 1705// element sizes of 8, 16 and 32 bits: 1706multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1707 string OpcodeStr, string Dt, Intrinsic IntOp> { 1708 // 64-bit vector types. 1709 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, 1710 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 1711 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1712 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 1713 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, 1714 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 1715 1716 // 128-bit vector types. 1717 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, 1718 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 1719 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, 1720 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 1721 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, 1722 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 1723} 1724 1725 1726// Neon Long 3-argument intrinsics. 1727 1728// First with only element sizes of 16 and 32 bits: 1729multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1730 string OpcodeStr, string Dt, Intrinsic IntOp> { 1731 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1732 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 1733 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D, 1734 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1735} 1736 1737multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 1738 string OpcodeStr, string Dt, Intrinsic IntOp> { 1739 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 1740 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 1741 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 1742 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 1743} 1744 1745// ....then also with element size of 8 bits: 1746multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1747 string OpcodeStr, string Dt, Intrinsic IntOp> 1748 : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, Dt, IntOp> { 1749 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, 1750 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 1751} 1752 1753 1754// Neon 2-register vector intrinsics, 1755// element sizes of 8, 16 and 32 bits: 1756multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1757 bits<5> op11_7, bit op4, 1758 InstrItinClass itinD, InstrItinClass itinQ, 1759 string OpcodeStr, string Dt, Intrinsic IntOp> { 1760 // 64-bit vector types. 1761 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1762 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 1763 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1764 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 1765 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1766 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 1767 1768 // 128-bit vector types. 1769 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1770 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 1771 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1772 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 1773 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1774 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 1775} 1776 1777 1778// Neon Pairwise long 2-register intrinsics, 1779// element sizes of 8, 16 and 32 bits: 1780multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1781 bits<5> op11_7, bit op4, 1782 string OpcodeStr, string Dt, Intrinsic IntOp> { 1783 // 64-bit vector types. 1784 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1785 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 1786 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1787 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 1788 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1789 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 1790 1791 // 128-bit vector types. 1792 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1793 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 1794 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1795 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 1796 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1797 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 1798} 1799 1800 1801// Neon Pairwise long 2-register accumulate intrinsics, 1802// element sizes of 8, 16 and 32 bits: 1803multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1804 bits<5> op11_7, bit op4, 1805 string OpcodeStr, string Dt, Intrinsic IntOp> { 1806 // 64-bit vector types. 1807 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1808 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 1809 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1810 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 1811 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1812 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 1813 1814 // 128-bit vector types. 1815 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1816 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 1817 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1818 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 1819 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1820 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 1821} 1822 1823 1824// Neon 2-register vector shift by immediate, 1825// element sizes of 8, 16, 32 and 64 bits: 1826multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1827 InstrItinClass itin, string OpcodeStr, string Dt, 1828 SDNode OpNode> { 1829 // 64-bit vector types. 1830 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, itin, 1831 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 1832 let Inst{21-19} = 0b001; // imm6 = 001xxx 1833 } 1834 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, itin, 1835 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 1836 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1837 } 1838 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, itin, 1839 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 1840 let Inst{21} = 0b1; // imm6 = 1xxxxx 1841 } 1842 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, itin, 1843 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 1844 // imm6 = xxxxxx 1845 1846 // 128-bit vector types. 1847 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, itin, 1848 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 1849 let Inst{21-19} = 0b001; // imm6 = 001xxx 1850 } 1851 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, itin, 1852 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 1853 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1854 } 1855 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, itin, 1856 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 1857 let Inst{21} = 0b1; // imm6 = 1xxxxx 1858 } 1859 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, itin, 1860 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 1861 // imm6 = xxxxxx 1862} 1863 1864 1865// Neon Shift-Accumulate vector operations, 1866// element sizes of 8, 16, 32 and 64 bits: 1867multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1868 string OpcodeStr, string Dt, SDNode ShOp> { 1869 // 64-bit vector types. 1870 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, 1871 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 1872 let Inst{21-19} = 0b001; // imm6 = 001xxx 1873 } 1874 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, 1875 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 1876 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1877 } 1878 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, 1879 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 1880 let Inst{21} = 0b1; // imm6 = 1xxxxx 1881 } 1882 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, 1883 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 1884 // imm6 = xxxxxx 1885 1886 // 128-bit vector types. 1887 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, 1888 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 1889 let Inst{21-19} = 0b001; // imm6 = 001xxx 1890 } 1891 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, 1892 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 1893 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1894 } 1895 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, 1896 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 1897 let Inst{21} = 0b1; // imm6 = 1xxxxx 1898 } 1899 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, 1900 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 1901 // imm6 = xxxxxx 1902} 1903 1904 1905// Neon Shift-Insert vector operations, 1906// element sizes of 8, 16, 32 and 64 bits: 1907multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1908 string OpcodeStr, SDNode ShOp> { 1909 // 64-bit vector types. 1910 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, 1911 OpcodeStr, "8", v8i8, ShOp> { 1912 let Inst{21-19} = 0b001; // imm6 = 001xxx 1913 } 1914 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, 1915 OpcodeStr, "16", v4i16, ShOp> { 1916 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1917 } 1918 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, 1919 OpcodeStr, "32", v2i32, ShOp> { 1920 let Inst{21} = 0b1; // imm6 = 1xxxxx 1921 } 1922 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, 1923 OpcodeStr, "64", v1i64, ShOp>; 1924 // imm6 = xxxxxx 1925 1926 // 128-bit vector types. 1927 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, 1928 OpcodeStr, "8", v16i8, ShOp> { 1929 let Inst{21-19} = 0b001; // imm6 = 001xxx 1930 } 1931 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, 1932 OpcodeStr, "16", v8i16, ShOp> { 1933 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1934 } 1935 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, 1936 OpcodeStr, "32", v4i32, ShOp> { 1937 let Inst{21} = 0b1; // imm6 = 1xxxxx 1938 } 1939 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, 1940 OpcodeStr, "64", v2i64, ShOp>; 1941 // imm6 = xxxxxx 1942} 1943 1944// Neon Shift Long operations, 1945// element sizes of 8, 16, 32 bits: 1946multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 1947 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 1948 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 1949 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { 1950 let Inst{21-19} = 0b001; // imm6 = 001xxx 1951 } 1952 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 1953 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { 1954 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1955 } 1956 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 1957 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { 1958 let Inst{21} = 0b1; // imm6 = 1xxxxx 1959 } 1960} 1961 1962// Neon Shift Narrow operations, 1963// element sizes of 16, 32, 64 bits: 1964multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 1965 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 1966 SDNode OpNode> { 1967 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 1968 OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> { 1969 let Inst{21-19} = 0b001; // imm6 = 001xxx 1970 } 1971 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 1972 OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> { 1973 let Inst{21-20} = 0b01; // imm6 = 01xxxx 1974 } 1975 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 1976 OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> { 1977 let Inst{21} = 0b1; // imm6 = 1xxxxx 1978 } 1979} 1980 1981//===----------------------------------------------------------------------===// 1982// Instruction Definitions. 1983//===----------------------------------------------------------------------===// 1984 1985// Vector Add Operations. 1986 1987// VADD : Vector Add (integer and floating-point) 1988defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 1989 add, 1>; 1990def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 1991 v2f32, v2f32, fadd, 1>; 1992def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 1993 v4f32, v4f32, fadd, 1>; 1994// VADDL : Vector Add Long (Q = D + D) 1995defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s", 1996 int_arm_neon_vaddls, 1>; 1997defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u", 1998 int_arm_neon_vaddlu, 1>; 1999// VADDW : Vector Add Wide (Q = Q + D) 2000defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; 2001defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; 2002// VHADD : Vector Halving Add 2003defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2004 IIC_VBINi4Q, "vhadd", "s", int_arm_neon_vhadds, 1>; 2005defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2006 IIC_VBINi4Q, "vhadd", "u", int_arm_neon_vhaddu, 1>; 2007// VRHADD : Vector Rounding Halving Add 2008defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2009 IIC_VBINi4Q, "vrhadd", "s", int_arm_neon_vrhadds, 1>; 2010defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2011 IIC_VBINi4Q, "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 2012// VQADD : Vector Saturating Add 2013defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2014 IIC_VBINi4Q, "vqadd", "s", int_arm_neon_vqadds, 1>; 2015defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2016 IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; 2017// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 2018defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", 2019 int_arm_neon_vaddhn, 1>; 2020// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 2021defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 2022 int_arm_neon_vraddhn, 1>; 2023 2024// Vector Multiply Operations. 2025 2026// VMUL : Vector Multiply (integer, polynomial and floating-point) 2027defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 2028 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 2029def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8", 2030 v8i8, v8i8, int_arm_neon_vmulp, 1>; 2031def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8", 2032 v16i8, v16i8, int_arm_neon_vmulp, 1>; 2033def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", 2034 v2f32, v2f32, fmul, 1>; 2035def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", 2036 v4f32, v4f32, fmul, 1>; 2037defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; 2038def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 2039def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 2040 v2f32, fmul>; 2041 2042def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 2043 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 2044 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 2045 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2046 (DSubReg_i16_reg imm:$lane))), 2047 (SubReg_i16_lane imm:$lane)))>; 2048def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 2049 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 2050 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 2051 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2052 (DSubReg_i32_reg imm:$lane))), 2053 (SubReg_i32_lane imm:$lane)))>; 2054def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 2055 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 2056 (v4f32 (VMULslfq (v4f32 QPR:$src1), 2057 (v2f32 (EXTRACT_SUBREG QPR:$src2, 2058 (DSubReg_i32_reg imm:$lane))), 2059 (SubReg_i32_lane imm:$lane)))>; 2060 2061// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 2062defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, 2063 IIC_VMULi16Q, IIC_VMULi32Q, 2064 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 2065defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 2066 IIC_VMULi16Q, IIC_VMULi32Q, 2067 "vqdmulh", "s", int_arm_neon_vqdmulh>; 2068def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 2069 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 2070 imm:$lane)))), 2071 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 2072 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2073 (DSubReg_i16_reg imm:$lane))), 2074 (SubReg_i16_lane imm:$lane)))>; 2075def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 2076 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 2077 imm:$lane)))), 2078 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 2079 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2080 (DSubReg_i32_reg imm:$lane))), 2081 (SubReg_i32_lane imm:$lane)))>; 2082 2083// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 2084defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, 2085 IIC_VMULi16Q, IIC_VMULi32Q, 2086 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 2087defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 2088 IIC_VMULi16Q, IIC_VMULi32Q, 2089 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 2090def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 2091 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 2092 imm:$lane)))), 2093 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 2094 (v4i16 (EXTRACT_SUBREG QPR:$src2, 2095 (DSubReg_i16_reg imm:$lane))), 2096 (SubReg_i16_lane imm:$lane)))>; 2097def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 2098 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 2099 imm:$lane)))), 2100 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 2101 (v2i32 (EXTRACT_SUBREG QPR:$src2, 2102 (DSubReg_i32_reg imm:$lane))), 2103 (SubReg_i32_lane imm:$lane)))>; 2104 2105// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 2106defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s", 2107 int_arm_neon_vmulls, 1>; 2108defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u", 2109 int_arm_neon_vmullu, 1>; 2110def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 2111 v8i16, v8i8, int_arm_neon_vmullp, 1>; 2112defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", 2113 int_arm_neon_vmulls>; 2114defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", 2115 int_arm_neon_vmullu>; 2116 2117// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 2118defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s", 2119 int_arm_neon_vqdmull, 1>; 2120defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s", 2121 int_arm_neon_vqdmull>; 2122 2123// Vector Multiply-Accumulate and Multiply-Subtract Operations. 2124 2125// VMLA : Vector Multiply Accumulate (integer and floating-point) 2126defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2127 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 2128def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 2129 v2f32, fmul, fadd>; 2130def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 2131 v4f32, fmul, fadd>; 2132defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 2133 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 2134def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 2135 v2f32, fmul, fadd>; 2136def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 2137 v4f32, v2f32, fmul, fadd>; 2138 2139def : Pat<(v8i16 (add (v8i16 QPR:$src1), 2140 (mul (v8i16 QPR:$src2), 2141 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 2142 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 2143 (v4i16 (EXTRACT_SUBREG QPR:$src3, 2144 (DSubReg_i16_reg imm:$lane))), 2145 (SubReg_i16_lane imm:$lane)))>; 2146 2147def : Pat<(v4i32 (add (v4i32 QPR:$src1), 2148 (mul (v4i32 QPR:$src2), 2149 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 2150 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 2151 (v2i32 (EXTRACT_SUBREG QPR:$src3, 2152 (DSubReg_i32_reg imm:$lane))), 2153 (SubReg_i32_lane imm:$lane)))>; 2154 2155def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), 2156 (fmul (v4f32 QPR:$src2), 2157 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 2158 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 2159 (v4f32 QPR:$src2), 2160 (v2f32 (EXTRACT_SUBREG QPR:$src3, 2161 (DSubReg_i32_reg imm:$lane))), 2162 (SubReg_i32_lane imm:$lane)))>; 2163 2164// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 2165defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>; 2166defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>; 2167 2168defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; 2169defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; 2170 2171// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 2172defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s", 2173 int_arm_neon_vqdmlal>; 2174defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; 2175 2176// VMLS : Vector Multiply Subtract (integer and floating-point) 2177defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 2178 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 2179def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 2180 v2f32, fmul, fsub>; 2181def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 2182 v4f32, fmul, fsub>; 2183defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 2184 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 2185def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 2186 v2f32, fmul, fsub>; 2187def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 2188 v4f32, v2f32, fmul, fsub>; 2189 2190def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 2191 (mul (v8i16 QPR:$src2), 2192 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 2193 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 2194 (v4i16 (EXTRACT_SUBREG QPR:$src3, 2195 (DSubReg_i16_reg imm:$lane))), 2196 (SubReg_i16_lane imm:$lane)))>; 2197 2198def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 2199 (mul (v4i32 QPR:$src2), 2200 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 2201 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 2202 (v2i32 (EXTRACT_SUBREG QPR:$src3, 2203 (DSubReg_i32_reg imm:$lane))), 2204 (SubReg_i32_lane imm:$lane)))>; 2205 2206def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), 2207 (fmul (v4f32 QPR:$src2), 2208 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 2209 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 2210 (v2f32 (EXTRACT_SUBREG QPR:$src3, 2211 (DSubReg_i32_reg imm:$lane))), 2212 (SubReg_i32_lane imm:$lane)))>; 2213 2214// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 2215defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>; 2216defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>; 2217 2218defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; 2219defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; 2220 2221// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 2222defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s", 2223 int_arm_neon_vqdmlsl>; 2224defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; 2225 2226// Vector Subtract Operations. 2227 2228// VSUB : Vector Subtract (integer and floating-point) 2229defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 2230 "vsub", "i", sub, 0>; 2231def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 2232 v2f32, v2f32, fsub, 0>; 2233def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 2234 v4f32, v4f32, fsub, 0>; 2235// VSUBL : Vector Subtract Long (Q = D - D) 2236defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s", 2237 int_arm_neon_vsubls, 1>; 2238defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u", 2239 int_arm_neon_vsublu, 1>; 2240// VSUBW : Vector Subtract Wide (Q = Q - D) 2241defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; 2242defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; 2243// VHSUB : Vector Halving Subtract 2244defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, 2245 IIC_VBINi4Q, IIC_VBINi4Q, 2246 "vhsub", "s", int_arm_neon_vhsubs, 0>; 2247defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, 2248 IIC_VBINi4Q, IIC_VBINi4Q, 2249 "vhsub", "u", int_arm_neon_vhsubu, 0>; 2250// VQSUB : Vector Saturing Subtract 2251defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, 2252 IIC_VBINi4Q, IIC_VBINi4Q, 2253 "vqsub", "s", int_arm_neon_vqsubs, 0>; 2254defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, 2255 IIC_VBINi4Q, IIC_VBINi4Q, 2256 "vqsub", "u", int_arm_neon_vqsubu, 0>; 2257// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 2258defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", 2259 int_arm_neon_vsubhn, 0>; 2260// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 2261defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 2262 int_arm_neon_vrsubhn, 0>; 2263 2264// Vector Comparisons. 2265 2266// VCEQ : Vector Compare Equal 2267defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2268 IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>; 2269def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 2270 NEONvceq, 1>; 2271def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 2272 NEONvceq, 1>; 2273// For disassembly only. 2274defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 2275 "$dst, $src, #0">; 2276 2277// VCGE : Vector Compare Greater Than or Equal 2278defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2279 IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; 2280defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2281 IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>; 2282def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", 2283 v2i32, v2f32, NEONvcge, 0>; 2284def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 2285 NEONvcge, 0>; 2286// For disassembly only. 2287defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 2288 "$dst, $src, #0">; 2289// For disassembly only. 2290defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 2291 "$dst, $src, #0">; 2292 2293// VCGT : Vector Compare Greater Than 2294defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2295 IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>; 2296defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2297 IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>; 2298def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 2299 NEONvcgt, 0>; 2300def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 2301 NEONvcgt, 0>; 2302// For disassembly only. 2303defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 2304 "$dst, $src, #0">; 2305// For disassembly only. 2306defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 2307 "$dst, $src, #0">; 2308 2309// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 2310def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32", 2311 v2i32, v2f32, int_arm_neon_vacged, 0>; 2312def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge", "f32", 2313 v4i32, v4f32, int_arm_neon_vacgeq, 0>; 2314// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 2315def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt", "f32", 2316 v2i32, v2f32, int_arm_neon_vacgtd, 0>; 2317def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt", "f32", 2318 v4i32, v4f32, int_arm_neon_vacgtq, 0>; 2319// VTST : Vector Test Bits 2320defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2321 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 2322 2323// Vector Bitwise Operations. 2324 2325// VAND : Vector Bitwise AND 2326def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 2327 v2i32, v2i32, and, 1>; 2328def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 2329 v4i32, v4i32, and, 1>; 2330 2331// VEOR : Vector Bitwise Exclusive OR 2332def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 2333 v2i32, v2i32, xor, 1>; 2334def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 2335 v4i32, v4i32, xor, 1>; 2336 2337// VORR : Vector Bitwise OR 2338def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 2339 v2i32, v2i32, or, 1>; 2340def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 2341 v4i32, v4i32, or, 1>; 2342 2343// VBIC : Vector Bitwise Bit Clear (AND NOT) 2344def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 2345 (ins DPR:$src1, DPR:$src2), IIC_VBINiD, 2346 "vbic", "$dst, $src1, $src2", "", 2347 [(set DPR:$dst, (v2i32 (and DPR:$src1, 2348 (vnot_conv DPR:$src2))))]>; 2349def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 2350 (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, 2351 "vbic", "$dst, $src1, $src2", "", 2352 [(set QPR:$dst, (v4i32 (and QPR:$src1, 2353 (vnot_conv QPR:$src2))))]>; 2354 2355// VORN : Vector Bitwise OR NOT 2356def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), 2357 (ins DPR:$src1, DPR:$src2), IIC_VBINiD, 2358 "vorn", "$dst, $src1, $src2", "", 2359 [(set DPR:$dst, (v2i32 (or DPR:$src1, 2360 (vnot_conv DPR:$src2))))]>; 2361def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), 2362 (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, 2363 "vorn", "$dst, $src1, $src2", "", 2364 [(set QPR:$dst, (v4i32 (or QPR:$src1, 2365 (vnot_conv QPR:$src2))))]>; 2366 2367// VMVN : Vector Bitwise NOT 2368def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 2369 (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, 2370 "vmvn", "$dst, $src", "", 2371 [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; 2372def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 2373 (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, 2374 "vmvn", "$dst, $src", "", 2375 [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; 2376def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; 2377def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; 2378 2379// VBSL : Vector Bitwise Select 2380def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 2381 (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, 2382 "vbsl", "$dst, $src2, $src3", "$src1 = $dst", 2383 [(set DPR:$dst, 2384 (v2i32 (or (and DPR:$src2, DPR:$src1), 2385 (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; 2386def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 2387 (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, 2388 "vbsl", "$dst, $src2, $src3", "$src1 = $dst", 2389 [(set QPR:$dst, 2390 (v4i32 (or (and QPR:$src2, QPR:$src1), 2391 (and QPR:$src3, (vnot_conv QPR:$src1)))))]>; 2392 2393// VBIF : Vector Bitwise Insert if False 2394// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 2395def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 2396 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), 2397 IIC_VBINiD, "vbif", "$dst, $src2, $src3", "$src1 = $dst", 2398 [/* For disassembly only; pattern left blank */]>; 2399def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 2400 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), 2401 IIC_VBINiQ, "vbif", "$dst, $src2, $src3", "$src1 = $dst", 2402 [/* For disassembly only; pattern left blank */]>; 2403 2404// VBIT : Vector Bitwise Insert if True 2405// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 2406def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 2407 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), 2408 IIC_VBINiD, "vbit", "$dst, $src2, $src3", "$src1 = $dst", 2409 [/* For disassembly only; pattern left blank */]>; 2410def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 2411 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), 2412 IIC_VBINiQ, "vbit", "$dst, $src2, $src3", "$src1 = $dst", 2413 [/* For disassembly only; pattern left blank */]>; 2414 2415// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 2416// for equivalent operations with different register constraints; it just 2417// inserts copies. 2418 2419// Vector Absolute Differences. 2420 2421// VABD : Vector Absolute Difference 2422defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, 2423 IIC_VBINi4Q, IIC_VBINi4Q, 2424 "vabd", "s", int_arm_neon_vabds, 0>; 2425defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, 2426 IIC_VBINi4Q, IIC_VBINi4Q, 2427 "vabd", "u", int_arm_neon_vabdu, 0>; 2428def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, 2429 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; 2430def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, 2431 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; 2432 2433// VABDL : Vector Absolute Difference Long (Q = | D - D |) 2434defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, 2435 "vabdl", "s", int_arm_neon_vabdls, 0>; 2436defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, 2437 "vabdl", "u", int_arm_neon_vabdlu, 0>; 2438 2439// VABA : Vector Absolute Difference and Accumulate 2440defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>; 2441defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>; 2442 2443// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 2444defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>; 2445defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; 2446 2447// Vector Maximum and Minimum. 2448 2449// VMAX : Vector Maximum 2450defm VMAXs : N3VInt_QHS<0,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2451 IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; 2452defm VMAXu : N3VInt_QHS<1,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2453 IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; 2454def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32", 2455 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 2456def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32", 2457 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 2458 2459// VMIN : Vector Minimum 2460defm VMINs : N3VInt_QHS<0,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2461 IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>; 2462defm VMINu : N3VInt_QHS<1,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 2463 IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>; 2464def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32", 2465 v2f32, v2f32, int_arm_neon_vmins, 1>; 2466def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32", 2467 v4f32, v4f32, int_arm_neon_vmins, 1>; 2468 2469// Vector Pairwise Operations. 2470 2471// VPADD : Vector Pairwise Add 2472def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd", "i8", 2473 v8i8, v8i8, int_arm_neon_vpadd, 0>; 2474def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd", "i16", 2475 v4i16, v4i16, int_arm_neon_vpadd, 0>; 2476def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd", "i32", 2477 v2i32, v2i32, int_arm_neon_vpadd, 0>; 2478def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd", "f32", 2479 v2f32, v2f32, int_arm_neon_vpadd, 0>; 2480 2481// VPADDL : Vector Pairwise Add Long 2482defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 2483 int_arm_neon_vpaddls>; 2484defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 2485 int_arm_neon_vpaddlu>; 2486 2487// VPADAL : Vector Pairwise Add and Accumulate Long 2488defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 2489 int_arm_neon_vpadals>; 2490defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 2491 int_arm_neon_vpadalu>; 2492 2493// VPMAX : Vector Pairwise Maximum 2494def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "s8", 2495 v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 2496def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "s16", 2497 v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 2498def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "s32", 2499 v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 2500def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "u8", 2501 v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 2502def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "u16", 2503 v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 2504def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "u32", 2505 v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 2506def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax", "f32", 2507 v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 2508 2509// VPMIN : Vector Pairwise Minimum 2510def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "s8", 2511 v8i8, v8i8, int_arm_neon_vpmins, 0>; 2512def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "s16", 2513 v4i16, v4i16, int_arm_neon_vpmins, 0>; 2514def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "s32", 2515 v2i32, v2i32, int_arm_neon_vpmins, 0>; 2516def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "u8", 2517 v8i8, v8i8, int_arm_neon_vpminu, 0>; 2518def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "u16", 2519 v4i16, v4i16, int_arm_neon_vpminu, 0>; 2520def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "u32", 2521 v2i32, v2i32, int_arm_neon_vpminu, 0>; 2522def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin", "f32", 2523 v2f32, v2f32, int_arm_neon_vpmins, 0>; 2524 2525// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 2526 2527// VRECPE : Vector Reciprocal Estimate 2528def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 2529 IIC_VUNAD, "vrecpe", "u32", 2530 v2i32, v2i32, int_arm_neon_vrecpe>; 2531def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 2532 IIC_VUNAQ, "vrecpe", "u32", 2533 v4i32, v4i32, int_arm_neon_vrecpe>; 2534def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 2535 IIC_VUNAD, "vrecpe", "f32", 2536 v2f32, v2f32, int_arm_neon_vrecpe>; 2537def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 2538 IIC_VUNAQ, "vrecpe", "f32", 2539 v4f32, v4f32, int_arm_neon_vrecpe>; 2540 2541// VRECPS : Vector Reciprocal Step 2542def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, 2543 IIC_VRECSD, "vrecps", "f32", 2544 v2f32, v2f32, int_arm_neon_vrecps, 1>; 2545def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, 2546 IIC_VRECSQ, "vrecps", "f32", 2547 v4f32, v4f32, int_arm_neon_vrecps, 1>; 2548 2549// VRSQRTE : Vector Reciprocal Square Root Estimate 2550def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 2551 IIC_VUNAD, "vrsqrte", "u32", 2552 v2i32, v2i32, int_arm_neon_vrsqrte>; 2553def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 2554 IIC_VUNAQ, "vrsqrte", "u32", 2555 v4i32, v4i32, int_arm_neon_vrsqrte>; 2556def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 2557 IIC_VUNAD, "vrsqrte", "f32", 2558 v2f32, v2f32, int_arm_neon_vrsqrte>; 2559def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 2560 IIC_VUNAQ, "vrsqrte", "f32", 2561 v4f32, v4f32, int_arm_neon_vrsqrte>; 2562 2563// VRSQRTS : Vector Reciprocal Square Root Step 2564def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, 2565 IIC_VRECSD, "vrsqrts", "f32", 2566 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 2567def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, 2568 IIC_VRECSQ, "vrsqrts", "f32", 2569 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 2570 2571// Vector Shifts. 2572 2573// VSHL : Vector Shift 2574defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, 2575 IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>; 2576defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, 2577 IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>; 2578// VSHL : Vector Shift Left (Immediate) 2579defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 2580// VSHR : Vector Shift Right (Immediate) 2581defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs>; 2582defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru>; 2583 2584// VSHLL : Vector Shift Left Long 2585defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 2586defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 2587 2588// VSHLL : Vector Shift Left Long (with maximum shift count) 2589class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 2590 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 2591 ValueType OpTy, SDNode OpNode> 2592 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 2593 ResTy, OpTy, OpNode> { 2594 let Inst{21-16} = op21_16; 2595} 2596def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 2597 v8i16, v8i8, NEONvshlli>; 2598def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 2599 v4i32, v4i16, NEONvshlli>; 2600def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 2601 v2i64, v2i32, NEONvshlli>; 2602 2603// VSHRN : Vector Shift Right and Narrow 2604defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 2605 NEONvshrn>; 2606 2607// VRSHL : Vector Rounding Shift 2608defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2609 IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts,0>; 2610defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2611 IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu,0>; 2612// VRSHR : Vector Rounding Shift Right 2613defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>; 2614defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>; 2615 2616// VRSHRN : Vector Rounding Shift Right and Narrow 2617defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 2618 NEONvrshrn>; 2619 2620// VQSHL : Vector Saturating Shift 2621defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2622 IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts,0>; 2623defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2624 IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu,0>; 2625// VQSHL : Vector Saturating Shift Left (Immediate) 2626defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>; 2627defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>; 2628// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 2629defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D, "vqshlu","s",NEONvqshlsu>; 2630 2631// VQSHRN : Vector Saturating Shift Right and Narrow 2632defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 2633 NEONvqshrns>; 2634defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 2635 NEONvqshrnu>; 2636 2637// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 2638defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 2639 NEONvqshrnsu>; 2640 2641// VQRSHL : Vector Saturating Rounding Shift 2642defm VQRSHLs : N3VInt_QHSD<0,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2643 IIC_VSHLi4Q, "vqrshl", "s", 2644 int_arm_neon_vqrshifts, 0>; 2645defm VQRSHLu : N3VInt_QHSD<1,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2646 IIC_VSHLi4Q, "vqrshl", "u", 2647 int_arm_neon_vqrshiftu, 0>; 2648 2649// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 2650defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 2651 NEONvqrshrns>; 2652defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 2653 NEONvqrshrnu>; 2654 2655// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 2656defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 2657 NEONvqrshrnsu>; 2658 2659// VSRA : Vector Shift Right and Accumulate 2660defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 2661defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 2662// VRSRA : Vector Rounding Shift Right and Accumulate 2663defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 2664defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 2665 2666// VSLI : Vector Shift Left and Insert 2667defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli>; 2668// VSRI : Vector Shift Right and Insert 2669defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri>; 2670 2671// Vector Absolute and Saturating Absolute. 2672 2673// VABS : Vector Absolute Value 2674defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 2675 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 2676 int_arm_neon_vabs>; 2677def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2678 IIC_VUNAD, "vabs", "f32", 2679 v2f32, v2f32, int_arm_neon_vabs>; 2680def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2681 IIC_VUNAQ, "vabs", "f32", 2682 v4f32, v4f32, int_arm_neon_vabs>; 2683 2684// VQABS : Vector Saturating Absolute Value 2685defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 2686 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 2687 int_arm_neon_vqabs>; 2688 2689// Vector Negate. 2690 2691def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; 2692def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; 2693 2694class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 2695 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), 2696 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 2697 [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; 2698class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 2699 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), 2700 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", 2701 [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; 2702 2703// VNEG : Vector Negate 2704def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 2705def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 2706def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 2707def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 2708def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 2709def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 2710 2711// VNEG : Vector Negate (floating-point) 2712def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 2713 (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, 2714 "vneg", "f32", "$dst, $src", "", 2715 [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; 2716def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 2717 (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, 2718 "vneg", "f32", "$dst, $src", "", 2719 [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; 2720 2721def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; 2722def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>; 2723def : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>; 2724def : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>; 2725def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>; 2726def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; 2727 2728// VQNEG : Vector Saturating Negate 2729defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 2730 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 2731 int_arm_neon_vqneg>; 2732 2733// Vector Bit Counting Operations. 2734 2735// VCLS : Vector Count Leading Sign Bits 2736defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 2737 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 2738 int_arm_neon_vcls>; 2739// VCLZ : Vector Count Leading Zeros 2740defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 2741 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 2742 int_arm_neon_vclz>; 2743// VCNT : Vector Count One Bits 2744def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2745 IIC_VCNTiD, "vcnt", "8", 2746 v8i8, v8i8, int_arm_neon_vcnt>; 2747def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2748 IIC_VCNTiQ, "vcnt", "8", 2749 v16i8, v16i8, int_arm_neon_vcnt>; 2750 2751// Vector Swap -- for disassembly only. 2752def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 2753 (outs DPR:$dst), (ins DPR:$src), NoItinerary, 2754 "vswp", "$dst, $src", "", []>; 2755def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 2756 (outs QPR:$dst), (ins QPR:$src), NoItinerary, 2757 "vswp", "$dst, $src", "", []>; 2758 2759// Vector Move Operations. 2760 2761// VMOV : Vector Move (Register) 2762 2763def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), 2764 IIC_VMOVD, "vmov", "$dst, $src", "", []>; 2765def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), 2766 IIC_VMOVD, "vmov", "$dst, $src", "", []>; 2767 2768// VMOV : Vector Move (Immediate) 2769 2770// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. 2771def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ 2772 return ARM::getVMOVImm(N, 1, *CurDAG); 2773}]>; 2774def vmovImm8 : PatLeaf<(build_vector), [{ 2775 return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; 2776}], VMOV_get_imm8>; 2777 2778// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. 2779def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ 2780 return ARM::getVMOVImm(N, 2, *CurDAG); 2781}]>; 2782def vmovImm16 : PatLeaf<(build_vector), [{ 2783 return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; 2784}], VMOV_get_imm16>; 2785 2786// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. 2787def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ 2788 return ARM::getVMOVImm(N, 4, *CurDAG); 2789}]>; 2790def vmovImm32 : PatLeaf<(build_vector), [{ 2791 return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; 2792}], VMOV_get_imm32>; 2793 2794// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. 2795def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ 2796 return ARM::getVMOVImm(N, 8, *CurDAG); 2797}]>; 2798def vmovImm64 : PatLeaf<(build_vector), [{ 2799 return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; 2800}], VMOV_get_imm64>; 2801 2802// Note: Some of the cmode bits in the following VMOV instructions need to 2803// be encoded based on the immed values. 2804 2805def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), 2806 (ins h8imm:$SIMM), IIC_VMOVImm, 2807 "vmov", "i8", "$dst, $SIMM", "", 2808 [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; 2809def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), 2810 (ins h8imm:$SIMM), IIC_VMOVImm, 2811 "vmov", "i8", "$dst, $SIMM", "", 2812 [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; 2813 2814def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), 2815 (ins h16imm:$SIMM), IIC_VMOVImm, 2816 "vmov", "i16", "$dst, $SIMM", "", 2817 [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; 2818def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), 2819 (ins h16imm:$SIMM), IIC_VMOVImm, 2820 "vmov", "i16", "$dst, $SIMM", "", 2821 [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; 2822 2823def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), 2824 (ins h32imm:$SIMM), IIC_VMOVImm, 2825 "vmov", "i32", "$dst, $SIMM", "", 2826 [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; 2827def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), 2828 (ins h32imm:$SIMM), IIC_VMOVImm, 2829 "vmov", "i32", "$dst, $SIMM", "", 2830 [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; 2831 2832def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), 2833 (ins h64imm:$SIMM), IIC_VMOVImm, 2834 "vmov", "i64", "$dst, $SIMM", "", 2835 [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; 2836def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), 2837 (ins h64imm:$SIMM), IIC_VMOVImm, 2838 "vmov", "i64", "$dst, $SIMM", "", 2839 [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; 2840 2841// VMOV : Vector Get Lane (move scalar to ARM core register) 2842 2843def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 2844 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2845 IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]", 2846 [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), 2847 imm:$lane))]>; 2848def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 2849 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2850 IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]", 2851 [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), 2852 imm:$lane))]>; 2853def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 2854 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2855 IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]", 2856 [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), 2857 imm:$lane))]>; 2858def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 2859 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2860 IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]", 2861 [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), 2862 imm:$lane))]>; 2863def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 2864 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2865 IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]", 2866 [(set GPR:$dst, (extractelt (v2i32 DPR:$src), 2867 imm:$lane))]>; 2868// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 2869def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 2870 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2871 (DSubReg_i8_reg imm:$lane))), 2872 (SubReg_i8_lane imm:$lane))>; 2873def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 2874 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2875 (DSubReg_i16_reg imm:$lane))), 2876 (SubReg_i16_lane imm:$lane))>; 2877def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 2878 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2879 (DSubReg_i8_reg imm:$lane))), 2880 (SubReg_i8_lane imm:$lane))>; 2881def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 2882 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2883 (DSubReg_i16_reg imm:$lane))), 2884 (SubReg_i16_lane imm:$lane))>; 2885def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 2886 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 2887 (DSubReg_i32_reg imm:$lane))), 2888 (SubReg_i32_lane imm:$lane))>; 2889def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 2890 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 2891 (SSubReg_f32_reg imm:$src2))>; 2892def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 2893 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 2894 (SSubReg_f32_reg imm:$src2))>; 2895//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 2896// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2897def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 2898 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2899 2900 2901// VMOV : Vector Set Lane (move ARM core register to scalar) 2902 2903let Constraints = "$src1 = $dst" in { 2904def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst), 2905 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2906 IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2", 2907 [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), 2908 GPR:$src2, imm:$lane))]>; 2909def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst), 2910 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2911 IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2", 2912 [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), 2913 GPR:$src2, imm:$lane))]>; 2914def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst), 2915 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2916 IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2", 2917 [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), 2918 GPR:$src2, imm:$lane))]>; 2919} 2920def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 2921 (v16i8 (INSERT_SUBREG QPR:$src1, 2922 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 2923 (DSubReg_i8_reg imm:$lane))), 2924 GPR:$src2, (SubReg_i8_lane imm:$lane))), 2925 (DSubReg_i8_reg imm:$lane)))>; 2926def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 2927 (v8i16 (INSERT_SUBREG QPR:$src1, 2928 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 2929 (DSubReg_i16_reg imm:$lane))), 2930 GPR:$src2, (SubReg_i16_lane imm:$lane))), 2931 (DSubReg_i16_reg imm:$lane)))>; 2932def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 2933 (v4i32 (INSERT_SUBREG QPR:$src1, 2934 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 2935 (DSubReg_i32_reg imm:$lane))), 2936 GPR:$src2, (SubReg_i32_lane imm:$lane))), 2937 (DSubReg_i32_reg imm:$lane)))>; 2938 2939def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 2940 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 2941 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 2942def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 2943 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 2944 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 2945 2946//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 2947// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 2948def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 2949 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 2950 2951def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 2952 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; 2953def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 2954 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>; 2955def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 2956 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; 2957 2958def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 2959 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2960def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 2961 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2962def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 2963 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2964 2965def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 2966 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 2967 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2968 arm_dsubreg_0)>; 2969def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 2970 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 2971 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2972 arm_dsubreg_0)>; 2973def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 2974 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 2975 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2976 arm_dsubreg_0)>; 2977 2978// VDUP : Vector Duplicate (from ARM core register to all elements) 2979 2980class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 2981 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), 2982 IIC_VMOVIS, "vdup", Dt, "$dst, $src", 2983 [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 2984class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 2985 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), 2986 IIC_VMOVIS, "vdup", Dt, "$dst, $src", 2987 [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 2988 2989def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 2990def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 2991def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; 2992def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 2993def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 2994def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 2995 2996def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), 2997 IIC_VMOVIS, "vdup", "32", "$dst, $src", 2998 [(set DPR:$dst, (v2f32 (NEONvdup 2999 (f32 (bitconvert GPR:$src)))))]>; 3000def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), 3001 IIC_VMOVIS, "vdup", "32", "$dst, $src", 3002 [(set QPR:$dst, (v4f32 (NEONvdup 3003 (f32 (bitconvert GPR:$src)))))]>; 3004 3005// VDUP : Vector Duplicate Lane (from scalar to all elements) 3006 3007class VDUPLND<bits<2> op19_18, bits<2> op17_16, 3008 string OpcodeStr, string Dt, ValueType Ty> 3009 : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, 3010 (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, 3011 OpcodeStr, Dt, "$dst, $src[$lane]", "", 3012 [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; 3013 3014class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, string Dt, 3015 ValueType ResTy, ValueType OpTy> 3016 : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, 3017 (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, 3018 OpcodeStr, Dt, "$dst, $src[$lane]", "", 3019 [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; 3020 3021// Inst{19-16} is partially specified depending on the element size. 3022 3023def VDUPLN8d : VDUPLND<{?,?}, {?,1}, "vdup", "8", v8i8>; 3024def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup", "16", v4i16>; 3025def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2i32>; 3026def VDUPLNfd : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2f32>; 3027def VDUPLN8q : VDUPLNQ<{?,?}, {?,1}, "vdup", "8", v16i8, v8i8>; 3028def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup", "16", v8i16, v4i16>; 3029def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4i32, v2i32>; 3030def VDUPLNfq : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4f32, v2f32>; 3031 3032def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 3033 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 3034 (DSubReg_i8_reg imm:$lane))), 3035 (SubReg_i8_lane imm:$lane)))>; 3036def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 3037 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 3038 (DSubReg_i16_reg imm:$lane))), 3039 (SubReg_i16_lane imm:$lane)))>; 3040def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 3041 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 3042 (DSubReg_i32_reg imm:$lane))), 3043 (SubReg_i32_lane imm:$lane)))>; 3044def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 3045 (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, 3046 (DSubReg_i32_reg imm:$lane))), 3047 (SubReg_i32_lane imm:$lane)))>; 3048 3049def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0, 3050 (outs DPR:$dst), (ins SPR:$src), 3051 IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", 3052 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 3053 3054def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, 3055 (outs QPR:$dst), (ins SPR:$src), 3056 IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", 3057 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 3058 3059def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), 3060 (INSERT_SUBREG QPR:$src, 3061 (i64 (EXTRACT_SUBREG QPR:$src, 3062 (DSubReg_f64_reg imm:$lane))), 3063 (DSubReg_f64_other_reg imm:$lane))>; 3064def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), 3065 (INSERT_SUBREG QPR:$src, 3066 (f64 (EXTRACT_SUBREG QPR:$src, 3067 (DSubReg_f64_reg imm:$lane))), 3068 (DSubReg_f64_other_reg imm:$lane))>; 3069 3070// VMOVN : Vector Narrowing Move 3071defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, 3072 "vmovn", "i", int_arm_neon_vmovn>; 3073// VQMOVN : Vector Saturating Narrowing Move 3074defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 3075 "vqmovn", "s", int_arm_neon_vqmovns>; 3076defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 3077 "vqmovn", "u", int_arm_neon_vqmovnu>; 3078defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 3079 "vqmovun", "s", int_arm_neon_vqmovnsu>; 3080// VMOVL : Vector Lengthening Move 3081defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s", 3082 int_arm_neon_vmovls>; 3083defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u", 3084 int_arm_neon_vmovlu>; 3085 3086// Vector Conversions. 3087 3088// VCVT : Vector Convert Between Floating-Point and Integers 3089def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3090 v2i32, v2f32, fp_to_sint>; 3091def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3092 v2i32, v2f32, fp_to_uint>; 3093def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3094 v2f32, v2i32, sint_to_fp>; 3095def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3096 v2f32, v2i32, uint_to_fp>; 3097 3098def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3099 v4i32, v4f32, fp_to_sint>; 3100def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3101 v4i32, v4f32, fp_to_uint>; 3102def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3103 v4f32, v4i32, sint_to_fp>; 3104def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3105 v4f32, v4i32, uint_to_fp>; 3106 3107// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 3108def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 3109 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 3110def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 3111 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 3112def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 3113 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 3114def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 3115 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 3116 3117def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 3118 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 3119def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 3120 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 3121def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 3122 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 3123def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 3124 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 3125 3126// Vector Reverse. 3127 3128// VREV64 : Vector Reverse elements within 64-bit doublewords 3129 3130class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3131 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), 3132 (ins DPR:$src), IIC_VMOVD, 3133 OpcodeStr, Dt, "$dst, $src", "", 3134 [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; 3135class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3136 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), 3137 (ins QPR:$src), IIC_VMOVD, 3138 OpcodeStr, Dt, "$dst, $src", "", 3139 [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; 3140 3141def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 3142def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 3143def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 3144def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; 3145 3146def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 3147def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 3148def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 3149def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; 3150 3151// VREV32 : Vector Reverse elements within 32-bit words 3152 3153class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3154 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), 3155 (ins DPR:$src), IIC_VMOVD, 3156 OpcodeStr, Dt, "$dst, $src", "", 3157 [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; 3158class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3159 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), 3160 (ins QPR:$src), IIC_VMOVD, 3161 OpcodeStr, Dt, "$dst, $src", "", 3162 [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; 3163 3164def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 3165def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 3166 3167def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 3168def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 3169 3170// VREV16 : Vector Reverse elements within 16-bit halfwords 3171 3172class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3173 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), 3174 (ins DPR:$src), IIC_VMOVD, 3175 OpcodeStr, Dt, "$dst, $src", "", 3176 [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; 3177class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 3178 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), 3179 (ins QPR:$src), IIC_VMOVD, 3180 OpcodeStr, Dt, "$dst, $src", "", 3181 [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; 3182 3183def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 3184def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 3185 3186// Other Vector Shuffles. 3187 3188// VEXT : Vector Extract 3189 3190class VEXTd<string OpcodeStr, string Dt, ValueType Ty> 3191 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst), 3192 (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, 3193 OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", 3194 [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), 3195 (Ty DPR:$rhs), imm:$index)))]>; 3196 3197class VEXTq<string OpcodeStr, string Dt, ValueType Ty> 3198 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst), 3199 (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, 3200 OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", 3201 [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), 3202 (Ty QPR:$rhs), imm:$index)))]>; 3203 3204def VEXTd8 : VEXTd<"vext", "8", v8i8>; 3205def VEXTd16 : VEXTd<"vext", "16", v4i16>; 3206def VEXTd32 : VEXTd<"vext", "32", v2i32>; 3207def VEXTdf : VEXTd<"vext", "32", v2f32>; 3208 3209def VEXTq8 : VEXTq<"vext", "8", v16i8>; 3210def VEXTq16 : VEXTq<"vext", "16", v8i16>; 3211def VEXTq32 : VEXTq<"vext", "32", v4i32>; 3212def VEXTqf : VEXTq<"vext", "32", v4f32>; 3213 3214// VTRN : Vector Transpose 3215 3216def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 3217def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 3218def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 3219 3220def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 3221def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 3222def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 3223 3224// VUZP : Vector Unzip (Deinterleave) 3225 3226def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 3227def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 3228def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; 3229 3230def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 3231def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 3232def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 3233 3234// VZIP : Vector Zip (Interleave) 3235 3236def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 3237def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 3238def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; 3239 3240def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 3241def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 3242def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 3243 3244// Vector Table Lookup and Table Extension. 3245 3246// VTBL : Vector Table Lookup 3247def VTBL1 3248 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), 3249 (ins DPR:$tbl1, DPR:$src), IIC_VTB1, 3250 "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "", 3251 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; 3252let hasExtraSrcRegAllocReq = 1 in { 3253def VTBL2 3254 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), 3255 (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, 3256 "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", 3257 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 3258 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; 3259def VTBL3 3260 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), 3261 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, 3262 "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", 3263 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 3264 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; 3265def VTBL4 3266 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), 3267 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, 3268 "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", 3269 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, 3270 DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; 3271} // hasExtraSrcRegAllocReq = 1 3272 3273// VTBX : Vector Table Extension 3274def VTBX1 3275 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), 3276 (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, 3277 "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst", 3278 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 3279 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; 3280let hasExtraSrcRegAllocReq = 1 in { 3281def VTBX2 3282 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), 3283 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, 3284 "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", 3285 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 3286 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; 3287def VTBX3 3288 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), 3289 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, 3290 "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst", 3291 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, 3292 DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; 3293def VTBX4 3294 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, 3295 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, 3296 "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", 3297 "$orig = $dst", 3298 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, 3299 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; 3300} // hasExtraSrcRegAllocReq = 1 3301 3302//===----------------------------------------------------------------------===// 3303// NEON instructions for single-precision FP math 3304//===----------------------------------------------------------------------===// 3305 3306class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> 3307 : NEONFPPat<(ResTy (OpNode SPR:$a)), 3308 (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), 3309 SPR:$a, arm_ssubreg_0))), 3310 arm_ssubreg_0)>; 3311 3312class N3VSPat<SDNode OpNode, NeonI Inst> 3313 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 3314 (EXTRACT_SUBREG (v2f32 3315 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3316 SPR:$a, arm_ssubreg_0), 3317 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3318 SPR:$b, arm_ssubreg_0))), 3319 arm_ssubreg_0)>; 3320 3321class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 3322 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 3323 (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3324 SPR:$acc, arm_ssubreg_0), 3325 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3326 SPR:$a, arm_ssubreg_0), 3327 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 3328 SPR:$b, arm_ssubreg_0)), 3329 arm_ssubreg_0)>; 3330 3331// These need separate instructions because they must use DPR_VFP2 register 3332// class which have SPR sub-registers. 3333 3334// Vector Add Operations used for single-precision FP 3335let neverHasSideEffects = 1 in 3336def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; 3337def : N3VSPat<fadd, VADDfd_sfp>; 3338 3339// Vector Sub Operations used for single-precision FP 3340let neverHasSideEffects = 1 in 3341def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; 3342def : N3VSPat<fsub, VSUBfd_sfp>; 3343 3344// Vector Multiply Operations used for single-precision FP 3345let neverHasSideEffects = 1 in 3346def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; 3347def : N3VSPat<fmul, VMULfd_sfp>; 3348 3349// Vector Multiply-Accumulate/Subtract used for single-precision FP 3350// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so 3351// we want to avoid them for now. e.g., alternating vmla/vadd instructions. 3352 3353//let neverHasSideEffects = 1 in 3354//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", 3355// v2f32, fmul, fadd>; 3356//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>; 3357 3358//let neverHasSideEffects = 1 in 3359//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", 3360// v2f32, fmul, fsub>; 3361//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>; 3362 3363// Vector Absolute used for single-precision FP 3364let neverHasSideEffects = 1 in 3365def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0, 3366 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 3367 "vabs", "f32", "$dst, $src", "", []>; 3368def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>; 3369 3370// Vector Negate used for single-precision FP 3371let neverHasSideEffects = 1 in 3372def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 3373 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 3374 "vneg", "f32", "$dst, $src", "", []>; 3375def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>; 3376 3377// Vector Maximum used for single-precision FP 3378let neverHasSideEffects = 1 in 3379def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), 3380 (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, 3381 "vmax", "f32", "$dst, $src1, $src2", "", []>; 3382def : N3VSPat<NEONfmax, VMAXfd_sfp>; 3383 3384// Vector Minimum used for single-precision FP 3385let neverHasSideEffects = 1 in 3386def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), 3387 (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, 3388 "vmin", "f32", "$dst, $src1, $src2", "", []>; 3389def : N3VSPat<NEONfmin, VMINfd_sfp>; 3390 3391// Vector Convert between single-precision FP and integer 3392let neverHasSideEffects = 1 in 3393def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 3394 v2i32, v2f32, fp_to_sint>; 3395def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; 3396 3397let neverHasSideEffects = 1 in 3398def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 3399 v2i32, v2f32, fp_to_uint>; 3400def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; 3401 3402let neverHasSideEffects = 1 in 3403def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 3404 v2f32, v2i32, sint_to_fp>; 3405def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; 3406 3407let neverHasSideEffects = 1 in 3408def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 3409 v2f32, v2i32, uint_to_fp>; 3410def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; 3411 3412//===----------------------------------------------------------------------===// 3413// Non-Instruction Patterns 3414//===----------------------------------------------------------------------===// 3415 3416// bit_convert 3417def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 3418def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 3419def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 3420def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 3421def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 3422def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 3423def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 3424def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 3425def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 3426def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 3427def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 3428def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 3429def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 3430def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 3431def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 3432def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 3433def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 3434def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 3435def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 3436def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 3437def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 3438def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 3439def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 3440def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 3441def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 3442def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 3443def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 3444def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 3445def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 3446def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 3447 3448def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 3449def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 3450def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 3451def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 3452def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 3453def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 3454def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 3455def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 3456def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 3457def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 3458def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 3459def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 3460def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 3461def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 3462def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 3463def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 3464def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 3465def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 3466def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 3467def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 3468def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 3469def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 3470def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 3471def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 3472def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 3473def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 3474def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 3475def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 3476def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 3477def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 3478