ARMInstrNEON.td revision d4897545e4e21d0dbdc788c5576f4b80071a95d8
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// NEON-specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17 18def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 19 20def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 21def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 22def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 23def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 24def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 25def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 26 27// Types for vector shift by immediates. The "SHX" version is for long and 28// narrow operations where the source and destination vectors have different 29// types. The "SHINS" version is for shift and insert operations. 30def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 31 SDTCisVT<2, i32>]>; 32def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 33 SDTCisVT<2, i32>]>; 34def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 35 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 36 37def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 38def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 39def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 40def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 41def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 42def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 43def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 44 45def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 46def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 47def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 48 49def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 50def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 51def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 52def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 53def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 54def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 55 56def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 57def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 58def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 59 60def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 61def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 62 63def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 64 SDTCisVT<2, i32>]>; 65def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 66def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 67 68def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 69 70// VDUPLANE can produce a quad-register result from a double-register source, 71// so the result is not constrained to match the source. 72def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 73 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 74 SDTCisVT<2, i32>]>>; 75 76def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 77 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 78def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 79 80def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 81def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 82def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 83def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 84 85def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 86 SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; 87def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 88def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 89def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 90 91//===----------------------------------------------------------------------===// 92// NEON operand definitions 93//===----------------------------------------------------------------------===// 94 95// addrmode_neonldstm := reg 96// 97/* TODO: Take advantage of vldm. 98def addrmode_neonldstm : Operand<i32>, 99 ComplexPattern<i32, 2, "SelectAddrModeNeonLdStM", []> { 100 let PrintMethod = "printAddrNeonLdStMOperand"; 101 let MIOperandInfo = (ops GPR, i32imm); 102} 103*/ 104 105//===----------------------------------------------------------------------===// 106// NEON load / store instructions 107//===----------------------------------------------------------------------===// 108 109/* TODO: Take advantage of vldm. 110let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { 111def VLDMD : NI<(outs), 112 (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), 113 IIC_fpLoadm, 114 "vldm${addr:submode} ${addr:base}, $dst1", 115 []> { 116 let Inst{27-25} = 0b110; 117 let Inst{20} = 1; 118 let Inst{11-9} = 0b101; 119} 120 121def VLDMS : NI<(outs), 122 (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), 123 IIC_fpLoadm, 124 "vldm${addr:submode} ${addr:base}, $dst1", 125 []> { 126 let Inst{27-25} = 0b110; 127 let Inst{20} = 1; 128 let Inst{11-9} = 0b101; 129} 130} 131*/ 132 133// Use vldmia to load a Q register as a D register pair. 134def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), 135 IIC_fpLoadm, 136 "vldmia $addr, ${dst:dregpair}", 137 [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { 138 let Inst{27-25} = 0b110; 139 let Inst{24} = 0; // P bit 140 let Inst{23} = 1; // U bit 141 let Inst{20} = 1; 142 let Inst{11-9} = 0b101; 143} 144 145// Use vstmia to store a Q register as a D register pair. 146def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), 147 IIC_fpStorem, 148 "vstmia $addr, ${src:dregpair}", 149 [(store (v2f64 QPR:$src), addrmode4:$addr)]> { 150 let Inst{27-25} = 0b110; 151 let Inst{24} = 0; // P bit 152 let Inst{23} = 1; // U bit 153 let Inst{20} = 0; 154 let Inst{11-9} = 0b101; 155} 156 157// VLD1 : Vector Load (multiple single elements) 158class VLD1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> 159 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, 160 !strconcat(OpcodeStr, "\t\\{$dst\\}, $addr"), "", 161 [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; 162class VLD1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> 163 : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, 164 !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), "", 165 [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; 166 167def VLD1d8 : VLD1D<0b0000, "vld1.8", v8i8, int_arm_neon_vld1>; 168def VLD1d16 : VLD1D<0b0100, "vld1.16", v4i16, int_arm_neon_vld1>; 169def VLD1d32 : VLD1D<0b1000, "vld1.32", v2i32, int_arm_neon_vld1>; 170def VLD1df : VLD1D<0b1000, "vld1.32", v2f32, int_arm_neon_vld1>; 171def VLD1d64 : VLD1D<0b1100, "vld1.64", v1i64, int_arm_neon_vld1>; 172 173def VLD1q8 : VLD1Q<0b0000, "vld1.8", v16i8, int_arm_neon_vld1>; 174def VLD1q16 : VLD1Q<0b0100, "vld1.16", v8i16, int_arm_neon_vld1>; 175def VLD1q32 : VLD1Q<0b1000, "vld1.32", v4i32, int_arm_neon_vld1>; 176def VLD1qf : VLD1Q<0b1000, "vld1.32", v4f32, int_arm_neon_vld1>; 177def VLD1q64 : VLD1Q<0b1100, "vld1.64", v2i64, int_arm_neon_vld1>; 178 179let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { 180 181// VLD2 : Vector Load (multiple 2-element structures) 182class VLD2D<bits<4> op7_4, string OpcodeStr> 183 : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2), 184 (ins addrmode6:$addr), IIC_VLD2, 185 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>; 186class VLD2Q<bits<4> op7_4, string OpcodeStr> 187 : NLdSt<0,0b10,0b0011,op7_4, 188 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 189 (ins addrmode6:$addr), IIC_VLD2, 190 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), 191 "", []>; 192 193def VLD2d8 : VLD2D<0b0000, "vld2.8">; 194def VLD2d16 : VLD2D<0b0100, "vld2.16">; 195def VLD2d32 : VLD2D<0b1000, "vld2.32">; 196def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), 197 (ins addrmode6:$addr), IIC_VLD1, 198 "vld1.64\t\\{$dst1,$dst2\\}, $addr", "", []>; 199 200def VLD2q8 : VLD2Q<0b0000, "vld2.8">; 201def VLD2q16 : VLD2Q<0b0100, "vld2.16">; 202def VLD2q32 : VLD2Q<0b1000, "vld2.32">; 203 204// VLD3 : Vector Load (multiple 3-element structures) 205class VLD3D<bits<4> op7_4, string OpcodeStr> 206 : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 207 (ins addrmode6:$addr), IIC_VLD3, 208 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>; 209class VLD3WB<bits<4> op7_4, string OpcodeStr> 210 : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 211 (ins addrmode6:$addr), IIC_VLD3, 212 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), 213 "$addr.addr = $wb", []>; 214 215def VLD3d8 : VLD3D<0b0000, "vld3.8">; 216def VLD3d16 : VLD3D<0b0100, "vld3.16">; 217def VLD3d32 : VLD3D<0b1000, "vld3.32">; 218def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, 219 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 220 (ins addrmode6:$addr), IIC_VLD1, 221 "vld1.64\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; 222 223// vld3 to double-spaced even registers. 224def VLD3q8a : VLD3WB<0b0000, "vld3.8">; 225def VLD3q16a : VLD3WB<0b0100, "vld3.16">; 226def VLD3q32a : VLD3WB<0b1000, "vld3.32">; 227 228// vld3 to double-spaced odd registers. 229def VLD3q8b : VLD3WB<0b0000, "vld3.8">; 230def VLD3q16b : VLD3WB<0b0100, "vld3.16">; 231def VLD3q32b : VLD3WB<0b1000, "vld3.32">; 232 233// VLD4 : Vector Load (multiple 4-element structures) 234class VLD4D<bits<4> op7_4, string OpcodeStr> 235 : NLdSt<0,0b10,0b0000,op7_4, 236 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 237 (ins addrmode6:$addr), IIC_VLD4, 238 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), 239 "", []>; 240class VLD4WB<bits<4> op7_4, string OpcodeStr> 241 : NLdSt<0,0b10,0b0001,op7_4, 242 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 243 (ins addrmode6:$addr), IIC_VLD4, 244 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), 245 "$addr.addr = $wb", []>; 246 247def VLD4d8 : VLD4D<0b0000, "vld4.8">; 248def VLD4d16 : VLD4D<0b0100, "vld4.16">; 249def VLD4d32 : VLD4D<0b1000, "vld4.32">; 250def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, 251 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 252 (ins addrmode6:$addr), IIC_VLD1, 253 "vld1.64\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; 254 255// vld4 to double-spaced even registers. 256def VLD4q8a : VLD4WB<0b0000, "vld4.8">; 257def VLD4q16a : VLD4WB<0b0100, "vld4.16">; 258def VLD4q32a : VLD4WB<0b1000, "vld4.32">; 259 260// vld4 to double-spaced odd registers. 261def VLD4q8b : VLD4WB<0b0000, "vld4.8">; 262def VLD4q16b : VLD4WB<0b0100, "vld4.16">; 263def VLD4q32b : VLD4WB<0b1000, "vld4.32">; 264 265// VLD1LN : Vector Load (single element to one lane) 266// FIXME: Not yet implemented. 267 268// VLD2LN : Vector Load (single 2-element structure to one lane) 269class VLD2LN<bits<4> op11_8, string OpcodeStr> 270 : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2), 271 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 272 IIC_VLD2, 273 !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"), 274 "$src1 = $dst1, $src2 = $dst2", []>; 275 276def VLD2LNd8 : VLD2LN<0b0001, "vld2.8">; 277def VLD2LNd16 : VLD2LN<0b0101, "vld2.16">; 278def VLD2LNd32 : VLD2LN<0b1001, "vld2.32">; 279 280// vld2 to double-spaced even registers. 281def VLD2LNq16a: VLD2LN<0b0101, "vld2.16">; 282def VLD2LNq32a: VLD2LN<0b1001, "vld2.32">; 283 284// vld2 to double-spaced odd registers. 285def VLD2LNq16b: VLD2LN<0b0101, "vld2.16">; 286def VLD2LNq32b: VLD2LN<0b1001, "vld2.32">; 287 288// VLD3LN : Vector Load (single 3-element structure to one lane) 289class VLD3LN<bits<4> op11_8, string OpcodeStr> 290 : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 291 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 292 nohash_imm:$lane), IIC_VLD3, 293 !strconcat(OpcodeStr, 294 "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"), 295 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; 296 297def VLD3LNd8 : VLD3LN<0b0010, "vld3.8">; 298def VLD3LNd16 : VLD3LN<0b0110, "vld3.16">; 299def VLD3LNd32 : VLD3LN<0b1010, "vld3.32">; 300 301// vld3 to double-spaced even registers. 302def VLD3LNq16a: VLD3LN<0b0110, "vld3.16">; 303def VLD3LNq32a: VLD3LN<0b1010, "vld3.32">; 304 305// vld3 to double-spaced odd registers. 306def VLD3LNq16b: VLD3LN<0b0110, "vld3.16">; 307def VLD3LNq32b: VLD3LN<0b1010, "vld3.32">; 308 309// VLD4LN : Vector Load (single 4-element structure to one lane) 310class VLD4LN<bits<4> op11_8, string OpcodeStr> 311 : NLdSt<1,0b10,op11_8,0b0000, 312 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 313 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 314 nohash_imm:$lane), IIC_VLD4, 315 !strconcat(OpcodeStr, 316 "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"), 317 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; 318 319def VLD4LNd8 : VLD4LN<0b0011, "vld4.8">; 320def VLD4LNd16 : VLD4LN<0b0111, "vld4.16">; 321def VLD4LNd32 : VLD4LN<0b1011, "vld4.32">; 322 323// vld4 to double-spaced even registers. 324def VLD4LNq16a: VLD4LN<0b0111, "vld4.16">; 325def VLD4LNq32a: VLD4LN<0b1011, "vld4.32">; 326 327// vld4 to double-spaced odd registers. 328def VLD4LNq16b: VLD4LN<0b0111, "vld4.16">; 329def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">; 330 331// VLD1DUP : Vector Load (single element to all lanes) 332// VLD2DUP : Vector Load (single 2-element structure to all lanes) 333// VLD3DUP : Vector Load (single 3-element structure to all lanes) 334// VLD4DUP : Vector Load (single 4-element structure to all lanes) 335// FIXME: Not yet implemented. 336} // mayLoad = 1, hasExtraDefRegAllocReq = 1 337 338// VST1 : Vector Store (multiple single elements) 339class VST1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> 340 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, 341 !strconcat(OpcodeStr, "\t\\{$src\\}, $addr"), "", 342 [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; 343class VST1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> 344 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, 345 !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"), "", 346 [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; 347 348let hasExtraSrcRegAllocReq = 1 in { 349def VST1d8 : VST1D<0b0000, "vst1.8", v8i8, int_arm_neon_vst1>; 350def VST1d16 : VST1D<0b0100, "vst1.16", v4i16, int_arm_neon_vst1>; 351def VST1d32 : VST1D<0b1000, "vst1.32", v2i32, int_arm_neon_vst1>; 352def VST1df : VST1D<0b1000, "vst1.32", v2f32, int_arm_neon_vst1>; 353def VST1d64 : VST1D<0b1100, "vst1.64", v1i64, int_arm_neon_vst1>; 354 355def VST1q8 : VST1Q<0b0000, "vst1.8", v16i8, int_arm_neon_vst1>; 356def VST1q16 : VST1Q<0b0100, "vst1.16", v8i16, int_arm_neon_vst1>; 357def VST1q32 : VST1Q<0b1000, "vst1.32", v4i32, int_arm_neon_vst1>; 358def VST1qf : VST1Q<0b1000, "vst1.32", v4f32, int_arm_neon_vst1>; 359def VST1q64 : VST1Q<0b1100, "vst1.64", v2i64, int_arm_neon_vst1>; 360} // hasExtraSrcRegAllocReq 361 362let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { 363 364// VST2 : Vector Store (multiple 2-element structures) 365class VST2D<bits<4> op7_4, string OpcodeStr> 366 : NLdSt<0,0b00,0b1000,op7_4, (outs), 367 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, 368 !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>; 369class VST2Q<bits<4> op7_4, string OpcodeStr> 370 : NLdSt<0,0b00,0b0011,op7_4, (outs), 371 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 372 IIC_VST, 373 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), 374 "", []>; 375 376def VST2d8 : VST2D<0b0000, "vst2.8">; 377def VST2d16 : VST2D<0b0100, "vst2.16">; 378def VST2d32 : VST2D<0b1000, "vst2.32">; 379def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), 380 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, 381 "vst1.64\t\\{$src1,$src2\\}, $addr", "", []>; 382 383def VST2q8 : VST2Q<0b0000, "vst2.8">; 384def VST2q16 : VST2Q<0b0100, "vst2.16">; 385def VST2q32 : VST2Q<0b1000, "vst2.32">; 386 387// VST3 : Vector Store (multiple 3-element structures) 388class VST3D<bits<4> op7_4, string OpcodeStr> 389 : NLdSt<0,0b00,0b0100,op7_4, (outs), 390 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, 391 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>; 392class VST3WB<bits<4> op7_4, string OpcodeStr> 393 : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), 394 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, 395 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), 396 "$addr.addr = $wb", []>; 397 398def VST3d8 : VST3D<0b0000, "vst3.8">; 399def VST3d16 : VST3D<0b0100, "vst3.16">; 400def VST3d32 : VST3D<0b1000, "vst3.32">; 401def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), 402 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), 403 IIC_VST, 404 "vst1.64\t\\{$src1,$src2,$src3\\}, $addr", "", []>; 405 406// vst3 to double-spaced even registers. 407def VST3q8a : VST3WB<0b0000, "vst3.8">; 408def VST3q16a : VST3WB<0b0100, "vst3.16">; 409def VST3q32a : VST3WB<0b1000, "vst3.32">; 410 411// vst3 to double-spaced odd registers. 412def VST3q8b : VST3WB<0b0000, "vst3.8">; 413def VST3q16b : VST3WB<0b0100, "vst3.16">; 414def VST3q32b : VST3WB<0b1000, "vst3.32">; 415 416// VST4 : Vector Store (multiple 4-element structures) 417class VST4D<bits<4> op7_4, string OpcodeStr> 418 : NLdSt<0,0b00,0b0000,op7_4, (outs), 419 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 420 IIC_VST, 421 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), 422 "", []>; 423class VST4WB<bits<4> op7_4, string OpcodeStr> 424 : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), 425 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 426 IIC_VST, 427 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), 428 "$addr.addr = $wb", []>; 429 430def VST4d8 : VST4D<0b0000, "vst4.8">; 431def VST4d16 : VST4D<0b0100, "vst4.16">; 432def VST4d32 : VST4D<0b1000, "vst4.32">; 433def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), 434 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 435 DPR:$src4), IIC_VST, 436 "vst1.64\t\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; 437 438// vst4 to double-spaced even registers. 439def VST4q8a : VST4WB<0b0000, "vst4.8">; 440def VST4q16a : VST4WB<0b0100, "vst4.16">; 441def VST4q32a : VST4WB<0b1000, "vst4.32">; 442 443// vst4 to double-spaced odd registers. 444def VST4q8b : VST4WB<0b0000, "vst4.8">; 445def VST4q16b : VST4WB<0b0100, "vst4.16">; 446def VST4q32b : VST4WB<0b1000, "vst4.32">; 447 448// VST1LN : Vector Store (single element from one lane) 449// FIXME: Not yet implemented. 450 451// VST2LN : Vector Store (single 2-element structure from one lane) 452class VST2LN<bits<4> op11_8, string OpcodeStr> 453 : NLdSt<1,0b00,op11_8,0b0000, (outs), 454 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 455 IIC_VST, 456 !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"), 457 "", []>; 458 459def VST2LNd8 : VST2LN<0b0000, "vst2.8">; 460def VST2LNd16 : VST2LN<0b0100, "vst2.16">; 461def VST2LNd32 : VST2LN<0b1000, "vst2.32">; 462 463// vst2 to double-spaced even registers. 464def VST2LNq16a: VST2LN<0b0100, "vst2.16">; 465def VST2LNq32a: VST2LN<0b1000, "vst2.32">; 466 467// vst2 to double-spaced odd registers. 468def VST2LNq16b: VST2LN<0b0100, "vst2.16">; 469def VST2LNq32b: VST2LN<0b1000, "vst2.32">; 470 471// VST3LN : Vector Store (single 3-element structure from one lane) 472class VST3LN<bits<4> op11_8, string OpcodeStr> 473 : NLdSt<1,0b00,op11_8,0b0000, (outs), 474 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 475 nohash_imm:$lane), IIC_VST, 476 !strconcat(OpcodeStr, 477 "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr"), "", []>; 478 479def VST3LNd8 : VST3LN<0b0010, "vst3.8">; 480def VST3LNd16 : VST3LN<0b0110, "vst3.16">; 481def VST3LNd32 : VST3LN<0b1010, "vst3.32">; 482 483// vst3 to double-spaced even registers. 484def VST3LNq16a: VST3LN<0b0110, "vst3.16">; 485def VST3LNq32a: VST3LN<0b1010, "vst3.32">; 486 487// vst3 to double-spaced odd registers. 488def VST3LNq16b: VST3LN<0b0110, "vst3.16">; 489def VST3LNq32b: VST3LN<0b1010, "vst3.32">; 490 491// VST4LN : Vector Store (single 4-element structure from one lane) 492class VST4LN<bits<4> op11_8, string OpcodeStr> 493 : NLdSt<1,0b00,op11_8,0b0000, (outs), 494 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 495 nohash_imm:$lane), IIC_VST, 496 !strconcat(OpcodeStr, 497 "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr"), 498 "", []>; 499 500def VST4LNd8 : VST4LN<0b0011, "vst4.8">; 501def VST4LNd16 : VST4LN<0b0111, "vst4.16">; 502def VST4LNd32 : VST4LN<0b1011, "vst4.32">; 503 504// vst4 to double-spaced even registers. 505def VST4LNq16a: VST4LN<0b0111, "vst4.16">; 506def VST4LNq32a: VST4LN<0b1011, "vst4.32">; 507 508// vst4 to double-spaced odd registers. 509def VST4LNq16b: VST4LN<0b0111, "vst4.16">; 510def VST4LNq32b: VST4LN<0b1011, "vst4.32">; 511 512} // mayStore = 1, hasExtraSrcRegAllocReq = 1 513 514 515//===----------------------------------------------------------------------===// 516// NEON pattern fragments 517//===----------------------------------------------------------------------===// 518 519// Extract D sub-registers of Q registers. 520// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) 521def DSubReg_i8_reg : SDNodeXForm<imm, [{ 522 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); 523}]>; 524def DSubReg_i16_reg : SDNodeXForm<imm, [{ 525 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); 526}]>; 527def DSubReg_i32_reg : SDNodeXForm<imm, [{ 528 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); 529}]>; 530def DSubReg_f64_reg : SDNodeXForm<imm, [{ 531 return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); 532}]>; 533def DSubReg_f64_other_reg : SDNodeXForm<imm, [{ 534 return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32); 535}]>; 536 537// Extract S sub-registers of Q/D registers. 538// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.) 539def SSubReg_f32_reg : SDNodeXForm<imm, [{ 540 return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32); 541}]>; 542 543// Translate lane numbers from Q registers to D subregs. 544def SubReg_i8_lane : SDNodeXForm<imm, [{ 545 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 546}]>; 547def SubReg_i16_lane : SDNodeXForm<imm, [{ 548 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 549}]>; 550def SubReg_i32_lane : SDNodeXForm<imm, [{ 551 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 552}]>; 553 554//===----------------------------------------------------------------------===// 555// Instruction Classes 556//===----------------------------------------------------------------------===// 557 558// Basic 2-register operations, both double- and quad-register. 559class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 560 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 561 ValueType ResTy, ValueType OpTy, SDNode OpNode> 562 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 563 (ins DPR:$src), IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 564 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; 565class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 566 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 567 ValueType ResTy, ValueType OpTy, SDNode OpNode> 568 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 569 (ins QPR:$src), IIC_VUNAQ, !strconcat(OpcodeStr, "\t$dst, $src"), "", 570 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; 571 572// Basic 2-register operations, scalar single-precision. 573class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 574 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 575 ValueType ResTy, ValueType OpTy, SDNode OpNode> 576 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 577 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), 578 IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", []>; 579 580class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> 581 : NEONFPPat<(ResTy (OpNode SPR:$a)), 582 (EXTRACT_SUBREG 583 (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), 584 arm_ssubreg_0)>; 585 586// Basic 2-register intrinsics, both double- and quad-register. 587class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 588 bits<2> op17_16, bits<5> op11_7, bit op4, 589 InstrItinClass itin, string OpcodeStr, 590 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 591 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 592 (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 593 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 594class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 595 bits<2> op17_16, bits<5> op11_7, bit op4, 596 InstrItinClass itin, string OpcodeStr, 597 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 598 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 599 (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 600 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 601 602// Basic 2-register intrinsics, scalar single-precision 603class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 604 bits<2> op17_16, bits<5> op11_7, bit op4, 605 InstrItinClass itin, string OpcodeStr, 606 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 607 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 608 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin, 609 !strconcat(OpcodeStr, "\t$dst, $src"), "", []>; 610 611class N2VDIntsPat<SDNode OpNode, NeonI Inst> 612 : NEONFPPat<(f32 (OpNode SPR:$a)), 613 (EXTRACT_SUBREG 614 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), 615 arm_ssubreg_0)>; 616 617// Narrow 2-register intrinsics. 618class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 619 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 620 InstrItinClass itin, string OpcodeStr, 621 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 622 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), 623 (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 624 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; 625 626// Long 2-register intrinsics. (This is currently only used for VMOVL and is 627// derived from N2VImm instead of N2V because of the way the size is encoded.) 628class N2VLInt<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 629 bit op6, bit op4, InstrItinClass itin, string OpcodeStr, 630 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 631 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, (outs QPR:$dst), 632 (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 633 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; 634 635// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 636class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr> 637 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), 638 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 639 !strconcat(OpcodeStr, "\t$dst1, $dst2"), 640 "$src1 = $dst1, $src2 = $dst2", []>; 641class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 642 InstrItinClass itin, string OpcodeStr> 643 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), 644 (ins QPR:$src1, QPR:$src2), itin, 645 !strconcat(OpcodeStr, "\t$dst1, $dst2"), 646 "$src1 = $dst1, $src2 = $dst2", []>; 647 648// Basic 3-register operations, both double- and quad-register. 649class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 650 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 651 SDNode OpNode, bit Commutable> 652 : N3V<op24, op23, op21_20, op11_8, 0, op4, 653 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 654 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 655 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 656 let isCommutable = Commutable; 657} 658class N3VDSL<bits<2> op21_20, bits<4> op11_8, 659 InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp> 660 : N3V<0, 1, op21_20, op11_8, 1, 0, 661 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 662 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 663 [(set (Ty DPR:$dst), 664 (Ty (ShOp (Ty DPR:$src1), 665 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), 666 imm:$lane)))))]> { 667 let isCommutable = 0; 668} 669class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 670 string OpcodeStr, ValueType Ty, SDNode ShOp> 671 : N3V<0, 1, op21_20, op11_8, 1, 0, 672 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 673 IIC_VMULi16D, 674 !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 675 [(set (Ty DPR:$dst), 676 (Ty (ShOp (Ty DPR:$src1), 677 (Ty (NEONvduplane (Ty DPR_8:$src2), 678 imm:$lane)))))]> { 679 let isCommutable = 0; 680} 681 682class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 683 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 684 SDNode OpNode, bit Commutable> 685 : N3V<op24, op23, op21_20, op11_8, 1, op4, 686 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 687 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 688 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 689 let isCommutable = Commutable; 690} 691class N3VQSL<bits<2> op21_20, bits<4> op11_8, 692 InstrItinClass itin, string OpcodeStr, 693 ValueType ResTy, ValueType OpTy, SDNode ShOp> 694 : N3V<1, 1, op21_20, op11_8, 1, 0, 695 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 696 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 697 [(set (ResTy QPR:$dst), 698 (ResTy (ShOp (ResTy QPR:$src1), 699 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 700 imm:$lane)))))]> { 701 let isCommutable = 0; 702} 703class N3VQSL16<bits<2> op21_20, bits<4> op11_8, 704 string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> 705 : N3V<1, 1, op21_20, op11_8, 1, 0, 706 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 707 IIC_VMULi16Q, 708 !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 709 [(set (ResTy QPR:$dst), 710 (ResTy (ShOp (ResTy QPR:$src1), 711 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 712 imm:$lane)))))]> { 713 let isCommutable = 0; 714} 715 716// Basic 3-register operations, scalar single-precision 717class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 718 string OpcodeStr, ValueType ResTy, ValueType OpTy, 719 SDNode OpNode, bit Commutable> 720 : N3V<op24, op23, op21_20, op11_8, 0, op4, 721 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, 722 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", []> { 723 let isCommutable = Commutable; 724} 725class N3VDsPat<SDNode OpNode, NeonI Inst> 726 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 727 (EXTRACT_SUBREG 728 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), 729 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), 730 arm_ssubreg_0)>; 731 732// Basic 3-register intrinsics, both double- and quad-register. 733class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 734 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 735 Intrinsic IntOp, bit Commutable> 736 : N3V<op24, op23, op21_20, op11_8, 0, op4, 737 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 738 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 739 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 740 let isCommutable = Commutable; 741} 742class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 743 string OpcodeStr, ValueType Ty, Intrinsic IntOp> 744 : N3V<0, 1, op21_20, op11_8, 1, 0, 745 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 746 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 747 [(set (Ty DPR:$dst), 748 (Ty (IntOp (Ty DPR:$src1), 749 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), 750 imm:$lane)))))]> { 751 let isCommutable = 0; 752} 753class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 754 string OpcodeStr, ValueType Ty, Intrinsic IntOp> 755 : N3V<0, 1, op21_20, op11_8, 1, 0, 756 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 757 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 758 [(set (Ty DPR:$dst), 759 (Ty (IntOp (Ty DPR:$src1), 760 (Ty (NEONvduplane (Ty DPR_8:$src2), 761 imm:$lane)))))]> { 762 let isCommutable = 0; 763} 764 765class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 766 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 767 Intrinsic IntOp, bit Commutable> 768 : N3V<op24, op23, op21_20, op11_8, 1, op4, 769 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 770 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 771 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 772 let isCommutable = Commutable; 773} 774class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 775 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 776 : N3V<1, 1, op21_20, op11_8, 1, 0, 777 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 778 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 779 [(set (ResTy QPR:$dst), 780 (ResTy (IntOp (ResTy QPR:$src1), 781 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 782 imm:$lane)))))]> { 783 let isCommutable = 0; 784} 785class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 786 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 787 : N3V<1, 1, op21_20, op11_8, 1, 0, 788 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 789 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 790 [(set (ResTy QPR:$dst), 791 (ResTy (IntOp (ResTy QPR:$src1), 792 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 793 imm:$lane)))))]> { 794 let isCommutable = 0; 795} 796 797// Multiply-Add/Sub operations, both double- and quad-register. 798class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 799 InstrItinClass itin, string OpcodeStr, 800 ValueType Ty, SDNode MulOp, SDNode OpNode> 801 : N3V<op24, op23, op21_20, op11_8, 0, op4, 802 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, 803 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 804 [(set DPR:$dst, (Ty (OpNode DPR:$src1, 805 (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; 806class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 807 string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> 808 : N3V<0, 1, op21_20, op11_8, 1, 0, 809 (outs DPR:$dst), 810 (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 811 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 812 [(set (Ty DPR:$dst), 813 (Ty (ShOp (Ty DPR:$src1), 814 (Ty (MulOp DPR:$src2, 815 (Ty (NEONvduplane (Ty DPR_VFP2:$src3), 816 imm:$lane)))))))]>; 817class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 818 string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> 819 : N3V<0, 1, op21_20, op11_8, 1, 0, 820 (outs DPR:$dst), 821 (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 822 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 823 [(set (Ty DPR:$dst), 824 (Ty (ShOp (Ty DPR:$src1), 825 (Ty (MulOp DPR:$src2, 826 (Ty (NEONvduplane (Ty DPR_8:$src3), 827 imm:$lane)))))))]>; 828 829class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 830 InstrItinClass itin, string OpcodeStr, ValueType Ty, 831 SDNode MulOp, SDNode OpNode> 832 : N3V<op24, op23, op21_20, op11_8, 1, op4, 833 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, 834 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 835 [(set QPR:$dst, (Ty (OpNode QPR:$src1, 836 (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; 837class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 838 string OpcodeStr, ValueType ResTy, ValueType OpTy, 839 SDNode MulOp, SDNode ShOp> 840 : N3V<1, 1, op21_20, op11_8, 1, 0, 841 (outs QPR:$dst), 842 (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 843 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 844 [(set (ResTy QPR:$dst), 845 (ResTy (ShOp (ResTy QPR:$src1), 846 (ResTy (MulOp QPR:$src2, 847 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), 848 imm:$lane)))))))]>; 849class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 850 string OpcodeStr, ValueType ResTy, ValueType OpTy, 851 SDNode MulOp, SDNode ShOp> 852 : N3V<1, 1, op21_20, op11_8, 1, 0, 853 (outs QPR:$dst), 854 (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 855 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 856 [(set (ResTy QPR:$dst), 857 (ResTy (ShOp (ResTy QPR:$src1), 858 (ResTy (MulOp QPR:$src2, 859 (ResTy (NEONvduplane (OpTy DPR_8:$src3), 860 imm:$lane)))))))]>; 861 862// Multiply-Add/Sub operations, scalar single-precision 863class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 864 InstrItinClass itin, string OpcodeStr, 865 ValueType Ty, SDNode MulOp, SDNode OpNode> 866 : N3V<op24, op23, op21_20, op11_8, 0, op4, 867 (outs DPR_VFP2:$dst), 868 (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, 869 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", []>; 870 871class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 872 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 873 (EXTRACT_SUBREG 874 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0), 875 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), 876 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), 877 arm_ssubreg_0)>; 878 879// Neon 3-argument intrinsics, both double- and quad-register. 880// The destination register is also used as the first source operand register. 881class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 882 InstrItinClass itin, string OpcodeStr, 883 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 884 : N3V<op24, op23, op21_20, op11_8, 0, op4, 885 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, 886 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 887 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), 888 (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; 889class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 890 InstrItinClass itin, string OpcodeStr, 891 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 892 : N3V<op24, op23, op21_20, op11_8, 1, op4, 893 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, 894 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 895 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), 896 (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; 897 898// Neon Long 3-argument intrinsic. The destination register is 899// a quad-register and is also used as the first source operand register. 900class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 901 InstrItinClass itin, string OpcodeStr, 902 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 903 : N3V<op24, op23, op21_20, op11_8, 0, op4, 904 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin, 905 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 906 [(set QPR:$dst, 907 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; 908class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 909 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 910 : N3V<op24, 1, op21_20, op11_8, 1, 0, 911 (outs QPR:$dst), 912 (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 913 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 914 [(set (ResTy QPR:$dst), 915 (ResTy (IntOp (ResTy QPR:$src1), 916 (OpTy DPR:$src2), 917 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), 918 imm:$lane)))))]>; 919class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 920 string OpcodeStr, ValueType ResTy, ValueType OpTy, 921 Intrinsic IntOp> 922 : N3V<op24, 1, op21_20, op11_8, 1, 0, 923 (outs QPR:$dst), 924 (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 925 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 926 [(set (ResTy QPR:$dst), 927 (ResTy (IntOp (ResTy QPR:$src1), 928 (OpTy DPR:$src2), 929 (OpTy (NEONvduplane (OpTy DPR_8:$src3), 930 imm:$lane)))))]>; 931 932 933// Narrowing 3-register intrinsics. 934class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 935 string OpcodeStr, ValueType TyD, ValueType TyQ, 936 Intrinsic IntOp, bit Commutable> 937 : N3V<op24, op23, op21_20, op11_8, 0, op4, 938 (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D, 939 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 940 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { 941 let isCommutable = Commutable; 942} 943 944// Long 3-register intrinsics. 945class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 946 InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, 947 Intrinsic IntOp, bit Commutable> 948 : N3V<op24, op23, op21_20, op11_8, 0, op4, 949 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 950 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 951 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { 952 let isCommutable = Commutable; 953} 954class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 955 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 956 : N3V<op24, 1, op21_20, op11_8, 1, 0, 957 (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 958 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 959 [(set (ResTy QPR:$dst), 960 (ResTy (IntOp (OpTy DPR:$src1), 961 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), 962 imm:$lane)))))]>; 963class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 964 string OpcodeStr, ValueType ResTy, ValueType OpTy, 965 Intrinsic IntOp> 966 : N3V<op24, 1, op21_20, op11_8, 1, 0, 967 (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 968 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 969 [(set (ResTy QPR:$dst), 970 (ResTy (IntOp (OpTy DPR:$src1), 971 (OpTy (NEONvduplane (OpTy DPR_8:$src2), 972 imm:$lane)))))]>; 973 974// Wide 3-register intrinsics. 975class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 976 string OpcodeStr, ValueType TyQ, ValueType TyD, 977 Intrinsic IntOp, bit Commutable> 978 : N3V<op24, op23, op21_20, op11_8, 0, op4, 979 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD, 980 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 981 [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { 982 let isCommutable = Commutable; 983} 984 985// Pairwise long 2-register intrinsics, both double- and quad-register. 986class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 987 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 988 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 989 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 990 (ins DPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 991 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 992class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 993 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 994 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 995 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 996 (ins QPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 997 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 998 999// Pairwise long 2-register accumulate intrinsics, 1000// both double- and quad-register. 1001// The destination register is also used as the first source operand register. 1002class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1003 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 1004 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1005 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 1006 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD, 1007 !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", 1008 [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; 1009class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1010 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 1011 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1012 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 1013 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ, 1014 !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", 1015 [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; 1016 1017// Shift by immediate, 1018// both double- and quad-register. 1019class N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1020 bit op4, InstrItinClass itin, string OpcodeStr, 1021 ValueType Ty, SDNode OpNode> 1022 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 1023 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin, 1024 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1025 [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; 1026class N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1027 bit op4, InstrItinClass itin, string OpcodeStr, 1028 ValueType Ty, SDNode OpNode> 1029 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 1030 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, 1031 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1032 [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; 1033 1034// Long shift by immediate. 1035class N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1036 bit op6, bit op4, string OpcodeStr, ValueType ResTy, 1037 ValueType OpTy, SDNode OpNode> 1038 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, 1039 (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD, 1040 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1041 [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), 1042 (i32 imm:$SIMM))))]>; 1043 1044// Narrow shift by immediate. 1045class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1046 bit op6, bit op4, InstrItinClass itin, string OpcodeStr, 1047 ValueType ResTy, ValueType OpTy, SDNode OpNode> 1048 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, 1049 (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, 1050 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1051 [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), 1052 (i32 imm:$SIMM))))]>; 1053 1054// Shift right by immediate and accumulate, 1055// both double- and quad-register. 1056class N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1057 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 1058 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 1059 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), 1060 IIC_VPALiD, 1061 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 1062 [(set DPR:$dst, (Ty (add DPR:$src1, 1063 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; 1064class N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1065 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 1066 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 1067 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), 1068 IIC_VPALiD, 1069 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 1070 [(set QPR:$dst, (Ty (add QPR:$src1, 1071 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; 1072 1073// Shift by immediate and insert, 1074// both double- and quad-register. 1075class N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1076 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 1077 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 1078 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), 1079 IIC_VSHLiD, 1080 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 1081 [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; 1082class N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1083 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 1084 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 1085 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), 1086 IIC_VSHLiQ, 1087 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 1088 [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; 1089 1090// Convert, with fractional bits immediate, 1091// both double- and quad-register. 1092class N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1093 bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, 1094 Intrinsic IntOp> 1095 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 1096 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, 1097 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1098 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; 1099class N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1100 bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, 1101 Intrinsic IntOp> 1102 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 1103 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, 1104 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1105 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; 1106 1107//===----------------------------------------------------------------------===// 1108// Multiclasses 1109//===----------------------------------------------------------------------===// 1110 1111// Abbreviations used in multiclass suffixes: 1112// Q = quarter int (8 bit) elements 1113// H = half int (16 bit) elements 1114// S = single int (32 bit) elements 1115// D = double int (64 bit) elements 1116 1117// Neon 3-register vector operations. 1118 1119// First with only element sizes of 8, 16 and 32 bits: 1120multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1121 InstrItinClass itinD16, InstrItinClass itinD32, 1122 InstrItinClass itinQ16, InstrItinClass itinQ32, 1123 string OpcodeStr, SDNode OpNode, bit Commutable = 0> { 1124 // 64-bit vector types. 1125 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 1126 !strconcat(OpcodeStr, "8"), v8i8, v8i8, OpNode, Commutable>; 1127 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 1128 !strconcat(OpcodeStr, "16"), v4i16, v4i16, OpNode, Commutable>; 1129 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 1130 !strconcat(OpcodeStr, "32"), v2i32, v2i32, OpNode, Commutable>; 1131 1132 // 128-bit vector types. 1133 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 1134 !strconcat(OpcodeStr, "8"), v16i8, v16i8, OpNode, Commutable>; 1135 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 1136 !strconcat(OpcodeStr, "16"), v8i16, v8i16, OpNode, Commutable>; 1137 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 1138 !strconcat(OpcodeStr, "32"), v4i32, v4i32, OpNode, Commutable>; 1139} 1140 1141multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 1142 def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 1143 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 1144 def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>; 1145 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>; 1146} 1147 1148// ....then also with element size 64 bits: 1149multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1150 InstrItinClass itinD, InstrItinClass itinQ, 1151 string OpcodeStr, SDNode OpNode, bit Commutable = 0> 1152 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 1153 OpcodeStr, OpNode, Commutable> { 1154 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 1155 !strconcat(OpcodeStr, "64"), v1i64, v1i64, OpNode, Commutable>; 1156 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 1157 !strconcat(OpcodeStr, "64"), v2i64, v2i64, OpNode, Commutable>; 1158} 1159 1160 1161// Neon Narrowing 2-register vector intrinsics, 1162// source operand element sizes of 16, 32 and 64 bits: 1163multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1164 bits<5> op11_7, bit op6, bit op4, 1165 InstrItinClass itin, string OpcodeStr, 1166 Intrinsic IntOp> { 1167 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 1168 itin, !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>; 1169 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 1170 itin, !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>; 1171 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 1172 itin, !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>; 1173} 1174 1175 1176// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 1177// source operand element sizes of 16, 32 and 64 bits: 1178multiclass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 1179 bit op4, string OpcodeStr, Intrinsic IntOp> { 1180 def v8i16 : N2VLInt<op24, op23, 0b001000, op11_8, op7, op6, op4, 1181 IIC_VQUNAiD, !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; 1182 def v4i32 : N2VLInt<op24, op23, 0b010000, op11_8, op7, op6, op4, 1183 IIC_VQUNAiD, !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1184 def v2i64 : N2VLInt<op24, op23, 0b100000, op11_8, op7, op6, op4, 1185 IIC_VQUNAiD, !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1186} 1187 1188 1189// Neon 3-register vector intrinsics. 1190 1191// First with only element sizes of 16 and 32 bits: 1192multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1193 InstrItinClass itinD16, InstrItinClass itinD32, 1194 InstrItinClass itinQ16, InstrItinClass itinQ32, 1195 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 1196 // 64-bit vector types. 1197 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, !strconcat(OpcodeStr,"16"), 1198 v4i16, v4i16, IntOp, Commutable>; 1199 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, !strconcat(OpcodeStr,"32"), 1200 v2i32, v2i32, IntOp, Commutable>; 1201 1202 // 128-bit vector types. 1203 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, !strconcat(OpcodeStr,"16"), 1204 v8i16, v8i16, IntOp, Commutable>; 1205 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, !strconcat(OpcodeStr,"32"), 1206 v4i32, v4i32, IntOp, Commutable>; 1207} 1208 1209multiclass N3VIntSL_HS<bits<4> op11_8, 1210 InstrItinClass itinD16, InstrItinClass itinD32, 1211 InstrItinClass itinQ16, InstrItinClass itinQ32, 1212 string OpcodeStr, Intrinsic IntOp> { 1213 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>; 1214 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>; 1215 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; 1216 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; 1217} 1218 1219// ....then also with element size of 8 bits: 1220multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1221 InstrItinClass itinD16, InstrItinClass itinD32, 1222 InstrItinClass itinQ16, InstrItinClass itinQ32, 1223 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 1224 : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, 1225 OpcodeStr, IntOp, Commutable> { 1226 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16, 1227 !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp, Commutable>; 1228 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16, 1229 !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp, Commutable>; 1230} 1231 1232// ....then also with element size of 64 bits: 1233multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1234 InstrItinClass itinD16, InstrItinClass itinD32, 1235 InstrItinClass itinQ16, InstrItinClass itinQ32, 1236 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 1237 : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, 1238 OpcodeStr, IntOp, Commutable> { 1239 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32, 1240 !strconcat(OpcodeStr,"64"), v1i64, v1i64, IntOp, Commutable>; 1241 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32, 1242 !strconcat(OpcodeStr,"64"), v2i64, v2i64, IntOp, Commutable>; 1243} 1244 1245 1246// Neon Narrowing 3-register vector intrinsics, 1247// source operand element sizes of 16, 32 and 64 bits: 1248multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1249 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 1250 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr,"16"), 1251 v8i8, v8i16, IntOp, Commutable>; 1252 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"32"), 1253 v4i16, v4i32, IntOp, Commutable>; 1254 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"64"), 1255 v2i32, v2i64, IntOp, Commutable>; 1256} 1257 1258 1259// Neon Long 3-register vector intrinsics. 1260 1261// First with only element sizes of 16 and 32 bits: 1262multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1263 InstrItinClass itin, string OpcodeStr, 1264 Intrinsic IntOp, bit Commutable = 0> { 1265 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, 1266 !strconcat(OpcodeStr,"16"), v4i32, v4i16, IntOp, Commutable>; 1267 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin, 1268 !strconcat(OpcodeStr,"32"), v2i64, v2i32, IntOp, Commutable>; 1269} 1270 1271multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 1272 InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> { 1273 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 1274 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1275 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 1276 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1277} 1278 1279// ....then also with element size of 8 bits: 1280multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1281 InstrItinClass itin, string OpcodeStr, 1282 Intrinsic IntOp, bit Commutable = 0> 1283 : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, IntOp, Commutable> { 1284 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, 1285 !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp, Commutable>; 1286} 1287 1288 1289// Neon Wide 3-register vector intrinsics, 1290// source operand element sizes of 8, 16 and 32 bits: 1291multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1292 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 1293 def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 1294 v8i16, v8i8, IntOp, Commutable>; 1295 def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 1296 v4i32, v4i16, IntOp, Commutable>; 1297 def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 1298 v2i64, v2i32, IntOp, Commutable>; 1299} 1300 1301 1302// Neon Multiply-Op vector operations, 1303// element sizes of 8, 16 and 32 bits: 1304multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1305 InstrItinClass itinD16, InstrItinClass itinD32, 1306 InstrItinClass itinQ16, InstrItinClass itinQ32, 1307 string OpcodeStr, SDNode OpNode> { 1308 // 64-bit vector types. 1309 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 1310 !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>; 1311 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 1312 !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>; 1313 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 1314 !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>; 1315 1316 // 128-bit vector types. 1317 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 1318 !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>; 1319 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 1320 !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>; 1321 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 1322 !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>; 1323} 1324 1325multiclass N3VMulOpSL_HS<bits<4> op11_8, 1326 InstrItinClass itinD16, InstrItinClass itinD32, 1327 InstrItinClass itinQ16, InstrItinClass itinQ32, 1328 string OpcodeStr, SDNode ShOp> { 1329 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 1330 !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>; 1331 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 1332 !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>; 1333 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 1334 !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>; 1335 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 1336 !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>; 1337} 1338 1339// Neon 3-argument intrinsics, 1340// element sizes of 8, 16 and 32 bits: 1341multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1342 string OpcodeStr, Intrinsic IntOp> { 1343 // 64-bit vector types. 1344 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, 1345 !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; 1346 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1347 !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; 1348 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, 1349 !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; 1350 1351 // 128-bit vector types. 1352 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, 1353 !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; 1354 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, 1355 !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; 1356 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, 1357 !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; 1358} 1359 1360 1361// Neon Long 3-argument intrinsics. 1362 1363// First with only element sizes of 16 and 32 bits: 1364multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1365 string OpcodeStr, Intrinsic IntOp> { 1366 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1367 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1368 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D, 1369 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1370} 1371 1372multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 1373 string OpcodeStr, Intrinsic IntOp> { 1374 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 1375 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1376 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 1377 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1378} 1379 1380// ....then also with element size of 8 bits: 1381multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1382 string OpcodeStr, Intrinsic IntOp> 1383 : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> { 1384 def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1385 !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; 1386} 1387 1388 1389// Neon 2-register vector intrinsics, 1390// element sizes of 8, 16 and 32 bits: 1391multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1392 bits<5> op11_7, bit op4, 1393 InstrItinClass itinD, InstrItinClass itinQ, 1394 string OpcodeStr, Intrinsic IntOp> { 1395 // 64-bit vector types. 1396 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1397 itinD, !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; 1398 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1399 itinD, !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; 1400 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1401 itinD, !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; 1402 1403 // 128-bit vector types. 1404 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1405 itinQ, !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; 1406 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1407 itinQ, !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; 1408 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1409 itinQ, !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; 1410} 1411 1412 1413// Neon Pairwise long 2-register intrinsics, 1414// element sizes of 8, 16 and 32 bits: 1415multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1416 bits<5> op11_7, bit op4, 1417 string OpcodeStr, Intrinsic IntOp> { 1418 // 64-bit vector types. 1419 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1420 !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; 1421 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1422 !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; 1423 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1424 !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; 1425 1426 // 128-bit vector types. 1427 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1428 !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; 1429 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1430 !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; 1431 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1432 !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; 1433} 1434 1435 1436// Neon Pairwise long 2-register accumulate intrinsics, 1437// element sizes of 8, 16 and 32 bits: 1438multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1439 bits<5> op11_7, bit op4, 1440 string OpcodeStr, Intrinsic IntOp> { 1441 // 64-bit vector types. 1442 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1443 !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; 1444 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1445 !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; 1446 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1447 !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; 1448 1449 // 128-bit vector types. 1450 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1451 !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; 1452 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1453 !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; 1454 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1455 !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; 1456} 1457 1458 1459// Neon 2-register vector shift by immediate, 1460// element sizes of 8, 16, 32 and 64 bits: 1461multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1462 InstrItinClass itin, string OpcodeStr, SDNode OpNode> { 1463 // 64-bit vector types. 1464 def v8i8 : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, itin, 1465 !strconcat(OpcodeStr, "8"), v8i8, OpNode>; 1466 def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, itin, 1467 !strconcat(OpcodeStr, "16"), v4i16, OpNode>; 1468 def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, itin, 1469 !strconcat(OpcodeStr, "32"), v2i32, OpNode>; 1470 def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, itin, 1471 !strconcat(OpcodeStr, "64"), v1i64, OpNode>; 1472 1473 // 128-bit vector types. 1474 def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, itin, 1475 !strconcat(OpcodeStr, "8"), v16i8, OpNode>; 1476 def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, itin, 1477 !strconcat(OpcodeStr, "16"), v8i16, OpNode>; 1478 def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, itin, 1479 !strconcat(OpcodeStr, "32"), v4i32, OpNode>; 1480 def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, itin, 1481 !strconcat(OpcodeStr, "64"), v2i64, OpNode>; 1482} 1483 1484 1485// Neon Shift-Accumulate vector operations, 1486// element sizes of 8, 16, 32 and 64 bits: 1487multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1488 string OpcodeStr, SDNode ShOp> { 1489 // 64-bit vector types. 1490 def v8i8 : N2VDShAdd<op24, op23, 0b001000, op11_8, 0, op4, 1491 !strconcat(OpcodeStr, "8"), v8i8, ShOp>; 1492 def v4i16 : N2VDShAdd<op24, op23, 0b010000, op11_8, 0, op4, 1493 !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 1494 def v2i32 : N2VDShAdd<op24, op23, 0b100000, op11_8, 0, op4, 1495 !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 1496 def v1i64 : N2VDShAdd<op24, op23, 0b000000, op11_8, 1, op4, 1497 !strconcat(OpcodeStr, "64"), v1i64, ShOp>; 1498 1499 // 128-bit vector types. 1500 def v16i8 : N2VQShAdd<op24, op23, 0b001000, op11_8, 0, op4, 1501 !strconcat(OpcodeStr, "8"), v16i8, ShOp>; 1502 def v8i16 : N2VQShAdd<op24, op23, 0b010000, op11_8, 0, op4, 1503 !strconcat(OpcodeStr, "16"), v8i16, ShOp>; 1504 def v4i32 : N2VQShAdd<op24, op23, 0b100000, op11_8, 0, op4, 1505 !strconcat(OpcodeStr, "32"), v4i32, ShOp>; 1506 def v2i64 : N2VQShAdd<op24, op23, 0b000000, op11_8, 1, op4, 1507 !strconcat(OpcodeStr, "64"), v2i64, ShOp>; 1508} 1509 1510 1511// Neon Shift-Insert vector operations, 1512// element sizes of 8, 16, 32 and 64 bits: 1513multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1514 string OpcodeStr, SDNode ShOp> { 1515 // 64-bit vector types. 1516 def v8i8 : N2VDShIns<op24, op23, 0b001000, op11_8, 0, op4, 1517 !strconcat(OpcodeStr, "8"), v8i8, ShOp>; 1518 def v4i16 : N2VDShIns<op24, op23, 0b010000, op11_8, 0, op4, 1519 !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 1520 def v2i32 : N2VDShIns<op24, op23, 0b100000, op11_8, 0, op4, 1521 !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 1522 def v1i64 : N2VDShIns<op24, op23, 0b000000, op11_8, 1, op4, 1523 !strconcat(OpcodeStr, "64"), v1i64, ShOp>; 1524 1525 // 128-bit vector types. 1526 def v16i8 : N2VQShIns<op24, op23, 0b001000, op11_8, 0, op4, 1527 !strconcat(OpcodeStr, "8"), v16i8, ShOp>; 1528 def v8i16 : N2VQShIns<op24, op23, 0b010000, op11_8, 0, op4, 1529 !strconcat(OpcodeStr, "16"), v8i16, ShOp>; 1530 def v4i32 : N2VQShIns<op24, op23, 0b100000, op11_8, 0, op4, 1531 !strconcat(OpcodeStr, "32"), v4i32, ShOp>; 1532 def v2i64 : N2VQShIns<op24, op23, 0b000000, op11_8, 1, op4, 1533 !strconcat(OpcodeStr, "64"), v2i64, ShOp>; 1534} 1535 1536//===----------------------------------------------------------------------===// 1537// Instruction Definitions. 1538//===----------------------------------------------------------------------===// 1539 1540// Vector Add Operations. 1541 1542// VADD : Vector Add (integer and floating-point) 1543defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, 1>; 1544def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>; 1545def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>; 1546// VADDL : Vector Add Long (Q = D + D) 1547defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>; 1548defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>; 1549// VADDW : Vector Add Wide (Q = Q + D) 1550defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; 1551defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; 1552// VHADD : Vector Halving Add 1553defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1554 IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>; 1555defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1556 IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>; 1557// VRHADD : Vector Rounding Halving Add 1558defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1559 IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>; 1560defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1561 IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>; 1562// VQADD : Vector Saturating Add 1563defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1564 IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>; 1565defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1566 IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>; 1567// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 1568defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; 1569// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 1570defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; 1571 1572// Vector Multiply Operations. 1573 1574// VMUL : Vector Multiply (integer, polynomial and floating-point) 1575defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, 1576 IIC_VMULi32Q, "vmul.i", mul, 1>; 1577def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8, 1578 int_arm_neon_vmulp, 1>; 1579def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8, 1580 int_arm_neon_vmulp, 1>; 1581def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>; 1582def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>; 1583defm VMULsl : N3VSL_HS<0b1000, "vmul.i", mul>; 1584def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>; 1585def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>; 1586def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 1587 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 1588 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 1589 (v4i16 (EXTRACT_SUBREG QPR:$src2, 1590 (DSubReg_i16_reg imm:$lane))), 1591 (SubReg_i16_lane imm:$lane)))>; 1592def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 1593 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 1594 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 1595 (v2i32 (EXTRACT_SUBREG QPR:$src2, 1596 (DSubReg_i32_reg imm:$lane))), 1597 (SubReg_i32_lane imm:$lane)))>; 1598def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 1599 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 1600 (v4f32 (VMULslfq (v4f32 QPR:$src1), 1601 (v2f32 (EXTRACT_SUBREG QPR:$src2, 1602 (DSubReg_i32_reg imm:$lane))), 1603 (SubReg_i32_lane imm:$lane)))>; 1604 1605// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 1606defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, 1607 IIC_VMULi16Q, IIC_VMULi32Q, 1608 "vqdmulh.s", int_arm_neon_vqdmulh, 1>; 1609defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 1610 IIC_VMULi16Q, IIC_VMULi32Q, 1611 "vqdmulh.s", int_arm_neon_vqdmulh>; 1612def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 1613 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 1614 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 1615 (v4i16 (EXTRACT_SUBREG QPR:$src2, 1616 (DSubReg_i16_reg imm:$lane))), 1617 (SubReg_i16_lane imm:$lane)))>; 1618def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 1619 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 1620 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 1621 (v2i32 (EXTRACT_SUBREG QPR:$src2, 1622 (DSubReg_i32_reg imm:$lane))), 1623 (SubReg_i32_lane imm:$lane)))>; 1624 1625// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 1626defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, 1627 IIC_VMULi16Q, IIC_VMULi32Q, 1628 "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; 1629defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 1630 IIC_VMULi16Q, IIC_VMULi32Q, 1631 "vqrdmulh.s", int_arm_neon_vqrdmulh>; 1632def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 1633 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 1634 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 1635 (v4i16 (EXTRACT_SUBREG QPR:$src2, 1636 (DSubReg_i16_reg imm:$lane))), 1637 (SubReg_i16_lane imm:$lane)))>; 1638def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 1639 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 1640 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 1641 (v2i32 (EXTRACT_SUBREG QPR:$src2, 1642 (DSubReg_i32_reg imm:$lane))), 1643 (SubReg_i32_lane imm:$lane)))>; 1644 1645// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 1646defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>; 1647defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>; 1648def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8, 1649 int_arm_neon_vmullp, 1>; 1650defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>; 1651defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>; 1652 1653// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 1654defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>; 1655defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>; 1656 1657// Vector Multiply-Accumulate and Multiply-Subtract Operations. 1658 1659// VMLA : Vector Multiply Accumulate (integer and floating-point) 1660defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 1661 IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; 1662def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; 1663def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>; 1664defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 1665 IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; 1666def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; 1667def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>; 1668 1669def : Pat<(v8i16 (add (v8i16 QPR:$src1), 1670 (mul (v8i16 QPR:$src2), 1671 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 1672 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), 1673 (v8i16 QPR:$src2), 1674 (v4i16 (EXTRACT_SUBREG QPR:$src3, 1675 (DSubReg_i16_reg imm:$lane))), 1676 (SubReg_i16_lane imm:$lane)))>; 1677 1678def : Pat<(v4i32 (add (v4i32 QPR:$src1), 1679 (mul (v4i32 QPR:$src2), 1680 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 1681 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), 1682 (v4i32 QPR:$src2), 1683 (v2i32 (EXTRACT_SUBREG QPR:$src3, 1684 (DSubReg_i32_reg imm:$lane))), 1685 (SubReg_i32_lane imm:$lane)))>; 1686 1687def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), 1688 (fmul (v4f32 QPR:$src2), 1689 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 1690 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 1691 (v4f32 QPR:$src2), 1692 (v2f32 (EXTRACT_SUBREG QPR:$src3, 1693 (DSubReg_i32_reg imm:$lane))), 1694 (SubReg_i32_lane imm:$lane)))>; 1695 1696// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 1697defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>; 1698defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>; 1699 1700defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal.s", int_arm_neon_vmlals>; 1701defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal.u", int_arm_neon_vmlalu>; 1702 1703// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 1704defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; 1705defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>; 1706 1707// VMLS : Vector Multiply Subtract (integer and floating-point) 1708defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 1709 IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; 1710def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; 1711def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>; 1712defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 1713 IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; 1714def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; 1715def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>; 1716 1717def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 1718 (mul (v8i16 QPR:$src2), 1719 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 1720 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), 1721 (v8i16 QPR:$src2), 1722 (v4i16 (EXTRACT_SUBREG QPR:$src3, 1723 (DSubReg_i16_reg imm:$lane))), 1724 (SubReg_i16_lane imm:$lane)))>; 1725 1726def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 1727 (mul (v4i32 QPR:$src2), 1728 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 1729 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), 1730 (v4i32 QPR:$src2), 1731 (v2i32 (EXTRACT_SUBREG QPR:$src3, 1732 (DSubReg_i32_reg imm:$lane))), 1733 (SubReg_i32_lane imm:$lane)))>; 1734 1735def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), 1736 (fmul (v4f32 QPR:$src2), 1737 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 1738 (v4f32 (VMLSslfq (v4f32 QPR:$src1), 1739 (v4f32 QPR:$src2), 1740 (v2f32 (EXTRACT_SUBREG QPR:$src3, 1741 (DSubReg_i32_reg imm:$lane))), 1742 (SubReg_i32_lane imm:$lane)))>; 1743 1744// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 1745defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>; 1746defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; 1747 1748defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl.s", int_arm_neon_vmlsls>; 1749defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl.u", int_arm_neon_vmlslu>; 1750 1751// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 1752defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; 1753defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl.s", int_arm_neon_vqdmlsl>; 1754 1755// Vector Subtract Operations. 1756 1757// VSUB : Vector Subtract (integer and floating-point) 1758defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, 0>; 1759def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>; 1760def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>; 1761// VSUBL : Vector Subtract Long (Q = D - D) 1762defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>; 1763defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>; 1764// VSUBW : Vector Subtract Wide (Q = Q - D) 1765defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; 1766defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; 1767// VHSUB : Vector Halving Subtract 1768defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1769 IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>; 1770defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1771 IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>; 1772// VQSUB : Vector Saturing Subtract 1773defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1774 IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>; 1775defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1776 IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>; 1777// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 1778defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; 1779// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 1780defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; 1781 1782// Vector Comparisons. 1783 1784// VCEQ : Vector Compare Equal 1785defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1786 IIC_VBINi4Q, "vceq.i", NEONvceq, 1>; 1787def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq.f32", v2i32, v2f32, NEONvceq, 1>; 1788def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq.f32", v4i32, v4f32, NEONvceq, 1>; 1789// VCGE : Vector Compare Greater Than or Equal 1790defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1791 IIC_VBINi4Q, "vcge.s", NEONvcge, 0>; 1792defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1793 IIC_VBINi4Q, "vcge.u", NEONvcgeu, 0>; 1794def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge.f32", v2i32, v2f32, NEONvcge, 0>; 1795def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge.f32", v4i32, v4f32, NEONvcge, 0>; 1796// VCGT : Vector Compare Greater Than 1797defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1798 IIC_VBINi4Q, "vcgt.s", NEONvcgt, 0>; 1799defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1800 IIC_VBINi4Q, "vcgt.u", NEONvcgtu, 0>; 1801def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; 1802def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; 1803// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 1804def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32, 1805 int_arm_neon_vacged, 0>; 1806def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32, 1807 int_arm_neon_vacgeq, 0>; 1808// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 1809def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32, 1810 int_arm_neon_vacgtd, 0>; 1811def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32, 1812 int_arm_neon_vacgtq, 0>; 1813// VTST : Vector Test Bits 1814defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1815 IIC_VBINi4Q, "vtst.i", NEONvtst, 1>; 1816 1817// Vector Bitwise Operations. 1818 1819// VAND : Vector Bitwise AND 1820def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>; 1821def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", v4i32, v4i32, and, 1>; 1822 1823// VEOR : Vector Bitwise Exclusive OR 1824def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", v2i32, v2i32, xor, 1>; 1825def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", v4i32, v4i32, xor, 1>; 1826 1827// VORR : Vector Bitwise OR 1828def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", v2i32, v2i32, or, 1>; 1829def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>; 1830 1831// VBIC : Vector Bitwise Bit Clear (AND NOT) 1832def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 1833 (ins DPR:$src1, DPR:$src2), IIC_VBINiD, 1834 "vbic\t$dst, $src1, $src2", "", 1835 [(set DPR:$dst, (v2i32 (and DPR:$src1, 1836 (vnot_conv DPR:$src2))))]>; 1837def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 1838 (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, 1839 "vbic\t$dst, $src1, $src2", "", 1840 [(set QPR:$dst, (v4i32 (and QPR:$src1, 1841 (vnot_conv QPR:$src2))))]>; 1842 1843// VORN : Vector Bitwise OR NOT 1844def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), 1845 (ins DPR:$src1, DPR:$src2), IIC_VBINiD, 1846 "vorn\t$dst, $src1, $src2", "", 1847 [(set DPR:$dst, (v2i32 (or DPR:$src1, 1848 (vnot_conv DPR:$src2))))]>; 1849def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), 1850 (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, 1851 "vorn\t$dst, $src1, $src2", "", 1852 [(set QPR:$dst, (v4i32 (or QPR:$src1, 1853 (vnot_conv QPR:$src2))))]>; 1854 1855// VMVN : Vector Bitwise NOT 1856def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 1857 (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, 1858 "vmvn\t$dst, $src", "", 1859 [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; 1860def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 1861 (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, 1862 "vmvn\t$dst, $src", "", 1863 [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; 1864def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; 1865def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; 1866 1867// VBSL : Vector Bitwise Select 1868def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 1869 (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, 1870 "vbsl\t$dst, $src2, $src3", "$src1 = $dst", 1871 [(set DPR:$dst, 1872 (v2i32 (or (and DPR:$src2, DPR:$src1), 1873 (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; 1874def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 1875 (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, 1876 "vbsl\t$dst, $src2, $src3", "$src1 = $dst", 1877 [(set QPR:$dst, 1878 (v4i32 (or (and QPR:$src2, QPR:$src1), 1879 (and QPR:$src3, (vnot_conv QPR:$src1)))))]>; 1880 1881// VBIF : Vector Bitwise Insert if False 1882// like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst", 1883// VBIT : Vector Bitwise Insert if True 1884// like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst", 1885// These are not yet implemented. The TwoAddress pass will not go looking 1886// for equivalent operations with different register constraints; it just 1887// inserts copies. 1888 1889// Vector Absolute Differences. 1890 1891// VABD : Vector Absolute Difference 1892defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1893 IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>; 1894defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1895 IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>; 1896def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32, 1897 int_arm_neon_vabds, 0>; 1898def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32, 1899 int_arm_neon_vabds, 0>; 1900 1901// VABDL : Vector Absolute Difference Long (Q = | D - D |) 1902defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>; 1903defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>; 1904 1905// VABA : Vector Absolute Difference and Accumulate 1906defm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; 1907defm VABAu : N3VInt3_QHS<1,1,0b0101,0, "vaba.u", int_arm_neon_vabau>; 1908 1909// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 1910defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; 1911defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; 1912 1913// Vector Maximum and Minimum. 1914 1915// VMAX : Vector Maximum 1916defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1917 IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>; 1918defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1919 IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>; 1920def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32, 1921 int_arm_neon_vmaxs, 1>; 1922def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32, 1923 int_arm_neon_vmaxs, 1>; 1924 1925// VMIN : Vector Minimum 1926defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1927 IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>; 1928defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1929 IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>; 1930def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32, 1931 int_arm_neon_vmins, 1>; 1932def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32, 1933 int_arm_neon_vmins, 1>; 1934 1935// Vector Pairwise Operations. 1936 1937// VPADD : Vector Pairwise Add 1938def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8, 1939 int_arm_neon_vpadd, 0>; 1940def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16, 1941 int_arm_neon_vpadd, 0>; 1942def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32, 1943 int_arm_neon_vpadd, 0>; 1944def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32, 1945 int_arm_neon_vpadd, 0>; 1946 1947// VPADDL : Vector Pairwise Add Long 1948defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", 1949 int_arm_neon_vpaddls>; 1950defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u", 1951 int_arm_neon_vpaddlu>; 1952 1953// VPADAL : Vector Pairwise Add and Accumulate Long 1954defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal.s", 1955 int_arm_neon_vpadals>; 1956defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal.u", 1957 int_arm_neon_vpadalu>; 1958 1959// VPMAX : Vector Pairwise Maximum 1960def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8, 1961 int_arm_neon_vpmaxs, 0>; 1962def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16, 1963 int_arm_neon_vpmaxs, 0>; 1964def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32, 1965 int_arm_neon_vpmaxs, 0>; 1966def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8, 1967 int_arm_neon_vpmaxu, 0>; 1968def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16, 1969 int_arm_neon_vpmaxu, 0>; 1970def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32, 1971 int_arm_neon_vpmaxu, 0>; 1972def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32, 1973 int_arm_neon_vpmaxs, 0>; 1974 1975// VPMIN : Vector Pairwise Minimum 1976def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8, 1977 int_arm_neon_vpmins, 0>; 1978def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16, 1979 int_arm_neon_vpmins, 0>; 1980def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32, 1981 int_arm_neon_vpmins, 0>; 1982def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8, 1983 int_arm_neon_vpminu, 0>; 1984def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16, 1985 int_arm_neon_vpminu, 0>; 1986def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32, 1987 int_arm_neon_vpminu, 0>; 1988def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32, 1989 int_arm_neon_vpmins, 0>; 1990 1991// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 1992 1993// VRECPE : Vector Reciprocal Estimate 1994def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 1995 IIC_VUNAD, "vrecpe.u32", 1996 v2i32, v2i32, int_arm_neon_vrecpe>; 1997def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 1998 IIC_VUNAQ, "vrecpe.u32", 1999 v4i32, v4i32, int_arm_neon_vrecpe>; 2000def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 2001 IIC_VUNAD, "vrecpe.f32", 2002 v2f32, v2f32, int_arm_neon_vrecpe>; 2003def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 2004 IIC_VUNAQ, "vrecpe.f32", 2005 v4f32, v4f32, int_arm_neon_vrecpe>; 2006 2007// VRECPS : Vector Reciprocal Step 2008def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32, 2009 int_arm_neon_vrecps, 1>; 2010def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32, 2011 int_arm_neon_vrecps, 1>; 2012 2013// VRSQRTE : Vector Reciprocal Square Root Estimate 2014def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 2015 IIC_VUNAD, "vrsqrte.u32", 2016 v2i32, v2i32, int_arm_neon_vrsqrte>; 2017def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 2018 IIC_VUNAQ, "vrsqrte.u32", 2019 v4i32, v4i32, int_arm_neon_vrsqrte>; 2020def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 2021 IIC_VUNAD, "vrsqrte.f32", 2022 v2f32, v2f32, int_arm_neon_vrsqrte>; 2023def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 2024 IIC_VUNAQ, "vrsqrte.f32", 2025 v4f32, v4f32, int_arm_neon_vrsqrte>; 2026 2027// VRSQRTS : Vector Reciprocal Square Root Step 2028def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32, 2029 int_arm_neon_vrsqrts, 1>; 2030def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32, 2031 int_arm_neon_vrsqrts, 1>; 2032 2033// Vector Shifts. 2034 2035// VSHL : Vector Shift 2036defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, 2037 IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>; 2038defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, 2039 IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>; 2040// VSHL : Vector Shift Left (Immediate) 2041defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; 2042// VSHR : Vector Shift Right (Immediate) 2043defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; 2044defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; 2045 2046// VSHLL : Vector Shift Left Long 2047def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", 2048 v8i16, v8i8, NEONvshlls>; 2049def VSHLLs16 : N2VLSh<0, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.s16", 2050 v4i32, v4i16, NEONvshlls>; 2051def VSHLLs32 : N2VLSh<0, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.s32", 2052 v2i64, v2i32, NEONvshlls>; 2053def VSHLLu8 : N2VLSh<1, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.u8", 2054 v8i16, v8i8, NEONvshllu>; 2055def VSHLLu16 : N2VLSh<1, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.u16", 2056 v4i32, v4i16, NEONvshllu>; 2057def VSHLLu32 : N2VLSh<1, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.u32", 2058 v2i64, v2i32, NEONvshllu>; 2059 2060// VSHLL : Vector Shift Left Long (with maximum shift count) 2061def VSHLLi8 : N2VLSh<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", 2062 v8i16, v8i8, NEONvshlli>; 2063def VSHLLi16 : N2VLSh<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", 2064 v4i32, v4i16, NEONvshlli>; 2065def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", 2066 v2i64, v2i32, NEONvshlli>; 2067 2068// VSHRN : Vector Shift Right and Narrow 2069def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, 2070 IIC_VSHLiD, "vshrn.i16", v8i8, v8i16, NEONvshrn>; 2071def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, 2072 IIC_VSHLiD, "vshrn.i32", v4i16, v4i32, NEONvshrn>; 2073def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, 2074 IIC_VSHLiD, "vshrn.i64", v2i32, v2i64, NEONvshrn>; 2075 2076// VRSHL : Vector Rounding Shift 2077defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2078 IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>; 2079defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2080 IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>; 2081// VRSHR : Vector Rounding Shift Right 2082defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; 2083defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; 2084 2085// VRSHRN : Vector Rounding Shift Right and Narrow 2086def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, 2087 IIC_VSHLi4D, "vrshrn.i16", v8i8, v8i16, NEONvrshrn>; 2088def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, 2089 IIC_VSHLi4D, "vrshrn.i32", v4i16, v4i32, NEONvrshrn>; 2090def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, 2091 IIC_VSHLi4D, "vrshrn.i64", v2i32, v2i64, NEONvrshrn>; 2092 2093// VQSHL : Vector Saturating Shift 2094defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2095 IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>; 2096defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2097 IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>; 2098// VQSHL : Vector Saturating Shift Left (Immediate) 2099defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>; 2100defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; 2101// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 2102defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; 2103 2104// VQSHRN : Vector Saturating Shift Right and Narrow 2105def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, 2106 IIC_VSHLi4D, "vqshrn.s16", v8i8, v8i16, NEONvqshrns>; 2107def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, 2108 IIC_VSHLi4D, "vqshrn.s32", v4i16, v4i32, NEONvqshrns>; 2109def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, 2110 IIC_VSHLi4D, "vqshrn.s64", v2i32, v2i64, NEONvqshrns>; 2111def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, 2112 IIC_VSHLi4D, "vqshrn.u16", v8i8, v8i16, NEONvqshrnu>; 2113def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, 2114 IIC_VSHLi4D, "vqshrn.u32", v4i16, v4i32, NEONvqshrnu>; 2115def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, 2116 IIC_VSHLi4D, "vqshrn.u64", v2i32, v2i64, NEONvqshrnu>; 2117 2118// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 2119def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, 2120 IIC_VSHLi4D, "vqshrun.s16", v8i8, v8i16, NEONvqshrnsu>; 2121def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, 2122 IIC_VSHLi4D, "vqshrun.s32", v4i16, v4i32, NEONvqshrnsu>; 2123def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, 2124 IIC_VSHLi4D, "vqshrun.s64", v2i32, v2i64, NEONvqshrnsu>; 2125 2126// VQRSHL : Vector Saturating Rounding Shift 2127defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2128 IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>; 2129defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2130 IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; 2131 2132// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 2133def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, 2134 IIC_VSHLi4D, "vqrshrn.s16", v8i8, v8i16, NEONvqrshrns>; 2135def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, 2136 IIC_VSHLi4D, "vqrshrn.s32", v4i16, v4i32, NEONvqrshrns>; 2137def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, 2138 IIC_VSHLi4D, "vqrshrn.s64", v2i32, v2i64, NEONvqrshrns>; 2139def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, 2140 IIC_VSHLi4D, "vqrshrn.u16", v8i8, v8i16, NEONvqrshrnu>; 2141def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, 2142 IIC_VSHLi4D, "vqrshrn.u32", v4i16, v4i32, NEONvqrshrnu>; 2143def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, 2144 IIC_VSHLi4D, "vqrshrn.u64", v2i32, v2i64, NEONvqrshrnu>; 2145 2146// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 2147def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, 2148 IIC_VSHLi4D, "vqrshrun.s16", v8i8, v8i16, NEONvqrshrnsu>; 2149def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, 2150 IIC_VSHLi4D, "vqrshrun.s32", v4i16, v4i32, NEONvqrshrnsu>; 2151def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, 2152 IIC_VSHLi4D, "vqrshrun.s64", v2i32, v2i64, NEONvqrshrnsu>; 2153 2154// VSRA : Vector Shift Right and Accumulate 2155defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; 2156defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>; 2157// VRSRA : Vector Rounding Shift Right and Accumulate 2158defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>; 2159defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>; 2160 2161// VSLI : Vector Shift Left and Insert 2162defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>; 2163// VSRI : Vector Shift Right and Insert 2164defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>; 2165 2166// Vector Absolute and Saturating Absolute. 2167 2168// VABS : Vector Absolute Value 2169defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 2170 IIC_VUNAiD, IIC_VUNAiQ, "vabs.s", 2171 int_arm_neon_vabs>; 2172def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2173 IIC_VUNAD, "vabs.f32", 2174 v2f32, v2f32, int_arm_neon_vabs>; 2175def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2176 IIC_VUNAQ, "vabs.f32", 2177 v4f32, v4f32, int_arm_neon_vabs>; 2178 2179// VQABS : Vector Saturating Absolute Value 2180defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 2181 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs.s", 2182 int_arm_neon_vqabs>; 2183 2184// Vector Negate. 2185 2186def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; 2187def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; 2188 2189class VNEGD<bits<2> size, string OpcodeStr, ValueType Ty> 2190 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), 2191 IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 2192 [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; 2193class VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty> 2194 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), 2195 IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 2196 [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; 2197 2198// VNEG : Vector Negate 2199def VNEGs8d : VNEGD<0b00, "vneg.s8", v8i8>; 2200def VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>; 2201def VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>; 2202def VNEGs8q : VNEGQ<0b00, "vneg.s8", v16i8>; 2203def VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>; 2204def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; 2205 2206// VNEG : Vector Negate (floating-point) 2207def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 2208 (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, 2209 "vneg.f32\t$dst, $src", "", 2210 [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; 2211def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 2212 (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, 2213 "vneg.f32\t$dst, $src", "", 2214 [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; 2215 2216def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; 2217def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>; 2218def : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>; 2219def : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>; 2220def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>; 2221def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; 2222 2223// VQNEG : Vector Saturating Negate 2224defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 2225 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg.s", 2226 int_arm_neon_vqneg>; 2227 2228// Vector Bit Counting Operations. 2229 2230// VCLS : Vector Count Leading Sign Bits 2231defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 2232 IIC_VCNTiD, IIC_VCNTiQ, "vcls.s", 2233 int_arm_neon_vcls>; 2234// VCLZ : Vector Count Leading Zeros 2235defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 2236 IIC_VCNTiD, IIC_VCNTiQ, "vclz.i", 2237 int_arm_neon_vclz>; 2238// VCNT : Vector Count One Bits 2239def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2240 IIC_VCNTiD, "vcnt.8", 2241 v8i8, v8i8, int_arm_neon_vcnt>; 2242def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2243 IIC_VCNTiQ, "vcnt.8", 2244 v16i8, v16i8, int_arm_neon_vcnt>; 2245 2246// Vector Move Operations. 2247 2248// VMOV : Vector Move (Register) 2249 2250def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), 2251 IIC_VMOVD, "vmov\t$dst, $src", "", []>; 2252def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), 2253 IIC_VMOVD, "vmov\t$dst, $src", "", []>; 2254 2255// VMOV : Vector Move (Immediate) 2256 2257// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. 2258def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ 2259 return ARM::getVMOVImm(N, 1, *CurDAG); 2260}]>; 2261def vmovImm8 : PatLeaf<(build_vector), [{ 2262 return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; 2263}], VMOV_get_imm8>; 2264 2265// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. 2266def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ 2267 return ARM::getVMOVImm(N, 2, *CurDAG); 2268}]>; 2269def vmovImm16 : PatLeaf<(build_vector), [{ 2270 return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; 2271}], VMOV_get_imm16>; 2272 2273// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. 2274def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ 2275 return ARM::getVMOVImm(N, 4, *CurDAG); 2276}]>; 2277def vmovImm32 : PatLeaf<(build_vector), [{ 2278 return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; 2279}], VMOV_get_imm32>; 2280 2281// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. 2282def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ 2283 return ARM::getVMOVImm(N, 8, *CurDAG); 2284}]>; 2285def vmovImm64 : PatLeaf<(build_vector), [{ 2286 return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; 2287}], VMOV_get_imm64>; 2288 2289// Note: Some of the cmode bits in the following VMOV instructions need to 2290// be encoded based on the immed values. 2291 2292def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), 2293 (ins i8imm:$SIMM), IIC_VMOVImm, 2294 "vmov.i8\t$dst, $SIMM", "", 2295 [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; 2296def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), 2297 (ins i8imm:$SIMM), IIC_VMOVImm, 2298 "vmov.i8\t$dst, $SIMM", "", 2299 [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; 2300 2301def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), 2302 (ins i16imm:$SIMM), IIC_VMOVImm, 2303 "vmov.i16\t$dst, $SIMM", "", 2304 [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; 2305def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), 2306 (ins i16imm:$SIMM), IIC_VMOVImm, 2307 "vmov.i16\t$dst, $SIMM", "", 2308 [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; 2309 2310def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), 2311 (ins i32imm:$SIMM), IIC_VMOVImm, 2312 "vmov.i32\t$dst, $SIMM", "", 2313 [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; 2314def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), 2315 (ins i32imm:$SIMM), IIC_VMOVImm, 2316 "vmov.i32\t$dst, $SIMM", "", 2317 [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; 2318 2319def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), 2320 (ins i64imm:$SIMM), IIC_VMOVImm, 2321 "vmov.i64\t$dst, $SIMM", "", 2322 [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; 2323def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), 2324 (ins i64imm:$SIMM), IIC_VMOVImm, 2325 "vmov.i64\t$dst, $SIMM", "", 2326 [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; 2327 2328// VMOV : Vector Get Lane (move scalar to ARM core register) 2329 2330def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, 2331 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2332 IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]", 2333 [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), 2334 imm:$lane))]>; 2335def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, 2336 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2337 IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]", 2338 [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), 2339 imm:$lane))]>; 2340def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, 2341 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2342 IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]", 2343 [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), 2344 imm:$lane))]>; 2345def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, 2346 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2347 IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]", 2348 [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), 2349 imm:$lane))]>; 2350def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, 2351 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2352 IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]", 2353 [(set GPR:$dst, (extractelt (v2i32 DPR:$src), 2354 imm:$lane))]>; 2355// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 2356def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 2357 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2358 (DSubReg_i8_reg imm:$lane))), 2359 (SubReg_i8_lane imm:$lane))>; 2360def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 2361 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2362 (DSubReg_i16_reg imm:$lane))), 2363 (SubReg_i16_lane imm:$lane))>; 2364def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 2365 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2366 (DSubReg_i8_reg imm:$lane))), 2367 (SubReg_i8_lane imm:$lane))>; 2368def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 2369 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2370 (DSubReg_i16_reg imm:$lane))), 2371 (SubReg_i16_lane imm:$lane))>; 2372def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 2373 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 2374 (DSubReg_i32_reg imm:$lane))), 2375 (SubReg_i32_lane imm:$lane))>; 2376def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 2377 (EXTRACT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2), 2378 (SSubReg_f32_reg imm:$src2))>; 2379def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 2380 (EXTRACT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2), 2381 (SSubReg_f32_reg imm:$src2))>; 2382//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 2383// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2384def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 2385 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2386 2387 2388// VMOV : Vector Set Lane (move ARM core register to scalar) 2389 2390let Constraints = "$src1 = $dst" in { 2391def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), 2392 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2393 IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2", 2394 [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), 2395 GPR:$src2, imm:$lane))]>; 2396def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), 2397 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2398 IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2", 2399 [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), 2400 GPR:$src2, imm:$lane))]>; 2401def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), 2402 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2403 IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2", 2404 [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), 2405 GPR:$src2, imm:$lane))]>; 2406} 2407def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 2408 (v16i8 (INSERT_SUBREG QPR:$src1, 2409 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 2410 (DSubReg_i8_reg imm:$lane))), 2411 GPR:$src2, (SubReg_i8_lane imm:$lane)), 2412 (DSubReg_i8_reg imm:$lane)))>; 2413def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 2414 (v8i16 (INSERT_SUBREG QPR:$src1, 2415 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 2416 (DSubReg_i16_reg imm:$lane))), 2417 GPR:$src2, (SubReg_i16_lane imm:$lane)), 2418 (DSubReg_i16_reg imm:$lane)))>; 2419def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 2420 (v4i32 (INSERT_SUBREG QPR:$src1, 2421 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 2422 (DSubReg_i32_reg imm:$lane))), 2423 GPR:$src2, (SubReg_i32_lane imm:$lane)), 2424 (DSubReg_i32_reg imm:$lane)))>; 2425 2426def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 2427 (INSERT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2), 2428 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 2429def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 2430 (INSERT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2), 2431 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 2432 2433//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 2434// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 2435def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 2436 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 2437 2438def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 2439 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; 2440def : Pat<(v2f64 (scalar_to_vector DPR:$src)), 2441 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>; 2442def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 2443 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; 2444 2445def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 2446 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2447def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 2448 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2449def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 2450 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2451 2452def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 2453 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 2454 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2455 arm_dsubreg_0)>; 2456def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 2457 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 2458 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2459 arm_dsubreg_0)>; 2460def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 2461 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 2462 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2463 arm_dsubreg_0)>; 2464 2465// VDUP : Vector Duplicate (from ARM core register to all elements) 2466 2467class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> 2468 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), 2469 IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), 2470 [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 2471class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> 2472 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), 2473 IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), 2474 [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 2475 2476def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; 2477def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; 2478def VDUP32d : VDUPD<0b11101000, 0b00, ".32", v2i32>; 2479def VDUP8q : VDUPQ<0b11101110, 0b00, ".8", v16i8>; 2480def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; 2481def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; 2482 2483def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), 2484 IIC_VMOVIS, "vdup", ".32\t$dst, $src", 2485 [(set DPR:$dst, (v2f32 (NEONvdup 2486 (f32 (bitconvert GPR:$src)))))]>; 2487def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), 2488 IIC_VMOVIS, "vdup", ".32\t$dst, $src", 2489 [(set QPR:$dst, (v4f32 (NEONvdup 2490 (f32 (bitconvert GPR:$src)))))]>; 2491 2492// VDUP : Vector Duplicate Lane (from scalar to all elements) 2493 2494class VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> 2495 : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, 2496 (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, 2497 !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", 2498 [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; 2499 2500class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, 2501 ValueType ResTy, ValueType OpTy> 2502 : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, 2503 (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, 2504 !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", 2505 [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; 2506 2507def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; 2508def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; 2509def VDUPLN32d : VDUPLND<0b01, 0b00, "vdup.32", v2i32>; 2510def VDUPLNfd : VDUPLND<0b01, 0b00, "vdup.32", v2f32>; 2511def VDUPLN8q : VDUPLNQ<0b00, 0b01, "vdup.8", v16i8, v8i8>; 2512def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; 2513def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; 2514def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; 2515 2516def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 2517 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 2518 (DSubReg_i8_reg imm:$lane))), 2519 (SubReg_i8_lane imm:$lane)))>; 2520def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 2521 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 2522 (DSubReg_i16_reg imm:$lane))), 2523 (SubReg_i16_lane imm:$lane)))>; 2524def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 2525 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 2526 (DSubReg_i32_reg imm:$lane))), 2527 (SubReg_i32_lane imm:$lane)))>; 2528def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 2529 (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, 2530 (DSubReg_i32_reg imm:$lane))), 2531 (SubReg_i32_lane imm:$lane)))>; 2532 2533def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, 2534 (outs DPR:$dst), (ins SPR:$src), 2535 IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", 2536 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 2537 2538def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0, 2539 (outs QPR:$dst), (ins SPR:$src), 2540 IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", 2541 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 2542 2543def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), 2544 (INSERT_SUBREG QPR:$src, 2545 (i64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), 2546 (DSubReg_f64_other_reg imm:$lane))>; 2547def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), 2548 (INSERT_SUBREG QPR:$src, 2549 (f64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), 2550 (DSubReg_f64_other_reg imm:$lane))>; 2551 2552// VMOVN : Vector Narrowing Move 2553defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn.i", 2554 int_arm_neon_vmovn>; 2555// VQMOVN : Vector Saturating Narrowing Move 2556defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn.s", 2557 int_arm_neon_vqmovns>; 2558defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u", 2559 int_arm_neon_vqmovnu>; 2560defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s", 2561 int_arm_neon_vqmovnsu>; 2562// VMOVL : Vector Lengthening Move 2563defm VMOVLs : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>; 2564defm VMOVLu : N2VLInt_QHS<1,1,0b1010,0,0,1, "vmovl.u", int_arm_neon_vmovlu>; 2565 2566// Vector Conversions. 2567 2568// VCVT : Vector Convert Between Floating-Point and Integers 2569def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 2570 v2i32, v2f32, fp_to_sint>; 2571def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 2572 v2i32, v2f32, fp_to_uint>; 2573def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 2574 v2f32, v2i32, sint_to_fp>; 2575def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 2576 v2f32, v2i32, uint_to_fp>; 2577 2578def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 2579 v4i32, v4f32, fp_to_sint>; 2580def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 2581 v4i32, v4f32, fp_to_uint>; 2582def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 2583 v4f32, v4i32, sint_to_fp>; 2584def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 2585 v4f32, v4i32, uint_to_fp>; 2586 2587// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 2588// Note: Some of the opcode bits in the following VCVT instructions need to 2589// be encoded based on the immed values. 2590def VCVTf2xsd : N2VCvtD<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", 2591 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 2592def VCVTf2xud : N2VCvtD<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", 2593 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 2594def VCVTxs2fd : N2VCvtD<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", 2595 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 2596def VCVTxu2fd : N2VCvtD<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", 2597 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 2598 2599def VCVTf2xsq : N2VCvtQ<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", 2600 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 2601def VCVTf2xuq : N2VCvtQ<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", 2602 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 2603def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", 2604 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 2605def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", 2606 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 2607 2608// Vector Reverse. 2609 2610// VREV64 : Vector Reverse elements within 64-bit doublewords 2611 2612class VREV64D<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2613 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), 2614 (ins DPR:$src), IIC_VMOVD, 2615 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2616 [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; 2617class VREV64Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2618 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), 2619 (ins QPR:$src), IIC_VMOVD, 2620 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2621 [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; 2622 2623def VREV64d8 : VREV64D<0b00, "vrev64.8", v8i8>; 2624def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>; 2625def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>; 2626def VREV64df : VREV64D<0b10, "vrev64.32", v2f32>; 2627 2628def VREV64q8 : VREV64Q<0b00, "vrev64.8", v16i8>; 2629def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>; 2630def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>; 2631def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>; 2632 2633// VREV32 : Vector Reverse elements within 32-bit words 2634 2635class VREV32D<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2636 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), 2637 (ins DPR:$src), IIC_VMOVD, 2638 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2639 [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; 2640class VREV32Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2641 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), 2642 (ins QPR:$src), IIC_VMOVD, 2643 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2644 [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; 2645 2646def VREV32d8 : VREV32D<0b00, "vrev32.8", v8i8>; 2647def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>; 2648 2649def VREV32q8 : VREV32Q<0b00, "vrev32.8", v16i8>; 2650def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>; 2651 2652// VREV16 : Vector Reverse elements within 16-bit halfwords 2653 2654class VREV16D<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2655 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), 2656 (ins DPR:$src), IIC_VMOVD, 2657 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2658 [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; 2659class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2660 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), 2661 (ins QPR:$src), IIC_VMOVD, 2662 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2663 [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; 2664 2665def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>; 2666def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; 2667 2668// Other Vector Shuffles. 2669 2670// VEXT : Vector Extract 2671 2672class VEXTd<string OpcodeStr, ValueType Ty> 2673 : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst), 2674 (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, 2675 !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", 2676 [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), 2677 (Ty DPR:$rhs), imm:$index)))]>; 2678 2679class VEXTq<string OpcodeStr, ValueType Ty> 2680 : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst), 2681 (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, 2682 !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", 2683 [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), 2684 (Ty QPR:$rhs), imm:$index)))]>; 2685 2686def VEXTd8 : VEXTd<"vext.8", v8i8>; 2687def VEXTd16 : VEXTd<"vext.16", v4i16>; 2688def VEXTd32 : VEXTd<"vext.32", v2i32>; 2689def VEXTdf : VEXTd<"vext.32", v2f32>; 2690 2691def VEXTq8 : VEXTq<"vext.8", v16i8>; 2692def VEXTq16 : VEXTq<"vext.16", v8i16>; 2693def VEXTq32 : VEXTq<"vext.32", v4i32>; 2694def VEXTqf : VEXTq<"vext.32", v4f32>; 2695 2696// VTRN : Vector Transpose 2697 2698def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">; 2699def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn.16">; 2700def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn.32">; 2701 2702def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn.8">; 2703def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn.16">; 2704def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn.32">; 2705 2706// VUZP : Vector Unzip (Deinterleave) 2707 2708def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp.8">; 2709def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp.16">; 2710def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp.32">; 2711 2712def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp.8">; 2713def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp.16">; 2714def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp.32">; 2715 2716// VZIP : Vector Zip (Interleave) 2717 2718def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip.8">; 2719def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip.16">; 2720def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip.32">; 2721 2722def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip.8">; 2723def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip.16">; 2724def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; 2725 2726// Vector Table Lookup and Table Extension. 2727 2728// VTBL : Vector Table Lookup 2729def VTBL1 2730 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), 2731 (ins DPR:$tbl1, DPR:$src), IIC_VTB1, 2732 "vtbl.8\t$dst, \\{$tbl1\\}, $src", "", 2733 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; 2734let hasExtraSrcRegAllocReq = 1 in { 2735def VTBL2 2736 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), 2737 (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, 2738 "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "", 2739 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 2740 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; 2741def VTBL3 2742 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), 2743 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, 2744 "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", 2745 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 2746 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; 2747def VTBL4 2748 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), 2749 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, 2750 "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", 2751 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, 2752 DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; 2753} // hasExtraSrcRegAllocReq = 1 2754 2755// VTBX : Vector Table Extension 2756def VTBX1 2757 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), 2758 (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, 2759 "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst", 2760 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 2761 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; 2762let hasExtraSrcRegAllocReq = 1 in { 2763def VTBX2 2764 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), 2765 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, 2766 "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", 2767 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 2768 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; 2769def VTBX3 2770 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), 2771 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, 2772 "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", 2773 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, 2774 DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; 2775def VTBX4 2776 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, 2777 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, 2778 "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", 2779 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, 2780 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; 2781} // hasExtraSrcRegAllocReq = 1 2782 2783//===----------------------------------------------------------------------===// 2784// NEON instructions for single-precision FP math 2785//===----------------------------------------------------------------------===// 2786 2787// These need separate instructions because they must use DPR_VFP2 register 2788// class which have SPR sub-registers. 2789 2790// Vector Add Operations used for single-precision FP 2791let neverHasSideEffects = 1 in 2792def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>; 2793def : N3VDsPat<fadd, VADDfd_sfp>; 2794 2795// Vector Sub Operations used for single-precision FP 2796let neverHasSideEffects = 1 in 2797def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>; 2798def : N3VDsPat<fsub, VSUBfd_sfp>; 2799 2800// Vector Multiply Operations used for single-precision FP 2801let neverHasSideEffects = 1 in 2802def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>; 2803def : N3VDsPat<fmul, VMULfd_sfp>; 2804 2805// Vector Multiply-Accumulate/Subtract used for single-precision FP 2806let neverHasSideEffects = 1 in 2807def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; 2808def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>; 2809 2810let neverHasSideEffects = 1 in 2811def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; 2812def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>; 2813 2814// Vector Absolute used for single-precision FP 2815let neverHasSideEffects = 1 in 2816def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2817 IIC_VUNAD, "vabs.f32", 2818 v2f32, v2f32, int_arm_neon_vabs>; 2819def : N2VDIntsPat<fabs, VABSfd_sfp>; 2820 2821// Vector Negate used for single-precision FP 2822let neverHasSideEffects = 1 in 2823def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 2824 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 2825 "vneg.f32\t$dst, $src", "", []>; 2826def : N2VDIntsPat<fneg, VNEGf32d_sfp>; 2827 2828// Vector Convert between single-precision FP and integer 2829let neverHasSideEffects = 1 in 2830def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 2831 v2i32, v2f32, fp_to_sint>; 2832def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; 2833 2834let neverHasSideEffects = 1 in 2835def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 2836 v2i32, v2f32, fp_to_uint>; 2837def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; 2838 2839let neverHasSideEffects = 1 in 2840def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 2841 v2f32, v2i32, sint_to_fp>; 2842def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; 2843 2844let neverHasSideEffects = 1 in 2845def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 2846 v2f32, v2i32, uint_to_fp>; 2847def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; 2848 2849//===----------------------------------------------------------------------===// 2850// Non-Instruction Patterns 2851//===----------------------------------------------------------------------===// 2852 2853// bit_convert 2854def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 2855def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 2856def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 2857def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 2858def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 2859def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 2860def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 2861def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 2862def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 2863def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 2864def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 2865def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 2866def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 2867def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 2868def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 2869def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 2870def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 2871def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 2872def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 2873def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 2874def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 2875def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 2876def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 2877def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 2878def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 2879def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 2880def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 2881def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 2882def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 2883def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 2884 2885def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 2886def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 2887def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 2888def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 2889def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 2890def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 2891def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 2892def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 2893def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 2894def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 2895def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 2896def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 2897def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 2898def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 2899def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 2900def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 2901def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 2902def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 2903def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 2904def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 2905def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 2906def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 2907def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 2908def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 2909def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 2910def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 2911def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 2912def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 2913def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 2914def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 2915