ARMInstrNEON.td revision 8cdb26968669692e88ee0e4a444032bbd52da0d2
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// NEON-specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17 18def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 19 20def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 21def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 22def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 23def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 24def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 25def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 26 27// Types for vector shift by immediates. The "SHX" version is for long and 28// narrow operations where the source and destination vectors have different 29// types. The "SHINS" version is for shift and insert operations. 30def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 31 SDTCisVT<2, i32>]>; 32def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 33 SDTCisVT<2, i32>]>; 34def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 35 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 36 37def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 38def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 39def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 40def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 41def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 42def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 43def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 44 45def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 46def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 47def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 48 49def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 50def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 51def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 52def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 53def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 54def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 55 56def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 57def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 58def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 59 60def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 61def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 62 63def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 64 SDTCisVT<2, i32>]>; 65def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 66def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 67 68def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 69 70// VDUPLANE can produce a quad-register result from a double-register source, 71// so the result is not constrained to match the source. 72def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 73 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 74 SDTCisVT<2, i32>]>>; 75 76def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 77 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 78def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 79 80def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 81def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 82def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 83def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 84 85def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 86 SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; 87def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 88def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 89def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 90 91//===----------------------------------------------------------------------===// 92// NEON operand definitions 93//===----------------------------------------------------------------------===// 94 95// addrmode_neonldstm := reg 96// 97/* TODO: Take advantage of vldm. 98def addrmode_neonldstm : Operand<i32>, 99 ComplexPattern<i32, 2, "SelectAddrModeNeonLdStM", []> { 100 let PrintMethod = "printAddrNeonLdStMOperand"; 101 let MIOperandInfo = (ops GPR, i32imm); 102} 103*/ 104 105//===----------------------------------------------------------------------===// 106// NEON load / store instructions 107//===----------------------------------------------------------------------===// 108 109/* TODO: Take advantage of vldm. 110let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { 111def VLDMD : NI<(outs), 112 (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), 113 IIC_fpLoadm, 114 "vldm${addr:submode} ${addr:base}, $dst1", 115 []> { 116 let Inst{27-25} = 0b110; 117 let Inst{20} = 1; 118 let Inst{11-9} = 0b101; 119} 120 121def VLDMS : NI<(outs), 122 (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), 123 IIC_fpLoadm, 124 "vldm${addr:submode} ${addr:base}, $dst1", 125 []> { 126 let Inst{27-25} = 0b110; 127 let Inst{20} = 1; 128 let Inst{11-9} = 0b101; 129} 130} 131*/ 132 133// Use vldmia to load a Q register as a D register pair. 134def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), 135 IIC_fpLoadm, 136 "vldmia $addr, ${dst:dregpair}", 137 [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { 138 let Inst{27-25} = 0b110; 139 let Inst{24} = 0; // P bit 140 let Inst{23} = 1; // U bit 141 let Inst{20} = 1; 142 let Inst{11-9} = 0b101; 143} 144 145// Use vstmia to store a Q register as a D register pair. 146def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), 147 IIC_fpStorem, 148 "vstmia $addr, ${src:dregpair}", 149 [(store (v2f64 QPR:$src), addrmode4:$addr)]> { 150 let Inst{27-25} = 0b110; 151 let Inst{24} = 0; // P bit 152 let Inst{23} = 1; // U bit 153 let Inst{20} = 0; 154 let Inst{11-9} = 0b101; 155} 156 157// VLD1 : Vector Load (multiple single elements) 158class VLD1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> 159 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, 160 !strconcat(OpcodeStr, "\t\\{$dst\\}, $addr"), "", 161 [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; 162class VLD1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> 163 : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, 164 !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), "", 165 [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; 166 167def VLD1d8 : VLD1D<0b0000, "vld1.8", v8i8, int_arm_neon_vld1>; 168def VLD1d16 : VLD1D<0b0100, "vld1.16", v4i16, int_arm_neon_vld1>; 169def VLD1d32 : VLD1D<0b1000, "vld1.32", v2i32, int_arm_neon_vld1>; 170def VLD1df : VLD1D<0b1000, "vld1.32", v2f32, int_arm_neon_vld1>; 171def VLD1d64 : VLD1D<0b1100, "vld1.64", v1i64, int_arm_neon_vld1>; 172 173def VLD1q8 : VLD1Q<0b0000, "vld1.8", v16i8, int_arm_neon_vld1>; 174def VLD1q16 : VLD1Q<0b0100, "vld1.16", v8i16, int_arm_neon_vld1>; 175def VLD1q32 : VLD1Q<0b1000, "vld1.32", v4i32, int_arm_neon_vld1>; 176def VLD1qf : VLD1Q<0b1000, "vld1.32", v4f32, int_arm_neon_vld1>; 177def VLD1q64 : VLD1Q<0b1100, "vld1.64", v2i64, int_arm_neon_vld1>; 178 179let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { 180 181// VLD2 : Vector Load (multiple 2-element structures) 182class VLD2D<bits<4> op7_4, string OpcodeStr> 183 : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2), 184 (ins addrmode6:$addr), IIC_VLD2, 185 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>; 186class VLD2Q<bits<4> op7_4, string OpcodeStr> 187 : NLdSt<0,0b10,0b0011,op7_4, 188 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 189 (ins addrmode6:$addr), IIC_VLD2, 190 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), 191 "", []>; 192 193def VLD2d8 : VLD2D<0b0000, "vld2.8">; 194def VLD2d16 : VLD2D<0b0100, "vld2.16">; 195def VLD2d32 : VLD2D<0b1000, "vld2.32">; 196def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), 197 (ins addrmode6:$addr), IIC_VLD1, 198 "vld1.64\t\\{$dst1,$dst2\\}, $addr", "", []>; 199 200def VLD2q8 : VLD2Q<0b0000, "vld2.8">; 201def VLD2q16 : VLD2Q<0b0100, "vld2.16">; 202def VLD2q32 : VLD2Q<0b1000, "vld2.32">; 203 204// VLD3 : Vector Load (multiple 3-element structures) 205class VLD3D<bits<4> op7_4, string OpcodeStr> 206 : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 207 (ins addrmode6:$addr), IIC_VLD3, 208 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>; 209class VLD3WB<bits<4> op7_4, string OpcodeStr> 210 : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), 211 (ins addrmode6:$addr), IIC_VLD3, 212 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), 213 "$addr.addr = $wb", []>; 214 215def VLD3d8 : VLD3D<0b0000, "vld3.8">; 216def VLD3d16 : VLD3D<0b0100, "vld3.16">; 217def VLD3d32 : VLD3D<0b1000, "vld3.32">; 218def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, 219 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 220 (ins addrmode6:$addr), IIC_VLD1, 221 "vld1.64\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; 222 223// vld3 to double-spaced even registers. 224def VLD3q8a : VLD3WB<0b0000, "vld3.8">; 225def VLD3q16a : VLD3WB<0b0100, "vld3.16">; 226def VLD3q32a : VLD3WB<0b1000, "vld3.32">; 227 228// vld3 to double-spaced odd registers. 229def VLD3q8b : VLD3WB<0b0000, "vld3.8">; 230def VLD3q16b : VLD3WB<0b0100, "vld3.16">; 231def VLD3q32b : VLD3WB<0b1000, "vld3.32">; 232 233// VLD4 : Vector Load (multiple 4-element structures) 234class VLD4D<bits<4> op7_4, string OpcodeStr> 235 : NLdSt<0,0b10,0b0000,op7_4, 236 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 237 (ins addrmode6:$addr), IIC_VLD4, 238 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), 239 "", []>; 240class VLD4WB<bits<4> op7_4, string OpcodeStr> 241 : NLdSt<0,0b10,0b0001,op7_4, 242 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 243 (ins addrmode6:$addr), IIC_VLD4, 244 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), 245 "$addr.addr = $wb", []>; 246 247def VLD4d8 : VLD4D<0b0000, "vld4.8">; 248def VLD4d16 : VLD4D<0b0100, "vld4.16">; 249def VLD4d32 : VLD4D<0b1000, "vld4.32">; 250def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, 251 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 252 (ins addrmode6:$addr), IIC_VLD1, 253 "vld1.64\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; 254 255// vld4 to double-spaced even registers. 256def VLD4q8a : VLD4WB<0b0000, "vld4.8">; 257def VLD4q16a : VLD4WB<0b0100, "vld4.16">; 258def VLD4q32a : VLD4WB<0b1000, "vld4.32">; 259 260// vld4 to double-spaced odd registers. 261def VLD4q8b : VLD4WB<0b0000, "vld4.8">; 262def VLD4q16b : VLD4WB<0b0100, "vld4.16">; 263def VLD4q32b : VLD4WB<0b1000, "vld4.32">; 264 265// VLD1LN : Vector Load (single element to one lane) 266// FIXME: Not yet implemented. 267 268// VLD2LN : Vector Load (single 2-element structure to one lane) 269class VLD2LN<bits<4> op11_8, string OpcodeStr> 270 : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2), 271 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 272 IIC_VLD2, 273 !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"), 274 "$src1 = $dst1, $src2 = $dst2", []>; 275 276def VLD2LNd8 : VLD2LN<0b0001, "vld2.8">; 277def VLD2LNd16 : VLD2LN<0b0101, "vld2.16">; 278def VLD2LNd32 : VLD2LN<0b1001, "vld2.32">; 279 280// vld2 to double-spaced even registers. 281def VLD2LNq16a: VLD2LN<0b0101, "vld2.16">; 282def VLD2LNq32a: VLD2LN<0b1001, "vld2.32">; 283 284// vld2 to double-spaced odd registers. 285def VLD2LNq16b: VLD2LN<0b0101, "vld2.16">; 286def VLD2LNq32b: VLD2LN<0b1001, "vld2.32">; 287 288// VLD3LN : Vector Load (single 3-element structure to one lane) 289class VLD3LN<bits<4> op11_8, string OpcodeStr> 290 : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 291 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 292 nohash_imm:$lane), IIC_VLD3, 293 !strconcat(OpcodeStr, 294 "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"), 295 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; 296 297def VLD3LNd8 : VLD3LN<0b0010, "vld3.8">; 298def VLD3LNd16 : VLD3LN<0b0110, "vld3.16">; 299def VLD3LNd32 : VLD3LN<0b1010, "vld3.32">; 300 301// vld3 to double-spaced even registers. 302def VLD3LNq16a: VLD3LN<0b0110, "vld3.16">; 303def VLD3LNq32a: VLD3LN<0b1010, "vld3.32">; 304 305// vld3 to double-spaced odd registers. 306def VLD3LNq16b: VLD3LN<0b0110, "vld3.16">; 307def VLD3LNq32b: VLD3LN<0b1010, "vld3.32">; 308 309// VLD4LN : Vector Load (single 4-element structure to one lane) 310class VLD4LN<bits<4> op11_8, string OpcodeStr> 311 : NLdSt<1,0b10,op11_8,0b0000, 312 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 313 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 314 nohash_imm:$lane), IIC_VLD4, 315 !strconcat(OpcodeStr, 316 "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"), 317 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; 318 319def VLD4LNd8 : VLD4LN<0b0011, "vld4.8">; 320def VLD4LNd16 : VLD4LN<0b0111, "vld4.16">; 321def VLD4LNd32 : VLD4LN<0b1011, "vld4.32">; 322 323// vld4 to double-spaced even registers. 324def VLD4LNq16a: VLD4LN<0b0111, "vld4.16">; 325def VLD4LNq32a: VLD4LN<0b1011, "vld4.32">; 326 327// vld4 to double-spaced odd registers. 328def VLD4LNq16b: VLD4LN<0b0111, "vld4.16">; 329def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">; 330 331// VLD1DUP : Vector Load (single element to all lanes) 332// VLD2DUP : Vector Load (single 2-element structure to all lanes) 333// VLD3DUP : Vector Load (single 3-element structure to all lanes) 334// VLD4DUP : Vector Load (single 4-element structure to all lanes) 335// FIXME: Not yet implemented. 336} // mayLoad = 1, hasExtraDefRegAllocReq = 1 337 338// VST1 : Vector Store (multiple single elements) 339class VST1D<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> 340 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, 341 !strconcat(OpcodeStr, "\t\\{$src\\}, $addr"), "", 342 [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; 343class VST1Q<bits<4> op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> 344 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, 345 !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"), "", 346 [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; 347 348let hasExtraSrcRegAllocReq = 1 in { 349def VST1d8 : VST1D<0b0000, "vst1.8", v8i8, int_arm_neon_vst1>; 350def VST1d16 : VST1D<0b0100, "vst1.16", v4i16, int_arm_neon_vst1>; 351def VST1d32 : VST1D<0b1000, "vst1.32", v2i32, int_arm_neon_vst1>; 352def VST1df : VST1D<0b1000, "vst1.32", v2f32, int_arm_neon_vst1>; 353def VST1d64 : VST1D<0b1100, "vst1.64", v1i64, int_arm_neon_vst1>; 354 355def VST1q8 : VST1Q<0b0000, "vst1.8", v16i8, int_arm_neon_vst1>; 356def VST1q16 : VST1Q<0b0100, "vst1.16", v8i16, int_arm_neon_vst1>; 357def VST1q32 : VST1Q<0b1000, "vst1.32", v4i32, int_arm_neon_vst1>; 358def VST1qf : VST1Q<0b1000, "vst1.32", v4f32, int_arm_neon_vst1>; 359def VST1q64 : VST1Q<0b1100, "vst1.64", v2i64, int_arm_neon_vst1>; 360} // hasExtraSrcRegAllocReq 361 362let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { 363 364// VST2 : Vector Store (multiple 2-element structures) 365class VST2D<bits<4> op7_4, string OpcodeStr> 366 : NLdSt<0,0b00,0b1000,op7_4, (outs), 367 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, 368 !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>; 369class VST2Q<bits<4> op7_4, string OpcodeStr> 370 : NLdSt<0,0b00,0b0011,op7_4, (outs), 371 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 372 IIC_VST, 373 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), 374 "", []>; 375 376def VST2d8 : VST2D<0b0000, "vst2.8">; 377def VST2d16 : VST2D<0b0100, "vst2.16">; 378def VST2d32 : VST2D<0b1000, "vst2.32">; 379def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), 380 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, 381 "vst1.64\t\\{$src1,$src2\\}, $addr", "", []>; 382 383def VST2q8 : VST2Q<0b0000, "vst2.8">; 384def VST2q16 : VST2Q<0b0100, "vst2.16">; 385def VST2q32 : VST2Q<0b1000, "vst2.32">; 386 387// VST3 : Vector Store (multiple 3-element structures) 388class VST3D<bits<4> op7_4, string OpcodeStr> 389 : NLdSt<0,0b00,0b0100,op7_4, (outs), 390 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, 391 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>; 392class VST3WB<bits<4> op7_4, string OpcodeStr> 393 : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), 394 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, 395 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), 396 "$addr.addr = $wb", []>; 397 398def VST3d8 : VST3D<0b0000, "vst3.8">; 399def VST3d16 : VST3D<0b0100, "vst3.16">; 400def VST3d32 : VST3D<0b1000, "vst3.32">; 401def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), 402 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), 403 IIC_VST, 404 "vst1.64\t\\{$src1,$src2,$src3\\}, $addr", "", []>; 405 406// vst3 to double-spaced even registers. 407def VST3q8a : VST3WB<0b0000, "vst3.8">; 408def VST3q16a : VST3WB<0b0100, "vst3.16">; 409def VST3q32a : VST3WB<0b1000, "vst3.32">; 410 411// vst3 to double-spaced odd registers. 412def VST3q8b : VST3WB<0b0000, "vst3.8">; 413def VST3q16b : VST3WB<0b0100, "vst3.16">; 414def VST3q32b : VST3WB<0b1000, "vst3.32">; 415 416// VST4 : Vector Store (multiple 4-element structures) 417class VST4D<bits<4> op7_4, string OpcodeStr> 418 : NLdSt<0,0b00,0b0000,op7_4, (outs), 419 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 420 IIC_VST, 421 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), 422 "", []>; 423class VST4WB<bits<4> op7_4, string OpcodeStr> 424 : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), 425 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), 426 IIC_VST, 427 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), 428 "$addr.addr = $wb", []>; 429 430def VST4d8 : VST4D<0b0000, "vst4.8">; 431def VST4d16 : VST4D<0b0100, "vst4.16">; 432def VST4d32 : VST4D<0b1000, "vst4.32">; 433def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), 434 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 435 DPR:$src4), IIC_VST, 436 "vst1.64\t\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; 437 438// vst4 to double-spaced even registers. 439def VST4q8a : VST4WB<0b0000, "vst4.8">; 440def VST4q16a : VST4WB<0b0100, "vst4.16">; 441def VST4q32a : VST4WB<0b1000, "vst4.32">; 442 443// vst4 to double-spaced odd registers. 444def VST4q8b : VST4WB<0b0000, "vst4.8">; 445def VST4q16b : VST4WB<0b0100, "vst4.16">; 446def VST4q32b : VST4WB<0b1000, "vst4.32">; 447 448// VST1LN : Vector Store (single element from one lane) 449// FIXME: Not yet implemented. 450 451// VST2LN : Vector Store (single 2-element structure from one lane) 452class VST2LN<bits<4> op11_8, string OpcodeStr> 453 : NLdSt<1,0b00,op11_8,0b0000, (outs), 454 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 455 IIC_VST, 456 !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"), 457 "", []>; 458 459def VST2LNd8 : VST2LN<0b0000, "vst2.8">; 460def VST2LNd16 : VST2LN<0b0100, "vst2.16">; 461def VST2LNd32 : VST2LN<0b1000, "vst2.32">; 462 463// vst2 to double-spaced even registers. 464def VST2LNq16a: VST2LN<0b0100, "vst2.16">; 465def VST2LNq32a: VST2LN<0b1000, "vst2.32">; 466 467// vst2 to double-spaced odd registers. 468def VST2LNq16b: VST2LN<0b0100, "vst2.16">; 469def VST2LNq32b: VST2LN<0b1000, "vst2.32">; 470 471// VST3LN : Vector Store (single 3-element structure from one lane) 472class VST3LN<bits<4> op11_8, string OpcodeStr> 473 : NLdSt<1,0b00,op11_8,0b0000, (outs), 474 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 475 nohash_imm:$lane), IIC_VST, 476 !strconcat(OpcodeStr, 477 "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr"), "", []>; 478 479def VST3LNd8 : VST3LN<0b0010, "vst3.8">; 480def VST3LNd16 : VST3LN<0b0110, "vst3.16">; 481def VST3LNd32 : VST3LN<0b1010, "vst3.32">; 482 483// vst3 to double-spaced even registers. 484def VST3LNq16a: VST3LN<0b0110, "vst3.16">; 485def VST3LNq32a: VST3LN<0b1010, "vst3.32">; 486 487// vst3 to double-spaced odd registers. 488def VST3LNq16b: VST3LN<0b0110, "vst3.16">; 489def VST3LNq32b: VST3LN<0b1010, "vst3.32">; 490 491// VST4LN : Vector Store (single 4-element structure from one lane) 492class VST4LND<bits<4> op11_8, string OpcodeStr> 493 : NLdSt<1,0b00,op11_8,0b0000, (outs), 494 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 495 nohash_imm:$lane), IIC_VST, 496 !strconcat(OpcodeStr, 497 "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr"), 498 "", []>; 499 500def VST4LNd8 : VST4LND<0b0011, "vst4.8">; 501def VST4LNd16 : VST4LND<0b0111, "vst4.16">; 502def VST4LNd32 : VST4LND<0b1011, "vst4.32">; 503} // mayStore = 1, hasExtraSrcRegAllocReq = 1 504 505 506//===----------------------------------------------------------------------===// 507// NEON pattern fragments 508//===----------------------------------------------------------------------===// 509 510// Extract D sub-registers of Q registers. 511// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) 512def DSubReg_i8_reg : SDNodeXForm<imm, [{ 513 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); 514}]>; 515def DSubReg_i16_reg : SDNodeXForm<imm, [{ 516 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); 517}]>; 518def DSubReg_i32_reg : SDNodeXForm<imm, [{ 519 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); 520}]>; 521def DSubReg_f64_reg : SDNodeXForm<imm, [{ 522 return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); 523}]>; 524def DSubReg_f64_other_reg : SDNodeXForm<imm, [{ 525 return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32); 526}]>; 527 528// Extract S sub-registers of Q/D registers. 529// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.) 530def SSubReg_f32_reg : SDNodeXForm<imm, [{ 531 return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32); 532}]>; 533 534// Translate lane numbers from Q registers to D subregs. 535def SubReg_i8_lane : SDNodeXForm<imm, [{ 536 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 537}]>; 538def SubReg_i16_lane : SDNodeXForm<imm, [{ 539 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 540}]>; 541def SubReg_i32_lane : SDNodeXForm<imm, [{ 542 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 543}]>; 544 545//===----------------------------------------------------------------------===// 546// Instruction Classes 547//===----------------------------------------------------------------------===// 548 549// Basic 2-register operations, both double- and quad-register. 550class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 551 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 552 ValueType ResTy, ValueType OpTy, SDNode OpNode> 553 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 554 (ins DPR:$src), IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 555 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; 556class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 557 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 558 ValueType ResTy, ValueType OpTy, SDNode OpNode> 559 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 560 (ins QPR:$src), IIC_VUNAQ, !strconcat(OpcodeStr, "\t$dst, $src"), "", 561 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; 562 563// Basic 2-register operations, scalar single-precision. 564class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 565 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 566 ValueType ResTy, ValueType OpTy, SDNode OpNode> 567 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 568 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), 569 IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", []>; 570 571class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> 572 : NEONFPPat<(ResTy (OpNode SPR:$a)), 573 (EXTRACT_SUBREG 574 (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), 575 arm_ssubreg_0)>; 576 577// Basic 2-register intrinsics, both double- and quad-register. 578class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 579 bits<2> op17_16, bits<5> op11_7, bit op4, 580 InstrItinClass itin, string OpcodeStr, 581 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 582 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 583 (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 584 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 585class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 586 bits<2> op17_16, bits<5> op11_7, bit op4, 587 InstrItinClass itin, string OpcodeStr, 588 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 589 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 590 (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 591 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 592 593// Basic 2-register intrinsics, scalar single-precision 594class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 595 bits<2> op17_16, bits<5> op11_7, bit op4, 596 InstrItinClass itin, string OpcodeStr, 597 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 598 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 599 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin, 600 !strconcat(OpcodeStr, "\t$dst, $src"), "", []>; 601 602class N2VDIntsPat<SDNode OpNode, NeonI Inst> 603 : NEONFPPat<(f32 (OpNode SPR:$a)), 604 (EXTRACT_SUBREG 605 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), 606 arm_ssubreg_0)>; 607 608// Narrow 2-register intrinsics. 609class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 610 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 611 InstrItinClass itin, string OpcodeStr, 612 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 613 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), 614 (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 615 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; 616 617// Long 2-register intrinsics. (This is currently only used for VMOVL and is 618// derived from N2VImm instead of N2V because of the way the size is encoded.) 619class N2VLInt<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 620 bit op6, bit op4, InstrItinClass itin, string OpcodeStr, 621 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 622 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, (outs QPR:$dst), 623 (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 624 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; 625 626// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 627class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr> 628 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), 629 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 630 !strconcat(OpcodeStr, "\t$dst1, $dst2"), 631 "$src1 = $dst1, $src2 = $dst2", []>; 632class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 633 InstrItinClass itin, string OpcodeStr> 634 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), 635 (ins QPR:$src1, QPR:$src2), itin, 636 !strconcat(OpcodeStr, "\t$dst1, $dst2"), 637 "$src1 = $dst1, $src2 = $dst2", []>; 638 639// Basic 3-register operations, both double- and quad-register. 640class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 641 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 642 SDNode OpNode, bit Commutable> 643 : N3V<op24, op23, op21_20, op11_8, 0, op4, 644 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 645 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 646 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 647 let isCommutable = Commutable; 648} 649class N3VDSL<bits<2> op21_20, bits<4> op11_8, 650 InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp> 651 : N3V<0, 1, op21_20, op11_8, 1, 0, 652 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 653 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 654 [(set (Ty DPR:$dst), 655 (Ty (ShOp (Ty DPR:$src1), 656 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), 657 imm:$lane)))))]> { 658 let isCommutable = 0; 659} 660class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 661 string OpcodeStr, ValueType Ty, SDNode ShOp> 662 : N3V<0, 1, op21_20, op11_8, 1, 0, 663 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 664 IIC_VMULi16D, 665 !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 666 [(set (Ty DPR:$dst), 667 (Ty (ShOp (Ty DPR:$src1), 668 (Ty (NEONvduplane (Ty DPR_8:$src2), 669 imm:$lane)))))]> { 670 let isCommutable = 0; 671} 672 673class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 674 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 675 SDNode OpNode, bit Commutable> 676 : N3V<op24, op23, op21_20, op11_8, 1, op4, 677 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 678 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 679 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 680 let isCommutable = Commutable; 681} 682class N3VQSL<bits<2> op21_20, bits<4> op11_8, 683 InstrItinClass itin, string OpcodeStr, 684 ValueType ResTy, ValueType OpTy, SDNode ShOp> 685 : N3V<1, 1, op21_20, op11_8, 1, 0, 686 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 687 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 688 [(set (ResTy QPR:$dst), 689 (ResTy (ShOp (ResTy QPR:$src1), 690 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 691 imm:$lane)))))]> { 692 let isCommutable = 0; 693} 694class N3VQSL16<bits<2> op21_20, bits<4> op11_8, 695 string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> 696 : N3V<1, 1, op21_20, op11_8, 1, 0, 697 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 698 IIC_VMULi16Q, 699 !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 700 [(set (ResTy QPR:$dst), 701 (ResTy (ShOp (ResTy QPR:$src1), 702 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 703 imm:$lane)))))]> { 704 let isCommutable = 0; 705} 706 707// Basic 3-register operations, scalar single-precision 708class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 709 string OpcodeStr, ValueType ResTy, ValueType OpTy, 710 SDNode OpNode, bit Commutable> 711 : N3V<op24, op23, op21_20, op11_8, 0, op4, 712 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, 713 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", []> { 714 let isCommutable = Commutable; 715} 716class N3VDsPat<SDNode OpNode, NeonI Inst> 717 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 718 (EXTRACT_SUBREG 719 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), 720 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), 721 arm_ssubreg_0)>; 722 723// Basic 3-register intrinsics, both double- and quad-register. 724class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 725 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 726 Intrinsic IntOp, bit Commutable> 727 : N3V<op24, op23, op21_20, op11_8, 0, op4, 728 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 729 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 730 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 731 let isCommutable = Commutable; 732} 733class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 734 string OpcodeStr, ValueType Ty, Intrinsic IntOp> 735 : N3V<0, 1, op21_20, op11_8, 1, 0, 736 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 737 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 738 [(set (Ty DPR:$dst), 739 (Ty (IntOp (Ty DPR:$src1), 740 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), 741 imm:$lane)))))]> { 742 let isCommutable = 0; 743} 744class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 745 string OpcodeStr, ValueType Ty, Intrinsic IntOp> 746 : N3V<0, 1, op21_20, op11_8, 1, 0, 747 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 748 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 749 [(set (Ty DPR:$dst), 750 (Ty (IntOp (Ty DPR:$src1), 751 (Ty (NEONvduplane (Ty DPR_8:$src2), 752 imm:$lane)))))]> { 753 let isCommutable = 0; 754} 755 756class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 757 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 758 Intrinsic IntOp, bit Commutable> 759 : N3V<op24, op23, op21_20, op11_8, 1, op4, 760 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 761 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 762 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 763 let isCommutable = Commutable; 764} 765class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 766 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 767 : N3V<1, 1, op21_20, op11_8, 1, 0, 768 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 769 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 770 [(set (ResTy QPR:$dst), 771 (ResTy (IntOp (ResTy QPR:$src1), 772 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 773 imm:$lane)))))]> { 774 let isCommutable = 0; 775} 776class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 777 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 778 : N3V<1, 1, op21_20, op11_8, 1, 0, 779 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 780 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 781 [(set (ResTy QPR:$dst), 782 (ResTy (IntOp (ResTy QPR:$src1), 783 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 784 imm:$lane)))))]> { 785 let isCommutable = 0; 786} 787 788// Multiply-Add/Sub operations, both double- and quad-register. 789class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 790 InstrItinClass itin, string OpcodeStr, 791 ValueType Ty, SDNode MulOp, SDNode OpNode> 792 : N3V<op24, op23, op21_20, op11_8, 0, op4, 793 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, 794 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 795 [(set DPR:$dst, (Ty (OpNode DPR:$src1, 796 (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; 797class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 798 string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> 799 : N3V<0, 1, op21_20, op11_8, 1, 0, 800 (outs DPR:$dst), 801 (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 802 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 803 [(set (Ty DPR:$dst), 804 (Ty (ShOp (Ty DPR:$src1), 805 (Ty (MulOp DPR:$src2, 806 (Ty (NEONvduplane (Ty DPR_VFP2:$src3), 807 imm:$lane)))))))]>; 808class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 809 string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> 810 : N3V<0, 1, op21_20, op11_8, 1, 0, 811 (outs DPR:$dst), 812 (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 813 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 814 [(set (Ty DPR:$dst), 815 (Ty (ShOp (Ty DPR:$src1), 816 (Ty (MulOp DPR:$src2, 817 (Ty (NEONvduplane (Ty DPR_8:$src3), 818 imm:$lane)))))))]>; 819 820class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 821 InstrItinClass itin, string OpcodeStr, ValueType Ty, 822 SDNode MulOp, SDNode OpNode> 823 : N3V<op24, op23, op21_20, op11_8, 1, op4, 824 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, 825 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 826 [(set QPR:$dst, (Ty (OpNode QPR:$src1, 827 (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; 828class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 829 string OpcodeStr, ValueType ResTy, ValueType OpTy, 830 SDNode MulOp, SDNode ShOp> 831 : N3V<1, 1, op21_20, op11_8, 1, 0, 832 (outs QPR:$dst), 833 (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 834 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 835 [(set (ResTy QPR:$dst), 836 (ResTy (ShOp (ResTy QPR:$src1), 837 (ResTy (MulOp QPR:$src2, 838 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), 839 imm:$lane)))))))]>; 840class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 841 string OpcodeStr, ValueType ResTy, ValueType OpTy, 842 SDNode MulOp, SDNode ShOp> 843 : N3V<1, 1, op21_20, op11_8, 1, 0, 844 (outs QPR:$dst), 845 (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 846 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 847 [(set (ResTy QPR:$dst), 848 (ResTy (ShOp (ResTy QPR:$src1), 849 (ResTy (MulOp QPR:$src2, 850 (ResTy (NEONvduplane (OpTy DPR_8:$src3), 851 imm:$lane)))))))]>; 852 853// Multiply-Add/Sub operations, scalar single-precision 854class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 855 InstrItinClass itin, string OpcodeStr, 856 ValueType Ty, SDNode MulOp, SDNode OpNode> 857 : N3V<op24, op23, op21_20, op11_8, 0, op4, 858 (outs DPR_VFP2:$dst), 859 (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, 860 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", []>; 861 862class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 863 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 864 (EXTRACT_SUBREG 865 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0), 866 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), 867 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), 868 arm_ssubreg_0)>; 869 870// Neon 3-argument intrinsics, both double- and quad-register. 871// The destination register is also used as the first source operand register. 872class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 873 InstrItinClass itin, string OpcodeStr, 874 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 875 : N3V<op24, op23, op21_20, op11_8, 0, op4, 876 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, 877 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 878 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), 879 (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; 880class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 881 InstrItinClass itin, string OpcodeStr, 882 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 883 : N3V<op24, op23, op21_20, op11_8, 1, op4, 884 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, 885 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 886 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), 887 (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; 888 889// Neon Long 3-argument intrinsic. The destination register is 890// a quad-register and is also used as the first source operand register. 891class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 892 InstrItinClass itin, string OpcodeStr, 893 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 894 : N3V<op24, op23, op21_20, op11_8, 0, op4, 895 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin, 896 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 897 [(set QPR:$dst, 898 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; 899class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 900 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 901 : N3V<op24, 1, op21_20, op11_8, 1, 0, 902 (outs QPR:$dst), 903 (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 904 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 905 [(set (ResTy QPR:$dst), 906 (ResTy (IntOp (ResTy QPR:$src1), 907 (OpTy DPR:$src2), 908 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), 909 imm:$lane)))))]>; 910class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 911 string OpcodeStr, ValueType ResTy, ValueType OpTy, 912 Intrinsic IntOp> 913 : N3V<op24, 1, op21_20, op11_8, 1, 0, 914 (outs QPR:$dst), 915 (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 916 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 917 [(set (ResTy QPR:$dst), 918 (ResTy (IntOp (ResTy QPR:$src1), 919 (OpTy DPR:$src2), 920 (OpTy (NEONvduplane (OpTy DPR_8:$src3), 921 imm:$lane)))))]>; 922 923 924// Narrowing 3-register intrinsics. 925class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 926 string OpcodeStr, ValueType TyD, ValueType TyQ, 927 Intrinsic IntOp, bit Commutable> 928 : N3V<op24, op23, op21_20, op11_8, 0, op4, 929 (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D, 930 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 931 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { 932 let isCommutable = Commutable; 933} 934 935// Long 3-register intrinsics. 936class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 937 InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, 938 Intrinsic IntOp, bit Commutable> 939 : N3V<op24, op23, op21_20, op11_8, 0, op4, 940 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 941 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 942 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { 943 let isCommutable = Commutable; 944} 945class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 946 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 947 : N3V<op24, 1, op21_20, op11_8, 1, 0, 948 (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 949 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 950 [(set (ResTy QPR:$dst), 951 (ResTy (IntOp (OpTy DPR:$src1), 952 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), 953 imm:$lane)))))]>; 954class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 955 string OpcodeStr, ValueType ResTy, ValueType OpTy, 956 Intrinsic IntOp> 957 : N3V<op24, 1, op21_20, op11_8, 1, 0, 958 (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 959 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 960 [(set (ResTy QPR:$dst), 961 (ResTy (IntOp (OpTy DPR:$src1), 962 (OpTy (NEONvduplane (OpTy DPR_8:$src2), 963 imm:$lane)))))]>; 964 965// Wide 3-register intrinsics. 966class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 967 string OpcodeStr, ValueType TyQ, ValueType TyD, 968 Intrinsic IntOp, bit Commutable> 969 : N3V<op24, op23, op21_20, op11_8, 0, op4, 970 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD, 971 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 972 [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { 973 let isCommutable = Commutable; 974} 975 976// Pairwise long 2-register intrinsics, both double- and quad-register. 977class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 978 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 979 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 980 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 981 (ins DPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 982 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 983class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 984 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 985 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 986 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 987 (ins QPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 988 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 989 990// Pairwise long 2-register accumulate intrinsics, 991// both double- and quad-register. 992// The destination register is also used as the first source operand register. 993class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 994 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 995 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 996 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 997 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD, 998 !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", 999 [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; 1000class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 1001 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 1002 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 1003 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 1004 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ, 1005 !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", 1006 [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; 1007 1008// Shift by immediate, 1009// both double- and quad-register. 1010class N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1011 bit op4, InstrItinClass itin, string OpcodeStr, 1012 ValueType Ty, SDNode OpNode> 1013 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 1014 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin, 1015 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1016 [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; 1017class N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1018 bit op4, InstrItinClass itin, string OpcodeStr, 1019 ValueType Ty, SDNode OpNode> 1020 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 1021 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, 1022 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1023 [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; 1024 1025// Long shift by immediate. 1026class N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1027 bit op6, bit op4, string OpcodeStr, ValueType ResTy, 1028 ValueType OpTy, SDNode OpNode> 1029 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, 1030 (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD, 1031 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1032 [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), 1033 (i32 imm:$SIMM))))]>; 1034 1035// Narrow shift by immediate. 1036class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1037 bit op6, bit op4, InstrItinClass itin, string OpcodeStr, 1038 ValueType ResTy, ValueType OpTy, SDNode OpNode> 1039 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, 1040 (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, 1041 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1042 [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), 1043 (i32 imm:$SIMM))))]>; 1044 1045// Shift right by immediate and accumulate, 1046// both double- and quad-register. 1047class N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1048 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 1049 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 1050 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), 1051 IIC_VPALiD, 1052 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 1053 [(set DPR:$dst, (Ty (add DPR:$src1, 1054 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; 1055class N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1056 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 1057 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 1058 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), 1059 IIC_VPALiD, 1060 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 1061 [(set QPR:$dst, (Ty (add QPR:$src1, 1062 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; 1063 1064// Shift by immediate and insert, 1065// both double- and quad-register. 1066class N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1067 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 1068 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 1069 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), 1070 IIC_VSHLiD, 1071 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 1072 [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; 1073class N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1074 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 1075 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 1076 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), 1077 IIC_VSHLiQ, 1078 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 1079 [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; 1080 1081// Convert, with fractional bits immediate, 1082// both double- and quad-register. 1083class N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1084 bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, 1085 Intrinsic IntOp> 1086 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 1087 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, 1088 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1089 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; 1090class N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 1091 bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, 1092 Intrinsic IntOp> 1093 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 1094 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, 1095 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 1096 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; 1097 1098//===----------------------------------------------------------------------===// 1099// Multiclasses 1100//===----------------------------------------------------------------------===// 1101 1102// Abbreviations used in multiclass suffixes: 1103// Q = quarter int (8 bit) elements 1104// H = half int (16 bit) elements 1105// S = single int (32 bit) elements 1106// D = double int (64 bit) elements 1107 1108// Neon 3-register vector operations. 1109 1110// First with only element sizes of 8, 16 and 32 bits: 1111multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1112 InstrItinClass itinD16, InstrItinClass itinD32, 1113 InstrItinClass itinQ16, InstrItinClass itinQ32, 1114 string OpcodeStr, SDNode OpNode, bit Commutable = 0> { 1115 // 64-bit vector types. 1116 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 1117 !strconcat(OpcodeStr, "8"), v8i8, v8i8, OpNode, Commutable>; 1118 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 1119 !strconcat(OpcodeStr, "16"), v4i16, v4i16, OpNode, Commutable>; 1120 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 1121 !strconcat(OpcodeStr, "32"), v2i32, v2i32, OpNode, Commutable>; 1122 1123 // 128-bit vector types. 1124 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 1125 !strconcat(OpcodeStr, "8"), v16i8, v16i8, OpNode, Commutable>; 1126 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 1127 !strconcat(OpcodeStr, "16"), v8i16, v8i16, OpNode, Commutable>; 1128 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 1129 !strconcat(OpcodeStr, "32"), v4i32, v4i32, OpNode, Commutable>; 1130} 1131 1132multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 1133 def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 1134 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 1135 def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>; 1136 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>; 1137} 1138 1139// ....then also with element size 64 bits: 1140multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1141 InstrItinClass itinD, InstrItinClass itinQ, 1142 string OpcodeStr, SDNode OpNode, bit Commutable = 0> 1143 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 1144 OpcodeStr, OpNode, Commutable> { 1145 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 1146 !strconcat(OpcodeStr, "64"), v1i64, v1i64, OpNode, Commutable>; 1147 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 1148 !strconcat(OpcodeStr, "64"), v2i64, v2i64, OpNode, Commutable>; 1149} 1150 1151 1152// Neon Narrowing 2-register vector intrinsics, 1153// source operand element sizes of 16, 32 and 64 bits: 1154multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1155 bits<5> op11_7, bit op6, bit op4, 1156 InstrItinClass itin, string OpcodeStr, 1157 Intrinsic IntOp> { 1158 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 1159 itin, !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>; 1160 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 1161 itin, !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>; 1162 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 1163 itin, !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>; 1164} 1165 1166 1167// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 1168// source operand element sizes of 16, 32 and 64 bits: 1169multiclass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 1170 bit op4, string OpcodeStr, Intrinsic IntOp> { 1171 def v8i16 : N2VLInt<op24, op23, 0b001000, op11_8, op7, op6, op4, 1172 IIC_VQUNAiD, !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; 1173 def v4i32 : N2VLInt<op24, op23, 0b010000, op11_8, op7, op6, op4, 1174 IIC_VQUNAiD, !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1175 def v2i64 : N2VLInt<op24, op23, 0b100000, op11_8, op7, op6, op4, 1176 IIC_VQUNAiD, !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1177} 1178 1179 1180// Neon 3-register vector intrinsics. 1181 1182// First with only element sizes of 16 and 32 bits: 1183multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1184 InstrItinClass itinD16, InstrItinClass itinD32, 1185 InstrItinClass itinQ16, InstrItinClass itinQ32, 1186 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 1187 // 64-bit vector types. 1188 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, !strconcat(OpcodeStr,"16"), 1189 v4i16, v4i16, IntOp, Commutable>; 1190 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, !strconcat(OpcodeStr,"32"), 1191 v2i32, v2i32, IntOp, Commutable>; 1192 1193 // 128-bit vector types. 1194 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, !strconcat(OpcodeStr,"16"), 1195 v8i16, v8i16, IntOp, Commutable>; 1196 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, !strconcat(OpcodeStr,"32"), 1197 v4i32, v4i32, IntOp, Commutable>; 1198} 1199 1200multiclass N3VIntSL_HS<bits<4> op11_8, 1201 InstrItinClass itinD16, InstrItinClass itinD32, 1202 InstrItinClass itinQ16, InstrItinClass itinQ32, 1203 string OpcodeStr, Intrinsic IntOp> { 1204 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>; 1205 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>; 1206 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; 1207 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; 1208} 1209 1210// ....then also with element size of 8 bits: 1211multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1212 InstrItinClass itinD16, InstrItinClass itinD32, 1213 InstrItinClass itinQ16, InstrItinClass itinQ32, 1214 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 1215 : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, 1216 OpcodeStr, IntOp, Commutable> { 1217 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16, 1218 !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp, Commutable>; 1219 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16, 1220 !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp, Commutable>; 1221} 1222 1223// ....then also with element size of 64 bits: 1224multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1225 InstrItinClass itinD16, InstrItinClass itinD32, 1226 InstrItinClass itinQ16, InstrItinClass itinQ32, 1227 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 1228 : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, 1229 OpcodeStr, IntOp, Commutable> { 1230 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32, 1231 !strconcat(OpcodeStr,"64"), v1i64, v1i64, IntOp, Commutable>; 1232 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32, 1233 !strconcat(OpcodeStr,"64"), v2i64, v2i64, IntOp, Commutable>; 1234} 1235 1236 1237// Neon Narrowing 3-register vector intrinsics, 1238// source operand element sizes of 16, 32 and 64 bits: 1239multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1240 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 1241 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr,"16"), 1242 v8i8, v8i16, IntOp, Commutable>; 1243 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"32"), 1244 v4i16, v4i32, IntOp, Commutable>; 1245 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"64"), 1246 v2i32, v2i64, IntOp, Commutable>; 1247} 1248 1249 1250// Neon Long 3-register vector intrinsics. 1251 1252// First with only element sizes of 16 and 32 bits: 1253multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1254 InstrItinClass itin, string OpcodeStr, 1255 Intrinsic IntOp, bit Commutable = 0> { 1256 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, 1257 !strconcat(OpcodeStr,"16"), v4i32, v4i16, IntOp, Commutable>; 1258 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin, 1259 !strconcat(OpcodeStr,"32"), v2i64, v2i32, IntOp, Commutable>; 1260} 1261 1262multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 1263 InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> { 1264 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 1265 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1266 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 1267 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1268} 1269 1270// ....then also with element size of 8 bits: 1271multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1272 InstrItinClass itin, string OpcodeStr, 1273 Intrinsic IntOp, bit Commutable = 0> 1274 : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, IntOp, Commutable> { 1275 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, 1276 !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp, Commutable>; 1277} 1278 1279 1280// Neon Wide 3-register vector intrinsics, 1281// source operand element sizes of 8, 16 and 32 bits: 1282multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1283 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 1284 def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 1285 v8i16, v8i8, IntOp, Commutable>; 1286 def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 1287 v4i32, v4i16, IntOp, Commutable>; 1288 def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 1289 v2i64, v2i32, IntOp, Commutable>; 1290} 1291 1292 1293// Neon Multiply-Op vector operations, 1294// element sizes of 8, 16 and 32 bits: 1295multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1296 InstrItinClass itinD16, InstrItinClass itinD32, 1297 InstrItinClass itinQ16, InstrItinClass itinQ32, 1298 string OpcodeStr, SDNode OpNode> { 1299 // 64-bit vector types. 1300 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 1301 !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>; 1302 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 1303 !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>; 1304 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 1305 !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>; 1306 1307 // 128-bit vector types. 1308 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 1309 !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>; 1310 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 1311 !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>; 1312 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 1313 !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>; 1314} 1315 1316multiclass N3VMulOpSL_HS<bits<4> op11_8, 1317 InstrItinClass itinD16, InstrItinClass itinD32, 1318 InstrItinClass itinQ16, InstrItinClass itinQ32, 1319 string OpcodeStr, SDNode ShOp> { 1320 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 1321 !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>; 1322 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 1323 !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>; 1324 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 1325 !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>; 1326 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 1327 !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>; 1328} 1329 1330// Neon 3-argument intrinsics, 1331// element sizes of 8, 16 and 32 bits: 1332multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1333 string OpcodeStr, Intrinsic IntOp> { 1334 // 64-bit vector types. 1335 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, 1336 !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; 1337 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1338 !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; 1339 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, 1340 !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; 1341 1342 // 128-bit vector types. 1343 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, 1344 !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; 1345 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, 1346 !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; 1347 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, 1348 !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; 1349} 1350 1351 1352// Neon Long 3-argument intrinsics. 1353 1354// First with only element sizes of 16 and 32 bits: 1355multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1356 string OpcodeStr, Intrinsic IntOp> { 1357 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1358 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1359 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D, 1360 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1361} 1362 1363multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 1364 string OpcodeStr, Intrinsic IntOp> { 1365 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 1366 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1367 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 1368 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1369} 1370 1371// ....then also with element size of 8 bits: 1372multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1373 string OpcodeStr, Intrinsic IntOp> 1374 : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> { 1375 def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1376 !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; 1377} 1378 1379 1380// Neon 2-register vector intrinsics, 1381// element sizes of 8, 16 and 32 bits: 1382multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1383 bits<5> op11_7, bit op4, 1384 InstrItinClass itinD, InstrItinClass itinQ, 1385 string OpcodeStr, Intrinsic IntOp> { 1386 // 64-bit vector types. 1387 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1388 itinD, !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; 1389 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1390 itinD, !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; 1391 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1392 itinD, !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; 1393 1394 // 128-bit vector types. 1395 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1396 itinQ, !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; 1397 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1398 itinQ, !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; 1399 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1400 itinQ, !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; 1401} 1402 1403 1404// Neon Pairwise long 2-register intrinsics, 1405// element sizes of 8, 16 and 32 bits: 1406multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1407 bits<5> op11_7, bit op4, 1408 string OpcodeStr, Intrinsic IntOp> { 1409 // 64-bit vector types. 1410 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1411 !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; 1412 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1413 !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; 1414 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1415 !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; 1416 1417 // 128-bit vector types. 1418 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1419 !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; 1420 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1421 !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; 1422 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1423 !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; 1424} 1425 1426 1427// Neon Pairwise long 2-register accumulate intrinsics, 1428// element sizes of 8, 16 and 32 bits: 1429multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1430 bits<5> op11_7, bit op4, 1431 string OpcodeStr, Intrinsic IntOp> { 1432 // 64-bit vector types. 1433 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1434 !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; 1435 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1436 !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; 1437 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1438 !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; 1439 1440 // 128-bit vector types. 1441 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1442 !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; 1443 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1444 !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; 1445 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1446 !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; 1447} 1448 1449 1450// Neon 2-register vector shift by immediate, 1451// element sizes of 8, 16, 32 and 64 bits: 1452multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1453 InstrItinClass itin, string OpcodeStr, SDNode OpNode> { 1454 // 64-bit vector types. 1455 def v8i8 : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, itin, 1456 !strconcat(OpcodeStr, "8"), v8i8, OpNode>; 1457 def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, itin, 1458 !strconcat(OpcodeStr, "16"), v4i16, OpNode>; 1459 def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, itin, 1460 !strconcat(OpcodeStr, "32"), v2i32, OpNode>; 1461 def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, itin, 1462 !strconcat(OpcodeStr, "64"), v1i64, OpNode>; 1463 1464 // 128-bit vector types. 1465 def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, itin, 1466 !strconcat(OpcodeStr, "8"), v16i8, OpNode>; 1467 def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, itin, 1468 !strconcat(OpcodeStr, "16"), v8i16, OpNode>; 1469 def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, itin, 1470 !strconcat(OpcodeStr, "32"), v4i32, OpNode>; 1471 def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, itin, 1472 !strconcat(OpcodeStr, "64"), v2i64, OpNode>; 1473} 1474 1475 1476// Neon Shift-Accumulate vector operations, 1477// element sizes of 8, 16, 32 and 64 bits: 1478multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1479 string OpcodeStr, SDNode ShOp> { 1480 // 64-bit vector types. 1481 def v8i8 : N2VDShAdd<op24, op23, 0b001000, op11_8, 0, op4, 1482 !strconcat(OpcodeStr, "8"), v8i8, ShOp>; 1483 def v4i16 : N2VDShAdd<op24, op23, 0b010000, op11_8, 0, op4, 1484 !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 1485 def v2i32 : N2VDShAdd<op24, op23, 0b100000, op11_8, 0, op4, 1486 !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 1487 def v1i64 : N2VDShAdd<op24, op23, 0b000000, op11_8, 1, op4, 1488 !strconcat(OpcodeStr, "64"), v1i64, ShOp>; 1489 1490 // 128-bit vector types. 1491 def v16i8 : N2VQShAdd<op24, op23, 0b001000, op11_8, 0, op4, 1492 !strconcat(OpcodeStr, "8"), v16i8, ShOp>; 1493 def v8i16 : N2VQShAdd<op24, op23, 0b010000, op11_8, 0, op4, 1494 !strconcat(OpcodeStr, "16"), v8i16, ShOp>; 1495 def v4i32 : N2VQShAdd<op24, op23, 0b100000, op11_8, 0, op4, 1496 !strconcat(OpcodeStr, "32"), v4i32, ShOp>; 1497 def v2i64 : N2VQShAdd<op24, op23, 0b000000, op11_8, 1, op4, 1498 !strconcat(OpcodeStr, "64"), v2i64, ShOp>; 1499} 1500 1501 1502// Neon Shift-Insert vector operations, 1503// element sizes of 8, 16, 32 and 64 bits: 1504multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1505 string OpcodeStr, SDNode ShOp> { 1506 // 64-bit vector types. 1507 def v8i8 : N2VDShIns<op24, op23, 0b001000, op11_8, 0, op4, 1508 !strconcat(OpcodeStr, "8"), v8i8, ShOp>; 1509 def v4i16 : N2VDShIns<op24, op23, 0b010000, op11_8, 0, op4, 1510 !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 1511 def v2i32 : N2VDShIns<op24, op23, 0b100000, op11_8, 0, op4, 1512 !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 1513 def v1i64 : N2VDShIns<op24, op23, 0b000000, op11_8, 1, op4, 1514 !strconcat(OpcodeStr, "64"), v1i64, ShOp>; 1515 1516 // 128-bit vector types. 1517 def v16i8 : N2VQShIns<op24, op23, 0b001000, op11_8, 0, op4, 1518 !strconcat(OpcodeStr, "8"), v16i8, ShOp>; 1519 def v8i16 : N2VQShIns<op24, op23, 0b010000, op11_8, 0, op4, 1520 !strconcat(OpcodeStr, "16"), v8i16, ShOp>; 1521 def v4i32 : N2VQShIns<op24, op23, 0b100000, op11_8, 0, op4, 1522 !strconcat(OpcodeStr, "32"), v4i32, ShOp>; 1523 def v2i64 : N2VQShIns<op24, op23, 0b000000, op11_8, 1, op4, 1524 !strconcat(OpcodeStr, "64"), v2i64, ShOp>; 1525} 1526 1527//===----------------------------------------------------------------------===// 1528// Instruction Definitions. 1529//===----------------------------------------------------------------------===// 1530 1531// Vector Add Operations. 1532 1533// VADD : Vector Add (integer and floating-point) 1534defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, 1>; 1535def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>; 1536def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>; 1537// VADDL : Vector Add Long (Q = D + D) 1538defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>; 1539defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>; 1540// VADDW : Vector Add Wide (Q = Q + D) 1541defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; 1542defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; 1543// VHADD : Vector Halving Add 1544defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1545 IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>; 1546defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1547 IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>; 1548// VRHADD : Vector Rounding Halving Add 1549defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1550 IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>; 1551defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1552 IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>; 1553// VQADD : Vector Saturating Add 1554defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1555 IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>; 1556defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1557 IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>; 1558// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 1559defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; 1560// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 1561defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; 1562 1563// Vector Multiply Operations. 1564 1565// VMUL : Vector Multiply (integer, polynomial and floating-point) 1566defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, 1567 IIC_VMULi32Q, "vmul.i", mul, 1>; 1568def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8, 1569 int_arm_neon_vmulp, 1>; 1570def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8, 1571 int_arm_neon_vmulp, 1>; 1572def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>; 1573def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>; 1574defm VMULsl : N3VSL_HS<0b1000, "vmul.i", mul>; 1575def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>; 1576def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>; 1577def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 1578 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 1579 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 1580 (v4i16 (EXTRACT_SUBREG QPR:$src2, 1581 (DSubReg_i16_reg imm:$lane))), 1582 (SubReg_i16_lane imm:$lane)))>; 1583def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 1584 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 1585 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 1586 (v2i32 (EXTRACT_SUBREG QPR:$src2, 1587 (DSubReg_i32_reg imm:$lane))), 1588 (SubReg_i32_lane imm:$lane)))>; 1589def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 1590 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 1591 (v4f32 (VMULslfq (v4f32 QPR:$src1), 1592 (v2f32 (EXTRACT_SUBREG QPR:$src2, 1593 (DSubReg_i32_reg imm:$lane))), 1594 (SubReg_i32_lane imm:$lane)))>; 1595 1596// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 1597defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, 1598 IIC_VMULi16Q, IIC_VMULi32Q, 1599 "vqdmulh.s", int_arm_neon_vqdmulh, 1>; 1600defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 1601 IIC_VMULi16Q, IIC_VMULi32Q, 1602 "vqdmulh.s", int_arm_neon_vqdmulh>; 1603def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 1604 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 1605 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 1606 (v4i16 (EXTRACT_SUBREG QPR:$src2, 1607 (DSubReg_i16_reg imm:$lane))), 1608 (SubReg_i16_lane imm:$lane)))>; 1609def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 1610 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 1611 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 1612 (v2i32 (EXTRACT_SUBREG QPR:$src2, 1613 (DSubReg_i32_reg imm:$lane))), 1614 (SubReg_i32_lane imm:$lane)))>; 1615 1616// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 1617defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, 1618 IIC_VMULi16Q, IIC_VMULi32Q, 1619 "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; 1620defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 1621 IIC_VMULi16Q, IIC_VMULi32Q, 1622 "vqrdmulh.s", int_arm_neon_vqrdmulh>; 1623def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 1624 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 1625 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 1626 (v4i16 (EXTRACT_SUBREG QPR:$src2, 1627 (DSubReg_i16_reg imm:$lane))), 1628 (SubReg_i16_lane imm:$lane)))>; 1629def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 1630 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 1631 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 1632 (v2i32 (EXTRACT_SUBREG QPR:$src2, 1633 (DSubReg_i32_reg imm:$lane))), 1634 (SubReg_i32_lane imm:$lane)))>; 1635 1636// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 1637defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>; 1638defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>; 1639def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8, 1640 int_arm_neon_vmullp, 1>; 1641defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>; 1642defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>; 1643 1644// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 1645defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>; 1646defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>; 1647 1648// Vector Multiply-Accumulate and Multiply-Subtract Operations. 1649 1650// VMLA : Vector Multiply Accumulate (integer and floating-point) 1651defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 1652 IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; 1653def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; 1654def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>; 1655defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 1656 IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; 1657def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; 1658def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>; 1659 1660def : Pat<(v8i16 (add (v8i16 QPR:$src1), 1661 (mul (v8i16 QPR:$src2), 1662 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 1663 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), 1664 (v8i16 QPR:$src2), 1665 (v4i16 (EXTRACT_SUBREG QPR:$src3, 1666 (DSubReg_i16_reg imm:$lane))), 1667 (SubReg_i16_lane imm:$lane)))>; 1668 1669def : Pat<(v4i32 (add (v4i32 QPR:$src1), 1670 (mul (v4i32 QPR:$src2), 1671 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 1672 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), 1673 (v4i32 QPR:$src2), 1674 (v2i32 (EXTRACT_SUBREG QPR:$src3, 1675 (DSubReg_i32_reg imm:$lane))), 1676 (SubReg_i32_lane imm:$lane)))>; 1677 1678def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), 1679 (fmul (v4f32 QPR:$src2), 1680 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 1681 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 1682 (v4f32 QPR:$src2), 1683 (v2f32 (EXTRACT_SUBREG QPR:$src3, 1684 (DSubReg_i32_reg imm:$lane))), 1685 (SubReg_i32_lane imm:$lane)))>; 1686 1687// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 1688defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>; 1689defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>; 1690 1691defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal.s", int_arm_neon_vmlals>; 1692defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal.u", int_arm_neon_vmlalu>; 1693 1694// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 1695defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; 1696defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>; 1697 1698// VMLS : Vector Multiply Subtract (integer and floating-point) 1699defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 1700 IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; 1701def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; 1702def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>; 1703defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 1704 IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; 1705def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; 1706def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>; 1707 1708def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 1709 (mul (v8i16 QPR:$src2), 1710 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 1711 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), 1712 (v8i16 QPR:$src2), 1713 (v4i16 (EXTRACT_SUBREG QPR:$src3, 1714 (DSubReg_i16_reg imm:$lane))), 1715 (SubReg_i16_lane imm:$lane)))>; 1716 1717def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 1718 (mul (v4i32 QPR:$src2), 1719 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 1720 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), 1721 (v4i32 QPR:$src2), 1722 (v2i32 (EXTRACT_SUBREG QPR:$src3, 1723 (DSubReg_i32_reg imm:$lane))), 1724 (SubReg_i32_lane imm:$lane)))>; 1725 1726def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), 1727 (fmul (v4f32 QPR:$src2), 1728 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 1729 (v4f32 (VMLSslfq (v4f32 QPR:$src1), 1730 (v4f32 QPR:$src2), 1731 (v2f32 (EXTRACT_SUBREG QPR:$src3, 1732 (DSubReg_i32_reg imm:$lane))), 1733 (SubReg_i32_lane imm:$lane)))>; 1734 1735// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 1736defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>; 1737defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; 1738 1739defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl.s", int_arm_neon_vmlsls>; 1740defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl.u", int_arm_neon_vmlslu>; 1741 1742// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 1743defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; 1744defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl.s", int_arm_neon_vqdmlsl>; 1745 1746// Vector Subtract Operations. 1747 1748// VSUB : Vector Subtract (integer and floating-point) 1749defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, 0>; 1750def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>; 1751def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>; 1752// VSUBL : Vector Subtract Long (Q = D - D) 1753defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>; 1754defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>; 1755// VSUBW : Vector Subtract Wide (Q = Q - D) 1756defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; 1757defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; 1758// VHSUB : Vector Halving Subtract 1759defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1760 IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>; 1761defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1762 IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>; 1763// VQSUB : Vector Saturing Subtract 1764defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1765 IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>; 1766defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1767 IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>; 1768// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 1769defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; 1770// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 1771defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; 1772 1773// Vector Comparisons. 1774 1775// VCEQ : Vector Compare Equal 1776defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1777 IIC_VBINi4Q, "vceq.i", NEONvceq, 1>; 1778def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq.f32", v2i32, v2f32, NEONvceq, 1>; 1779def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq.f32", v4i32, v4f32, NEONvceq, 1>; 1780// VCGE : Vector Compare Greater Than or Equal 1781defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1782 IIC_VBINi4Q, "vcge.s", NEONvcge, 0>; 1783defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1784 IIC_VBINi4Q, "vcge.u", NEONvcgeu, 0>; 1785def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge.f32", v2i32, v2f32, NEONvcge, 0>; 1786def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge.f32", v4i32, v4f32, NEONvcge, 0>; 1787// VCGT : Vector Compare Greater Than 1788defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1789 IIC_VBINi4Q, "vcgt.s", NEONvcgt, 0>; 1790defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1791 IIC_VBINi4Q, "vcgt.u", NEONvcgtu, 0>; 1792def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; 1793def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; 1794// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 1795def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32, 1796 int_arm_neon_vacged, 0>; 1797def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32, 1798 int_arm_neon_vacgeq, 0>; 1799// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 1800def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32, 1801 int_arm_neon_vacgtd, 0>; 1802def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32, 1803 int_arm_neon_vacgtq, 0>; 1804// VTST : Vector Test Bits 1805defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1806 IIC_VBINi4Q, "vtst.i", NEONvtst, 1>; 1807 1808// Vector Bitwise Operations. 1809 1810// VAND : Vector Bitwise AND 1811def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>; 1812def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", v4i32, v4i32, and, 1>; 1813 1814// VEOR : Vector Bitwise Exclusive OR 1815def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", v2i32, v2i32, xor, 1>; 1816def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", v4i32, v4i32, xor, 1>; 1817 1818// VORR : Vector Bitwise OR 1819def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", v2i32, v2i32, or, 1>; 1820def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>; 1821 1822// VBIC : Vector Bitwise Bit Clear (AND NOT) 1823def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 1824 (ins DPR:$src1, DPR:$src2), IIC_VBINiD, 1825 "vbic\t$dst, $src1, $src2", "", 1826 [(set DPR:$dst, (v2i32 (and DPR:$src1, 1827 (vnot_conv DPR:$src2))))]>; 1828def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 1829 (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, 1830 "vbic\t$dst, $src1, $src2", "", 1831 [(set QPR:$dst, (v4i32 (and QPR:$src1, 1832 (vnot_conv QPR:$src2))))]>; 1833 1834// VORN : Vector Bitwise OR NOT 1835def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), 1836 (ins DPR:$src1, DPR:$src2), IIC_VBINiD, 1837 "vorn\t$dst, $src1, $src2", "", 1838 [(set DPR:$dst, (v2i32 (or DPR:$src1, 1839 (vnot_conv DPR:$src2))))]>; 1840def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), 1841 (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, 1842 "vorn\t$dst, $src1, $src2", "", 1843 [(set QPR:$dst, (v4i32 (or QPR:$src1, 1844 (vnot_conv QPR:$src2))))]>; 1845 1846// VMVN : Vector Bitwise NOT 1847def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 1848 (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, 1849 "vmvn\t$dst, $src", "", 1850 [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; 1851def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 1852 (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, 1853 "vmvn\t$dst, $src", "", 1854 [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; 1855def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; 1856def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; 1857 1858// VBSL : Vector Bitwise Select 1859def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 1860 (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, 1861 "vbsl\t$dst, $src2, $src3", "$src1 = $dst", 1862 [(set DPR:$dst, 1863 (v2i32 (or (and DPR:$src2, DPR:$src1), 1864 (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; 1865def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 1866 (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, 1867 "vbsl\t$dst, $src2, $src3", "$src1 = $dst", 1868 [(set QPR:$dst, 1869 (v4i32 (or (and QPR:$src2, QPR:$src1), 1870 (and QPR:$src3, (vnot_conv QPR:$src1)))))]>; 1871 1872// VBIF : Vector Bitwise Insert if False 1873// like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst", 1874// VBIT : Vector Bitwise Insert if True 1875// like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst", 1876// These are not yet implemented. The TwoAddress pass will not go looking 1877// for equivalent operations with different register constraints; it just 1878// inserts copies. 1879 1880// Vector Absolute Differences. 1881 1882// VABD : Vector Absolute Difference 1883defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1884 IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>; 1885defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1886 IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>; 1887def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32, 1888 int_arm_neon_vabds, 0>; 1889def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32, 1890 int_arm_neon_vabds, 0>; 1891 1892// VABDL : Vector Absolute Difference Long (Q = | D - D |) 1893defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>; 1894defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>; 1895 1896// VABA : Vector Absolute Difference and Accumulate 1897defm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; 1898defm VABAu : N3VInt3_QHS<1,1,0b0101,0, "vaba.u", int_arm_neon_vabau>; 1899 1900// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 1901defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; 1902defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; 1903 1904// Vector Maximum and Minimum. 1905 1906// VMAX : Vector Maximum 1907defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1908 IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>; 1909defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1910 IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>; 1911def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32, 1912 int_arm_neon_vmaxs, 1>; 1913def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32, 1914 int_arm_neon_vmaxs, 1>; 1915 1916// VMIN : Vector Minimum 1917defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1918 IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>; 1919defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1920 IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>; 1921def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32, 1922 int_arm_neon_vmins, 1>; 1923def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32, 1924 int_arm_neon_vmins, 1>; 1925 1926// Vector Pairwise Operations. 1927 1928// VPADD : Vector Pairwise Add 1929def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8, 1930 int_arm_neon_vpadd, 0>; 1931def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16, 1932 int_arm_neon_vpadd, 0>; 1933def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32, 1934 int_arm_neon_vpadd, 0>; 1935def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32, 1936 int_arm_neon_vpadd, 0>; 1937 1938// VPADDL : Vector Pairwise Add Long 1939defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", 1940 int_arm_neon_vpaddls>; 1941defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u", 1942 int_arm_neon_vpaddlu>; 1943 1944// VPADAL : Vector Pairwise Add and Accumulate Long 1945defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpadal.s", 1946 int_arm_neon_vpadals>; 1947defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u", 1948 int_arm_neon_vpadalu>; 1949 1950// VPMAX : Vector Pairwise Maximum 1951def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8, 1952 int_arm_neon_vpmaxs, 0>; 1953def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16, 1954 int_arm_neon_vpmaxs, 0>; 1955def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32, 1956 int_arm_neon_vpmaxs, 0>; 1957def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8, 1958 int_arm_neon_vpmaxu, 0>; 1959def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16, 1960 int_arm_neon_vpmaxu, 0>; 1961def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32, 1962 int_arm_neon_vpmaxu, 0>; 1963def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32, 1964 int_arm_neon_vpmaxs, 0>; 1965 1966// VPMIN : Vector Pairwise Minimum 1967def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8, 1968 int_arm_neon_vpmins, 0>; 1969def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16, 1970 int_arm_neon_vpmins, 0>; 1971def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32, 1972 int_arm_neon_vpmins, 0>; 1973def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8, 1974 int_arm_neon_vpminu, 0>; 1975def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16, 1976 int_arm_neon_vpminu, 0>; 1977def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32, 1978 int_arm_neon_vpminu, 0>; 1979def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32, 1980 int_arm_neon_vpmins, 0>; 1981 1982// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 1983 1984// VRECPE : Vector Reciprocal Estimate 1985def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 1986 IIC_VUNAD, "vrecpe.u32", 1987 v2i32, v2i32, int_arm_neon_vrecpe>; 1988def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 1989 IIC_VUNAQ, "vrecpe.u32", 1990 v4i32, v4i32, int_arm_neon_vrecpe>; 1991def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 1992 IIC_VUNAD, "vrecpe.f32", 1993 v2f32, v2f32, int_arm_neon_vrecpe>; 1994def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 1995 IIC_VUNAQ, "vrecpe.f32", 1996 v4f32, v4f32, int_arm_neon_vrecpe>; 1997 1998// VRECPS : Vector Reciprocal Step 1999def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32, 2000 int_arm_neon_vrecps, 1>; 2001def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32, 2002 int_arm_neon_vrecps, 1>; 2003 2004// VRSQRTE : Vector Reciprocal Square Root Estimate 2005def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 2006 IIC_VUNAD, "vrsqrte.u32", 2007 v2i32, v2i32, int_arm_neon_vrsqrte>; 2008def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 2009 IIC_VUNAQ, "vrsqrte.u32", 2010 v4i32, v4i32, int_arm_neon_vrsqrte>; 2011def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 2012 IIC_VUNAD, "vrsqrte.f32", 2013 v2f32, v2f32, int_arm_neon_vrsqrte>; 2014def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 2015 IIC_VUNAQ, "vrsqrte.f32", 2016 v4f32, v4f32, int_arm_neon_vrsqrte>; 2017 2018// VRSQRTS : Vector Reciprocal Square Root Step 2019def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32, 2020 int_arm_neon_vrsqrts, 1>; 2021def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32, 2022 int_arm_neon_vrsqrts, 1>; 2023 2024// Vector Shifts. 2025 2026// VSHL : Vector Shift 2027defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, 2028 IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>; 2029defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, 2030 IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>; 2031// VSHL : Vector Shift Left (Immediate) 2032defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; 2033// VSHR : Vector Shift Right (Immediate) 2034defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; 2035defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; 2036 2037// VSHLL : Vector Shift Left Long 2038def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", 2039 v8i16, v8i8, NEONvshlls>; 2040def VSHLLs16 : N2VLSh<0, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.s16", 2041 v4i32, v4i16, NEONvshlls>; 2042def VSHLLs32 : N2VLSh<0, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.s32", 2043 v2i64, v2i32, NEONvshlls>; 2044def VSHLLu8 : N2VLSh<1, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.u8", 2045 v8i16, v8i8, NEONvshllu>; 2046def VSHLLu16 : N2VLSh<1, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.u16", 2047 v4i32, v4i16, NEONvshllu>; 2048def VSHLLu32 : N2VLSh<1, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.u32", 2049 v2i64, v2i32, NEONvshllu>; 2050 2051// VSHLL : Vector Shift Left Long (with maximum shift count) 2052def VSHLLi8 : N2VLSh<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", 2053 v8i16, v8i8, NEONvshlli>; 2054def VSHLLi16 : N2VLSh<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", 2055 v4i32, v4i16, NEONvshlli>; 2056def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", 2057 v2i64, v2i32, NEONvshlli>; 2058 2059// VSHRN : Vector Shift Right and Narrow 2060def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, 2061 IIC_VSHLiD, "vshrn.i16", v8i8, v8i16, NEONvshrn>; 2062def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, 2063 IIC_VSHLiD, "vshrn.i32", v4i16, v4i32, NEONvshrn>; 2064def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, 2065 IIC_VSHLiD, "vshrn.i64", v2i32, v2i64, NEONvshrn>; 2066 2067// VRSHL : Vector Rounding Shift 2068defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2069 IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>; 2070defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2071 IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>; 2072// VRSHR : Vector Rounding Shift Right 2073defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; 2074defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; 2075 2076// VRSHRN : Vector Rounding Shift Right and Narrow 2077def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, 2078 IIC_VSHLi4D, "vrshrn.i16", v8i8, v8i16, NEONvrshrn>; 2079def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, 2080 IIC_VSHLi4D, "vrshrn.i32", v4i16, v4i32, NEONvrshrn>; 2081def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, 2082 IIC_VSHLi4D, "vrshrn.i64", v2i32, v2i64, NEONvrshrn>; 2083 2084// VQSHL : Vector Saturating Shift 2085defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2086 IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>; 2087defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2088 IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>; 2089// VQSHL : Vector Saturating Shift Left (Immediate) 2090defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>; 2091defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; 2092// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 2093defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; 2094 2095// VQSHRN : Vector Saturating Shift Right and Narrow 2096def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, 2097 IIC_VSHLi4D, "vqshrn.s16", v8i8, v8i16, NEONvqshrns>; 2098def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, 2099 IIC_VSHLi4D, "vqshrn.s32", v4i16, v4i32, NEONvqshrns>; 2100def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, 2101 IIC_VSHLi4D, "vqshrn.s64", v2i32, v2i64, NEONvqshrns>; 2102def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, 2103 IIC_VSHLi4D, "vqshrn.u16", v8i8, v8i16, NEONvqshrnu>; 2104def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, 2105 IIC_VSHLi4D, "vqshrn.u32", v4i16, v4i32, NEONvqshrnu>; 2106def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, 2107 IIC_VSHLi4D, "vqshrn.u64", v2i32, v2i64, NEONvqshrnu>; 2108 2109// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 2110def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, 2111 IIC_VSHLi4D, "vqshrun.s16", v8i8, v8i16, NEONvqshrnsu>; 2112def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, 2113 IIC_VSHLi4D, "vqshrun.s32", v4i16, v4i32, NEONvqshrnsu>; 2114def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, 2115 IIC_VSHLi4D, "vqshrun.s64", v2i32, v2i64, NEONvqshrnsu>; 2116 2117// VQRSHL : Vector Saturating Rounding Shift 2118defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2119 IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>; 2120defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 2121 IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; 2122 2123// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 2124def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, 2125 IIC_VSHLi4D, "vqrshrn.s16", v8i8, v8i16, NEONvqrshrns>; 2126def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, 2127 IIC_VSHLi4D, "vqrshrn.s32", v4i16, v4i32, NEONvqrshrns>; 2128def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, 2129 IIC_VSHLi4D, "vqrshrn.s64", v2i32, v2i64, NEONvqrshrns>; 2130def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, 2131 IIC_VSHLi4D, "vqrshrn.u16", v8i8, v8i16, NEONvqrshrnu>; 2132def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, 2133 IIC_VSHLi4D, "vqrshrn.u32", v4i16, v4i32, NEONvqrshrnu>; 2134def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, 2135 IIC_VSHLi4D, "vqrshrn.u64", v2i32, v2i64, NEONvqrshrnu>; 2136 2137// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 2138def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, 2139 IIC_VSHLi4D, "vqrshrun.s16", v8i8, v8i16, NEONvqrshrnsu>; 2140def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, 2141 IIC_VSHLi4D, "vqrshrun.s32", v4i16, v4i32, NEONvqrshrnsu>; 2142def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, 2143 IIC_VSHLi4D, "vqrshrun.s64", v2i32, v2i64, NEONvqrshrnsu>; 2144 2145// VSRA : Vector Shift Right and Accumulate 2146defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; 2147defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>; 2148// VRSRA : Vector Rounding Shift Right and Accumulate 2149defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>; 2150defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>; 2151 2152// VSLI : Vector Shift Left and Insert 2153defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>; 2154// VSRI : Vector Shift Right and Insert 2155defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>; 2156 2157// Vector Absolute and Saturating Absolute. 2158 2159// VABS : Vector Absolute Value 2160defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 2161 IIC_VUNAiD, IIC_VUNAiQ, "vabs.s", 2162 int_arm_neon_vabs>; 2163def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2164 IIC_VUNAD, "vabs.f32", 2165 v2f32, v2f32, int_arm_neon_vabs>; 2166def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2167 IIC_VUNAQ, "vabs.f32", 2168 v4f32, v4f32, int_arm_neon_vabs>; 2169 2170// VQABS : Vector Saturating Absolute Value 2171defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 2172 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs.s", 2173 int_arm_neon_vqabs>; 2174 2175// Vector Negate. 2176 2177def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; 2178def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; 2179 2180class VNEGD<bits<2> size, string OpcodeStr, ValueType Ty> 2181 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), 2182 IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 2183 [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; 2184class VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty> 2185 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), 2186 IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 2187 [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; 2188 2189// VNEG : Vector Negate 2190def VNEGs8d : VNEGD<0b00, "vneg.s8", v8i8>; 2191def VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>; 2192def VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>; 2193def VNEGs8q : VNEGQ<0b00, "vneg.s8", v16i8>; 2194def VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>; 2195def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; 2196 2197// VNEG : Vector Negate (floating-point) 2198def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 2199 (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, 2200 "vneg.f32\t$dst, $src", "", 2201 [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; 2202def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 2203 (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, 2204 "vneg.f32\t$dst, $src", "", 2205 [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; 2206 2207def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; 2208def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>; 2209def : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>; 2210def : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>; 2211def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>; 2212def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; 2213 2214// VQNEG : Vector Saturating Negate 2215defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 2216 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg.s", 2217 int_arm_neon_vqneg>; 2218 2219// Vector Bit Counting Operations. 2220 2221// VCLS : Vector Count Leading Sign Bits 2222defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 2223 IIC_VCNTiD, IIC_VCNTiQ, "vcls.s", 2224 int_arm_neon_vcls>; 2225// VCLZ : Vector Count Leading Zeros 2226defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 2227 IIC_VCNTiD, IIC_VCNTiQ, "vclz.i", 2228 int_arm_neon_vclz>; 2229// VCNT : Vector Count One Bits 2230def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2231 IIC_VCNTiD, "vcnt.8", 2232 v8i8, v8i8, int_arm_neon_vcnt>; 2233def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2234 IIC_VCNTiQ, "vcnt.8", 2235 v16i8, v16i8, int_arm_neon_vcnt>; 2236 2237// Vector Move Operations. 2238 2239// VMOV : Vector Move (Register) 2240 2241def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), 2242 IIC_VMOVD, "vmov\t$dst, $src", "", []>; 2243def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), 2244 IIC_VMOVD, "vmov\t$dst, $src", "", []>; 2245 2246// VMOV : Vector Move (Immediate) 2247 2248// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. 2249def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ 2250 return ARM::getVMOVImm(N, 1, *CurDAG); 2251}]>; 2252def vmovImm8 : PatLeaf<(build_vector), [{ 2253 return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; 2254}], VMOV_get_imm8>; 2255 2256// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. 2257def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ 2258 return ARM::getVMOVImm(N, 2, *CurDAG); 2259}]>; 2260def vmovImm16 : PatLeaf<(build_vector), [{ 2261 return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; 2262}], VMOV_get_imm16>; 2263 2264// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. 2265def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ 2266 return ARM::getVMOVImm(N, 4, *CurDAG); 2267}]>; 2268def vmovImm32 : PatLeaf<(build_vector), [{ 2269 return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; 2270}], VMOV_get_imm32>; 2271 2272// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. 2273def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ 2274 return ARM::getVMOVImm(N, 8, *CurDAG); 2275}]>; 2276def vmovImm64 : PatLeaf<(build_vector), [{ 2277 return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; 2278}], VMOV_get_imm64>; 2279 2280// Note: Some of the cmode bits in the following VMOV instructions need to 2281// be encoded based on the immed values. 2282 2283def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), 2284 (ins i8imm:$SIMM), IIC_VMOVImm, 2285 "vmov.i8\t$dst, $SIMM", "", 2286 [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; 2287def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), 2288 (ins i8imm:$SIMM), IIC_VMOVImm, 2289 "vmov.i8\t$dst, $SIMM", "", 2290 [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; 2291 2292def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), 2293 (ins i16imm:$SIMM), IIC_VMOVImm, 2294 "vmov.i16\t$dst, $SIMM", "", 2295 [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; 2296def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), 2297 (ins i16imm:$SIMM), IIC_VMOVImm, 2298 "vmov.i16\t$dst, $SIMM", "", 2299 [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; 2300 2301def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), 2302 (ins i32imm:$SIMM), IIC_VMOVImm, 2303 "vmov.i32\t$dst, $SIMM", "", 2304 [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; 2305def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), 2306 (ins i32imm:$SIMM), IIC_VMOVImm, 2307 "vmov.i32\t$dst, $SIMM", "", 2308 [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; 2309 2310def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), 2311 (ins i64imm:$SIMM), IIC_VMOVImm, 2312 "vmov.i64\t$dst, $SIMM", "", 2313 [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; 2314def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), 2315 (ins i64imm:$SIMM), IIC_VMOVImm, 2316 "vmov.i64\t$dst, $SIMM", "", 2317 [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; 2318 2319// VMOV : Vector Get Lane (move scalar to ARM core register) 2320 2321def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, 2322 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2323 IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]", 2324 [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), 2325 imm:$lane))]>; 2326def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, 2327 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2328 IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]", 2329 [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), 2330 imm:$lane))]>; 2331def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, 2332 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2333 IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]", 2334 [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), 2335 imm:$lane))]>; 2336def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, 2337 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2338 IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]", 2339 [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), 2340 imm:$lane))]>; 2341def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, 2342 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2343 IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]", 2344 [(set GPR:$dst, (extractelt (v2i32 DPR:$src), 2345 imm:$lane))]>; 2346// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 2347def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 2348 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2349 (DSubReg_i8_reg imm:$lane))), 2350 (SubReg_i8_lane imm:$lane))>; 2351def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 2352 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2353 (DSubReg_i16_reg imm:$lane))), 2354 (SubReg_i16_lane imm:$lane))>; 2355def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 2356 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2357 (DSubReg_i8_reg imm:$lane))), 2358 (SubReg_i8_lane imm:$lane))>; 2359def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 2360 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2361 (DSubReg_i16_reg imm:$lane))), 2362 (SubReg_i16_lane imm:$lane))>; 2363def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 2364 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 2365 (DSubReg_i32_reg imm:$lane))), 2366 (SubReg_i32_lane imm:$lane))>; 2367def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 2368 (EXTRACT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2), 2369 (SSubReg_f32_reg imm:$src2))>; 2370def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 2371 (EXTRACT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2), 2372 (SSubReg_f32_reg imm:$src2))>; 2373//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 2374// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2375def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 2376 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2377 2378 2379// VMOV : Vector Set Lane (move ARM core register to scalar) 2380 2381let Constraints = "$src1 = $dst" in { 2382def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), 2383 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2384 IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2", 2385 [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), 2386 GPR:$src2, imm:$lane))]>; 2387def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), 2388 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2389 IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2", 2390 [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), 2391 GPR:$src2, imm:$lane))]>; 2392def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), 2393 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2394 IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2", 2395 [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), 2396 GPR:$src2, imm:$lane))]>; 2397} 2398def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 2399 (v16i8 (INSERT_SUBREG QPR:$src1, 2400 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 2401 (DSubReg_i8_reg imm:$lane))), 2402 GPR:$src2, (SubReg_i8_lane imm:$lane)), 2403 (DSubReg_i8_reg imm:$lane)))>; 2404def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 2405 (v8i16 (INSERT_SUBREG QPR:$src1, 2406 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 2407 (DSubReg_i16_reg imm:$lane))), 2408 GPR:$src2, (SubReg_i16_lane imm:$lane)), 2409 (DSubReg_i16_reg imm:$lane)))>; 2410def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 2411 (v4i32 (INSERT_SUBREG QPR:$src1, 2412 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 2413 (DSubReg_i32_reg imm:$lane))), 2414 GPR:$src2, (SubReg_i32_lane imm:$lane)), 2415 (DSubReg_i32_reg imm:$lane)))>; 2416 2417def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 2418 (INSERT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2), 2419 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 2420def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 2421 (INSERT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2), 2422 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 2423 2424//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 2425// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 2426def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 2427 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 2428 2429def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 2430 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; 2431def : Pat<(v2f64 (scalar_to_vector DPR:$src)), 2432 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>; 2433def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 2434 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; 2435 2436def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 2437 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2438def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 2439 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2440def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 2441 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2442 2443def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 2444 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 2445 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2446 arm_dsubreg_0)>; 2447def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 2448 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 2449 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2450 arm_dsubreg_0)>; 2451def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 2452 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 2453 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2454 arm_dsubreg_0)>; 2455 2456// VDUP : Vector Duplicate (from ARM core register to all elements) 2457 2458class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> 2459 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), 2460 IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), 2461 [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 2462class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> 2463 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), 2464 IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), 2465 [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 2466 2467def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; 2468def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; 2469def VDUP32d : VDUPD<0b11101000, 0b00, ".32", v2i32>; 2470def VDUP8q : VDUPQ<0b11101110, 0b00, ".8", v16i8>; 2471def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; 2472def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; 2473 2474def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), 2475 IIC_VMOVIS, "vdup", ".32\t$dst, $src", 2476 [(set DPR:$dst, (v2f32 (NEONvdup 2477 (f32 (bitconvert GPR:$src)))))]>; 2478def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), 2479 IIC_VMOVIS, "vdup", ".32\t$dst, $src", 2480 [(set QPR:$dst, (v4f32 (NEONvdup 2481 (f32 (bitconvert GPR:$src)))))]>; 2482 2483// VDUP : Vector Duplicate Lane (from scalar to all elements) 2484 2485class VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> 2486 : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, 2487 (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, 2488 !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", 2489 [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; 2490 2491class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, 2492 ValueType ResTy, ValueType OpTy> 2493 : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, 2494 (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, 2495 !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", 2496 [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; 2497 2498def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; 2499def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; 2500def VDUPLN32d : VDUPLND<0b01, 0b00, "vdup.32", v2i32>; 2501def VDUPLNfd : VDUPLND<0b01, 0b00, "vdup.32", v2f32>; 2502def VDUPLN8q : VDUPLNQ<0b00, 0b01, "vdup.8", v16i8, v8i8>; 2503def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; 2504def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; 2505def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; 2506 2507def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 2508 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 2509 (DSubReg_i8_reg imm:$lane))), 2510 (SubReg_i8_lane imm:$lane)))>; 2511def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 2512 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 2513 (DSubReg_i16_reg imm:$lane))), 2514 (SubReg_i16_lane imm:$lane)))>; 2515def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 2516 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 2517 (DSubReg_i32_reg imm:$lane))), 2518 (SubReg_i32_lane imm:$lane)))>; 2519def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 2520 (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, 2521 (DSubReg_i32_reg imm:$lane))), 2522 (SubReg_i32_lane imm:$lane)))>; 2523 2524def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, 2525 (outs DPR:$dst), (ins SPR:$src), 2526 IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", 2527 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 2528 2529def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0, 2530 (outs QPR:$dst), (ins SPR:$src), 2531 IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", 2532 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 2533 2534def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), 2535 (INSERT_SUBREG QPR:$src, 2536 (i64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), 2537 (DSubReg_f64_other_reg imm:$lane))>; 2538def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), 2539 (INSERT_SUBREG QPR:$src, 2540 (f64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), 2541 (DSubReg_f64_other_reg imm:$lane))>; 2542 2543// VMOVN : Vector Narrowing Move 2544defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn.i", 2545 int_arm_neon_vmovn>; 2546// VQMOVN : Vector Saturating Narrowing Move 2547defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn.s", 2548 int_arm_neon_vqmovns>; 2549defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u", 2550 int_arm_neon_vqmovnu>; 2551defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s", 2552 int_arm_neon_vqmovnsu>; 2553// VMOVL : Vector Lengthening Move 2554defm VMOVLs : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>; 2555defm VMOVLu : N2VLInt_QHS<1,1,0b1010,0,0,1, "vmovl.u", int_arm_neon_vmovlu>; 2556 2557// Vector Conversions. 2558 2559// VCVT : Vector Convert Between Floating-Point and Integers 2560def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 2561 v2i32, v2f32, fp_to_sint>; 2562def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 2563 v2i32, v2f32, fp_to_uint>; 2564def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 2565 v2f32, v2i32, sint_to_fp>; 2566def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 2567 v2f32, v2i32, uint_to_fp>; 2568 2569def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 2570 v4i32, v4f32, fp_to_sint>; 2571def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 2572 v4i32, v4f32, fp_to_uint>; 2573def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 2574 v4f32, v4i32, sint_to_fp>; 2575def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 2576 v4f32, v4i32, uint_to_fp>; 2577 2578// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 2579// Note: Some of the opcode bits in the following VCVT instructions need to 2580// be encoded based on the immed values. 2581def VCVTf2xsd : N2VCvtD<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", 2582 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 2583def VCVTf2xud : N2VCvtD<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", 2584 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 2585def VCVTxs2fd : N2VCvtD<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", 2586 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 2587def VCVTxu2fd : N2VCvtD<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", 2588 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 2589 2590def VCVTf2xsq : N2VCvtQ<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", 2591 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 2592def VCVTf2xuq : N2VCvtQ<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", 2593 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 2594def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", 2595 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 2596def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", 2597 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 2598 2599// Vector Reverse. 2600 2601// VREV64 : Vector Reverse elements within 64-bit doublewords 2602 2603class VREV64D<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2604 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), 2605 (ins DPR:$src), IIC_VMOVD, 2606 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2607 [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; 2608class VREV64Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2609 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), 2610 (ins QPR:$src), IIC_VMOVD, 2611 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2612 [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; 2613 2614def VREV64d8 : VREV64D<0b00, "vrev64.8", v8i8>; 2615def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>; 2616def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>; 2617def VREV64df : VREV64D<0b10, "vrev64.32", v2f32>; 2618 2619def VREV64q8 : VREV64Q<0b00, "vrev64.8", v16i8>; 2620def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>; 2621def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>; 2622def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>; 2623 2624// VREV32 : Vector Reverse elements within 32-bit words 2625 2626class VREV32D<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2627 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), 2628 (ins DPR:$src), IIC_VMOVD, 2629 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2630 [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; 2631class VREV32Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2632 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), 2633 (ins QPR:$src), IIC_VMOVD, 2634 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2635 [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; 2636 2637def VREV32d8 : VREV32D<0b00, "vrev32.8", v8i8>; 2638def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>; 2639 2640def VREV32q8 : VREV32Q<0b00, "vrev32.8", v16i8>; 2641def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>; 2642 2643// VREV16 : Vector Reverse elements within 16-bit halfwords 2644 2645class VREV16D<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2646 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), 2647 (ins DPR:$src), IIC_VMOVD, 2648 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2649 [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; 2650class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2651 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), 2652 (ins QPR:$src), IIC_VMOVD, 2653 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2654 [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; 2655 2656def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>; 2657def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; 2658 2659// Other Vector Shuffles. 2660 2661// VEXT : Vector Extract 2662 2663class VEXTd<string OpcodeStr, ValueType Ty> 2664 : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst), 2665 (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, 2666 !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", 2667 [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), 2668 (Ty DPR:$rhs), imm:$index)))]>; 2669 2670class VEXTq<string OpcodeStr, ValueType Ty> 2671 : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst), 2672 (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, 2673 !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", 2674 [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), 2675 (Ty QPR:$rhs), imm:$index)))]>; 2676 2677def VEXTd8 : VEXTd<"vext.8", v8i8>; 2678def VEXTd16 : VEXTd<"vext.16", v4i16>; 2679def VEXTd32 : VEXTd<"vext.32", v2i32>; 2680def VEXTdf : VEXTd<"vext.32", v2f32>; 2681 2682def VEXTq8 : VEXTq<"vext.8", v16i8>; 2683def VEXTq16 : VEXTq<"vext.16", v8i16>; 2684def VEXTq32 : VEXTq<"vext.32", v4i32>; 2685def VEXTqf : VEXTq<"vext.32", v4f32>; 2686 2687// VTRN : Vector Transpose 2688 2689def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">; 2690def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn.16">; 2691def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn.32">; 2692 2693def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn.8">; 2694def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn.16">; 2695def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn.32">; 2696 2697// VUZP : Vector Unzip (Deinterleave) 2698 2699def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp.8">; 2700def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp.16">; 2701def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp.32">; 2702 2703def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp.8">; 2704def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp.16">; 2705def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp.32">; 2706 2707// VZIP : Vector Zip (Interleave) 2708 2709def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip.8">; 2710def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip.16">; 2711def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip.32">; 2712 2713def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip.8">; 2714def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip.16">; 2715def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; 2716 2717// Vector Table Lookup and Table Extension. 2718 2719// VTBL : Vector Table Lookup 2720def VTBL1 2721 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), 2722 (ins DPR:$tbl1, DPR:$src), IIC_VTB1, 2723 "vtbl.8\t$dst, \\{$tbl1\\}, $src", "", 2724 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; 2725let hasExtraSrcRegAllocReq = 1 in { 2726def VTBL2 2727 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), 2728 (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, 2729 "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "", 2730 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 2731 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; 2732def VTBL3 2733 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), 2734 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, 2735 "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", 2736 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 2737 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; 2738def VTBL4 2739 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), 2740 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, 2741 "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", 2742 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, 2743 DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; 2744} // hasExtraSrcRegAllocReq = 1 2745 2746// VTBX : Vector Table Extension 2747def VTBX1 2748 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), 2749 (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, 2750 "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst", 2751 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 2752 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; 2753let hasExtraSrcRegAllocReq = 1 in { 2754def VTBX2 2755 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), 2756 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, 2757 "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", 2758 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 2759 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; 2760def VTBX3 2761 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), 2762 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, 2763 "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", 2764 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, 2765 DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; 2766def VTBX4 2767 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, 2768 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, 2769 "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", 2770 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, 2771 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; 2772} // hasExtraSrcRegAllocReq = 1 2773 2774//===----------------------------------------------------------------------===// 2775// NEON instructions for single-precision FP math 2776//===----------------------------------------------------------------------===// 2777 2778// These need separate instructions because they must use DPR_VFP2 register 2779// class which have SPR sub-registers. 2780 2781// Vector Add Operations used for single-precision FP 2782let neverHasSideEffects = 1 in 2783def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>; 2784def : N3VDsPat<fadd, VADDfd_sfp>; 2785 2786// Vector Sub Operations used for single-precision FP 2787let neverHasSideEffects = 1 in 2788def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>; 2789def : N3VDsPat<fsub, VSUBfd_sfp>; 2790 2791// Vector Multiply Operations used for single-precision FP 2792let neverHasSideEffects = 1 in 2793def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>; 2794def : N3VDsPat<fmul, VMULfd_sfp>; 2795 2796// Vector Multiply-Accumulate/Subtract used for single-precision FP 2797let neverHasSideEffects = 1 in 2798def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; 2799def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>; 2800 2801let neverHasSideEffects = 1 in 2802def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; 2803def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>; 2804 2805// Vector Absolute used for single-precision FP 2806let neverHasSideEffects = 1 in 2807def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2808 IIC_VUNAD, "vabs.f32", 2809 v2f32, v2f32, int_arm_neon_vabs>; 2810def : N2VDIntsPat<fabs, VABSfd_sfp>; 2811 2812// Vector Negate used for single-precision FP 2813let neverHasSideEffects = 1 in 2814def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 2815 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 2816 "vneg.f32\t$dst, $src", "", []>; 2817def : N2VDIntsPat<fneg, VNEGf32d_sfp>; 2818 2819// Vector Convert between single-precision FP and integer 2820let neverHasSideEffects = 1 in 2821def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 2822 v2i32, v2f32, fp_to_sint>; 2823def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; 2824 2825let neverHasSideEffects = 1 in 2826def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 2827 v2i32, v2f32, fp_to_uint>; 2828def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; 2829 2830let neverHasSideEffects = 1 in 2831def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 2832 v2f32, v2i32, sint_to_fp>; 2833def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; 2834 2835let neverHasSideEffects = 1 in 2836def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 2837 v2f32, v2i32, uint_to_fp>; 2838def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; 2839 2840//===----------------------------------------------------------------------===// 2841// Non-Instruction Patterns 2842//===----------------------------------------------------------------------===// 2843 2844// bit_convert 2845def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 2846def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 2847def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 2848def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 2849def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 2850def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 2851def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 2852def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 2853def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 2854def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 2855def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 2856def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 2857def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 2858def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 2859def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 2860def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 2861def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 2862def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 2863def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 2864def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 2865def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 2866def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 2867def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 2868def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 2869def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 2870def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 2871def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 2872def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 2873def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 2874def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 2875 2876def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 2877def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 2878def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 2879def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 2880def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 2881def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 2882def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 2883def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 2884def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 2885def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 2886def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 2887def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 2888def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 2889def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 2890def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 2891def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 2892def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 2893def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 2894def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 2895def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 2896def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 2897def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 2898def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 2899def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 2900def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 2901def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 2902def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 2903def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 2904def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 2905def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 2906