ARMInstrNEON.td revision e9829cafca9811fa616f8046ce384b797814fa67
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// NEON-specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17 18def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 19 20def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 21def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 22def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 23def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 24def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 25def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 26 27// Types for vector shift by immediates. The "SHX" version is for long and 28// narrow operations where the source and destination vectors have different 29// types. The "SHINS" version is for shift and insert operations. 30def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 31 SDTCisVT<2, i32>]>; 32def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 33 SDTCisVT<2, i32>]>; 34def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 35 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 36 37def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 38def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 39def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 40def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 41def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 42def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 43def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 44 45def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 46def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 47def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 48 49def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 50def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 51def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 52def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 53def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 54def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 55 56def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 57def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 58def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 59 60def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 61def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 62 63def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 64 SDTCisVT<2, i32>]>; 65def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 66def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 67 68def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 69 70// VDUPLANE can produce a quad-register result from a double-register source, 71// so the result is not constrained to match the source. 72def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 73 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 74 SDTCisVT<2, i32>]>>; 75 76def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 77 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 78def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 79 80def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 81def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 82def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 83def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 84 85def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 86 SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; 87def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 88def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 89def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 90 91//===----------------------------------------------------------------------===// 92// NEON operand definitions 93//===----------------------------------------------------------------------===// 94 95// addrmode_neonldstm := reg 96// 97/* TODO: Take advantage of vldm. 98def addrmode_neonldstm : Operand<i32>, 99 ComplexPattern<i32, 2, "SelectAddrModeNeonLdStM", []> { 100 let PrintMethod = "printAddrNeonLdStMOperand"; 101 let MIOperandInfo = (ops GPR, i32imm); 102} 103*/ 104 105//===----------------------------------------------------------------------===// 106// NEON load / store instructions 107//===----------------------------------------------------------------------===// 108 109/* TODO: Take advantage of vldm. 110let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { 111def VLDMD : NI<(outs), 112 (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), 113 IIC_fpLoadm, 114 "vldm${addr:submode} ${addr:base}, $dst1", 115 []> { 116 let Inst{27-25} = 0b110; 117 let Inst{20} = 1; 118 let Inst{11-9} = 0b101; 119} 120 121def VLDMS : NI<(outs), 122 (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), 123 IIC_fpLoadm, 124 "vldm${addr:submode} ${addr:base}, $dst1", 125 []> { 126 let Inst{27-25} = 0b110; 127 let Inst{20} = 1; 128 let Inst{11-9} = 0b101; 129} 130} 131*/ 132 133// Use vldmia to load a Q register as a D register pair. 134def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), 135 IIC_fpLoadm, 136 "vldmia $addr, ${dst:dregpair}", 137 [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { 138 let Inst{27-25} = 0b110; 139 let Inst{24} = 0; // P bit 140 let Inst{23} = 1; // U bit 141 let Inst{20} = 1; 142 let Inst{11-9} = 0b101; 143} 144 145// Use vstmia to store a Q register as a D register pair. 146def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), 147 IIC_fpStorem, 148 "vstmia $addr, ${src:dregpair}", 149 [(store (v2f64 QPR:$src), addrmode4:$addr)]> { 150 let Inst{27-25} = 0b110; 151 let Inst{24} = 0; // P bit 152 let Inst{23} = 1; // U bit 153 let Inst{20} = 0; 154 let Inst{11-9} = 0b101; 155} 156 157// VLD1 : Vector Load (multiple single elements) 158class VLD1D<string OpcodeStr, ValueType Ty, Intrinsic IntOp> 159 : NLdSt<(outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, 160 !strconcat(OpcodeStr, "\t\\{$dst\\}, $addr"), "", 161 [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; 162class VLD1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp> 163 : NLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, 164 !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), "", 165 [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; 166 167def VLD1d8 : VLD1D<"vld1.8", v8i8, int_arm_neon_vld1>; 168def VLD1d16 : VLD1D<"vld1.16", v4i16, int_arm_neon_vld1>; 169def VLD1d32 : VLD1D<"vld1.32", v2i32, int_arm_neon_vld1>; 170def VLD1df : VLD1D<"vld1.32", v2f32, int_arm_neon_vld1>; 171def VLD1d64 : VLD1D<"vld1.64", v1i64, int_arm_neon_vld1>; 172 173def VLD1q8 : VLD1Q<"vld1.8", v16i8, int_arm_neon_vld1>; 174def VLD1q16 : VLD1Q<"vld1.16", v8i16, int_arm_neon_vld1>; 175def VLD1q32 : VLD1Q<"vld1.32", v4i32, int_arm_neon_vld1>; 176def VLD1qf : VLD1Q<"vld1.32", v4f32, int_arm_neon_vld1>; 177def VLD1q64 : VLD1Q<"vld1.64", v2i64, int_arm_neon_vld1>; 178 179let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { 180 181// VLD2 : Vector Load (multiple 2-element structures) 182class VLD2D<string OpcodeStr> 183 : NLdSt<(outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD2, 184 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>; 185class VLD2Q<string OpcodeStr> 186 : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 187 (ins addrmode6:$addr), IIC_VLD2, 188 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), 189 "", []>; 190 191def VLD2d8 : VLD2D<"vld2.8">; 192def VLD2d16 : VLD2D<"vld2.16">; 193def VLD2d32 : VLD2D<"vld2.32">; 194 195def VLD2q8 : VLD2Q<"vld2.8">; 196def VLD2q16 : VLD2Q<"vld2.16">; 197def VLD2q32 : VLD2Q<"vld2.32">; 198 199// VLD3 : Vector Load (multiple 3-element structures) 200class VLD3D<string OpcodeStr> 201 : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), 202 IIC_VLD3, 203 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>; 204 205def VLD3d8 : VLD3D<"vld3.8">; 206def VLD3d16 : VLD3D<"vld3.16">; 207def VLD3d32 : VLD3D<"vld3.32">; 208 209// VLD4 : Vector Load (multiple 4-element structures) 210class VLD4D<string OpcodeStr> 211 : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 212 (ins addrmode6:$addr), IIC_VLD4, 213 !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), 214 "", []>; 215 216def VLD4d8 : VLD4D<"vld4.8">; 217def VLD4d16 : VLD4D<"vld4.16">; 218def VLD4d32 : VLD4D<"vld4.32">; 219 220// VLD2LN : Vector Load (single 2-element structure to one lane) 221class VLD2LND<string OpcodeStr> 222 : NLdSt<(outs DPR:$dst1, DPR:$dst2), 223 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 224 IIC_VLD2, 225 !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"), 226 "$src1 = $dst1, $src2 = $dst2", []>; 227 228def VLD2LNd8 : VLD2LND<"vld2.8">; 229def VLD2LNd16 : VLD2LND<"vld2.16">; 230def VLD2LNd32 : VLD2LND<"vld2.32">; 231 232// VLD3LN : Vector Load (single 3-element structure to one lane) 233class VLD3LND<string OpcodeStr> 234 : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), 235 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 236 nohash_imm:$lane), IIC_VLD3, 237 !strconcat(OpcodeStr, 238 "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"), 239 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; 240 241def VLD3LNd8 : VLD3LND<"vld3.8">; 242def VLD3LNd16 : VLD3LND<"vld3.16">; 243def VLD3LNd32 : VLD3LND<"vld3.32">; 244 245// VLD4LN : Vector Load (single 4-element structure to one lane) 246class VLD4LND<string OpcodeStr> 247 : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), 248 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 249 nohash_imm:$lane), IIC_VLD4, 250 !strconcat(OpcodeStr, 251 "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"), 252 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; 253 254def VLD4LNd8 : VLD4LND<"vld4.8">; 255def VLD4LNd16 : VLD4LND<"vld4.16">; 256def VLD4LNd32 : VLD4LND<"vld4.32">; 257} // mayLoad = 1, hasExtraDefRegAllocReq = 1 258 259// VST1 : Vector Store (multiple single elements) 260class VST1D<string OpcodeStr, ValueType Ty, Intrinsic IntOp> 261 : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, 262 !strconcat(OpcodeStr, "\t\\{$src\\}, $addr"), "", 263 [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; 264class VST1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp> 265 : NLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, 266 !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"), "", 267 [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; 268 269let hasExtraSrcRegAllocReq = 1 in { 270def VST1d8 : VST1D<"vst1.8", v8i8, int_arm_neon_vst1>; 271def VST1d16 : VST1D<"vst1.16", v4i16, int_arm_neon_vst1>; 272def VST1d32 : VST1D<"vst1.32", v2i32, int_arm_neon_vst1>; 273def VST1df : VST1D<"vst1.32", v2f32, int_arm_neon_vst1>; 274def VST1d64 : VST1D<"vst1.64", v1i64, int_arm_neon_vst1>; 275 276def VST1q8 : VST1Q<"vst1.8", v16i8, int_arm_neon_vst1>; 277def VST1q16 : VST1Q<"vst1.16", v8i16, int_arm_neon_vst1>; 278def VST1q32 : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1>; 279def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1>; 280def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1>; 281} // hasExtraSrcRegAllocReq 282 283let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { 284 285// VST2 : Vector Store (multiple 2-element structures) 286class VST2D<string OpcodeStr> 287 : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, 288 !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>; 289 290def VST2d8 : VST2D<"vst2.8">; 291def VST2d16 : VST2D<"vst2.16">; 292def VST2d32 : VST2D<"vst2.32">; 293 294// VST3 : Vector Store (multiple 3-element structures) 295class VST3D<string OpcodeStr> 296 : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), 297 IIC_VST, 298 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>; 299 300def VST3d8 : VST3D<"vst3.8">; 301def VST3d16 : VST3D<"vst3.16">; 302def VST3d32 : VST3D<"vst3.32">; 303 304// VST4 : Vector Store (multiple 4-element structures) 305class VST4D<string OpcodeStr> 306 : NLdSt<(outs), (ins addrmode6:$addr, 307 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, 308 !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), 309 "", []>; 310 311def VST4d8 : VST4D<"vst4.8">; 312def VST4d16 : VST4D<"vst4.16">; 313def VST4d32 : VST4D<"vst4.32">; 314 315// VST2LN : Vector Store (single 2-element structure from one lane) 316class VST2LND<string OpcodeStr> 317 : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), 318 IIC_VST, 319 !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"), 320 "", []>; 321 322def VST2LNd8 : VST2LND<"vst2.8">; 323def VST2LNd16 : VST2LND<"vst2.16">; 324def VST2LNd32 : VST2LND<"vst2.32">; 325 326// VST3LN : Vector Store (single 3-element structure from one lane) 327class VST3LND<string OpcodeStr> 328 : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 329 nohash_imm:$lane), IIC_VST, 330 !strconcat(OpcodeStr, 331 "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr"), "", []>; 332 333def VST3LNd8 : VST3LND<"vst3.8">; 334def VST3LNd16 : VST3LND<"vst3.16">; 335def VST3LNd32 : VST3LND<"vst3.32">; 336 337// VST4LN : Vector Store (single 4-element structure from one lane) 338class VST4LND<string OpcodeStr> 339 : NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, 340 DPR:$src4, nohash_imm:$lane), IIC_VST, 341 !strconcat(OpcodeStr, 342 "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr"), 343 "", []>; 344 345def VST4LNd8 : VST4LND<"vst4.8">; 346def VST4LNd16 : VST4LND<"vst4.16">; 347def VST4LNd32 : VST4LND<"vst4.32">; 348} // mayStore = 1, hasExtraSrcRegAllocReq = 1 349 350 351//===----------------------------------------------------------------------===// 352// NEON pattern fragments 353//===----------------------------------------------------------------------===// 354 355// Extract D sub-registers of Q registers. 356// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) 357def DSubReg_i8_reg : SDNodeXForm<imm, [{ 358 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); 359}]>; 360def DSubReg_i16_reg : SDNodeXForm<imm, [{ 361 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); 362}]>; 363def DSubReg_i32_reg : SDNodeXForm<imm, [{ 364 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); 365}]>; 366def DSubReg_f64_reg : SDNodeXForm<imm, [{ 367 return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); 368}]>; 369def DSubReg_f64_other_reg : SDNodeXForm<imm, [{ 370 return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32); 371}]>; 372 373// Extract S sub-registers of Q/D registers. 374// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.) 375def SSubReg_f32_reg : SDNodeXForm<imm, [{ 376 return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32); 377}]>; 378 379// Translate lane numbers from Q registers to D subregs. 380def SubReg_i8_lane : SDNodeXForm<imm, [{ 381 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 382}]>; 383def SubReg_i16_lane : SDNodeXForm<imm, [{ 384 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 385}]>; 386def SubReg_i32_lane : SDNodeXForm<imm, [{ 387 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 388}]>; 389 390//===----------------------------------------------------------------------===// 391// Instruction Classes 392//===----------------------------------------------------------------------===// 393 394// Basic 2-register operations, both double- and quad-register. 395class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 396 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 397 ValueType ResTy, ValueType OpTy, SDNode OpNode> 398 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 399 (ins DPR:$src), IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 400 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; 401class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 402 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 403 ValueType ResTy, ValueType OpTy, SDNode OpNode> 404 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 405 (ins QPR:$src), IIC_VUNAQ, !strconcat(OpcodeStr, "\t$dst, $src"), "", 406 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; 407 408// Basic 2-register operations, scalar single-precision. 409class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 410 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 411 ValueType ResTy, ValueType OpTy, SDNode OpNode> 412 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 413 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), 414 IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src"), "", []>; 415 416class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> 417 : NEONFPPat<(ResTy (OpNode SPR:$a)), 418 (EXTRACT_SUBREG 419 (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), 420 arm_ssubreg_0)>; 421 422// Basic 2-register intrinsics, both double- and quad-register. 423class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 424 bits<2> op17_16, bits<5> op11_7, bit op4, 425 InstrItinClass itin, string OpcodeStr, 426 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 427 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 428 (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 429 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 430class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 431 bits<2> op17_16, bits<5> op11_7, bit op4, 432 InstrItinClass itin, string OpcodeStr, 433 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 434 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 435 (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 436 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 437 438// Basic 2-register intrinsics, scalar single-precision 439class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 440 bits<2> op17_16, bits<5> op11_7, bit op4, 441 InstrItinClass itin, string OpcodeStr, 442 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 443 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 444 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin, 445 !strconcat(OpcodeStr, "\t$dst, $src"), "", []>; 446 447class N2VDIntsPat<SDNode OpNode, NeonI Inst> 448 : NEONFPPat<(f32 (OpNode SPR:$a)), 449 (EXTRACT_SUBREG 450 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), 451 arm_ssubreg_0)>; 452 453// Narrow 2-register intrinsics. 454class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 455 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 456 InstrItinClass itin, string OpcodeStr, 457 ValueType TyD, ValueType TyQ, Intrinsic IntOp> 458 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), 459 (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 460 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; 461 462// Long 2-register intrinsics. (This is currently only used for VMOVL and is 463// derived from N2VImm instead of N2V because of the way the size is encoded.) 464class N2VLInt<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 465 bit op6, bit op4, InstrItinClass itin, string OpcodeStr, 466 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 467 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, (outs QPR:$dst), 468 (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", 469 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; 470 471// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 472class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr> 473 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), 474 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 475 !strconcat(OpcodeStr, "\t$dst1, $dst2"), 476 "$src1 = $dst1, $src2 = $dst2", []>; 477class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 478 InstrItinClass itin, string OpcodeStr> 479 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), 480 (ins QPR:$src1, QPR:$src2), itin, 481 !strconcat(OpcodeStr, "\t$dst1, $dst2"), 482 "$src1 = $dst1, $src2 = $dst2", []>; 483 484// Basic 3-register operations, both double- and quad-register. 485class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 486 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 487 SDNode OpNode, bit Commutable> 488 : N3V<op24, op23, op21_20, op11_8, 0, op4, 489 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 490 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 491 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 492 let isCommutable = Commutable; 493} 494class N3VDSL<bits<2> op21_20, bits<4> op11_8, 495 InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp> 496 : N3V<0, 1, op21_20, op11_8, 1, 0, 497 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 498 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 499 [(set (Ty DPR:$dst), 500 (Ty (ShOp (Ty DPR:$src1), 501 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), 502 imm:$lane)))))]> { 503 let isCommutable = 0; 504} 505class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 506 string OpcodeStr, ValueType Ty, SDNode ShOp> 507 : N3V<0, 1, op21_20, op11_8, 1, 0, 508 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 509 IIC_VMULi16D, 510 !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 511 [(set (Ty DPR:$dst), 512 (Ty (ShOp (Ty DPR:$src1), 513 (Ty (NEONvduplane (Ty DPR_8:$src2), 514 imm:$lane)))))]> { 515 let isCommutable = 0; 516} 517 518class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 519 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 520 SDNode OpNode, bit Commutable> 521 : N3V<op24, op23, op21_20, op11_8, 1, op4, 522 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 523 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 524 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 525 let isCommutable = Commutable; 526} 527class N3VQSL<bits<2> op21_20, bits<4> op11_8, 528 InstrItinClass itin, string OpcodeStr, 529 ValueType ResTy, ValueType OpTy, SDNode ShOp> 530 : N3V<1, 1, op21_20, op11_8, 1, 0, 531 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 532 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 533 [(set (ResTy QPR:$dst), 534 (ResTy (ShOp (ResTy QPR:$src1), 535 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 536 imm:$lane)))))]> { 537 let isCommutable = 0; 538} 539class N3VQSL16<bits<2> op21_20, bits<4> op11_8, 540 string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> 541 : N3V<1, 1, op21_20, op11_8, 1, 0, 542 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 543 IIC_VMULi16Q, 544 !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 545 [(set (ResTy QPR:$dst), 546 (ResTy (ShOp (ResTy QPR:$src1), 547 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 548 imm:$lane)))))]> { 549 let isCommutable = 0; 550} 551 552// Basic 3-register operations, scalar single-precision 553class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 554 string OpcodeStr, ValueType ResTy, ValueType OpTy, 555 SDNode OpNode, bit Commutable> 556 : N3V<op24, op23, op21_20, op11_8, 0, op4, 557 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, 558 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", []> { 559 let isCommutable = Commutable; 560} 561class N3VDsPat<SDNode OpNode, NeonI Inst> 562 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 563 (EXTRACT_SUBREG 564 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), 565 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), 566 arm_ssubreg_0)>; 567 568// Basic 3-register intrinsics, both double- and quad-register. 569class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 570 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 571 Intrinsic IntOp, bit Commutable> 572 : N3V<op24, op23, op21_20, op11_8, 0, op4, 573 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 574 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 575 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 576 let isCommutable = Commutable; 577} 578class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 579 string OpcodeStr, ValueType Ty, Intrinsic IntOp> 580 : N3V<0, 1, op21_20, op11_8, 1, 0, 581 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 582 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 583 [(set (Ty DPR:$dst), 584 (Ty (IntOp (Ty DPR:$src1), 585 (Ty (NEONvduplane (Ty DPR_VFP2:$src2), 586 imm:$lane)))))]> { 587 let isCommutable = 0; 588} 589class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 590 string OpcodeStr, ValueType Ty, Intrinsic IntOp> 591 : N3V<0, 1, op21_20, op11_8, 1, 0, 592 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 593 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 594 [(set (Ty DPR:$dst), 595 (Ty (IntOp (Ty DPR:$src1), 596 (Ty (NEONvduplane (Ty DPR_8:$src2), 597 imm:$lane)))))]> { 598 let isCommutable = 0; 599} 600 601class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 602 InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, 603 Intrinsic IntOp, bit Commutable> 604 : N3V<op24, op23, op21_20, op11_8, 1, op4, 605 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 606 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 607 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 608 let isCommutable = Commutable; 609} 610class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 611 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 612 : N3V<1, 1, op21_20, op11_8, 1, 0, 613 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 614 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 615 [(set (ResTy QPR:$dst), 616 (ResTy (IntOp (ResTy QPR:$src1), 617 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), 618 imm:$lane)))))]> { 619 let isCommutable = 0; 620} 621class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 622 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 623 : N3V<1, 1, op21_20, op11_8, 1, 0, 624 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), 625 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 626 [(set (ResTy QPR:$dst), 627 (ResTy (IntOp (ResTy QPR:$src1), 628 (ResTy (NEONvduplane (OpTy DPR_8:$src2), 629 imm:$lane)))))]> { 630 let isCommutable = 0; 631} 632 633// Multiply-Add/Sub operations, both double- and quad-register. 634class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 635 InstrItinClass itin, string OpcodeStr, 636 ValueType Ty, SDNode MulOp, SDNode OpNode> 637 : N3V<op24, op23, op21_20, op11_8, 0, op4, 638 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, 639 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 640 [(set DPR:$dst, (Ty (OpNode DPR:$src1, 641 (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; 642class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 643 string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> 644 : N3V<0, 1, op21_20, op11_8, 1, 0, 645 (outs DPR:$dst), 646 (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 647 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 648 [(set (Ty DPR:$dst), 649 (Ty (ShOp (Ty DPR:$src1), 650 (Ty (MulOp DPR:$src2, 651 (Ty (NEONvduplane (Ty DPR_VFP2:$src3), 652 imm:$lane)))))))]>; 653class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 654 string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> 655 : N3V<0, 1, op21_20, op11_8, 1, 0, 656 (outs DPR:$dst), 657 (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 658 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 659 [(set (Ty DPR:$dst), 660 (Ty (ShOp (Ty DPR:$src1), 661 (Ty (MulOp DPR:$src2, 662 (Ty (NEONvduplane (Ty DPR_8:$src3), 663 imm:$lane)))))))]>; 664 665class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 666 InstrItinClass itin, string OpcodeStr, ValueType Ty, 667 SDNode MulOp, SDNode OpNode> 668 : N3V<op24, op23, op21_20, op11_8, 1, op4, 669 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, 670 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 671 [(set QPR:$dst, (Ty (OpNode QPR:$src1, 672 (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; 673class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 674 string OpcodeStr, ValueType ResTy, ValueType OpTy, 675 SDNode MulOp, SDNode ShOp> 676 : N3V<1, 1, op21_20, op11_8, 1, 0, 677 (outs QPR:$dst), 678 (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 679 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 680 [(set (ResTy QPR:$dst), 681 (ResTy (ShOp (ResTy QPR:$src1), 682 (ResTy (MulOp QPR:$src2, 683 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), 684 imm:$lane)))))))]>; 685class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 686 string OpcodeStr, ValueType ResTy, ValueType OpTy, 687 SDNode MulOp, SDNode ShOp> 688 : N3V<1, 1, op21_20, op11_8, 1, 0, 689 (outs QPR:$dst), 690 (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 691 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 692 [(set (ResTy QPR:$dst), 693 (ResTy (ShOp (ResTy QPR:$src1), 694 (ResTy (MulOp QPR:$src2, 695 (ResTy (NEONvduplane (OpTy DPR_8:$src3), 696 imm:$lane)))))))]>; 697 698// Multiply-Add/Sub operations, scalar single-precision 699class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 700 InstrItinClass itin, string OpcodeStr, 701 ValueType Ty, SDNode MulOp, SDNode OpNode> 702 : N3V<op24, op23, op21_20, op11_8, 0, op4, 703 (outs DPR_VFP2:$dst), 704 (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, 705 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", []>; 706 707class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 708 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 709 (EXTRACT_SUBREG 710 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0), 711 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), 712 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), 713 arm_ssubreg_0)>; 714 715// Neon 3-argument intrinsics, both double- and quad-register. 716// The destination register is also used as the first source operand register. 717class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 718 InstrItinClass itin, string OpcodeStr, 719 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 720 : N3V<op24, op23, op21_20, op11_8, 0, op4, 721 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, 722 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 723 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), 724 (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; 725class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 726 InstrItinClass itin, string OpcodeStr, 727 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 728 : N3V<op24, op23, op21_20, op11_8, 1, op4, 729 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, 730 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 731 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), 732 (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; 733 734// Neon Long 3-argument intrinsic. The destination register is 735// a quad-register and is also used as the first source operand register. 736class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 737 InstrItinClass itin, string OpcodeStr, 738 ValueType TyQ, ValueType TyD, Intrinsic IntOp> 739 : N3V<op24, op23, op21_20, op11_8, 0, op4, 740 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin, 741 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 742 [(set QPR:$dst, 743 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; 744class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 745 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 746 : N3V<op24, 1, op21_20, op11_8, 1, 0, 747 (outs QPR:$dst), 748 (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, 749 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 750 [(set (ResTy QPR:$dst), 751 (ResTy (IntOp (ResTy QPR:$src1), 752 (OpTy DPR:$src2), 753 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), 754 imm:$lane)))))]>; 755class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 756 string OpcodeStr, ValueType ResTy, ValueType OpTy, 757 Intrinsic IntOp> 758 : N3V<op24, 1, op21_20, op11_8, 1, 0, 759 (outs QPR:$dst), 760 (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, 761 !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", 762 [(set (ResTy QPR:$dst), 763 (ResTy (IntOp (ResTy QPR:$src1), 764 (OpTy DPR:$src2), 765 (OpTy (NEONvduplane (OpTy DPR_8:$src3), 766 imm:$lane)))))]>; 767 768 769// Narrowing 3-register intrinsics. 770class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 771 string OpcodeStr, ValueType TyD, ValueType TyQ, 772 Intrinsic IntOp, bit Commutable> 773 : N3V<op24, op23, op21_20, op11_8, 0, op4, 774 (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D, 775 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 776 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { 777 let isCommutable = Commutable; 778} 779 780// Long 3-register intrinsics. 781class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 782 InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, 783 Intrinsic IntOp, bit Commutable> 784 : N3V<op24, op23, op21_20, op11_8, 0, op4, 785 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 786 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 787 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { 788 let isCommutable = Commutable; 789} 790class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 791 string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 792 : N3V<op24, 1, op21_20, op11_8, 1, 0, 793 (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), 794 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 795 [(set (ResTy QPR:$dst), 796 (ResTy (IntOp (OpTy DPR:$src1), 797 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), 798 imm:$lane)))))]>; 799class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 800 string OpcodeStr, ValueType ResTy, ValueType OpTy, 801 Intrinsic IntOp> 802 : N3V<op24, 1, op21_20, op11_8, 1, 0, 803 (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 804 itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", 805 [(set (ResTy QPR:$dst), 806 (ResTy (IntOp (OpTy DPR:$src1), 807 (OpTy (NEONvduplane (OpTy DPR_8:$src2), 808 imm:$lane)))))]>; 809 810// Wide 3-register intrinsics. 811class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 812 string OpcodeStr, ValueType TyQ, ValueType TyD, 813 Intrinsic IntOp, bit Commutable> 814 : N3V<op24, op23, op21_20, op11_8, 0, op4, 815 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD, 816 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 817 [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { 818 let isCommutable = Commutable; 819} 820 821// Pairwise long 2-register intrinsics, both double- and quad-register. 822class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 823 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 824 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 825 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 826 (ins DPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 827 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 828class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 829 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 830 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 831 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 832 (ins QPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 833 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 834 835// Pairwise long 2-register accumulate intrinsics, 836// both double- and quad-register. 837// The destination register is also used as the first source operand register. 838class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 839 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 840 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 841 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 842 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD, 843 !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", 844 [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; 845class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 846 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 847 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 848 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 849 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ, 850 !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", 851 [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; 852 853// Shift by immediate, 854// both double- and quad-register. 855class N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 856 bit op4, InstrItinClass itin, string OpcodeStr, 857 ValueType Ty, SDNode OpNode> 858 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 859 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin, 860 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 861 [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; 862class N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 863 bit op4, InstrItinClass itin, string OpcodeStr, 864 ValueType Ty, SDNode OpNode> 865 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 866 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, 867 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 868 [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; 869 870// Long shift by immediate. 871class N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 872 bit op6, bit op4, string OpcodeStr, ValueType ResTy, 873 ValueType OpTy, SDNode OpNode> 874 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, 875 (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD, 876 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 877 [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), 878 (i32 imm:$SIMM))))]>; 879 880// Narrow shift by immediate. 881class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 882 bit op6, bit op4, InstrItinClass itin, string OpcodeStr, 883 ValueType ResTy, ValueType OpTy, SDNode OpNode> 884 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, 885 (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, 886 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 887 [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), 888 (i32 imm:$SIMM))))]>; 889 890// Shift right by immediate and accumulate, 891// both double- and quad-register. 892class N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 893 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 894 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 895 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), 896 IIC_VPALiD, 897 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 898 [(set DPR:$dst, (Ty (add DPR:$src1, 899 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; 900class N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 901 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 902 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 903 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), 904 IIC_VPALiD, 905 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 906 [(set QPR:$dst, (Ty (add QPR:$src1, 907 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; 908 909// Shift by immediate and insert, 910// both double- and quad-register. 911class N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 912 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 913 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 914 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), 915 IIC_VSHLiD, 916 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 917 [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; 918class N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 919 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 920 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 921 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), 922 IIC_VSHLiQ, 923 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 924 [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; 925 926// Convert, with fractional bits immediate, 927// both double- and quad-register. 928class N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 929 bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, 930 Intrinsic IntOp> 931 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 932 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, 933 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 934 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; 935class N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 936 bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, 937 Intrinsic IntOp> 938 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 939 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, 940 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 941 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; 942 943//===----------------------------------------------------------------------===// 944// Multiclasses 945//===----------------------------------------------------------------------===// 946 947// Abbreviations used in multiclass suffixes: 948// Q = quarter int (8 bit) elements 949// H = half int (16 bit) elements 950// S = single int (32 bit) elements 951// D = double int (64 bit) elements 952 953// Neon 3-register vector operations. 954 955// First with only element sizes of 8, 16 and 32 bits: 956multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 957 InstrItinClass itinD16, InstrItinClass itinD32, 958 InstrItinClass itinQ16, InstrItinClass itinQ32, 959 string OpcodeStr, SDNode OpNode, bit Commutable = 0> { 960 // 64-bit vector types. 961 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 962 !strconcat(OpcodeStr, "8"), v8i8, v8i8, OpNode, Commutable>; 963 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 964 !strconcat(OpcodeStr, "16"), v4i16, v4i16, OpNode, Commutable>; 965 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 966 !strconcat(OpcodeStr, "32"), v2i32, v2i32, OpNode, Commutable>; 967 968 // 128-bit vector types. 969 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 970 !strconcat(OpcodeStr, "8"), v16i8, v16i8, OpNode, Commutable>; 971 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 972 !strconcat(OpcodeStr, "16"), v8i16, v8i16, OpNode, Commutable>; 973 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 974 !strconcat(OpcodeStr, "32"), v4i32, v4i32, OpNode, Commutable>; 975} 976 977multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 978 def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 979 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 980 def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>; 981 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>; 982} 983 984// ....then also with element size 64 bits: 985multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 986 InstrItinClass itinD, InstrItinClass itinQ, 987 string OpcodeStr, SDNode OpNode, bit Commutable = 0> 988 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 989 OpcodeStr, OpNode, Commutable> { 990 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 991 !strconcat(OpcodeStr, "64"), v1i64, v1i64, OpNode, Commutable>; 992 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 993 !strconcat(OpcodeStr, "64"), v2i64, v2i64, OpNode, Commutable>; 994} 995 996 997// Neon Narrowing 2-register vector intrinsics, 998// source operand element sizes of 16, 32 and 64 bits: 999multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1000 bits<5> op11_7, bit op6, bit op4, 1001 InstrItinClass itin, string OpcodeStr, 1002 Intrinsic IntOp> { 1003 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 1004 itin, !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>; 1005 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 1006 itin, !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>; 1007 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 1008 itin, !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>; 1009} 1010 1011 1012// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 1013// source operand element sizes of 16, 32 and 64 bits: 1014multiclass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 1015 bit op4, string OpcodeStr, Intrinsic IntOp> { 1016 def v8i16 : N2VLInt<op24, op23, 0b001000, op11_8, op7, op6, op4, 1017 IIC_VQUNAiD, !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; 1018 def v4i32 : N2VLInt<op24, op23, 0b010000, op11_8, op7, op6, op4, 1019 IIC_VQUNAiD, !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1020 def v2i64 : N2VLInt<op24, op23, 0b100000, op11_8, op7, op6, op4, 1021 IIC_VQUNAiD, !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1022} 1023 1024 1025// Neon 3-register vector intrinsics. 1026 1027// First with only element sizes of 16 and 32 bits: 1028multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1029 InstrItinClass itinD16, InstrItinClass itinD32, 1030 InstrItinClass itinQ16, InstrItinClass itinQ32, 1031 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 1032 // 64-bit vector types. 1033 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, !strconcat(OpcodeStr,"16"), 1034 v4i16, v4i16, IntOp, Commutable>; 1035 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, !strconcat(OpcodeStr,"32"), 1036 v2i32, v2i32, IntOp, Commutable>; 1037 1038 // 128-bit vector types. 1039 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, !strconcat(OpcodeStr,"16"), 1040 v8i16, v8i16, IntOp, Commutable>; 1041 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, !strconcat(OpcodeStr,"32"), 1042 v4i32, v4i32, IntOp, Commutable>; 1043} 1044 1045multiclass N3VIntSL_HS<bits<4> op11_8, 1046 InstrItinClass itinD16, InstrItinClass itinD32, 1047 InstrItinClass itinQ16, InstrItinClass itinQ32, 1048 string OpcodeStr, Intrinsic IntOp> { 1049 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>; 1050 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>; 1051 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; 1052 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; 1053} 1054 1055// ....then also with element size of 8 bits: 1056multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1057 InstrItinClass itinD16, InstrItinClass itinD32, 1058 InstrItinClass itinQ16, InstrItinClass itinQ32, 1059 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 1060 : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, 1061 OpcodeStr, IntOp, Commutable> { 1062 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16, 1063 !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp, Commutable>; 1064 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16, 1065 !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp, Commutable>; 1066} 1067 1068// ....then also with element size of 64 bits: 1069multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1070 InstrItinClass itinD16, InstrItinClass itinD32, 1071 InstrItinClass itinQ16, InstrItinClass itinQ32, 1072 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 1073 : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, 1074 OpcodeStr, IntOp, Commutable> { 1075 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32, 1076 !strconcat(OpcodeStr,"64"), v1i64, v1i64, IntOp, Commutable>; 1077 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32, 1078 !strconcat(OpcodeStr,"64"), v2i64, v2i64, IntOp, Commutable>; 1079} 1080 1081 1082// Neon Narrowing 3-register vector intrinsics, 1083// source operand element sizes of 16, 32 and 64 bits: 1084multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1085 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 1086 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr,"16"), 1087 v8i8, v8i16, IntOp, Commutable>; 1088 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"32"), 1089 v4i16, v4i32, IntOp, Commutable>; 1090 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"64"), 1091 v2i32, v2i64, IntOp, Commutable>; 1092} 1093 1094 1095// Neon Long 3-register vector intrinsics. 1096 1097// First with only element sizes of 16 and 32 bits: 1098multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1099 InstrItinClass itin, string OpcodeStr, 1100 Intrinsic IntOp, bit Commutable = 0> { 1101 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, 1102 !strconcat(OpcodeStr,"16"), v4i32, v4i16, IntOp, Commutable>; 1103 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin, 1104 !strconcat(OpcodeStr,"32"), v2i64, v2i32, IntOp, Commutable>; 1105} 1106 1107multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 1108 InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> { 1109 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 1110 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1111 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 1112 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1113} 1114 1115// ....then also with element size of 8 bits: 1116multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1117 InstrItinClass itin, string OpcodeStr, 1118 Intrinsic IntOp, bit Commutable = 0> 1119 : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, IntOp, Commutable> { 1120 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, 1121 !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp, Commutable>; 1122} 1123 1124 1125// Neon Wide 3-register vector intrinsics, 1126// source operand element sizes of 8, 16 and 32 bits: 1127multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1128 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 1129 def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 1130 v8i16, v8i8, IntOp, Commutable>; 1131 def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 1132 v4i32, v4i16, IntOp, Commutable>; 1133 def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 1134 v2i64, v2i32, IntOp, Commutable>; 1135} 1136 1137 1138// Neon Multiply-Op vector operations, 1139// element sizes of 8, 16 and 32 bits: 1140multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1141 InstrItinClass itinD16, InstrItinClass itinD32, 1142 InstrItinClass itinQ16, InstrItinClass itinQ32, 1143 string OpcodeStr, SDNode OpNode> { 1144 // 64-bit vector types. 1145 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 1146 !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>; 1147 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 1148 !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>; 1149 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 1150 !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>; 1151 1152 // 128-bit vector types. 1153 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 1154 !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>; 1155 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 1156 !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>; 1157 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 1158 !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>; 1159} 1160 1161multiclass N3VMulOpSL_HS<bits<4> op11_8, 1162 InstrItinClass itinD16, InstrItinClass itinD32, 1163 InstrItinClass itinQ16, InstrItinClass itinQ32, 1164 string OpcodeStr, SDNode ShOp> { 1165 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 1166 !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>; 1167 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 1168 !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>; 1169 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 1170 !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>; 1171 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 1172 !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>; 1173} 1174 1175// Neon 3-argument intrinsics, 1176// element sizes of 8, 16 and 32 bits: 1177multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1178 string OpcodeStr, Intrinsic IntOp> { 1179 // 64-bit vector types. 1180 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, 1181 !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; 1182 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1183 !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; 1184 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, 1185 !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; 1186 1187 // 128-bit vector types. 1188 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, 1189 !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; 1190 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, 1191 !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; 1192 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, 1193 !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; 1194} 1195 1196 1197// Neon Long 3-argument intrinsics. 1198 1199// First with only element sizes of 16 and 32 bits: 1200multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 1201 string OpcodeStr, Intrinsic IntOp> { 1202 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1203 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1204 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D, 1205 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1206} 1207 1208multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 1209 string OpcodeStr, Intrinsic IntOp> { 1210 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 1211 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 1212 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 1213 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 1214} 1215 1216// ....then also with element size of 8 bits: 1217multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 1218 string OpcodeStr, Intrinsic IntOp> 1219 : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> { 1220 def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, 1221 !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; 1222} 1223 1224 1225// Neon 2-register vector intrinsics, 1226// element sizes of 8, 16 and 32 bits: 1227multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1228 bits<5> op11_7, bit op4, 1229 InstrItinClass itinD, InstrItinClass itinQ, 1230 string OpcodeStr, Intrinsic IntOp> { 1231 // 64-bit vector types. 1232 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1233 itinD, !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; 1234 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1235 itinD, !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; 1236 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1237 itinD, !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; 1238 1239 // 128-bit vector types. 1240 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1241 itinQ, !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; 1242 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1243 itinQ, !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; 1244 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1245 itinQ, !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; 1246} 1247 1248 1249// Neon Pairwise long 2-register intrinsics, 1250// element sizes of 8, 16 and 32 bits: 1251multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1252 bits<5> op11_7, bit op4, 1253 string OpcodeStr, Intrinsic IntOp> { 1254 // 64-bit vector types. 1255 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1256 !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; 1257 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1258 !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; 1259 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1260 !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; 1261 1262 // 128-bit vector types. 1263 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1264 !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; 1265 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1266 !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; 1267 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1268 !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; 1269} 1270 1271 1272// Neon Pairwise long 2-register accumulate intrinsics, 1273// element sizes of 8, 16 and 32 bits: 1274multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 1275 bits<5> op11_7, bit op4, 1276 string OpcodeStr, Intrinsic IntOp> { 1277 // 64-bit vector types. 1278 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1279 !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; 1280 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1281 !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; 1282 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1283 !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; 1284 1285 // 128-bit vector types. 1286 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 1287 !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; 1288 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 1289 !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; 1290 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 1291 !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; 1292} 1293 1294 1295// Neon 2-register vector shift by immediate, 1296// element sizes of 8, 16, 32 and 64 bits: 1297multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1298 InstrItinClass itin, string OpcodeStr, SDNode OpNode> { 1299 // 64-bit vector types. 1300 def v8i8 : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, itin, 1301 !strconcat(OpcodeStr, "8"), v8i8, OpNode>; 1302 def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, itin, 1303 !strconcat(OpcodeStr, "16"), v4i16, OpNode>; 1304 def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, itin, 1305 !strconcat(OpcodeStr, "32"), v2i32, OpNode>; 1306 def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, itin, 1307 !strconcat(OpcodeStr, "64"), v1i64, OpNode>; 1308 1309 // 128-bit vector types. 1310 def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, itin, 1311 !strconcat(OpcodeStr, "8"), v16i8, OpNode>; 1312 def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, itin, 1313 !strconcat(OpcodeStr, "16"), v8i16, OpNode>; 1314 def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, itin, 1315 !strconcat(OpcodeStr, "32"), v4i32, OpNode>; 1316 def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, itin, 1317 !strconcat(OpcodeStr, "64"), v2i64, OpNode>; 1318} 1319 1320 1321// Neon Shift-Accumulate vector operations, 1322// element sizes of 8, 16, 32 and 64 bits: 1323multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1324 string OpcodeStr, SDNode ShOp> { 1325 // 64-bit vector types. 1326 def v8i8 : N2VDShAdd<op24, op23, 0b001000, op11_8, 0, op4, 1327 !strconcat(OpcodeStr, "8"), v8i8, ShOp>; 1328 def v4i16 : N2VDShAdd<op24, op23, 0b010000, op11_8, 0, op4, 1329 !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 1330 def v2i32 : N2VDShAdd<op24, op23, 0b100000, op11_8, 0, op4, 1331 !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 1332 def v1i64 : N2VDShAdd<op24, op23, 0b000000, op11_8, 1, op4, 1333 !strconcat(OpcodeStr, "64"), v1i64, ShOp>; 1334 1335 // 128-bit vector types. 1336 def v16i8 : N2VQShAdd<op24, op23, 0b001000, op11_8, 0, op4, 1337 !strconcat(OpcodeStr, "8"), v16i8, ShOp>; 1338 def v8i16 : N2VQShAdd<op24, op23, 0b010000, op11_8, 0, op4, 1339 !strconcat(OpcodeStr, "16"), v8i16, ShOp>; 1340 def v4i32 : N2VQShAdd<op24, op23, 0b100000, op11_8, 0, op4, 1341 !strconcat(OpcodeStr, "32"), v4i32, ShOp>; 1342 def v2i64 : N2VQShAdd<op24, op23, 0b000000, op11_8, 1, op4, 1343 !strconcat(OpcodeStr, "64"), v2i64, ShOp>; 1344} 1345 1346 1347// Neon Shift-Insert vector operations, 1348// element sizes of 8, 16, 32 and 64 bits: 1349multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 1350 string OpcodeStr, SDNode ShOp> { 1351 // 64-bit vector types. 1352 def v8i8 : N2VDShIns<op24, op23, 0b001000, op11_8, 0, op4, 1353 !strconcat(OpcodeStr, "8"), v8i8, ShOp>; 1354 def v4i16 : N2VDShIns<op24, op23, 0b010000, op11_8, 0, op4, 1355 !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 1356 def v2i32 : N2VDShIns<op24, op23, 0b100000, op11_8, 0, op4, 1357 !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 1358 def v1i64 : N2VDShIns<op24, op23, 0b000000, op11_8, 1, op4, 1359 !strconcat(OpcodeStr, "64"), v1i64, ShOp>; 1360 1361 // 128-bit vector types. 1362 def v16i8 : N2VQShIns<op24, op23, 0b001000, op11_8, 0, op4, 1363 !strconcat(OpcodeStr, "8"), v16i8, ShOp>; 1364 def v8i16 : N2VQShIns<op24, op23, 0b010000, op11_8, 0, op4, 1365 !strconcat(OpcodeStr, "16"), v8i16, ShOp>; 1366 def v4i32 : N2VQShIns<op24, op23, 0b100000, op11_8, 0, op4, 1367 !strconcat(OpcodeStr, "32"), v4i32, ShOp>; 1368 def v2i64 : N2VQShIns<op24, op23, 0b000000, op11_8, 1, op4, 1369 !strconcat(OpcodeStr, "64"), v2i64, ShOp>; 1370} 1371 1372//===----------------------------------------------------------------------===// 1373// Instruction Definitions. 1374//===----------------------------------------------------------------------===// 1375 1376// Vector Add Operations. 1377 1378// VADD : Vector Add (integer and floating-point) 1379defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, 1>; 1380def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>; 1381def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>; 1382// VADDL : Vector Add Long (Q = D + D) 1383defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>; 1384defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>; 1385// VADDW : Vector Add Wide (Q = Q + D) 1386defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; 1387defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; 1388// VHADD : Vector Halving Add 1389defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1390 IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>; 1391defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1392 IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>; 1393// VRHADD : Vector Rounding Halving Add 1394defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1395 IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>; 1396defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1397 IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>; 1398// VQADD : Vector Saturating Add 1399defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1400 IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>; 1401defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1402 IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>; 1403// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 1404defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; 1405// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 1406defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; 1407 1408// Vector Multiply Operations. 1409 1410// VMUL : Vector Multiply (integer, polynomial and floating-point) 1411defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, 1412 IIC_VMULi32Q, "vmul.i", mul, 1>; 1413def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8, 1414 int_arm_neon_vmulp, 1>; 1415def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8, 1416 int_arm_neon_vmulp, 1>; 1417def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>; 1418def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>; 1419defm VMULsl : N3VSL_HS<0b1000, "vmul.i", mul>; 1420def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>; 1421def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>; 1422def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 1423 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 1424 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 1425 (v4i16 (EXTRACT_SUBREG QPR:$src2, 1426 (DSubReg_i16_reg imm:$lane))), 1427 (SubReg_i16_lane imm:$lane)))>; 1428def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 1429 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 1430 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 1431 (v2i32 (EXTRACT_SUBREG QPR:$src2, 1432 (DSubReg_i32_reg imm:$lane))), 1433 (SubReg_i32_lane imm:$lane)))>; 1434def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 1435 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 1436 (v4f32 (VMULslfq (v4f32 QPR:$src1), 1437 (v2f32 (EXTRACT_SUBREG QPR:$src2, 1438 (DSubReg_i32_reg imm:$lane))), 1439 (SubReg_i32_lane imm:$lane)))>; 1440 1441// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 1442defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, 1443 IIC_VMULi16Q, IIC_VMULi32Q, 1444 "vqdmulh.s", int_arm_neon_vqdmulh, 1>; 1445defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 1446 IIC_VMULi16Q, IIC_VMULi32Q, 1447 "vqdmulh.s", int_arm_neon_vqdmulh>; 1448def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 1449 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 1450 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 1451 (v4i16 (EXTRACT_SUBREG QPR:$src2, 1452 (DSubReg_i16_reg imm:$lane))), 1453 (SubReg_i16_lane imm:$lane)))>; 1454def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 1455 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 1456 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 1457 (v2i32 (EXTRACT_SUBREG QPR:$src2, 1458 (DSubReg_i32_reg imm:$lane))), 1459 (SubReg_i32_lane imm:$lane)))>; 1460 1461// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 1462defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, 1463 IIC_VMULi16Q, IIC_VMULi32Q, 1464 "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; 1465defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 1466 IIC_VMULi16Q, IIC_VMULi32Q, 1467 "vqrdmulh.s", int_arm_neon_vqrdmulh>; 1468def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 1469 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 1470 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 1471 (v4i16 (EXTRACT_SUBREG QPR:$src2, 1472 (DSubReg_i16_reg imm:$lane))), 1473 (SubReg_i16_lane imm:$lane)))>; 1474def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 1475 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 1476 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 1477 (v2i32 (EXTRACT_SUBREG QPR:$src2, 1478 (DSubReg_i32_reg imm:$lane))), 1479 (SubReg_i32_lane imm:$lane)))>; 1480 1481// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 1482defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>; 1483defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>; 1484def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8, 1485 int_arm_neon_vmullp, 1>; 1486defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>; 1487defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>; 1488 1489// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 1490defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>; 1491defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>; 1492 1493// Vector Multiply-Accumulate and Multiply-Subtract Operations. 1494 1495// VMLA : Vector Multiply Accumulate (integer and floating-point) 1496defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 1497 IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; 1498def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; 1499def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>; 1500defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 1501 IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; 1502def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; 1503def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>; 1504 1505def : Pat<(v8i16 (add (v8i16 QPR:$src1), 1506 (mul (v8i16 QPR:$src2), 1507 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 1508 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), 1509 (v8i16 QPR:$src2), 1510 (v4i16 (EXTRACT_SUBREG QPR:$src3, 1511 (DSubReg_i16_reg imm:$lane))), 1512 (SubReg_i16_lane imm:$lane)))>; 1513 1514def : Pat<(v4i32 (add (v4i32 QPR:$src1), 1515 (mul (v4i32 QPR:$src2), 1516 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 1517 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), 1518 (v4i32 QPR:$src2), 1519 (v2i32 (EXTRACT_SUBREG QPR:$src3, 1520 (DSubReg_i32_reg imm:$lane))), 1521 (SubReg_i32_lane imm:$lane)))>; 1522 1523def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), 1524 (fmul (v4f32 QPR:$src2), 1525 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 1526 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 1527 (v4f32 QPR:$src2), 1528 (v2f32 (EXTRACT_SUBREG QPR:$src3, 1529 (DSubReg_i32_reg imm:$lane))), 1530 (SubReg_i32_lane imm:$lane)))>; 1531 1532// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 1533defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>; 1534defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>; 1535 1536defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal.s", int_arm_neon_vmlals>; 1537defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal.u", int_arm_neon_vmlalu>; 1538 1539// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 1540defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; 1541defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>; 1542 1543// VMLS : Vector Multiply Subtract (integer and floating-point) 1544defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 1545 IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; 1546def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; 1547def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>; 1548defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 1549 IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; 1550def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; 1551def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>; 1552 1553def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 1554 (mul (v8i16 QPR:$src2), 1555 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 1556 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), 1557 (v8i16 QPR:$src2), 1558 (v4i16 (EXTRACT_SUBREG QPR:$src3, 1559 (DSubReg_i16_reg imm:$lane))), 1560 (SubReg_i16_lane imm:$lane)))>; 1561 1562def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 1563 (mul (v4i32 QPR:$src2), 1564 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 1565 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), 1566 (v4i32 QPR:$src2), 1567 (v2i32 (EXTRACT_SUBREG QPR:$src3, 1568 (DSubReg_i32_reg imm:$lane))), 1569 (SubReg_i32_lane imm:$lane)))>; 1570 1571def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), 1572 (fmul (v4f32 QPR:$src2), 1573 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 1574 (v4f32 (VMLSslfq (v4f32 QPR:$src1), 1575 (v4f32 QPR:$src2), 1576 (v2f32 (EXTRACT_SUBREG QPR:$src3, 1577 (DSubReg_i32_reg imm:$lane))), 1578 (SubReg_i32_lane imm:$lane)))>; 1579 1580// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 1581defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>; 1582defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; 1583 1584defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl.s", int_arm_neon_vmlsls>; 1585defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl.u", int_arm_neon_vmlslu>; 1586 1587// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 1588defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; 1589defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl.s", int_arm_neon_vqdmlsl>; 1590 1591// Vector Subtract Operations. 1592 1593// VSUB : Vector Subtract (integer and floating-point) 1594defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, 0>; 1595def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>; 1596def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>; 1597// VSUBL : Vector Subtract Long (Q = D - D) 1598defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>; 1599defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>; 1600// VSUBW : Vector Subtract Wide (Q = Q - D) 1601defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; 1602defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; 1603// VHSUB : Vector Halving Subtract 1604defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1605 IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>; 1606defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1607 IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>; 1608// VQSUB : Vector Saturing Subtract 1609defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1610 IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>; 1611defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1612 IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>; 1613// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 1614defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; 1615// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 1616defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; 1617 1618// Vector Comparisons. 1619 1620// VCEQ : Vector Compare Equal 1621defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1622 IIC_VBINi4Q, "vceq.i", NEONvceq, 1>; 1623def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq.f32", v2i32, v2f32, NEONvceq, 1>; 1624def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq.f32", v4i32, v4f32, NEONvceq, 1>; 1625// VCGE : Vector Compare Greater Than or Equal 1626defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1627 IIC_VBINi4Q, "vcge.s", NEONvcge, 0>; 1628defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1629 IIC_VBINi4Q, "vcge.u", NEONvcgeu, 0>; 1630def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge.f32", v2i32, v2f32, NEONvcge, 0>; 1631def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge.f32", v4i32, v4f32, NEONvcge, 0>; 1632// VCGT : Vector Compare Greater Than 1633defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1634 IIC_VBINi4Q, "vcgt.s", NEONvcgt, 0>; 1635defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1636 IIC_VBINi4Q, "vcgt.u", NEONvcgtu, 0>; 1637def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; 1638def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; 1639// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 1640def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32, 1641 int_arm_neon_vacged, 0>; 1642def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32, 1643 int_arm_neon_vacgeq, 0>; 1644// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 1645def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32, 1646 int_arm_neon_vacgtd, 0>; 1647def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32, 1648 int_arm_neon_vacgtq, 0>; 1649// VTST : Vector Test Bits 1650defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1651 IIC_VBINi4Q, "vtst.i", NEONvtst, 1>; 1652 1653// Vector Bitwise Operations. 1654 1655// VAND : Vector Bitwise AND 1656def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>; 1657def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", v4i32, v4i32, and, 1>; 1658 1659// VEOR : Vector Bitwise Exclusive OR 1660def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", v2i32, v2i32, xor, 1>; 1661def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", v4i32, v4i32, xor, 1>; 1662 1663// VORR : Vector Bitwise OR 1664def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", v2i32, v2i32, or, 1>; 1665def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>; 1666 1667// VBIC : Vector Bitwise Bit Clear (AND NOT) 1668def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 1669 (ins DPR:$src1, DPR:$src2), IIC_VBINiD, 1670 "vbic\t$dst, $src1, $src2", "", 1671 [(set DPR:$dst, (v2i32 (and DPR:$src1, 1672 (vnot_conv DPR:$src2))))]>; 1673def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 1674 (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, 1675 "vbic\t$dst, $src1, $src2", "", 1676 [(set QPR:$dst, (v4i32 (and QPR:$src1, 1677 (vnot_conv QPR:$src2))))]>; 1678 1679// VORN : Vector Bitwise OR NOT 1680def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), 1681 (ins DPR:$src1, DPR:$src2), IIC_VBINiD, 1682 "vorn\t$dst, $src1, $src2", "", 1683 [(set DPR:$dst, (v2i32 (or DPR:$src1, 1684 (vnot_conv DPR:$src2))))]>; 1685def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), 1686 (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, 1687 "vorn\t$dst, $src1, $src2", "", 1688 [(set QPR:$dst, (v4i32 (or QPR:$src1, 1689 (vnot_conv QPR:$src2))))]>; 1690 1691// VMVN : Vector Bitwise NOT 1692def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 1693 (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, 1694 "vmvn\t$dst, $src", "", 1695 [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; 1696def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 1697 (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, 1698 "vmvn\t$dst, $src", "", 1699 [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; 1700def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; 1701def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; 1702 1703// VBSL : Vector Bitwise Select 1704def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 1705 (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, 1706 "vbsl\t$dst, $src2, $src3", "$src1 = $dst", 1707 [(set DPR:$dst, 1708 (v2i32 (or (and DPR:$src2, DPR:$src1), 1709 (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; 1710def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 1711 (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, 1712 "vbsl\t$dst, $src2, $src3", "$src1 = $dst", 1713 [(set QPR:$dst, 1714 (v4i32 (or (and QPR:$src2, QPR:$src1), 1715 (and QPR:$src3, (vnot_conv QPR:$src1)))))]>; 1716 1717// VBIF : Vector Bitwise Insert if False 1718// like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst", 1719// VBIT : Vector Bitwise Insert if True 1720// like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst", 1721// These are not yet implemented. The TwoAddress pass will not go looking 1722// for equivalent operations with different register constraints; it just 1723// inserts copies. 1724 1725// Vector Absolute Differences. 1726 1727// VABD : Vector Absolute Difference 1728defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1729 IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>; 1730defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1731 IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>; 1732def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32, 1733 int_arm_neon_vabds, 0>; 1734def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32, 1735 int_arm_neon_vabds, 0>; 1736 1737// VABDL : Vector Absolute Difference Long (Q = | D - D |) 1738defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>; 1739defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>; 1740 1741// VABA : Vector Absolute Difference and Accumulate 1742defm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; 1743defm VABAu : N3VInt3_QHS<1,1,0b0101,0, "vaba.u", int_arm_neon_vabau>; 1744 1745// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 1746defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; 1747defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; 1748 1749// Vector Maximum and Minimum. 1750 1751// VMAX : Vector Maximum 1752defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1753 IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>; 1754defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1755 IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>; 1756def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32, 1757 int_arm_neon_vmaxs, 1>; 1758def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32, 1759 int_arm_neon_vmaxs, 1>; 1760 1761// VMIN : Vector Minimum 1762defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1763 IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>; 1764defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 1765 IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>; 1766def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32, 1767 int_arm_neon_vmins, 1>; 1768def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32, 1769 int_arm_neon_vmins, 1>; 1770 1771// Vector Pairwise Operations. 1772 1773// VPADD : Vector Pairwise Add 1774def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8, 1775 int_arm_neon_vpadd, 0>; 1776def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16, 1777 int_arm_neon_vpadd, 0>; 1778def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32, 1779 int_arm_neon_vpadd, 0>; 1780def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32, 1781 int_arm_neon_vpadd, 0>; 1782 1783// VPADDL : Vector Pairwise Add Long 1784defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", 1785 int_arm_neon_vpaddls>; 1786defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u", 1787 int_arm_neon_vpaddlu>; 1788 1789// VPADAL : Vector Pairwise Add and Accumulate Long 1790defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpadal.s", 1791 int_arm_neon_vpadals>; 1792defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u", 1793 int_arm_neon_vpadalu>; 1794 1795// VPMAX : Vector Pairwise Maximum 1796def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8, 1797 int_arm_neon_vpmaxs, 0>; 1798def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16, 1799 int_arm_neon_vpmaxs, 0>; 1800def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32, 1801 int_arm_neon_vpmaxs, 0>; 1802def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8, 1803 int_arm_neon_vpmaxu, 0>; 1804def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16, 1805 int_arm_neon_vpmaxu, 0>; 1806def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32, 1807 int_arm_neon_vpmaxu, 0>; 1808def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32, 1809 int_arm_neon_vpmaxs, 0>; 1810 1811// VPMIN : Vector Pairwise Minimum 1812def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8, 1813 int_arm_neon_vpmins, 0>; 1814def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16, 1815 int_arm_neon_vpmins, 0>; 1816def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32, 1817 int_arm_neon_vpmins, 0>; 1818def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8, 1819 int_arm_neon_vpminu, 0>; 1820def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16, 1821 int_arm_neon_vpminu, 0>; 1822def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32, 1823 int_arm_neon_vpminu, 0>; 1824def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32, 1825 int_arm_neon_vpmins, 0>; 1826 1827// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 1828 1829// VRECPE : Vector Reciprocal Estimate 1830def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 1831 IIC_VUNAD, "vrecpe.u32", 1832 v2i32, v2i32, int_arm_neon_vrecpe>; 1833def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 1834 IIC_VUNAQ, "vrecpe.u32", 1835 v4i32, v4i32, int_arm_neon_vrecpe>; 1836def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 1837 IIC_VUNAD, "vrecpe.f32", 1838 v2f32, v2f32, int_arm_neon_vrecpe>; 1839def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 1840 IIC_VUNAQ, "vrecpe.f32", 1841 v4f32, v4f32, int_arm_neon_vrecpe>; 1842 1843// VRECPS : Vector Reciprocal Step 1844def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32, 1845 int_arm_neon_vrecps, 1>; 1846def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32, 1847 int_arm_neon_vrecps, 1>; 1848 1849// VRSQRTE : Vector Reciprocal Square Root Estimate 1850def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 1851 IIC_VUNAD, "vrsqrte.u32", 1852 v2i32, v2i32, int_arm_neon_vrsqrte>; 1853def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 1854 IIC_VUNAQ, "vrsqrte.u32", 1855 v4i32, v4i32, int_arm_neon_vrsqrte>; 1856def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 1857 IIC_VUNAD, "vrsqrte.f32", 1858 v2f32, v2f32, int_arm_neon_vrsqrte>; 1859def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 1860 IIC_VUNAQ, "vrsqrte.f32", 1861 v4f32, v4f32, int_arm_neon_vrsqrte>; 1862 1863// VRSQRTS : Vector Reciprocal Square Root Step 1864def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32, 1865 int_arm_neon_vrsqrts, 1>; 1866def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32, 1867 int_arm_neon_vrsqrts, 1>; 1868 1869// Vector Shifts. 1870 1871// VSHL : Vector Shift 1872defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, 1873 IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>; 1874defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, 1875 IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>; 1876// VSHL : Vector Shift Left (Immediate) 1877defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; 1878// VSHR : Vector Shift Right (Immediate) 1879defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; 1880defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; 1881 1882// VSHLL : Vector Shift Left Long 1883def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", 1884 v8i16, v8i8, NEONvshlls>; 1885def VSHLLs16 : N2VLSh<0, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.s16", 1886 v4i32, v4i16, NEONvshlls>; 1887def VSHLLs32 : N2VLSh<0, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.s32", 1888 v2i64, v2i32, NEONvshlls>; 1889def VSHLLu8 : N2VLSh<1, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.u8", 1890 v8i16, v8i8, NEONvshllu>; 1891def VSHLLu16 : N2VLSh<1, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.u16", 1892 v4i32, v4i16, NEONvshllu>; 1893def VSHLLu32 : N2VLSh<1, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.u32", 1894 v2i64, v2i32, NEONvshllu>; 1895 1896// VSHLL : Vector Shift Left Long (with maximum shift count) 1897def VSHLLi8 : N2VLSh<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", 1898 v8i16, v8i8, NEONvshlli>; 1899def VSHLLi16 : N2VLSh<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", 1900 v4i32, v4i16, NEONvshlli>; 1901def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", 1902 v2i64, v2i32, NEONvshlli>; 1903 1904// VSHRN : Vector Shift Right and Narrow 1905def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, 1906 IIC_VSHLiD, "vshrn.i16", v8i8, v8i16, NEONvshrn>; 1907def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, 1908 IIC_VSHLiD, "vshrn.i32", v4i16, v4i32, NEONvshrn>; 1909def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, 1910 IIC_VSHLiD, "vshrn.i64", v2i32, v2i64, NEONvshrn>; 1911 1912// VRSHL : Vector Rounding Shift 1913defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 1914 IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>; 1915defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 1916 IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>; 1917// VRSHR : Vector Rounding Shift Right 1918defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; 1919defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; 1920 1921// VRSHRN : Vector Rounding Shift Right and Narrow 1922def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, 1923 IIC_VSHLi4D, "vrshrn.i16", v8i8, v8i16, NEONvrshrn>; 1924def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, 1925 IIC_VSHLi4D, "vrshrn.i32", v4i16, v4i32, NEONvrshrn>; 1926def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, 1927 IIC_VSHLi4D, "vrshrn.i64", v2i32, v2i64, NEONvrshrn>; 1928 1929// VQSHL : Vector Saturating Shift 1930defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 1931 IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>; 1932defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 1933 IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>; 1934// VQSHL : Vector Saturating Shift Left (Immediate) 1935defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>; 1936defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; 1937// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 1938defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; 1939 1940// VQSHRN : Vector Saturating Shift Right and Narrow 1941def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, 1942 IIC_VSHLi4D, "vqshrn.s16", v8i8, v8i16, NEONvqshrns>; 1943def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, 1944 IIC_VSHLi4D, "vqshrn.s32", v4i16, v4i32, NEONvqshrns>; 1945def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, 1946 IIC_VSHLi4D, "vqshrn.s64", v2i32, v2i64, NEONvqshrns>; 1947def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, 1948 IIC_VSHLi4D, "vqshrn.u16", v8i8, v8i16, NEONvqshrnu>; 1949def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, 1950 IIC_VSHLi4D, "vqshrn.u32", v4i16, v4i32, NEONvqshrnu>; 1951def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, 1952 IIC_VSHLi4D, "vqshrn.u64", v2i32, v2i64, NEONvqshrnu>; 1953 1954// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 1955def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, 1956 IIC_VSHLi4D, "vqshrun.s16", v8i8, v8i16, NEONvqshrnsu>; 1957def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, 1958 IIC_VSHLi4D, "vqshrun.s32", v4i16, v4i32, NEONvqshrnsu>; 1959def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, 1960 IIC_VSHLi4D, "vqshrun.s64", v2i32, v2i64, NEONvqshrnsu>; 1961 1962// VQRSHL : Vector Saturating Rounding Shift 1963defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 1964 IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>; 1965defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, 1966 IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; 1967 1968// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 1969def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, 1970 IIC_VSHLi4D, "vqrshrn.s16", v8i8, v8i16, NEONvqrshrns>; 1971def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, 1972 IIC_VSHLi4D, "vqrshrn.s32", v4i16, v4i32, NEONvqrshrns>; 1973def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, 1974 IIC_VSHLi4D, "vqrshrn.s64", v2i32, v2i64, NEONvqrshrns>; 1975def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, 1976 IIC_VSHLi4D, "vqrshrn.u16", v8i8, v8i16, NEONvqrshrnu>; 1977def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, 1978 IIC_VSHLi4D, "vqrshrn.u32", v4i16, v4i32, NEONvqrshrnu>; 1979def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, 1980 IIC_VSHLi4D, "vqrshrn.u64", v2i32, v2i64, NEONvqrshrnu>; 1981 1982// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 1983def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, 1984 IIC_VSHLi4D, "vqrshrun.s16", v8i8, v8i16, NEONvqrshrnsu>; 1985def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, 1986 IIC_VSHLi4D, "vqrshrun.s32", v4i16, v4i32, NEONvqrshrnsu>; 1987def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, 1988 IIC_VSHLi4D, "vqrshrun.s64", v2i32, v2i64, NEONvqrshrnsu>; 1989 1990// VSRA : Vector Shift Right and Accumulate 1991defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; 1992defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>; 1993// VRSRA : Vector Rounding Shift Right and Accumulate 1994defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>; 1995defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>; 1996 1997// VSLI : Vector Shift Left and Insert 1998defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>; 1999// VSRI : Vector Shift Right and Insert 2000defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>; 2001 2002// Vector Absolute and Saturating Absolute. 2003 2004// VABS : Vector Absolute Value 2005defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 2006 IIC_VUNAiD, IIC_VUNAiQ, "vabs.s", 2007 int_arm_neon_vabs>; 2008def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2009 IIC_VUNAD, "vabs.f32", 2010 v2f32, v2f32, int_arm_neon_vabs>; 2011def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2012 IIC_VUNAQ, "vabs.f32", 2013 v4f32, v4f32, int_arm_neon_vabs>; 2014 2015// VQABS : Vector Saturating Absolute Value 2016defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 2017 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs.s", 2018 int_arm_neon_vqabs>; 2019 2020// Vector Negate. 2021 2022def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; 2023def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; 2024 2025class VNEGD<bits<2> size, string OpcodeStr, ValueType Ty> 2026 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), 2027 IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 2028 [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; 2029class VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty> 2030 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), 2031 IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", 2032 [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; 2033 2034// VNEG : Vector Negate 2035def VNEGs8d : VNEGD<0b00, "vneg.s8", v8i8>; 2036def VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>; 2037def VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>; 2038def VNEGs8q : VNEGQ<0b00, "vneg.s8", v16i8>; 2039def VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>; 2040def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; 2041 2042// VNEG : Vector Negate (floating-point) 2043def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 2044 (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, 2045 "vneg.f32\t$dst, $src", "", 2046 [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; 2047def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 2048 (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, 2049 "vneg.f32\t$dst, $src", "", 2050 [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; 2051 2052def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; 2053def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>; 2054def : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>; 2055def : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>; 2056def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>; 2057def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; 2058 2059// VQNEG : Vector Saturating Negate 2060defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 2061 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg.s", 2062 int_arm_neon_vqneg>; 2063 2064// Vector Bit Counting Operations. 2065 2066// VCLS : Vector Count Leading Sign Bits 2067defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 2068 IIC_VCNTiD, IIC_VCNTiQ, "vcls.s", 2069 int_arm_neon_vcls>; 2070// VCLZ : Vector Count Leading Zeros 2071defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 2072 IIC_VCNTiD, IIC_VCNTiQ, "vclz.i", 2073 int_arm_neon_vclz>; 2074// VCNT : Vector Count One Bits 2075def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2076 IIC_VCNTiD, "vcnt.8", 2077 v8i8, v8i8, int_arm_neon_vcnt>; 2078def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 2079 IIC_VCNTiQ, "vcnt.8", 2080 v16i8, v16i8, int_arm_neon_vcnt>; 2081 2082// Vector Move Operations. 2083 2084// VMOV : Vector Move (Register) 2085 2086def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), 2087 IIC_VMOVD, "vmov\t$dst, $src", "", []>; 2088def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), 2089 IIC_VMOVD, "vmov\t$dst, $src", "", []>; 2090 2091// VMOV : Vector Move (Immediate) 2092 2093// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. 2094def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ 2095 return ARM::getVMOVImm(N, 1, *CurDAG); 2096}]>; 2097def vmovImm8 : PatLeaf<(build_vector), [{ 2098 return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; 2099}], VMOV_get_imm8>; 2100 2101// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. 2102def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ 2103 return ARM::getVMOVImm(N, 2, *CurDAG); 2104}]>; 2105def vmovImm16 : PatLeaf<(build_vector), [{ 2106 return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; 2107}], VMOV_get_imm16>; 2108 2109// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. 2110def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ 2111 return ARM::getVMOVImm(N, 4, *CurDAG); 2112}]>; 2113def vmovImm32 : PatLeaf<(build_vector), [{ 2114 return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; 2115}], VMOV_get_imm32>; 2116 2117// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. 2118def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ 2119 return ARM::getVMOVImm(N, 8, *CurDAG); 2120}]>; 2121def vmovImm64 : PatLeaf<(build_vector), [{ 2122 return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; 2123}], VMOV_get_imm64>; 2124 2125// Note: Some of the cmode bits in the following VMOV instructions need to 2126// be encoded based on the immed values. 2127 2128def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), 2129 (ins i8imm:$SIMM), IIC_VMOVImm, 2130 "vmov.i8\t$dst, $SIMM", "", 2131 [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; 2132def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), 2133 (ins i8imm:$SIMM), IIC_VMOVImm, 2134 "vmov.i8\t$dst, $SIMM", "", 2135 [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; 2136 2137def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), 2138 (ins i16imm:$SIMM), IIC_VMOVImm, 2139 "vmov.i16\t$dst, $SIMM", "", 2140 [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; 2141def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), 2142 (ins i16imm:$SIMM), IIC_VMOVImm, 2143 "vmov.i16\t$dst, $SIMM", "", 2144 [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; 2145 2146def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), 2147 (ins i32imm:$SIMM), IIC_VMOVImm, 2148 "vmov.i32\t$dst, $SIMM", "", 2149 [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; 2150def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), 2151 (ins i32imm:$SIMM), IIC_VMOVImm, 2152 "vmov.i32\t$dst, $SIMM", "", 2153 [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; 2154 2155def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), 2156 (ins i64imm:$SIMM), IIC_VMOVImm, 2157 "vmov.i64\t$dst, $SIMM", "", 2158 [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; 2159def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), 2160 (ins i64imm:$SIMM), IIC_VMOVImm, 2161 "vmov.i64\t$dst, $SIMM", "", 2162 [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; 2163 2164// VMOV : Vector Get Lane (move scalar to ARM core register) 2165 2166def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, 2167 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2168 IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]", 2169 [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), 2170 imm:$lane))]>; 2171def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, 2172 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2173 IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]", 2174 [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), 2175 imm:$lane))]>; 2176def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, 2177 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2178 IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]", 2179 [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), 2180 imm:$lane))]>; 2181def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, 2182 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2183 IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]", 2184 [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), 2185 imm:$lane))]>; 2186def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, 2187 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), 2188 IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]", 2189 [(set GPR:$dst, (extractelt (v2i32 DPR:$src), 2190 imm:$lane))]>; 2191// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 2192def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 2193 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2194 (DSubReg_i8_reg imm:$lane))), 2195 (SubReg_i8_lane imm:$lane))>; 2196def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 2197 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2198 (DSubReg_i16_reg imm:$lane))), 2199 (SubReg_i16_lane imm:$lane))>; 2200def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 2201 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 2202 (DSubReg_i8_reg imm:$lane))), 2203 (SubReg_i8_lane imm:$lane))>; 2204def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 2205 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 2206 (DSubReg_i16_reg imm:$lane))), 2207 (SubReg_i16_lane imm:$lane))>; 2208def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 2209 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 2210 (DSubReg_i32_reg imm:$lane))), 2211 (SubReg_i32_lane imm:$lane))>; 2212def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 2213 (EXTRACT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2), 2214 (SSubReg_f32_reg imm:$src2))>; 2215def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 2216 (EXTRACT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2), 2217 (SSubReg_f32_reg imm:$src2))>; 2218//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 2219// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2220def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 2221 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 2222 2223 2224// VMOV : Vector Set Lane (move ARM core register to scalar) 2225 2226let Constraints = "$src1 = $dst" in { 2227def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), 2228 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2229 IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2", 2230 [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), 2231 GPR:$src2, imm:$lane))]>; 2232def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), 2233 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2234 IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2", 2235 [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), 2236 GPR:$src2, imm:$lane))]>; 2237def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), 2238 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), 2239 IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2", 2240 [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), 2241 GPR:$src2, imm:$lane))]>; 2242} 2243def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 2244 (v16i8 (INSERT_SUBREG QPR:$src1, 2245 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 2246 (DSubReg_i8_reg imm:$lane))), 2247 GPR:$src2, (SubReg_i8_lane imm:$lane)), 2248 (DSubReg_i8_reg imm:$lane)))>; 2249def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 2250 (v8i16 (INSERT_SUBREG QPR:$src1, 2251 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 2252 (DSubReg_i16_reg imm:$lane))), 2253 GPR:$src2, (SubReg_i16_lane imm:$lane)), 2254 (DSubReg_i16_reg imm:$lane)))>; 2255def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 2256 (v4i32 (INSERT_SUBREG QPR:$src1, 2257 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 2258 (DSubReg_i32_reg imm:$lane))), 2259 GPR:$src2, (SubReg_i32_lane imm:$lane)), 2260 (DSubReg_i32_reg imm:$lane)))>; 2261 2262def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 2263 (INSERT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2), 2264 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 2265def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 2266 (INSERT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2), 2267 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 2268 2269//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 2270// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 2271def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 2272 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 2273 2274def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 2275 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; 2276def : Pat<(v2f64 (scalar_to_vector DPR:$src)), 2277 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>; 2278def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 2279 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; 2280 2281def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 2282 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2283def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 2284 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2285def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 2286 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 2287 2288def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 2289 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 2290 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2291 arm_dsubreg_0)>; 2292def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 2293 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 2294 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2295 arm_dsubreg_0)>; 2296def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 2297 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 2298 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 2299 arm_dsubreg_0)>; 2300 2301// VDUP : Vector Duplicate (from ARM core register to all elements) 2302 2303class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> 2304 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), 2305 IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), 2306 [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 2307class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> 2308 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), 2309 IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), 2310 [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; 2311 2312def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; 2313def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; 2314def VDUP32d : VDUPD<0b11101000, 0b00, ".32", v2i32>; 2315def VDUP8q : VDUPQ<0b11101110, 0b00, ".8", v16i8>; 2316def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; 2317def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; 2318 2319def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), 2320 IIC_VMOVIS, "vdup", ".32\t$dst, $src", 2321 [(set DPR:$dst, (v2f32 (NEONvdup 2322 (f32 (bitconvert GPR:$src)))))]>; 2323def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), 2324 IIC_VMOVIS, "vdup", ".32\t$dst, $src", 2325 [(set QPR:$dst, (v4f32 (NEONvdup 2326 (f32 (bitconvert GPR:$src)))))]>; 2327 2328// VDUP : Vector Duplicate Lane (from scalar to all elements) 2329 2330class VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> 2331 : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, 2332 (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, 2333 !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", 2334 [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; 2335 2336class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, 2337 ValueType ResTy, ValueType OpTy> 2338 : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, 2339 (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, 2340 !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", 2341 [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; 2342 2343def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; 2344def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; 2345def VDUPLN32d : VDUPLND<0b01, 0b00, "vdup.32", v2i32>; 2346def VDUPLNfd : VDUPLND<0b01, 0b00, "vdup.32", v2f32>; 2347def VDUPLN8q : VDUPLNQ<0b00, 0b01, "vdup.8", v16i8, v8i8>; 2348def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; 2349def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; 2350def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; 2351 2352def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 2353 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 2354 (DSubReg_i8_reg imm:$lane))), 2355 (SubReg_i8_lane imm:$lane)))>; 2356def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 2357 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 2358 (DSubReg_i16_reg imm:$lane))), 2359 (SubReg_i16_lane imm:$lane)))>; 2360def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 2361 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 2362 (DSubReg_i32_reg imm:$lane))), 2363 (SubReg_i32_lane imm:$lane)))>; 2364def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 2365 (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, 2366 (DSubReg_i32_reg imm:$lane))), 2367 (SubReg_i32_lane imm:$lane)))>; 2368 2369def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, 2370 (outs DPR:$dst), (ins SPR:$src), 2371 IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", 2372 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 2373 2374def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0, 2375 (outs QPR:$dst), (ins SPR:$src), 2376 IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", 2377 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 2378 2379def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), 2380 (INSERT_SUBREG QPR:$src, 2381 (i64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), 2382 (DSubReg_f64_other_reg imm:$lane))>; 2383def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), 2384 (INSERT_SUBREG QPR:$src, 2385 (f64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), 2386 (DSubReg_f64_other_reg imm:$lane))>; 2387 2388// VMOVN : Vector Narrowing Move 2389defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn.i", 2390 int_arm_neon_vmovn>; 2391// VQMOVN : Vector Saturating Narrowing Move 2392defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn.s", 2393 int_arm_neon_vqmovns>; 2394defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u", 2395 int_arm_neon_vqmovnu>; 2396defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s", 2397 int_arm_neon_vqmovnsu>; 2398// VMOVL : Vector Lengthening Move 2399defm VMOVLs : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>; 2400defm VMOVLu : N2VLInt_QHS<1,1,0b1010,0,0,1, "vmovl.u", int_arm_neon_vmovlu>; 2401 2402// Vector Conversions. 2403 2404// VCVT : Vector Convert Between Floating-Point and Integers 2405def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 2406 v2i32, v2f32, fp_to_sint>; 2407def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 2408 v2i32, v2f32, fp_to_uint>; 2409def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 2410 v2f32, v2i32, sint_to_fp>; 2411def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 2412 v2f32, v2i32, uint_to_fp>; 2413 2414def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 2415 v4i32, v4f32, fp_to_sint>; 2416def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 2417 v4i32, v4f32, fp_to_uint>; 2418def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 2419 v4f32, v4i32, sint_to_fp>; 2420def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 2421 v4f32, v4i32, uint_to_fp>; 2422 2423// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 2424// Note: Some of the opcode bits in the following VCVT instructions need to 2425// be encoded based on the immed values. 2426def VCVTf2xsd : N2VCvtD<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", 2427 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 2428def VCVTf2xud : N2VCvtD<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", 2429 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 2430def VCVTxs2fd : N2VCvtD<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", 2431 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 2432def VCVTxu2fd : N2VCvtD<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", 2433 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 2434 2435def VCVTf2xsq : N2VCvtQ<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", 2436 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 2437def VCVTf2xuq : N2VCvtQ<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", 2438 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 2439def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", 2440 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 2441def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", 2442 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 2443 2444// Vector Reverse. 2445 2446// VREV64 : Vector Reverse elements within 64-bit doublewords 2447 2448class VREV64D<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2449 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), 2450 (ins DPR:$src), IIC_VMOVD, 2451 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2452 [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; 2453class VREV64Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2454 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), 2455 (ins QPR:$src), IIC_VMOVD, 2456 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2457 [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; 2458 2459def VREV64d8 : VREV64D<0b00, "vrev64.8", v8i8>; 2460def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>; 2461def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>; 2462def VREV64df : VREV64D<0b10, "vrev64.32", v2f32>; 2463 2464def VREV64q8 : VREV64Q<0b00, "vrev64.8", v16i8>; 2465def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>; 2466def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>; 2467def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>; 2468 2469// VREV32 : Vector Reverse elements within 32-bit words 2470 2471class VREV32D<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2472 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), 2473 (ins DPR:$src), IIC_VMOVD, 2474 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2475 [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; 2476class VREV32Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2477 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), 2478 (ins QPR:$src), IIC_VMOVD, 2479 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2480 [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; 2481 2482def VREV32d8 : VREV32D<0b00, "vrev32.8", v8i8>; 2483def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>; 2484 2485def VREV32q8 : VREV32Q<0b00, "vrev32.8", v16i8>; 2486def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>; 2487 2488// VREV16 : Vector Reverse elements within 16-bit halfwords 2489 2490class VREV16D<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2491 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), 2492 (ins DPR:$src), IIC_VMOVD, 2493 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2494 [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; 2495class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> 2496 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), 2497 (ins QPR:$src), IIC_VMOVD, 2498 !strconcat(OpcodeStr, "\t$dst, $src"), "", 2499 [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; 2500 2501def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>; 2502def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; 2503 2504// Other Vector Shuffles. 2505 2506// VEXT : Vector Extract 2507 2508class VEXTd<string OpcodeStr, ValueType Ty> 2509 : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst), 2510 (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, 2511 !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", 2512 [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), 2513 (Ty DPR:$rhs), imm:$index)))]>; 2514 2515class VEXTq<string OpcodeStr, ValueType Ty> 2516 : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst), 2517 (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, 2518 !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", 2519 [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), 2520 (Ty QPR:$rhs), imm:$index)))]>; 2521 2522def VEXTd8 : VEXTd<"vext.8", v8i8>; 2523def VEXTd16 : VEXTd<"vext.16", v4i16>; 2524def VEXTd32 : VEXTd<"vext.32", v2i32>; 2525def VEXTdf : VEXTd<"vext.32", v2f32>; 2526 2527def VEXTq8 : VEXTq<"vext.8", v16i8>; 2528def VEXTq16 : VEXTq<"vext.16", v8i16>; 2529def VEXTq32 : VEXTq<"vext.32", v4i32>; 2530def VEXTqf : VEXTq<"vext.32", v4f32>; 2531 2532// VTRN : Vector Transpose 2533 2534def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">; 2535def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn.16">; 2536def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn.32">; 2537 2538def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn.8">; 2539def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn.16">; 2540def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn.32">; 2541 2542// VUZP : Vector Unzip (Deinterleave) 2543 2544def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp.8">; 2545def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp.16">; 2546def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp.32">; 2547 2548def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp.8">; 2549def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp.16">; 2550def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp.32">; 2551 2552// VZIP : Vector Zip (Interleave) 2553 2554def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip.8">; 2555def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip.16">; 2556def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip.32">; 2557 2558def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip.8">; 2559def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip.16">; 2560def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; 2561 2562// Vector Table Lookup and Table Extension. 2563 2564// VTBL : Vector Table Lookup 2565def VTBL1 2566 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), 2567 (ins DPR:$tbl1, DPR:$src), IIC_VTB1, 2568 "vtbl.8\t$dst, \\{$tbl1\\}, $src", "", 2569 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; 2570let hasExtraSrcRegAllocReq = 1 in { 2571def VTBL2 2572 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), 2573 (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, 2574 "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "", 2575 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 2576 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; 2577def VTBL3 2578 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), 2579 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, 2580 "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", 2581 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 2582 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; 2583def VTBL4 2584 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), 2585 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, 2586 "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", 2587 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, 2588 DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; 2589} // hasExtraSrcRegAllocReq = 1 2590 2591// VTBX : Vector Table Extension 2592def VTBX1 2593 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), 2594 (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, 2595 "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst", 2596 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 2597 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; 2598let hasExtraSrcRegAllocReq = 1 in { 2599def VTBX2 2600 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), 2601 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, 2602 "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", 2603 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 2604 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; 2605def VTBX3 2606 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), 2607 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, 2608 "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", 2609 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, 2610 DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; 2611def VTBX4 2612 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, 2613 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, 2614 "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", 2615 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, 2616 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; 2617} // hasExtraSrcRegAllocReq = 1 2618 2619//===----------------------------------------------------------------------===// 2620// NEON instructions for single-precision FP math 2621//===----------------------------------------------------------------------===// 2622 2623// These need separate instructions because they must use DPR_VFP2 register 2624// class which have SPR sub-registers. 2625 2626// Vector Add Operations used for single-precision FP 2627let neverHasSideEffects = 1 in 2628def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>; 2629def : N3VDsPat<fadd, VADDfd_sfp>; 2630 2631// Vector Sub Operations used for single-precision FP 2632let neverHasSideEffects = 1 in 2633def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>; 2634def : N3VDsPat<fsub, VSUBfd_sfp>; 2635 2636// Vector Multiply Operations used for single-precision FP 2637let neverHasSideEffects = 1 in 2638def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>; 2639def : N3VDsPat<fmul, VMULfd_sfp>; 2640 2641// Vector Multiply-Accumulate/Subtract used for single-precision FP 2642let neverHasSideEffects = 1 in 2643def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; 2644def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>; 2645 2646let neverHasSideEffects = 1 in 2647def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; 2648def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>; 2649 2650// Vector Absolute used for single-precision FP 2651let neverHasSideEffects = 1 in 2652def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 2653 IIC_VUNAD, "vabs.f32", 2654 v2f32, v2f32, int_arm_neon_vabs>; 2655def : N2VDIntsPat<fabs, VABSfd_sfp>; 2656 2657// Vector Negate used for single-precision FP 2658let neverHasSideEffects = 1 in 2659def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 2660 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, 2661 "vneg.f32\t$dst, $src", "", []>; 2662def : N2VDIntsPat<fneg, VNEGf32d_sfp>; 2663 2664// Vector Convert between single-precision FP and integer 2665let neverHasSideEffects = 1 in 2666def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 2667 v2i32, v2f32, fp_to_sint>; 2668def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; 2669 2670let neverHasSideEffects = 1 in 2671def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 2672 v2i32, v2f32, fp_to_uint>; 2673def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; 2674 2675let neverHasSideEffects = 1 in 2676def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 2677 v2f32, v2i32, sint_to_fp>; 2678def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; 2679 2680let neverHasSideEffects = 1 in 2681def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 2682 v2f32, v2i32, uint_to_fp>; 2683def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; 2684 2685//===----------------------------------------------------------------------===// 2686// Non-Instruction Patterns 2687//===----------------------------------------------------------------------===// 2688 2689// bit_convert 2690def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 2691def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 2692def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 2693def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 2694def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 2695def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 2696def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 2697def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 2698def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 2699def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 2700def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 2701def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 2702def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 2703def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 2704def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 2705def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 2706def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 2707def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 2708def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 2709def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 2710def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 2711def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 2712def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 2713def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 2714def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 2715def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 2716def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 2717def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 2718def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 2719def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 2720 2721def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 2722def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 2723def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 2724def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 2725def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 2726def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 2727def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 2728def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 2729def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 2730def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 2731def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 2732def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 2733def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 2734def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 2735def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 2736def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 2737def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 2738def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 2739def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 2740def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 2741def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 2742def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 2743def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 2744def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 2745def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 2746def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 2747def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 2748def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 2749def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 2750def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 2751