ARMInstrNEON.td revision 88acef0b8e93d065aa4de164422ce4c546a7cd5f
1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14 15//===----------------------------------------------------------------------===// 16// NEON-specific Operands. 17//===----------------------------------------------------------------------===// 18def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20} 21 22def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26} 27def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31} 32def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36} 37def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 38def nImmVMOVI32 : Operand<i32> { 39 let PrintMethod = "printNEONModImmOperand"; 40 let ParserMatchClass = nImmVMOVI32AsmOperand; 41} 42def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 43def nImmVMOVI32Neg : Operand<i32> { 44 let PrintMethod = "printNEONModImmOperand"; 45 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 46} 47def nImmVMOVF32 : Operand<i32> { 48 let PrintMethod = "printFPImmOperand"; 49 let ParserMatchClass = FPImmOperand; 50} 51def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 52def nImmSplatI64 : Operand<i32> { 53 let PrintMethod = "printNEONModImmOperand"; 54 let ParserMatchClass = nImmSplatI64AsmOperand; 55} 56 57def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 58def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 59def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 60def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 61 return ((uint64_t)Imm) < 8; 62}]> { 63 let ParserMatchClass = VectorIndex8Operand; 64 let PrintMethod = "printVectorIndex"; 65 let MIOperandInfo = (ops i32imm); 66} 67def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 68 return ((uint64_t)Imm) < 4; 69}]> { 70 let ParserMatchClass = VectorIndex16Operand; 71 let PrintMethod = "printVectorIndex"; 72 let MIOperandInfo = (ops i32imm); 73} 74def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 75 return ((uint64_t)Imm) < 2; 76}]> { 77 let ParserMatchClass = VectorIndex32Operand; 78 let PrintMethod = "printVectorIndex"; 79 let MIOperandInfo = (ops i32imm); 80} 81 82// Register list of one D register. 83def VecListOneDAsmOperand : AsmOperandClass { 84 let Name = "VecListOneD"; 85 let ParserMethod = "parseVectorList"; 86 let RenderMethod = "addVecListOperands"; 87} 88def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 89 let ParserMatchClass = VecListOneDAsmOperand; 90} 91// Register list of two sequential D registers. 92def VecListDPairAsmOperand : AsmOperandClass { 93 let Name = "VecListDPair"; 94 let ParserMethod = "parseVectorList"; 95 let RenderMethod = "addVecListOperands"; 96} 97def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 98 let ParserMatchClass = VecListDPairAsmOperand; 99} 100// Register list of three sequential D registers. 101def VecListThreeDAsmOperand : AsmOperandClass { 102 let Name = "VecListThreeD"; 103 let ParserMethod = "parseVectorList"; 104 let RenderMethod = "addVecListOperands"; 105} 106def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 107 let ParserMatchClass = VecListThreeDAsmOperand; 108} 109// Register list of four sequential D registers. 110def VecListFourDAsmOperand : AsmOperandClass { 111 let Name = "VecListFourD"; 112 let ParserMethod = "parseVectorList"; 113 let RenderMethod = "addVecListOperands"; 114} 115def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 116 let ParserMatchClass = VecListFourDAsmOperand; 117} 118// Register list of two D registers spaced by 2 (two sequential Q registers). 119def VecListDPairSpacedAsmOperand : AsmOperandClass { 120 let Name = "VecListDPairSpaced"; 121 let ParserMethod = "parseVectorList"; 122 let RenderMethod = "addVecListOperands"; 123} 124def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 125 let ParserMatchClass = VecListDPairSpacedAsmOperand; 126} 127// Register list of three D registers spaced by 2 (three Q registers). 128def VecListThreeQAsmOperand : AsmOperandClass { 129 let Name = "VecListThreeQ"; 130 let ParserMethod = "parseVectorList"; 131 let RenderMethod = "addVecListOperands"; 132} 133def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 134 let ParserMatchClass = VecListThreeQAsmOperand; 135} 136// Register list of three D registers spaced by 2 (three Q registers). 137def VecListFourQAsmOperand : AsmOperandClass { 138 let Name = "VecListFourQ"; 139 let ParserMethod = "parseVectorList"; 140 let RenderMethod = "addVecListOperands"; 141} 142def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 143 let ParserMatchClass = VecListFourQAsmOperand; 144} 145 146// Register list of one D register, with "all lanes" subscripting. 147def VecListOneDAllLanesAsmOperand : AsmOperandClass { 148 let Name = "VecListOneDAllLanes"; 149 let ParserMethod = "parseVectorList"; 150 let RenderMethod = "addVecListOperands"; 151} 152def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 153 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 154} 155// Register list of two D registers, with "all lanes" subscripting. 156def VecListDPairAllLanesAsmOperand : AsmOperandClass { 157 let Name = "VecListDPairAllLanes"; 158 let ParserMethod = "parseVectorList"; 159 let RenderMethod = "addVecListOperands"; 160} 161def VecListDPairAllLanes : RegisterOperand<DPair, 162 "printVectorListTwoAllLanes"> { 163 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 164} 165// Register list of two D registers spaced by 2 (two sequential Q registers). 166def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 167 let Name = "VecListDPairSpacedAllLanes"; 168 let ParserMethod = "parseVectorList"; 169 let RenderMethod = "addVecListOperands"; 170} 171def VecListDPairSpacedAllLanes : RegisterOperand<DPair, 172 "printVectorListTwoSpacedAllLanes"> { 173 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 174} 175// Register list of three D registers, with "all lanes" subscripting. 176def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 177 let Name = "VecListThreeDAllLanes"; 178 let ParserMethod = "parseVectorList"; 179 let RenderMethod = "addVecListOperands"; 180} 181def VecListThreeDAllLanes : RegisterOperand<DPR, 182 "printVectorListThreeAllLanes"> { 183 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 184} 185// Register list of three D registers spaced by 2 (three sequential Q regs). 186def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 187 let Name = "VecListThreeQAllLanes"; 188 let ParserMethod = "parseVectorList"; 189 let RenderMethod = "addVecListOperands"; 190} 191def VecListThreeQAllLanes : RegisterOperand<DPR, 192 "printVectorListThreeSpacedAllLanes"> { 193 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 194} 195// Register list of four D registers, with "all lanes" subscripting. 196def VecListFourDAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListFourDAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 202 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 203} 204// Register list of four D registers spaced by 2 (four sequential Q regs). 205def VecListFourQAllLanesAsmOperand : AsmOperandClass { 206 let Name = "VecListFourQAllLanes"; 207 let ParserMethod = "parseVectorList"; 208 let RenderMethod = "addVecListOperands"; 209} 210def VecListFourQAllLanes : RegisterOperand<DPR, 211 "printVectorListFourSpacedAllLanes"> { 212 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 213} 214 215 216// Register list of one D register, with byte lane subscripting. 217def VecListOneDByteIndexAsmOperand : AsmOperandClass { 218 let Name = "VecListOneDByteIndexed"; 219 let ParserMethod = "parseVectorList"; 220 let RenderMethod = "addVecListIndexedOperands"; 221} 222def VecListOneDByteIndexed : Operand<i32> { 223 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 224 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 225} 226// ...with half-word lane subscripting. 227def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 228 let Name = "VecListOneDHWordIndexed"; 229 let ParserMethod = "parseVectorList"; 230 let RenderMethod = "addVecListIndexedOperands"; 231} 232def VecListOneDHWordIndexed : Operand<i32> { 233 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 234 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 235} 236// ...with word lane subscripting. 237def VecListOneDWordIndexAsmOperand : AsmOperandClass { 238 let Name = "VecListOneDWordIndexed"; 239 let ParserMethod = "parseVectorList"; 240 let RenderMethod = "addVecListIndexedOperands"; 241} 242def VecListOneDWordIndexed : Operand<i32> { 243 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 244 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 245} 246 247// Register list of two D registers with byte lane subscripting. 248def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 249 let Name = "VecListTwoDByteIndexed"; 250 let ParserMethod = "parseVectorList"; 251 let RenderMethod = "addVecListIndexedOperands"; 252} 253def VecListTwoDByteIndexed : Operand<i32> { 254 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 255 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 256} 257// ...with half-word lane subscripting. 258def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 259 let Name = "VecListTwoDHWordIndexed"; 260 let ParserMethod = "parseVectorList"; 261 let RenderMethod = "addVecListIndexedOperands"; 262} 263def VecListTwoDHWordIndexed : Operand<i32> { 264 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 265 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 266} 267// ...with word lane subscripting. 268def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 269 let Name = "VecListTwoDWordIndexed"; 270 let ParserMethod = "parseVectorList"; 271 let RenderMethod = "addVecListIndexedOperands"; 272} 273def VecListTwoDWordIndexed : Operand<i32> { 274 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 276} 277// Register list of two Q registers with half-word lane subscripting. 278def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 279 let Name = "VecListTwoQHWordIndexed"; 280 let ParserMethod = "parseVectorList"; 281 let RenderMethod = "addVecListIndexedOperands"; 282} 283def VecListTwoQHWordIndexed : Operand<i32> { 284 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 286} 287// ...with word lane subscripting. 288def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoQWordIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoQWordIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297 298 299// Register list of three D registers with byte lane subscripting. 300def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 301 let Name = "VecListThreeDByteIndexed"; 302 let ParserMethod = "parseVectorList"; 303 let RenderMethod = "addVecListIndexedOperands"; 304} 305def VecListThreeDByteIndexed : Operand<i32> { 306 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 307 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 308} 309// ...with half-word lane subscripting. 310def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 311 let Name = "VecListThreeDHWordIndexed"; 312 let ParserMethod = "parseVectorList"; 313 let RenderMethod = "addVecListIndexedOperands"; 314} 315def VecListThreeDHWordIndexed : Operand<i32> { 316 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 317 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 318} 319// ...with word lane subscripting. 320def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 321 let Name = "VecListThreeDWordIndexed"; 322 let ParserMethod = "parseVectorList"; 323 let RenderMethod = "addVecListIndexedOperands"; 324} 325def VecListThreeDWordIndexed : Operand<i32> { 326 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 327 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 328} 329// Register list of three Q registers with half-word lane subscripting. 330def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 331 let Name = "VecListThreeQHWordIndexed"; 332 let ParserMethod = "parseVectorList"; 333 let RenderMethod = "addVecListIndexedOperands"; 334} 335def VecListThreeQHWordIndexed : Operand<i32> { 336 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 337 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 338} 339// ...with word lane subscripting. 340def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeQWordIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeQWordIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349 350// Register list of four D registers with byte lane subscripting. 351def VecListFourDByteIndexAsmOperand : AsmOperandClass { 352 let Name = "VecListFourDByteIndexed"; 353 let ParserMethod = "parseVectorList"; 354 let RenderMethod = "addVecListIndexedOperands"; 355} 356def VecListFourDByteIndexed : Operand<i32> { 357 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 359} 360// ...with half-word lane subscripting. 361def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 362 let Name = "VecListFourDHWordIndexed"; 363 let ParserMethod = "parseVectorList"; 364 let RenderMethod = "addVecListIndexedOperands"; 365} 366def VecListFourDHWordIndexed : Operand<i32> { 367 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 369} 370// ...with word lane subscripting. 371def VecListFourDWordIndexAsmOperand : AsmOperandClass { 372 let Name = "VecListFourDWordIndexed"; 373 let ParserMethod = "parseVectorList"; 374 let RenderMethod = "addVecListIndexedOperands"; 375} 376def VecListFourDWordIndexed : Operand<i32> { 377 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 379} 380// Register list of four Q registers with half-word lane subscripting. 381def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 382 let Name = "VecListFourQHWordIndexed"; 383 let ParserMethod = "parseVectorList"; 384 let RenderMethod = "addVecListIndexedOperands"; 385} 386def VecListFourQHWordIndexed : Operand<i32> { 387 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 389} 390// ...with word lane subscripting. 391def VecListFourQWordIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourQWordIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourQWordIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400 401def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 402 return cast<LoadSDNode>(N)->getAlignment() >= 8; 403}]>; 404def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 405 (store node:$val, node:$ptr), [{ 406 return cast<StoreSDNode>(N)->getAlignment() >= 8; 407}]>; 408def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 409 return cast<LoadSDNode>(N)->getAlignment() == 4; 410}]>; 411def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 412 (store node:$val, node:$ptr), [{ 413 return cast<StoreSDNode>(N)->getAlignment() == 4; 414}]>; 415def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 416 return cast<LoadSDNode>(N)->getAlignment() == 2; 417}]>; 418def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 419 (store node:$val, node:$ptr), [{ 420 return cast<StoreSDNode>(N)->getAlignment() == 2; 421}]>; 422def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 423 return cast<LoadSDNode>(N)->getAlignment() == 1; 424}]>; 425def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 426 (store node:$val, node:$ptr), [{ 427 return cast<StoreSDNode>(N)->getAlignment() == 1; 428}]>; 429def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 430 return cast<LoadSDNode>(N)->getAlignment() < 4; 431}]>; 432def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 433 (store node:$val, node:$ptr), [{ 434 return cast<StoreSDNode>(N)->getAlignment() < 4; 435}]>; 436 437//===----------------------------------------------------------------------===// 438// NEON-specific DAG Nodes. 439//===----------------------------------------------------------------------===// 440 441def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 442def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 443 444def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 445def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 446def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 447def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 448def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 449def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 450def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 451def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 452def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 453def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 454def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 455 456// Types for vector shift by immediates. The "SHX" version is for long and 457// narrow operations where the source and destination vectors have different 458// types. The "SHINS" version is for shift and insert operations. 459def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 460 SDTCisVT<2, i32>]>; 461def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 462 SDTCisVT<2, i32>]>; 463def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 464 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 465 466def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 467def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 468def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 469def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 470def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 471def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 472def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 473 474def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 475def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 476def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 477 478def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 479def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 480def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 481def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 482def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 483def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 484 485def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 486def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 487def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 488 489def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 490def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 491 492def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 493 SDTCisVT<2, i32>]>; 494def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 495def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 496 497def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 498def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 499def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 500def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 501 502def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 503 SDTCisVT<2, i32>]>; 504def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 505def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 506 507def NEONvbsl : SDNode<"ARMISD::VBSL", 508 SDTypeProfile<1, 3, [SDTCisVec<0>, 509 SDTCisSameAs<0, 1>, 510 SDTCisSameAs<0, 2>, 511 SDTCisSameAs<0, 3>]>>; 512 513def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 514 515// VDUPLANE can produce a quad-register result from a double-register source, 516// so the result is not constrained to match the source. 517def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 518 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 519 SDTCisVT<2, i32>]>>; 520 521def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 522 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 523def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 524 525def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 526def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 527def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 528def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 529 530def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 531 SDTCisSameAs<0, 2>, 532 SDTCisSameAs<0, 3>]>; 533def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 534def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 535def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 536 537def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 538 SDTCisSameAs<1, 2>]>; 539def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 540def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 541 542def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 543 SDTCisSameAs<0, 2>]>; 544def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 545def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 546 547def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 548 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 549 unsigned EltBits = 0; 550 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 551 return (EltBits == 32 && EltVal == 0); 552}]>; 553 554def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 555 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 556 unsigned EltBits = 0; 557 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 558 return (EltBits == 8 && EltVal == 0xff); 559}]>; 560 561//===----------------------------------------------------------------------===// 562// NEON load / store instructions 563//===----------------------------------------------------------------------===// 564 565// Use VLDM to load a Q register as a D register pair. 566// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 567def VLDMQIA 568 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 569 IIC_fpLoad_m, "", 570 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; 571 572// Use VSTM to store a Q register as a D register pair. 573// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 574def VSTMQIA 575 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 576 IIC_fpStore_m, "", 577 [(store (v2f64 DPair:$src), GPR:$Rn)]>; 578 579// Classes for VLD* pseudo-instructions with multi-register operands. 580// These are expanded to real instructions after register allocation. 581class VLDQPseudo<InstrItinClass itin> 582 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 583class VLDQWBPseudo<InstrItinClass itin> 584 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 585 (ins addrmode6:$addr, am6offset:$offset), itin, 586 "$addr.addr = $wb">; 587class VLDQWBfixedPseudo<InstrItinClass itin> 588 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 589 (ins addrmode6:$addr), itin, 590 "$addr.addr = $wb">; 591class VLDQWBregisterPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, rGPR:$offset), itin, 594 "$addr.addr = $wb">; 595 596class VLDQQPseudo<InstrItinClass itin> 597 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 598class VLDQQWBPseudo<InstrItinClass itin> 599 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 600 (ins addrmode6:$addr, am6offset:$offset), itin, 601 "$addr.addr = $wb">; 602class VLDQQWBfixedPseudo<InstrItinClass itin> 603 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 604 (ins addrmode6:$addr), itin, 605 "$addr.addr = $wb">; 606class VLDQQWBregisterPseudo<InstrItinClass itin> 607 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 608 (ins addrmode6:$addr, rGPR:$offset), itin, 609 "$addr.addr = $wb">; 610 611 612class VLDQQQQPseudo<InstrItinClass itin> 613 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 614 "$src = $dst">; 615class VLDQQQQWBPseudo<InstrItinClass itin> 616 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 617 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 618 "$addr.addr = $wb, $src = $dst">; 619 620let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 621 622// VLD1 : Vector Load (multiple single elements) 623class VLD1D<bits<4> op7_4, string Dt> 624 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 625 (ins addrmode6:$Rn), IIC_VLD1, 626 "vld1", Dt, "$Vd, $Rn", "", []> { 627 let Rm = 0b1111; 628 let Inst{4} = Rn{4}; 629 let DecoderMethod = "DecodeVLDST1Instruction"; 630} 631class VLD1Q<bits<4> op7_4, string Dt> 632 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 633 (ins addrmode6:$Rn), IIC_VLD1x2, 634 "vld1", Dt, "$Vd, $Rn", "", []> { 635 let Rm = 0b1111; 636 let Inst{5-4} = Rn{5-4}; 637 let DecoderMethod = "DecodeVLDST1Instruction"; 638} 639 640def VLD1d8 : VLD1D<{0,0,0,?}, "8">; 641def VLD1d16 : VLD1D<{0,1,0,?}, "16">; 642def VLD1d32 : VLD1D<{1,0,0,?}, "32">; 643def VLD1d64 : VLD1D<{1,1,0,?}, "64">; 644 645def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; 646def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; 647def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; 648def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; 649 650// ...with address register writeback: 651multiclass VLD1DWB<bits<4> op7_4, string Dt> { 652 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 653 (ins addrmode6:$Rn), IIC_VLD1u, 654 "vld1", Dt, "$Vd, $Rn!", 655 "$Rn.addr = $wb", []> { 656 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 657 let Inst{4} = Rn{4}; 658 let DecoderMethod = "DecodeVLDST1Instruction"; 659 } 660 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 661 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, 662 "vld1", Dt, "$Vd, $Rn, $Rm", 663 "$Rn.addr = $wb", []> { 664 let Inst{4} = Rn{4}; 665 let DecoderMethod = "DecodeVLDST1Instruction"; 666 } 667} 668multiclass VLD1QWB<bits<4> op7_4, string Dt> { 669 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 670 (ins addrmode6:$Rn), IIC_VLD1x2u, 671 "vld1", Dt, "$Vd, $Rn!", 672 "$Rn.addr = $wb", []> { 673 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 674 let Inst{5-4} = Rn{5-4}; 675 let DecoderMethod = "DecodeVLDST1Instruction"; 676 } 677 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 678 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 679 "vld1", Dt, "$Vd, $Rn, $Rm", 680 "$Rn.addr = $wb", []> { 681 let Inst{5-4} = Rn{5-4}; 682 let DecoderMethod = "DecodeVLDST1Instruction"; 683 } 684} 685 686defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; 687defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; 688defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; 689defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; 690defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; 691defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; 692defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; 693defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; 694 695// ...with 3 registers 696class VLD1D3<bits<4> op7_4, string Dt> 697 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 698 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, 699 "$Vd, $Rn", "", []> { 700 let Rm = 0b1111; 701 let Inst{4} = Rn{4}; 702 let DecoderMethod = "DecodeVLDST1Instruction"; 703} 704multiclass VLD1D3WB<bits<4> op7_4, string Dt> { 705 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 706 (ins addrmode6:$Rn), IIC_VLD1x2u, 707 "vld1", Dt, "$Vd, $Rn!", 708 "$Rn.addr = $wb", []> { 709 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 710 let Inst{4} = Rn{4}; 711 let DecoderMethod = "DecodeVLDST1Instruction"; 712 } 713 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 714 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 715 "vld1", Dt, "$Vd, $Rn, $Rm", 716 "$Rn.addr = $wb", []> { 717 let Inst{4} = Rn{4}; 718 let DecoderMethod = "DecodeVLDST1Instruction"; 719 } 720} 721 722def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; 723def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; 724def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; 725def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; 726 727defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; 728defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; 729defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; 730defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; 731 732def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 733 734// ...with 4 registers 735class VLD1D4<bits<4> op7_4, string Dt> 736 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 737 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, 738 "$Vd, $Rn", "", []> { 739 let Rm = 0b1111; 740 let Inst{5-4} = Rn{5-4}; 741 let DecoderMethod = "DecodeVLDST1Instruction"; 742} 743multiclass VLD1D4WB<bits<4> op7_4, string Dt> { 744 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 745 (ins addrmode6:$Rn), IIC_VLD1x2u, 746 "vld1", Dt, "$Vd, $Rn!", 747 "$Rn.addr = $wb", []> { 748 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 749 let Inst{5-4} = Rn{5-4}; 750 let DecoderMethod = "DecodeVLDST1Instruction"; 751 } 752 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 753 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 754 "vld1", Dt, "$Vd, $Rn, $Rm", 755 "$Rn.addr = $wb", []> { 756 let Inst{5-4} = Rn{5-4}; 757 let DecoderMethod = "DecodeVLDST1Instruction"; 758 } 759} 760 761def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; 762def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; 763def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; 764def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; 765 766defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; 767defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; 768defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; 769defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; 770 771def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 772 773// VLD2 : Vector Load (multiple 2-element structures) 774class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 775 InstrItinClass itin> 776 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 777 (ins addrmode6:$Rn), itin, 778 "vld2", Dt, "$Vd, $Rn", "", []> { 779 let Rm = 0b1111; 780 let Inst{5-4} = Rn{5-4}; 781 let DecoderMethod = "DecodeVLDST2Instruction"; 782} 783 784def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; 785def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; 786def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; 787 788def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; 789def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; 790def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; 791 792def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 793def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 794def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 795 796// ...with address register writeback: 797multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 798 RegisterOperand VdTy, InstrItinClass itin> { 799 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 800 (ins addrmode6:$Rn), itin, 801 "vld2", Dt, "$Vd, $Rn!", 802 "$Rn.addr = $wb", []> { 803 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 804 let Inst{5-4} = Rn{5-4}; 805 let DecoderMethod = "DecodeVLDST2Instruction"; 806 } 807 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 808 (ins addrmode6:$Rn, rGPR:$Rm), itin, 809 "vld2", Dt, "$Vd, $Rn, $Rm", 810 "$Rn.addr = $wb", []> { 811 let Inst{5-4} = Rn{5-4}; 812 let DecoderMethod = "DecodeVLDST2Instruction"; 813 } 814} 815 816defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; 817defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; 818defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; 819 820defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; 821defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; 822defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; 823 824def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 825def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 826def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 827def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 828def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 829def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 830 831// ...with double-spaced registers 832def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; 833def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; 834def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; 835defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; 836defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; 837defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; 838 839// VLD3 : Vector Load (multiple 3-element structures) 840class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 841 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 842 (ins addrmode6:$Rn), IIC_VLD3, 843 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 844 let Rm = 0b1111; 845 let Inst{4} = Rn{4}; 846 let DecoderMethod = "DecodeVLDST3Instruction"; 847} 848 849def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 850def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 851def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 852 853def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 854def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 855def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 856 857// ...with address register writeback: 858class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 859 : NLdSt<0, 0b10, op11_8, op7_4, 860 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 861 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 862 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 863 "$Rn.addr = $wb", []> { 864 let Inst{4} = Rn{4}; 865 let DecoderMethod = "DecodeVLDST3Instruction"; 866} 867 868def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 869def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 870def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 871 872def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 873def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 874def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 875 876// ...with double-spaced registers: 877def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 878def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 879def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 880def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 881def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 882def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 883 884def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 885def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 886def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 887 888// ...alternate versions to be allocated odd register numbers: 889def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 890def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 891def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 892 893def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 894def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 895def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 896 897// VLD4 : Vector Load (multiple 4-element structures) 898class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 899 : NLdSt<0, 0b10, op11_8, op7_4, 900 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 901 (ins addrmode6:$Rn), IIC_VLD4, 902 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 903 let Rm = 0b1111; 904 let Inst{5-4} = Rn{5-4}; 905 let DecoderMethod = "DecodeVLDST4Instruction"; 906} 907 908def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 909def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 910def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 911 912def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 913def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 914def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 915 916// ...with address register writeback: 917class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 918 : NLdSt<0, 0b10, op11_8, op7_4, 919 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 920 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 921 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 922 "$Rn.addr = $wb", []> { 923 let Inst{5-4} = Rn{5-4}; 924 let DecoderMethod = "DecodeVLDST4Instruction"; 925} 926 927def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 928def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 929def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 930 931def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 932def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 933def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 934 935// ...with double-spaced registers: 936def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 937def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 938def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 939def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 940def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 941def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 942 943def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 944def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 945def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 946 947// ...alternate versions to be allocated odd register numbers: 948def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 949def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 950def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 951 952def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 953def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 954def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 955 956} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 957 958// Classes for VLD*LN pseudo-instructions with multi-register operands. 959// These are expanded to real instructions after register allocation. 960class VLDQLNPseudo<InstrItinClass itin> 961 : PseudoNLdSt<(outs QPR:$dst), 962 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 963 itin, "$src = $dst">; 964class VLDQLNWBPseudo<InstrItinClass itin> 965 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 966 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 967 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 968class VLDQQLNPseudo<InstrItinClass itin> 969 : PseudoNLdSt<(outs QQPR:$dst), 970 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 971 itin, "$src = $dst">; 972class VLDQQLNWBPseudo<InstrItinClass itin> 973 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 974 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 975 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 976class VLDQQQQLNPseudo<InstrItinClass itin> 977 : PseudoNLdSt<(outs QQQQPR:$dst), 978 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 979 itin, "$src = $dst">; 980class VLDQQQQLNWBPseudo<InstrItinClass itin> 981 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 982 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 983 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 984 985// VLD1LN : Vector Load (single element to one lane) 986class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 987 PatFrag LoadOp> 988 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 989 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 990 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 991 "$src = $Vd", 992 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 993 (i32 (LoadOp addrmode6:$Rn)), 994 imm:$lane))]> { 995 let Rm = 0b1111; 996 let DecoderMethod = "DecodeVLD1LN"; 997} 998class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 999 PatFrag LoadOp> 1000 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1001 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1002 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1003 "$src = $Vd", 1004 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1005 (i32 (LoadOp addrmode6oneL32:$Rn)), 1006 imm:$lane))]> { 1007 let Rm = 0b1111; 1008 let DecoderMethod = "DecodeVLD1LN"; 1009} 1010class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 1011 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1012 (i32 (LoadOp addrmode6:$addr)), 1013 imm:$lane))]; 1014} 1015 1016def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1017 let Inst{7-5} = lane{2-0}; 1018} 1019def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1020 let Inst{7-6} = lane{1-0}; 1021 let Inst{5-4} = Rn{5-4}; 1022} 1023def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1024 let Inst{7} = lane{0}; 1025 let Inst{5-4} = Rn{5-4}; 1026} 1027 1028def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1029def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1030def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1031 1032def : Pat<(vector_insert (v2f32 DPR:$src), 1033 (f32 (load addrmode6:$addr)), imm:$lane), 1034 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1035def : Pat<(vector_insert (v4f32 QPR:$src), 1036 (f32 (load addrmode6:$addr)), imm:$lane), 1037 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1038 1039let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1040 1041// ...with address register writeback: 1042class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1043 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1044 (ins addrmode6:$Rn, am6offset:$Rm, 1045 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1046 "\\{$Vd[$lane]\\}, $Rn$Rm", 1047 "$src = $Vd, $Rn.addr = $wb", []> { 1048 let DecoderMethod = "DecodeVLD1LN"; 1049} 1050 1051def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1052 let Inst{7-5} = lane{2-0}; 1053} 1054def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1055 let Inst{7-6} = lane{1-0}; 1056 let Inst{4} = Rn{4}; 1057} 1058def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1059 let Inst{7} = lane{0}; 1060 let Inst{5} = Rn{4}; 1061 let Inst{4} = Rn{4}; 1062} 1063 1064def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1065def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1066def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1067 1068// VLD2LN : Vector Load (single 2-element structure to one lane) 1069class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1070 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1071 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1072 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1073 "$src1 = $Vd, $src2 = $dst2", []> { 1074 let Rm = 0b1111; 1075 let Inst{4} = Rn{4}; 1076 let DecoderMethod = "DecodeVLD2LN"; 1077} 1078 1079def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1080 let Inst{7-5} = lane{2-0}; 1081} 1082def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1083 let Inst{7-6} = lane{1-0}; 1084} 1085def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1086 let Inst{7} = lane{0}; 1087} 1088 1089def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1090def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1091def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1092 1093// ...with double-spaced registers: 1094def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1095 let Inst{7-6} = lane{1-0}; 1096} 1097def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1098 let Inst{7} = lane{0}; 1099} 1100 1101def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1102def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1103 1104// ...with address register writeback: 1105class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1106 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1107 (ins addrmode6:$Rn, am6offset:$Rm, 1108 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1109 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1110 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1111 let Inst{4} = Rn{4}; 1112 let DecoderMethod = "DecodeVLD2LN"; 1113} 1114 1115def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1116 let Inst{7-5} = lane{2-0}; 1117} 1118def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1119 let Inst{7-6} = lane{1-0}; 1120} 1121def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1122 let Inst{7} = lane{0}; 1123} 1124 1125def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1126def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1127def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1128 1129def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1130 let Inst{7-6} = lane{1-0}; 1131} 1132def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1133 let Inst{7} = lane{0}; 1134} 1135 1136def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1137def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1138 1139// VLD3LN : Vector Load (single 3-element structure to one lane) 1140class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1141 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1142 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1143 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1144 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1145 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 1146 let Rm = 0b1111; 1147 let DecoderMethod = "DecodeVLD3LN"; 1148} 1149 1150def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1151 let Inst{7-5} = lane{2-0}; 1152} 1153def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1154 let Inst{7-6} = lane{1-0}; 1155} 1156def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1157 let Inst{7} = lane{0}; 1158} 1159 1160def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1161def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1162def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1163 1164// ...with double-spaced registers: 1165def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1166 let Inst{7-6} = lane{1-0}; 1167} 1168def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1169 let Inst{7} = lane{0}; 1170} 1171 1172def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1173def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1174 1175// ...with address register writeback: 1176class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1177 : NLdStLn<1, 0b10, op11_8, op7_4, 1178 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1179 (ins addrmode6:$Rn, am6offset:$Rm, 1180 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1181 IIC_VLD3lnu, "vld3", Dt, 1182 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1183 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1184 []> { 1185 let DecoderMethod = "DecodeVLD3LN"; 1186} 1187 1188def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1189 let Inst{7-5} = lane{2-0}; 1190} 1191def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1192 let Inst{7-6} = lane{1-0}; 1193} 1194def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1195 let Inst{7} = lane{0}; 1196} 1197 1198def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1199def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1200def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1201 1202def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1203 let Inst{7-6} = lane{1-0}; 1204} 1205def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1206 let Inst{7} = lane{0}; 1207} 1208 1209def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1210def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1211 1212// VLD4LN : Vector Load (single 4-element structure to one lane) 1213class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1214 : NLdStLn<1, 0b10, op11_8, op7_4, 1215 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1216 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1217 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1218 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1219 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 1220 let Rm = 0b1111; 1221 let Inst{4} = Rn{4}; 1222 let DecoderMethod = "DecodeVLD4LN"; 1223} 1224 1225def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1226 let Inst{7-5} = lane{2-0}; 1227} 1228def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1229 let Inst{7-6} = lane{1-0}; 1230} 1231def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1232 let Inst{7} = lane{0}; 1233 let Inst{5} = Rn{5}; 1234} 1235 1236def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1237def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1238def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1239 1240// ...with double-spaced registers: 1241def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1242 let Inst{7-6} = lane{1-0}; 1243} 1244def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1245 let Inst{7} = lane{0}; 1246 let Inst{5} = Rn{5}; 1247} 1248 1249def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1250def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1251 1252// ...with address register writeback: 1253class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1254 : NLdStLn<1, 0b10, op11_8, op7_4, 1255 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1256 (ins addrmode6:$Rn, am6offset:$Rm, 1257 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1258 IIC_VLD4lnu, "vld4", Dt, 1259"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1260"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1261 []> { 1262 let Inst{4} = Rn{4}; 1263 let DecoderMethod = "DecodeVLD4LN" ; 1264} 1265 1266def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1267 let Inst{7-5} = lane{2-0}; 1268} 1269def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1270 let Inst{7-6} = lane{1-0}; 1271} 1272def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1273 let Inst{7} = lane{0}; 1274 let Inst{5} = Rn{5}; 1275} 1276 1277def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1278def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1279def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1280 1281def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1282 let Inst{7-6} = lane{1-0}; 1283} 1284def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1285 let Inst{7} = lane{0}; 1286 let Inst{5} = Rn{5}; 1287} 1288 1289def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1290def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1291 1292} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1293 1294// VLD1DUP : Vector Load (single element to all lanes) 1295class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1296 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1297 (ins addrmode6dup:$Rn), 1298 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1299 [(set VecListOneDAllLanes:$Vd, 1300 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1301 let Rm = 0b1111; 1302 let Inst{4} = Rn{4}; 1303 let DecoderMethod = "DecodeVLD1DupInstruction"; 1304} 1305def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; 1306def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; 1307def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; 1308 1309def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1310 (VLD1DUPd32 addrmode6:$addr)>; 1311 1312class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1313 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1314 (ins addrmode6dup:$Rn), IIC_VLD1dup, 1315 "vld1", Dt, "$Vd, $Rn", "", 1316 [(set VecListDPairAllLanes:$Vd, 1317 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1318 let Rm = 0b1111; 1319 let Inst{4} = Rn{4}; 1320 let DecoderMethod = "DecodeVLD1DupInstruction"; 1321} 1322 1323def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; 1324def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; 1325def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; 1326 1327def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1328 (VLD1DUPq32 addrmode6:$addr)>; 1329 1330let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1331// ...with address register writeback: 1332multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { 1333 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1334 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1335 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1336 "vld1", Dt, "$Vd, $Rn!", 1337 "$Rn.addr = $wb", []> { 1338 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1339 let Inst{4} = Rn{4}; 1340 let DecoderMethod = "DecodeVLD1DupInstruction"; 1341 } 1342 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1343 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1344 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1345 "vld1", Dt, "$Vd, $Rn, $Rm", 1346 "$Rn.addr = $wb", []> { 1347 let Inst{4} = Rn{4}; 1348 let DecoderMethod = "DecodeVLD1DupInstruction"; 1349 } 1350} 1351multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { 1352 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1353 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1354 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1355 "vld1", Dt, "$Vd, $Rn!", 1356 "$Rn.addr = $wb", []> { 1357 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1358 let Inst{4} = Rn{4}; 1359 let DecoderMethod = "DecodeVLD1DupInstruction"; 1360 } 1361 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1362 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1363 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1364 "vld1", Dt, "$Vd, $Rn, $Rm", 1365 "$Rn.addr = $wb", []> { 1366 let Inst{4} = Rn{4}; 1367 let DecoderMethod = "DecodeVLD1DupInstruction"; 1368 } 1369} 1370 1371defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; 1372defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; 1373defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; 1374 1375defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; 1376defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; 1377defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; 1378 1379// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1380class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> 1381 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1382 (ins addrmode6dup:$Rn), IIC_VLD2dup, 1383 "vld2", Dt, "$Vd, $Rn", "", []> { 1384 let Rm = 0b1111; 1385 let Inst{4} = Rn{4}; 1386 let DecoderMethod = "DecodeVLD2DupInstruction"; 1387} 1388 1389def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; 1390def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; 1391def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; 1392 1393// ...with double-spaced registers 1394def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; 1395def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1396def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1397 1398// ...with address register writeback: 1399multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { 1400 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1401 (outs VdTy:$Vd, GPR:$wb), 1402 (ins addrmode6dup:$Rn), IIC_VLD2dupu, 1403 "vld2", Dt, "$Vd, $Rn!", 1404 "$Rn.addr = $wb", []> { 1405 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1406 let Inst{4} = Rn{4}; 1407 let DecoderMethod = "DecodeVLD2DupInstruction"; 1408 } 1409 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1410 (outs VdTy:$Vd, GPR:$wb), 1411 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1412 "vld2", Dt, "$Vd, $Rn, $Rm", 1413 "$Rn.addr = $wb", []> { 1414 let Inst{4} = Rn{4}; 1415 let DecoderMethod = "DecodeVLD2DupInstruction"; 1416 } 1417} 1418 1419defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; 1420defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; 1421defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; 1422 1423defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; 1424defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1425defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1426 1427// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1428class VLD3DUP<bits<4> op7_4, string Dt> 1429 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1430 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1431 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1432 let Rm = 0b1111; 1433 let Inst{4} = 0; 1434 let DecoderMethod = "DecodeVLD3DupInstruction"; 1435} 1436 1437def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1438def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1439def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1440 1441def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1442def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1443def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1444 1445// ...with double-spaced registers (not used for codegen): 1446def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1447def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1448def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1449 1450// ...with address register writeback: 1451class VLD3DUPWB<bits<4> op7_4, string Dt> 1452 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1453 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1454 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1455 "$Rn.addr = $wb", []> { 1456 let Inst{4} = 0; 1457 let DecoderMethod = "DecodeVLD3DupInstruction"; 1458} 1459 1460def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; 1461def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; 1462def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; 1463 1464def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; 1465def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; 1466def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; 1467 1468def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1469def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1470def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1471 1472// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1473class VLD4DUP<bits<4> op7_4, string Dt> 1474 : NLdSt<1, 0b10, 0b1111, op7_4, 1475 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1476 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1477 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1478 let Rm = 0b1111; 1479 let Inst{4} = Rn{4}; 1480 let DecoderMethod = "DecodeVLD4DupInstruction"; 1481} 1482 1483def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1484def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1485def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1486 1487def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1488def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1489def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1490 1491// ...with double-spaced registers (not used for codegen): 1492def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1493def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1494def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1495 1496// ...with address register writeback: 1497class VLD4DUPWB<bits<4> op7_4, string Dt> 1498 : NLdSt<1, 0b10, 0b1111, op7_4, 1499 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1500 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1501 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1502 "$Rn.addr = $wb", []> { 1503 let Inst{4} = Rn{4}; 1504 let DecoderMethod = "DecodeVLD4DupInstruction"; 1505} 1506 1507def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1508def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1509def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1510 1511def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1512def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1513def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1514 1515def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1516def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1517def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1518 1519} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1520 1521let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1522 1523// Classes for VST* pseudo-instructions with multi-register operands. 1524// These are expanded to real instructions after register allocation. 1525class VSTQPseudo<InstrItinClass itin> 1526 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1527class VSTQWBPseudo<InstrItinClass itin> 1528 : PseudoNLdSt<(outs GPR:$wb), 1529 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1530 "$addr.addr = $wb">; 1531class VSTQWBfixedPseudo<InstrItinClass itin> 1532 : PseudoNLdSt<(outs GPR:$wb), 1533 (ins addrmode6:$addr, QPR:$src), itin, 1534 "$addr.addr = $wb">; 1535class VSTQWBregisterPseudo<InstrItinClass itin> 1536 : PseudoNLdSt<(outs GPR:$wb), 1537 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1538 "$addr.addr = $wb">; 1539class VSTQQPseudo<InstrItinClass itin> 1540 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1541class VSTQQWBPseudo<InstrItinClass itin> 1542 : PseudoNLdSt<(outs GPR:$wb), 1543 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1544 "$addr.addr = $wb">; 1545class VSTQQWBfixedPseudo<InstrItinClass itin> 1546 : PseudoNLdSt<(outs GPR:$wb), 1547 (ins addrmode6:$addr, QQPR:$src), itin, 1548 "$addr.addr = $wb">; 1549class VSTQQWBregisterPseudo<InstrItinClass itin> 1550 : PseudoNLdSt<(outs GPR:$wb), 1551 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1552 "$addr.addr = $wb">; 1553 1554class VSTQQQQPseudo<InstrItinClass itin> 1555 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1556class VSTQQQQWBPseudo<InstrItinClass itin> 1557 : PseudoNLdSt<(outs GPR:$wb), 1558 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1559 "$addr.addr = $wb">; 1560 1561// VST1 : Vector Store (multiple single elements) 1562class VST1D<bits<4> op7_4, string Dt> 1563 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), 1564 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1565 let Rm = 0b1111; 1566 let Inst{4} = Rn{4}; 1567 let DecoderMethod = "DecodeVLDST1Instruction"; 1568} 1569class VST1Q<bits<4> op7_4, string Dt> 1570 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), 1571 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1572 let Rm = 0b1111; 1573 let Inst{5-4} = Rn{5-4}; 1574 let DecoderMethod = "DecodeVLDST1Instruction"; 1575} 1576 1577def VST1d8 : VST1D<{0,0,0,?}, "8">; 1578def VST1d16 : VST1D<{0,1,0,?}, "16">; 1579def VST1d32 : VST1D<{1,0,0,?}, "32">; 1580def VST1d64 : VST1D<{1,1,0,?}, "64">; 1581 1582def VST1q8 : VST1Q<{0,0,?,?}, "8">; 1583def VST1q16 : VST1Q<{0,1,?,?}, "16">; 1584def VST1q32 : VST1Q<{1,0,?,?}, "32">; 1585def VST1q64 : VST1Q<{1,1,?,?}, "64">; 1586 1587// ...with address register writeback: 1588multiclass VST1DWB<bits<4> op7_4, string Dt> { 1589 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1590 (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1591 "vst1", Dt, "$Vd, $Rn!", 1592 "$Rn.addr = $wb", []> { 1593 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1594 let Inst{4} = Rn{4}; 1595 let DecoderMethod = "DecodeVLDST1Instruction"; 1596 } 1597 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1598 (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1599 IIC_VLD1u, 1600 "vst1", Dt, "$Vd, $Rn, $Rm", 1601 "$Rn.addr = $wb", []> { 1602 let Inst{4} = Rn{4}; 1603 let DecoderMethod = "DecodeVLDST1Instruction"; 1604 } 1605} 1606multiclass VST1QWB<bits<4> op7_4, string Dt> { 1607 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1608 (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1609 "vst1", Dt, "$Vd, $Rn!", 1610 "$Rn.addr = $wb", []> { 1611 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1612 let Inst{5-4} = Rn{5-4}; 1613 let DecoderMethod = "DecodeVLDST1Instruction"; 1614 } 1615 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1616 (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1617 IIC_VLD1x2u, 1618 "vst1", Dt, "$Vd, $Rn, $Rm", 1619 "$Rn.addr = $wb", []> { 1620 let Inst{5-4} = Rn{5-4}; 1621 let DecoderMethod = "DecodeVLDST1Instruction"; 1622 } 1623} 1624 1625defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; 1626defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; 1627defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; 1628defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; 1629 1630defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; 1631defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; 1632defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; 1633defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; 1634 1635// ...with 3 registers 1636class VST1D3<bits<4> op7_4, string Dt> 1637 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1638 (ins addrmode6:$Rn, VecListThreeD:$Vd), 1639 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1640 let Rm = 0b1111; 1641 let Inst{4} = Rn{4}; 1642 let DecoderMethod = "DecodeVLDST1Instruction"; 1643} 1644multiclass VST1D3WB<bits<4> op7_4, string Dt> { 1645 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1646 (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1647 "vst1", Dt, "$Vd, $Rn!", 1648 "$Rn.addr = $wb", []> { 1649 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1650 let Inst{5-4} = Rn{5-4}; 1651 let DecoderMethod = "DecodeVLDST1Instruction"; 1652 } 1653 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1654 (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1655 IIC_VLD1x3u, 1656 "vst1", Dt, "$Vd, $Rn, $Rm", 1657 "$Rn.addr = $wb", []> { 1658 let Inst{5-4} = Rn{5-4}; 1659 let DecoderMethod = "DecodeVLDST1Instruction"; 1660 } 1661} 1662 1663def VST1d8T : VST1D3<{0,0,0,?}, "8">; 1664def VST1d16T : VST1D3<{0,1,0,?}, "16">; 1665def VST1d32T : VST1D3<{1,0,0,?}, "32">; 1666def VST1d64T : VST1D3<{1,1,0,?}, "64">; 1667 1668defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; 1669defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; 1670defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; 1671defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; 1672 1673def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1674def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; 1675def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1676 1677// ...with 4 registers 1678class VST1D4<bits<4> op7_4, string Dt> 1679 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1680 (ins addrmode6:$Rn, VecListFourD:$Vd), 1681 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1682 []> { 1683 let Rm = 0b1111; 1684 let Inst{5-4} = Rn{5-4}; 1685 let DecoderMethod = "DecodeVLDST1Instruction"; 1686} 1687multiclass VST1D4WB<bits<4> op7_4, string Dt> { 1688 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1689 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1690 "vst1", Dt, "$Vd, $Rn!", 1691 "$Rn.addr = $wb", []> { 1692 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1693 let Inst{5-4} = Rn{5-4}; 1694 let DecoderMethod = "DecodeVLDST1Instruction"; 1695 } 1696 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1697 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1698 IIC_VLD1x4u, 1699 "vst1", Dt, "$Vd, $Rn, $Rm", 1700 "$Rn.addr = $wb", []> { 1701 let Inst{5-4} = Rn{5-4}; 1702 let DecoderMethod = "DecodeVLDST1Instruction"; 1703 } 1704} 1705 1706def VST1d8Q : VST1D4<{0,0,?,?}, "8">; 1707def VST1d16Q : VST1D4<{0,1,?,?}, "16">; 1708def VST1d32Q : VST1D4<{1,0,?,?}, "32">; 1709def VST1d64Q : VST1D4<{1,1,?,?}, "64">; 1710 1711defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; 1712defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; 1713defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; 1714defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; 1715 1716def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1717def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; 1718def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1719 1720// VST2 : Vector Store (multiple 2-element structures) 1721class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1722 InstrItinClass itin> 1723 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), 1724 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1725 let Rm = 0b1111; 1726 let Inst{5-4} = Rn{5-4}; 1727 let DecoderMethod = "DecodeVLDST2Instruction"; 1728} 1729 1730def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; 1731def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; 1732def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; 1733 1734def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; 1735def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; 1736def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; 1737 1738def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1739def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1740def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1741 1742// ...with address register writeback: 1743multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1744 RegisterOperand VdTy> { 1745 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1746 (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, 1747 "vst2", Dt, "$Vd, $Rn!", 1748 "$Rn.addr = $wb", []> { 1749 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1750 let Inst{5-4} = Rn{5-4}; 1751 let DecoderMethod = "DecodeVLDST2Instruction"; 1752 } 1753 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1754 (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1755 "vst2", Dt, "$Vd, $Rn, $Rm", 1756 "$Rn.addr = $wb", []> { 1757 let Inst{5-4} = Rn{5-4}; 1758 let DecoderMethod = "DecodeVLDST2Instruction"; 1759 } 1760} 1761multiclass VST2QWB<bits<4> op7_4, string Dt> { 1762 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1763 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1764 "vst2", Dt, "$Vd, $Rn!", 1765 "$Rn.addr = $wb", []> { 1766 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1767 let Inst{5-4} = Rn{5-4}; 1768 let DecoderMethod = "DecodeVLDST2Instruction"; 1769 } 1770 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1771 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1772 IIC_VLD1u, 1773 "vst2", Dt, "$Vd, $Rn, $Rm", 1774 "$Rn.addr = $wb", []> { 1775 let Inst{5-4} = Rn{5-4}; 1776 let DecoderMethod = "DecodeVLDST2Instruction"; 1777 } 1778} 1779 1780defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; 1781defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; 1782defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; 1783 1784defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; 1785defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; 1786defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; 1787 1788def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1789def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1790def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1791def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1792def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1793def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1794 1795// ...with double-spaced registers 1796def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; 1797def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; 1798def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; 1799defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; 1800defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; 1801defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; 1802 1803// VST3 : Vector Store (multiple 3-element structures) 1804class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1805 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1806 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1807 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1808 let Rm = 0b1111; 1809 let Inst{4} = Rn{4}; 1810 let DecoderMethod = "DecodeVLDST3Instruction"; 1811} 1812 1813def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1814def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1815def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1816 1817def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1818def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1819def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1820 1821// ...with address register writeback: 1822class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1823 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1824 (ins addrmode6:$Rn, am6offset:$Rm, 1825 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1826 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1827 "$Rn.addr = $wb", []> { 1828 let Inst{4} = Rn{4}; 1829 let DecoderMethod = "DecodeVLDST3Instruction"; 1830} 1831 1832def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1833def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1834def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1835 1836def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1837def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1838def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1839 1840// ...with double-spaced registers: 1841def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1842def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1843def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1844def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1845def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1846def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1847 1848def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1849def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1850def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1851 1852// ...alternate versions to be allocated odd register numbers: 1853def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1854def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1855def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1856 1857def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1858def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1859def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1860 1861// VST4 : Vector Store (multiple 4-element structures) 1862class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1863 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1864 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1865 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1866 "", []> { 1867 let Rm = 0b1111; 1868 let Inst{5-4} = Rn{5-4}; 1869 let DecoderMethod = "DecodeVLDST4Instruction"; 1870} 1871 1872def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1873def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1874def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1875 1876def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1877def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1878def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1879 1880// ...with address register writeback: 1881class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1882 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1883 (ins addrmode6:$Rn, am6offset:$Rm, 1884 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1885 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1886 "$Rn.addr = $wb", []> { 1887 let Inst{5-4} = Rn{5-4}; 1888 let DecoderMethod = "DecodeVLDST4Instruction"; 1889} 1890 1891def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1892def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1893def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1894 1895def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1896def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1897def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1898 1899// ...with double-spaced registers: 1900def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 1901def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 1902def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 1903def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 1904def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 1905def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 1906 1907def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1908def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1909def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1910 1911// ...alternate versions to be allocated odd register numbers: 1912def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1913def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1914def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1915 1916def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1917def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1918def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1919 1920} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1921 1922// Classes for VST*LN pseudo-instructions with multi-register operands. 1923// These are expanded to real instructions after register allocation. 1924class VSTQLNPseudo<InstrItinClass itin> 1925 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1926 itin, "">; 1927class VSTQLNWBPseudo<InstrItinClass itin> 1928 : PseudoNLdSt<(outs GPR:$wb), 1929 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1930 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1931class VSTQQLNPseudo<InstrItinClass itin> 1932 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1933 itin, "">; 1934class VSTQQLNWBPseudo<InstrItinClass itin> 1935 : PseudoNLdSt<(outs GPR:$wb), 1936 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1937 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1938class VSTQQQQLNPseudo<InstrItinClass itin> 1939 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1940 itin, "">; 1941class VSTQQQQLNWBPseudo<InstrItinClass itin> 1942 : PseudoNLdSt<(outs GPR:$wb), 1943 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1944 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1945 1946// VST1LN : Vector Store (single element from one lane) 1947class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1948 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 1949 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1950 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 1951 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1952 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { 1953 let Rm = 0b1111; 1954 let DecoderMethod = "DecodeVST1LN"; 1955} 1956class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1957 : VSTQLNPseudo<IIC_VST1ln> { 1958 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1959 addrmode6:$addr)]; 1960} 1961 1962def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 1963 NEONvgetlaneu, addrmode6> { 1964 let Inst{7-5} = lane{2-0}; 1965} 1966def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 1967 NEONvgetlaneu, addrmode6> { 1968 let Inst{7-6} = lane{1-0}; 1969 let Inst{4} = Rn{4}; 1970} 1971 1972def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 1973 addrmode6oneL32> { 1974 let Inst{7} = lane{0}; 1975 let Inst{5-4} = Rn{5-4}; 1976} 1977 1978def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 1979def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 1980def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 1981 1982def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 1983 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1984def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 1985 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1986 1987// ...with address register writeback: 1988class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1989 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 1990 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1991 (ins AdrMode:$Rn, am6offset:$Rm, 1992 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 1993 "\\{$Vd[$lane]\\}, $Rn$Rm", 1994 "$Rn.addr = $wb", 1995 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 1996 AdrMode:$Rn, am6offset:$Rm))]> { 1997 let DecoderMethod = "DecodeVST1LN"; 1998} 1999class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2000 : VSTQLNWBPseudo<IIC_VST1lnu> { 2001 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2002 addrmode6:$addr, am6offset:$offset))]; 2003} 2004 2005def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2006 NEONvgetlaneu, addrmode6> { 2007 let Inst{7-5} = lane{2-0}; 2008} 2009def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2010 NEONvgetlaneu, addrmode6> { 2011 let Inst{7-6} = lane{1-0}; 2012 let Inst{4} = Rn{4}; 2013} 2014def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2015 extractelt, addrmode6oneL32> { 2016 let Inst{7} = lane{0}; 2017 let Inst{5-4} = Rn{5-4}; 2018} 2019 2020def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2021def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2022def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2023 2024let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 2025 2026// VST2LN : Vector Store (single 2-element structure from one lane) 2027class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2028 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2029 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2030 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2031 "", []> { 2032 let Rm = 0b1111; 2033 let Inst{4} = Rn{4}; 2034 let DecoderMethod = "DecodeVST2LN"; 2035} 2036 2037def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2038 let Inst{7-5} = lane{2-0}; 2039} 2040def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2041 let Inst{7-6} = lane{1-0}; 2042} 2043def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2044 let Inst{7} = lane{0}; 2045} 2046 2047def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2048def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2049def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2050 2051// ...with double-spaced registers: 2052def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2053 let Inst{7-6} = lane{1-0}; 2054 let Inst{4} = Rn{4}; 2055} 2056def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2057 let Inst{7} = lane{0}; 2058 let Inst{4} = Rn{4}; 2059} 2060 2061def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2062def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2063 2064// ...with address register writeback: 2065class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2066 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2067 (ins addrmode6:$Rn, am6offset:$Rm, 2068 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2069 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2070 "$Rn.addr = $wb", []> { 2071 let Inst{4} = Rn{4}; 2072 let DecoderMethod = "DecodeVST2LN"; 2073} 2074 2075def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2076 let Inst{7-5} = lane{2-0}; 2077} 2078def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2079 let Inst{7-6} = lane{1-0}; 2080} 2081def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2082 let Inst{7} = lane{0}; 2083} 2084 2085def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2086def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2087def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2088 2089def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2090 let Inst{7-6} = lane{1-0}; 2091} 2092def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2093 let Inst{7} = lane{0}; 2094} 2095 2096def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2097def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2098 2099// VST3LN : Vector Store (single 3-element structure from one lane) 2100class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2101 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2102 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2103 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2104 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 2105 let Rm = 0b1111; 2106 let DecoderMethod = "DecodeVST3LN"; 2107} 2108 2109def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2110 let Inst{7-5} = lane{2-0}; 2111} 2112def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2113 let Inst{7-6} = lane{1-0}; 2114} 2115def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2116 let Inst{7} = lane{0}; 2117} 2118 2119def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2120def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2121def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2122 2123// ...with double-spaced registers: 2124def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2125 let Inst{7-6} = lane{1-0}; 2126} 2127def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2128 let Inst{7} = lane{0}; 2129} 2130 2131def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2132def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2133 2134// ...with address register writeback: 2135class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2136 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2137 (ins addrmode6:$Rn, am6offset:$Rm, 2138 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2139 IIC_VST3lnu, "vst3", Dt, 2140 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2141 "$Rn.addr = $wb", []> { 2142 let DecoderMethod = "DecodeVST3LN"; 2143} 2144 2145def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2146 let Inst{7-5} = lane{2-0}; 2147} 2148def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2149 let Inst{7-6} = lane{1-0}; 2150} 2151def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2152 let Inst{7} = lane{0}; 2153} 2154 2155def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2156def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2157def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2158 2159def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2160 let Inst{7-6} = lane{1-0}; 2161} 2162def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2163 let Inst{7} = lane{0}; 2164} 2165 2166def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2167def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2168 2169// VST4LN : Vector Store (single 4-element structure from one lane) 2170class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2171 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2172 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2173 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2174 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2175 "", []> { 2176 let Rm = 0b1111; 2177 let Inst{4} = Rn{4}; 2178 let DecoderMethod = "DecodeVST4LN"; 2179} 2180 2181def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2182 let Inst{7-5} = lane{2-0}; 2183} 2184def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2185 let Inst{7-6} = lane{1-0}; 2186} 2187def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2188 let Inst{7} = lane{0}; 2189 let Inst{5} = Rn{5}; 2190} 2191 2192def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2193def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2194def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2195 2196// ...with double-spaced registers: 2197def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2198 let Inst{7-6} = lane{1-0}; 2199} 2200def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2201 let Inst{7} = lane{0}; 2202 let Inst{5} = Rn{5}; 2203} 2204 2205def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2206def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2207 2208// ...with address register writeback: 2209class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2210 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2211 (ins addrmode6:$Rn, am6offset:$Rm, 2212 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2213 IIC_VST4lnu, "vst4", Dt, 2214 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2215 "$Rn.addr = $wb", []> { 2216 let Inst{4} = Rn{4}; 2217 let DecoderMethod = "DecodeVST4LN"; 2218} 2219 2220def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2221 let Inst{7-5} = lane{2-0}; 2222} 2223def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2224 let Inst{7-6} = lane{1-0}; 2225} 2226def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2227 let Inst{7} = lane{0}; 2228 let Inst{5} = Rn{5}; 2229} 2230 2231def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2232def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2233def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2234 2235def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2236 let Inst{7-6} = lane{1-0}; 2237} 2238def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2239 let Inst{7} = lane{0}; 2240 let Inst{5} = Rn{5}; 2241} 2242 2243def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2244def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2245 2246} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 2247 2248// Use vld1/vst1 for unaligned f64 load / store 2249def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2250 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; 2251def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2252 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2253def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2254 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; 2255def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2256 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2257def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2258 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; 2259def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2260 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; 2261 2262// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2263// load / store if it's legal. 2264def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2265 (VLD1q64 addrmode6:$addr)>; 2266def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2267 (VST1q64 addrmode6:$addr, QPR:$value)>; 2268def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2269 (VLD1q32 addrmode6:$addr)>; 2270def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2271 (VST1q32 addrmode6:$addr, QPR:$value)>; 2272def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2273 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; 2274def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2275 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2276def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2277 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; 2278def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2279 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2280 2281//===----------------------------------------------------------------------===// 2282// NEON pattern fragments 2283//===----------------------------------------------------------------------===// 2284 2285// Extract D sub-registers of Q registers. 2286def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2287 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2288 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 2289}]>; 2290def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2291 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2292 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 2293}]>; 2294def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2295 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2296 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 2297}]>; 2298def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2299 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2300 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 2301}]>; 2302 2303// Extract S sub-registers of Q/D registers. 2304def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2305 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2306 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 2307}]>; 2308 2309// Translate lane numbers from Q registers to D subregs. 2310def SubReg_i8_lane : SDNodeXForm<imm, [{ 2311 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 2312}]>; 2313def SubReg_i16_lane : SDNodeXForm<imm, [{ 2314 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 2315}]>; 2316def SubReg_i32_lane : SDNodeXForm<imm, [{ 2317 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 2318}]>; 2319 2320//===----------------------------------------------------------------------===// 2321// Instruction Classes 2322//===----------------------------------------------------------------------===// 2323 2324// Basic 2-register operations: double- and quad-register. 2325class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2326 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2327 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2328 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2329 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2330 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2331class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2332 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2333 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2334 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2335 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2336 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2337 2338// Basic 2-register intrinsics, both double- and quad-register. 2339class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2340 bits<2> op17_16, bits<5> op11_7, bit op4, 2341 InstrItinClass itin, string OpcodeStr, string Dt, 2342 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2343 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2344 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2345 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2346class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2347 bits<2> op17_16, bits<5> op11_7, bit op4, 2348 InstrItinClass itin, string OpcodeStr, string Dt, 2349 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2350 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2351 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2352 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2353 2354// Same as above, but not predicated. 2355class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2356 InstrItinClass itin, string OpcodeStr, string Dt, 2357 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2358 : N2Vnp<op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2359 itin, OpcodeStr, Dt, ResTy, OpTy, 2360 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2361 2362class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2363 InstrItinClass itin, string OpcodeStr, string Dt, 2364 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2365 : N2Vnp<op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2366 itin, OpcodeStr, Dt, ResTy, OpTy, 2367 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2368 2369// Narrow 2-register operations. 2370class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2371 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2372 InstrItinClass itin, string OpcodeStr, string Dt, 2373 ValueType TyD, ValueType TyQ, SDNode OpNode> 2374 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2375 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2376 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2377 2378// Narrow 2-register intrinsics. 2379class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2380 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2381 InstrItinClass itin, string OpcodeStr, string Dt, 2382 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2383 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2384 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2385 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2386 2387// Long 2-register operations (currently only used for VMOVL). 2388class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2389 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2390 InstrItinClass itin, string OpcodeStr, string Dt, 2391 ValueType TyQ, ValueType TyD, SDNode OpNode> 2392 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2393 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2394 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2395 2396// Long 2-register intrinsics. 2397class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2398 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2399 InstrItinClass itin, string OpcodeStr, string Dt, 2400 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2401 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2402 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2403 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2404 2405// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2406class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2407 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2408 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2409 OpcodeStr, Dt, "$Vd, $Vm", 2410 "$src1 = $Vd, $src2 = $Vm", []>; 2411class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2412 InstrItinClass itin, string OpcodeStr, string Dt> 2413 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2414 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2415 "$src1 = $Vd, $src2 = $Vm", []>; 2416 2417// Basic 3-register operations: double- and quad-register. 2418class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2419 InstrItinClass itin, string OpcodeStr, string Dt, 2420 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2421 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2422 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2423 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2424 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2425 // All of these have a two-operand InstAlias. 2426 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2427 let isCommutable = Commutable; 2428} 2429// Same as N3VD but no data type. 2430class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2431 InstrItinClass itin, string OpcodeStr, 2432 ValueType ResTy, ValueType OpTy, 2433 SDNode OpNode, bit Commutable> 2434 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2435 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2436 OpcodeStr, "$Vd, $Vn, $Vm", "", 2437 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2438 // All of these have a two-operand InstAlias. 2439 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2440 let isCommutable = Commutable; 2441} 2442 2443class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2444 InstrItinClass itin, string OpcodeStr, string Dt, 2445 ValueType Ty, SDNode ShOp> 2446 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2447 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2448 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2449 [(set (Ty DPR:$Vd), 2450 (Ty (ShOp (Ty DPR:$Vn), 2451 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2452 // All of these have a two-operand InstAlias. 2453 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2454 let isCommutable = 0; 2455} 2456class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2457 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2458 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2459 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2460 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2461 [(set (Ty DPR:$Vd), 2462 (Ty (ShOp (Ty DPR:$Vn), 2463 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2464 // All of these have a two-operand InstAlias. 2465 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2466 let isCommutable = 0; 2467} 2468 2469class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2470 InstrItinClass itin, string OpcodeStr, string Dt, 2471 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2472 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2473 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2474 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2475 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2476 // All of these have a two-operand InstAlias. 2477 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2478 let isCommutable = Commutable; 2479} 2480class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2481 InstrItinClass itin, string OpcodeStr, 2482 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2483 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2484 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2485 OpcodeStr, "$Vd, $Vn, $Vm", "", 2486 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2487 // All of these have a two-operand InstAlias. 2488 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2489 let isCommutable = Commutable; 2490} 2491class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2492 InstrItinClass itin, string OpcodeStr, string Dt, 2493 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2494 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2495 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2496 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2497 [(set (ResTy QPR:$Vd), 2498 (ResTy (ShOp (ResTy QPR:$Vn), 2499 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2500 imm:$lane)))))]> { 2501 // All of these have a two-operand InstAlias. 2502 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2503 let isCommutable = 0; 2504} 2505class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2506 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2507 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2508 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2509 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2510 [(set (ResTy QPR:$Vd), 2511 (ResTy (ShOp (ResTy QPR:$Vn), 2512 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2513 imm:$lane)))))]> { 2514 // All of these have a two-operand InstAlias. 2515 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2516 let isCommutable = 0; 2517} 2518 2519// Basic 3-register intrinsics, both double- and quad-register. 2520class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2521 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2522 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2523 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2524 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2525 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2526 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2527 // All of these have a two-operand InstAlias. 2528 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2529 let isCommutable = Commutable; 2530} 2531 2532class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2533 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2534 string Dt, ValueType ResTy, ValueType OpTy, 2535 SDPatternOperator IntOp, bit Commutable> 2536 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2537 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt, 2538 ResTy, OpTy, IntOp, Commutable, 2539 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2540 2541class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2542 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2543 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2544 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2545 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2546 [(set (Ty DPR:$Vd), 2547 (Ty (IntOp (Ty DPR:$Vn), 2548 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2549 imm:$lane)))))]> { 2550 let isCommutable = 0; 2551} 2552 2553class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2554 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2555 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2556 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2557 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2558 [(set (Ty DPR:$Vd), 2559 (Ty (IntOp (Ty DPR:$Vn), 2560 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2561 let isCommutable = 0; 2562} 2563class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2564 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2565 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2566 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2567 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2568 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2569 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2570 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2571 let isCommutable = 0; 2572} 2573 2574class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2575 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2576 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2577 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2578 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2579 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2580 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2581 // All of these have a two-operand InstAlias. 2582 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2583 let isCommutable = Commutable; 2584} 2585 2586class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2587 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2588 string Dt, ValueType ResTy, ValueType OpTy, 2589 SDPatternOperator IntOp, bit Commutable> 2590 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2591 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2592 ResTy, OpTy, IntOp, Commutable, 2593 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2594 2595class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2596 string OpcodeStr, string Dt, 2597 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2598 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2599 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2600 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2601 [(set (ResTy QPR:$Vd), 2602 (ResTy (IntOp (ResTy QPR:$Vn), 2603 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2604 imm:$lane)))))]> { 2605 let isCommutable = 0; 2606} 2607class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2608 string OpcodeStr, string Dt, 2609 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2610 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2611 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2612 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2613 [(set (ResTy QPR:$Vd), 2614 (ResTy (IntOp (ResTy QPR:$Vn), 2615 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2616 imm:$lane)))))]> { 2617 let isCommutable = 0; 2618} 2619class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2620 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2621 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2622 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2623 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2624 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2625 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2626 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2627 let isCommutable = 0; 2628} 2629 2630// Multiply-Add/Sub operations: double- and quad-register. 2631class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2632 InstrItinClass itin, string OpcodeStr, string Dt, 2633 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2634 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2635 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2636 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2637 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2638 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2639 2640class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2641 string OpcodeStr, string Dt, 2642 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2643 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2644 (outs DPR:$Vd), 2645 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2646 NVMulSLFrm, itin, 2647 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2648 [(set (Ty DPR:$Vd), 2649 (Ty (ShOp (Ty DPR:$src1), 2650 (Ty (MulOp DPR:$Vn, 2651 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2652 imm:$lane)))))))]>; 2653class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2654 string OpcodeStr, string Dt, 2655 ValueType Ty, SDNode MulOp, SDNode ShOp> 2656 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2657 (outs DPR:$Vd), 2658 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2659 NVMulSLFrm, itin, 2660 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2661 [(set (Ty DPR:$Vd), 2662 (Ty (ShOp (Ty DPR:$src1), 2663 (Ty (MulOp DPR:$Vn, 2664 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2665 imm:$lane)))))))]>; 2666 2667class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2668 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2669 SDPatternOperator MulOp, SDPatternOperator OpNode> 2670 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2671 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2672 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2673 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2674 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2675class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2676 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2677 SDPatternOperator MulOp, SDPatternOperator ShOp> 2678 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2679 (outs QPR:$Vd), 2680 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2681 NVMulSLFrm, itin, 2682 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2683 [(set (ResTy QPR:$Vd), 2684 (ResTy (ShOp (ResTy QPR:$src1), 2685 (ResTy (MulOp QPR:$Vn, 2686 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2687 imm:$lane)))))))]>; 2688class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2689 string OpcodeStr, string Dt, 2690 ValueType ResTy, ValueType OpTy, 2691 SDNode MulOp, SDNode ShOp> 2692 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2693 (outs QPR:$Vd), 2694 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2695 NVMulSLFrm, itin, 2696 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2697 [(set (ResTy QPR:$Vd), 2698 (ResTy (ShOp (ResTy QPR:$src1), 2699 (ResTy (MulOp QPR:$Vn, 2700 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2701 imm:$lane)))))))]>; 2702 2703// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2704class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2705 InstrItinClass itin, string OpcodeStr, string Dt, 2706 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2707 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2708 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2709 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2710 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2711 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2712class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2713 InstrItinClass itin, string OpcodeStr, string Dt, 2714 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2715 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2716 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2717 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2718 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2719 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2720 2721// Neon 3-argument intrinsics, both double- and quad-register. 2722// The destination register is also used as the first source operand register. 2723class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2724 InstrItinClass itin, string OpcodeStr, string Dt, 2725 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2726 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2727 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2728 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2729 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2730 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2731class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2732 InstrItinClass itin, string OpcodeStr, string Dt, 2733 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2734 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2735 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2736 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2737 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2738 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2739 2740// Long Multiply-Add/Sub operations. 2741class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2742 InstrItinClass itin, string OpcodeStr, string Dt, 2743 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2744 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2745 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2746 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2747 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2748 (TyQ (MulOp (TyD DPR:$Vn), 2749 (TyD DPR:$Vm)))))]>; 2750class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2751 InstrItinClass itin, string OpcodeStr, string Dt, 2752 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2753 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2754 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2755 NVMulSLFrm, itin, 2756 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2757 [(set QPR:$Vd, 2758 (OpNode (TyQ QPR:$src1), 2759 (TyQ (MulOp (TyD DPR:$Vn), 2760 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2761 imm:$lane))))))]>; 2762class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2763 InstrItinClass itin, string OpcodeStr, string Dt, 2764 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2765 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2766 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2767 NVMulSLFrm, itin, 2768 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2769 [(set QPR:$Vd, 2770 (OpNode (TyQ QPR:$src1), 2771 (TyQ (MulOp (TyD DPR:$Vn), 2772 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2773 imm:$lane))))))]>; 2774 2775// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2776class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2777 InstrItinClass itin, string OpcodeStr, string Dt, 2778 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2779 SDNode OpNode> 2780 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2781 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2782 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2783 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2784 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2785 (TyD DPR:$Vm)))))))]>; 2786 2787// Neon Long 3-argument intrinsic. The destination register is 2788// a quad-register and is also used as the first source operand register. 2789class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2790 InstrItinClass itin, string OpcodeStr, string Dt, 2791 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2792 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2793 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2794 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2795 [(set QPR:$Vd, 2796 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2797class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2798 string OpcodeStr, string Dt, 2799 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2800 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2801 (outs QPR:$Vd), 2802 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2803 NVMulSLFrm, itin, 2804 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2805 [(set (ResTy QPR:$Vd), 2806 (ResTy (IntOp (ResTy QPR:$src1), 2807 (OpTy DPR:$Vn), 2808 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2809 imm:$lane)))))]>; 2810class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2811 InstrItinClass itin, string OpcodeStr, string Dt, 2812 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2813 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2814 (outs QPR:$Vd), 2815 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2816 NVMulSLFrm, itin, 2817 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2818 [(set (ResTy QPR:$Vd), 2819 (ResTy (IntOp (ResTy QPR:$src1), 2820 (OpTy DPR:$Vn), 2821 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2822 imm:$lane)))))]>; 2823 2824// Narrowing 3-register intrinsics. 2825class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2826 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2827 SDPatternOperator IntOp, bit Commutable> 2828 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2829 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2830 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2831 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2832 let isCommutable = Commutable; 2833} 2834 2835// Long 3-register operations. 2836class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2837 InstrItinClass itin, string OpcodeStr, string Dt, 2838 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2839 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2840 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2841 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2842 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2843 let isCommutable = Commutable; 2844} 2845class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2846 InstrItinClass itin, string OpcodeStr, string Dt, 2847 ValueType TyQ, ValueType TyD, SDNode OpNode> 2848 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2849 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2850 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2851 [(set QPR:$Vd, 2852 (TyQ (OpNode (TyD DPR:$Vn), 2853 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2854class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2855 InstrItinClass itin, string OpcodeStr, string Dt, 2856 ValueType TyQ, ValueType TyD, SDNode OpNode> 2857 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2858 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2859 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2860 [(set QPR:$Vd, 2861 (TyQ (OpNode (TyD DPR:$Vn), 2862 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 2863 2864// Long 3-register operations with explicitly extended operands. 2865class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2866 InstrItinClass itin, string OpcodeStr, string Dt, 2867 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 2868 bit Commutable> 2869 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2870 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2871 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2872 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 2873 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2874 let isCommutable = Commutable; 2875} 2876 2877// Long 3-register intrinsics with explicit extend (VABDL). 2878class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2879 InstrItinClass itin, string OpcodeStr, string Dt, 2880 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2881 bit Commutable> 2882 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2883 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2884 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2885 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2886 (TyD DPR:$Vm))))))]> { 2887 let isCommutable = Commutable; 2888} 2889 2890// Long 3-register intrinsics. 2891class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2892 InstrItinClass itin, string OpcodeStr, string Dt, 2893 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 2894 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2895 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2896 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2897 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2898 let isCommutable = Commutable; 2899} 2900class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2901 string OpcodeStr, string Dt, 2902 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2903 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2904 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2905 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2906 [(set (ResTy QPR:$Vd), 2907 (ResTy (IntOp (OpTy DPR:$Vn), 2908 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2909 imm:$lane)))))]>; 2910class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2911 InstrItinClass itin, string OpcodeStr, string Dt, 2912 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2913 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2914 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2915 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2916 [(set (ResTy QPR:$Vd), 2917 (ResTy (IntOp (OpTy DPR:$Vn), 2918 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2919 imm:$lane)))))]>; 2920 2921// Wide 3-register operations. 2922class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2923 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 2924 SDNode OpNode, SDNode ExtOp, bit Commutable> 2925 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2926 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 2927 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2928 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 2929 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2930 // All of these have a two-operand InstAlias. 2931 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2932 let isCommutable = Commutable; 2933} 2934 2935// Pairwise long 2-register intrinsics, both double- and quad-register. 2936class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2937 bits<2> op17_16, bits<5> op11_7, bit op4, 2938 string OpcodeStr, string Dt, 2939 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2940 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2941 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2942 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2943class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2944 bits<2> op17_16, bits<5> op11_7, bit op4, 2945 string OpcodeStr, string Dt, 2946 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2947 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2948 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2949 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2950 2951// Pairwise long 2-register accumulate intrinsics, 2952// both double- and quad-register. 2953// The destination register is also used as the first source operand register. 2954class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2955 bits<2> op17_16, bits<5> op11_7, bit op4, 2956 string OpcodeStr, string Dt, 2957 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2958 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 2959 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 2960 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2961 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 2962class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2963 bits<2> op17_16, bits<5> op11_7, bit op4, 2964 string OpcodeStr, string Dt, 2965 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2966 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 2967 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 2968 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 2969 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 2970 2971// Shift by immediate, 2972// both double- and quad-register. 2973let TwoOperandAliasConstraint = "$Vm = $Vd" in { 2974class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2975 Format f, InstrItinClass itin, Operand ImmTy, 2976 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2977 : N2VImm<op24, op23, op11_8, op7, 0, op4, 2978 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 2979 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2980 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 2981class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 2982 Format f, InstrItinClass itin, Operand ImmTy, 2983 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 2984 : N2VImm<op24, op23, op11_8, op7, 1, op4, 2985 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 2986 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2987 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 2988} 2989 2990// Long shift by immediate. 2991class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 2992 string OpcodeStr, string Dt, 2993 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 2994 : N2VImm<op24, op23, op11_8, op7, op6, op4, 2995 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 2996 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 2997 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), 2998 (i32 imm:$SIMM))))]>; 2999 3000// Narrow shift by immediate. 3001class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3002 InstrItinClass itin, string OpcodeStr, string Dt, 3003 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 3004 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3005 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3006 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3007 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3008 (i32 imm:$SIMM))))]>; 3009 3010// Shift right by immediate and accumulate, 3011// both double- and quad-register. 3012let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3013class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3014 Operand ImmTy, string OpcodeStr, string Dt, 3015 ValueType Ty, SDNode ShOp> 3016 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3017 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3018 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3019 [(set DPR:$Vd, (Ty (add DPR:$src1, 3020 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3021class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3022 Operand ImmTy, string OpcodeStr, string Dt, 3023 ValueType Ty, SDNode ShOp> 3024 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3025 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3026 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3027 [(set QPR:$Vd, (Ty (add QPR:$src1, 3028 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3029} 3030 3031// Shift by immediate and insert, 3032// both double- and quad-register. 3033let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3034class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3035 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3036 ValueType Ty,SDNode ShOp> 3037 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3038 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3039 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3040 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3041class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3042 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3043 ValueType Ty,SDNode ShOp> 3044 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3045 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3046 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3047 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3048} 3049 3050// Convert, with fractional bits immediate, 3051// both double- and quad-register. 3052class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3053 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3054 SDPatternOperator IntOp> 3055 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3056 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3057 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3058 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3059class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3060 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3061 SDPatternOperator IntOp> 3062 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3063 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3064 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3065 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3066 3067//===----------------------------------------------------------------------===// 3068// Multiclasses 3069//===----------------------------------------------------------------------===// 3070 3071// Abbreviations used in multiclass suffixes: 3072// Q = quarter int (8 bit) elements 3073// H = half int (16 bit) elements 3074// S = single int (32 bit) elements 3075// D = double int (64 bit) elements 3076 3077// Neon 2-register vector operations and intrinsics. 3078 3079// Neon 2-register comparisons. 3080// source operand element sizes of 8, 16 and 32 bits: 3081multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3082 bits<5> op11_7, bit op4, string opc, string Dt, 3083 string asm, SDNode OpNode> { 3084 // 64-bit vector types. 3085 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3086 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3087 opc, !strconcat(Dt, "8"), asm, "", 3088 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3089 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3090 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3091 opc, !strconcat(Dt, "16"), asm, "", 3092 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3093 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3094 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3095 opc, !strconcat(Dt, "32"), asm, "", 3096 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3097 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3098 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3099 opc, "f32", asm, "", 3100 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3101 let Inst{10} = 1; // overwrite F = 1 3102 } 3103 3104 // 128-bit vector types. 3105 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3106 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3107 opc, !strconcat(Dt, "8"), asm, "", 3108 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3109 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3110 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3111 opc, !strconcat(Dt, "16"), asm, "", 3112 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3113 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3114 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3115 opc, !strconcat(Dt, "32"), asm, "", 3116 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3117 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3118 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3119 opc, "f32", asm, "", 3120 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3121 let Inst{10} = 1; // overwrite F = 1 3122 } 3123} 3124 3125 3126// Neon 2-register vector intrinsics, 3127// element sizes of 8, 16 and 32 bits: 3128multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3129 bits<5> op11_7, bit op4, 3130 InstrItinClass itinD, InstrItinClass itinQ, 3131 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3132 // 64-bit vector types. 3133 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3134 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3135 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3136 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3137 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3138 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3139 3140 // 128-bit vector types. 3141 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3142 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3143 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3144 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3145 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3146 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3147} 3148 3149 3150// Neon Narrowing 2-register vector operations, 3151// source operand element sizes of 16, 32 and 64 bits: 3152multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3153 bits<5> op11_7, bit op6, bit op4, 3154 InstrItinClass itin, string OpcodeStr, string Dt, 3155 SDNode OpNode> { 3156 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3157 itin, OpcodeStr, !strconcat(Dt, "16"), 3158 v8i8, v8i16, OpNode>; 3159 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3160 itin, OpcodeStr, !strconcat(Dt, "32"), 3161 v4i16, v4i32, OpNode>; 3162 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3163 itin, OpcodeStr, !strconcat(Dt, "64"), 3164 v2i32, v2i64, OpNode>; 3165} 3166 3167// Neon Narrowing 2-register vector intrinsics, 3168// source operand element sizes of 16, 32 and 64 bits: 3169multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3170 bits<5> op11_7, bit op6, bit op4, 3171 InstrItinClass itin, string OpcodeStr, string Dt, 3172 SDPatternOperator IntOp> { 3173 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3174 itin, OpcodeStr, !strconcat(Dt, "16"), 3175 v8i8, v8i16, IntOp>; 3176 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3177 itin, OpcodeStr, !strconcat(Dt, "32"), 3178 v4i16, v4i32, IntOp>; 3179 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3180 itin, OpcodeStr, !strconcat(Dt, "64"), 3181 v2i32, v2i64, IntOp>; 3182} 3183 3184 3185// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3186// source operand element sizes of 16, 32 and 64 bits: 3187multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3188 string OpcodeStr, string Dt, SDNode OpNode> { 3189 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3190 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3191 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3192 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3193 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3194 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3195} 3196 3197 3198// Neon 3-register vector operations. 3199 3200// First with only element sizes of 8, 16 and 32 bits: 3201multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3202 InstrItinClass itinD16, InstrItinClass itinD32, 3203 InstrItinClass itinQ16, InstrItinClass itinQ32, 3204 string OpcodeStr, string Dt, 3205 SDNode OpNode, bit Commutable = 0> { 3206 // 64-bit vector types. 3207 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3208 OpcodeStr, !strconcat(Dt, "8"), 3209 v8i8, v8i8, OpNode, Commutable>; 3210 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3211 OpcodeStr, !strconcat(Dt, "16"), 3212 v4i16, v4i16, OpNode, Commutable>; 3213 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3214 OpcodeStr, !strconcat(Dt, "32"), 3215 v2i32, v2i32, OpNode, Commutable>; 3216 3217 // 128-bit vector types. 3218 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3219 OpcodeStr, !strconcat(Dt, "8"), 3220 v16i8, v16i8, OpNode, Commutable>; 3221 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3222 OpcodeStr, !strconcat(Dt, "16"), 3223 v8i16, v8i16, OpNode, Commutable>; 3224 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3225 OpcodeStr, !strconcat(Dt, "32"), 3226 v4i32, v4i32, OpNode, Commutable>; 3227} 3228 3229multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3230 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3231 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3232 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3233 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3234 v4i32, v2i32, ShOp>; 3235} 3236 3237// ....then also with element size 64 bits: 3238multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3239 InstrItinClass itinD, InstrItinClass itinQ, 3240 string OpcodeStr, string Dt, 3241 SDNode OpNode, bit Commutable = 0> 3242 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3243 OpcodeStr, Dt, OpNode, Commutable> { 3244 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3245 OpcodeStr, !strconcat(Dt, "64"), 3246 v1i64, v1i64, OpNode, Commutable>; 3247 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3248 OpcodeStr, !strconcat(Dt, "64"), 3249 v2i64, v2i64, OpNode, Commutable>; 3250} 3251 3252 3253// Neon 3-register vector intrinsics. 3254 3255// First with only element sizes of 16 and 32 bits: 3256multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3257 InstrItinClass itinD16, InstrItinClass itinD32, 3258 InstrItinClass itinQ16, InstrItinClass itinQ32, 3259 string OpcodeStr, string Dt, 3260 SDPatternOperator IntOp, bit Commutable = 0> { 3261 // 64-bit vector types. 3262 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3263 OpcodeStr, !strconcat(Dt, "16"), 3264 v4i16, v4i16, IntOp, Commutable>; 3265 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3266 OpcodeStr, !strconcat(Dt, "32"), 3267 v2i32, v2i32, IntOp, Commutable>; 3268 3269 // 128-bit vector types. 3270 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3271 OpcodeStr, !strconcat(Dt, "16"), 3272 v8i16, v8i16, IntOp, Commutable>; 3273 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3274 OpcodeStr, !strconcat(Dt, "32"), 3275 v4i32, v4i32, IntOp, Commutable>; 3276} 3277multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3278 InstrItinClass itinD16, InstrItinClass itinD32, 3279 InstrItinClass itinQ16, InstrItinClass itinQ32, 3280 string OpcodeStr, string Dt, 3281 SDPatternOperator IntOp> { 3282 // 64-bit vector types. 3283 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3284 OpcodeStr, !strconcat(Dt, "16"), 3285 v4i16, v4i16, IntOp>; 3286 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3287 OpcodeStr, !strconcat(Dt, "32"), 3288 v2i32, v2i32, IntOp>; 3289 3290 // 128-bit vector types. 3291 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3292 OpcodeStr, !strconcat(Dt, "16"), 3293 v8i16, v8i16, IntOp>; 3294 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3295 OpcodeStr, !strconcat(Dt, "32"), 3296 v4i32, v4i32, IntOp>; 3297} 3298 3299multiclass N3VIntSL_HS<bits<4> op11_8, 3300 InstrItinClass itinD16, InstrItinClass itinD32, 3301 InstrItinClass itinQ16, InstrItinClass itinQ32, 3302 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3303 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3304 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3305 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3306 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3307 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3308 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3309 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3310 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3311} 3312 3313// ....then also with element size of 8 bits: 3314multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3315 InstrItinClass itinD16, InstrItinClass itinD32, 3316 InstrItinClass itinQ16, InstrItinClass itinQ32, 3317 string OpcodeStr, string Dt, 3318 SDPatternOperator IntOp, bit Commutable = 0> 3319 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3320 OpcodeStr, Dt, IntOp, Commutable> { 3321 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3322 OpcodeStr, !strconcat(Dt, "8"), 3323 v8i8, v8i8, IntOp, Commutable>; 3324 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3325 OpcodeStr, !strconcat(Dt, "8"), 3326 v16i8, v16i8, IntOp, Commutable>; 3327} 3328multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3329 InstrItinClass itinD16, InstrItinClass itinD32, 3330 InstrItinClass itinQ16, InstrItinClass itinQ32, 3331 string OpcodeStr, string Dt, 3332 SDPatternOperator IntOp> 3333 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3334 OpcodeStr, Dt, IntOp> { 3335 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3336 OpcodeStr, !strconcat(Dt, "8"), 3337 v8i8, v8i8, IntOp>; 3338 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3339 OpcodeStr, !strconcat(Dt, "8"), 3340 v16i8, v16i8, IntOp>; 3341} 3342 3343 3344// ....then also with element size of 64 bits: 3345multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3346 InstrItinClass itinD16, InstrItinClass itinD32, 3347 InstrItinClass itinQ16, InstrItinClass itinQ32, 3348 string OpcodeStr, string Dt, 3349 SDPatternOperator IntOp, bit Commutable = 0> 3350 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3351 OpcodeStr, Dt, IntOp, Commutable> { 3352 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3353 OpcodeStr, !strconcat(Dt, "64"), 3354 v1i64, v1i64, IntOp, Commutable>; 3355 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3356 OpcodeStr, !strconcat(Dt, "64"), 3357 v2i64, v2i64, IntOp, Commutable>; 3358} 3359multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3360 InstrItinClass itinD16, InstrItinClass itinD32, 3361 InstrItinClass itinQ16, InstrItinClass itinQ32, 3362 string OpcodeStr, string Dt, 3363 SDPatternOperator IntOp> 3364 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3365 OpcodeStr, Dt, IntOp> { 3366 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3367 OpcodeStr, !strconcat(Dt, "64"), 3368 v1i64, v1i64, IntOp>; 3369 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3370 OpcodeStr, !strconcat(Dt, "64"), 3371 v2i64, v2i64, IntOp>; 3372} 3373 3374// Neon Narrowing 3-register vector intrinsics, 3375// source operand element sizes of 16, 32 and 64 bits: 3376multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3377 string OpcodeStr, string Dt, 3378 SDPatternOperator IntOp, bit Commutable = 0> { 3379 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3380 OpcodeStr, !strconcat(Dt, "16"), 3381 v8i8, v8i16, IntOp, Commutable>; 3382 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3383 OpcodeStr, !strconcat(Dt, "32"), 3384 v4i16, v4i32, IntOp, Commutable>; 3385 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3386 OpcodeStr, !strconcat(Dt, "64"), 3387 v2i32, v2i64, IntOp, Commutable>; 3388} 3389 3390 3391// Neon Long 3-register vector operations. 3392 3393multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3394 InstrItinClass itin16, InstrItinClass itin32, 3395 string OpcodeStr, string Dt, 3396 SDNode OpNode, bit Commutable = 0> { 3397 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3398 OpcodeStr, !strconcat(Dt, "8"), 3399 v8i16, v8i8, OpNode, Commutable>; 3400 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3401 OpcodeStr, !strconcat(Dt, "16"), 3402 v4i32, v4i16, OpNode, Commutable>; 3403 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3404 OpcodeStr, !strconcat(Dt, "32"), 3405 v2i64, v2i32, OpNode, Commutable>; 3406} 3407 3408multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3409 InstrItinClass itin, string OpcodeStr, string Dt, 3410 SDNode OpNode> { 3411 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3412 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3413 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3414 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3415} 3416 3417multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3418 InstrItinClass itin16, InstrItinClass itin32, 3419 string OpcodeStr, string Dt, 3420 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3421 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3422 OpcodeStr, !strconcat(Dt, "8"), 3423 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3424 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3425 OpcodeStr, !strconcat(Dt, "16"), 3426 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3427 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3428 OpcodeStr, !strconcat(Dt, "32"), 3429 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3430} 3431 3432// Neon Long 3-register vector intrinsics. 3433 3434// First with only element sizes of 16 and 32 bits: 3435multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3436 InstrItinClass itin16, InstrItinClass itin32, 3437 string OpcodeStr, string Dt, 3438 SDPatternOperator IntOp, bit Commutable = 0> { 3439 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3440 OpcodeStr, !strconcat(Dt, "16"), 3441 v4i32, v4i16, IntOp, Commutable>; 3442 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3443 OpcodeStr, !strconcat(Dt, "32"), 3444 v2i64, v2i32, IntOp, Commutable>; 3445} 3446 3447multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3448 InstrItinClass itin, string OpcodeStr, string Dt, 3449 SDPatternOperator IntOp> { 3450 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3451 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3452 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3453 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3454} 3455 3456// ....then also with element size of 8 bits: 3457multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3458 InstrItinClass itin16, InstrItinClass itin32, 3459 string OpcodeStr, string Dt, 3460 SDPatternOperator IntOp, bit Commutable = 0> 3461 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3462 IntOp, Commutable> { 3463 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3464 OpcodeStr, !strconcat(Dt, "8"), 3465 v8i16, v8i8, IntOp, Commutable>; 3466} 3467 3468// ....with explicit extend (VABDL). 3469multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3470 InstrItinClass itin, string OpcodeStr, string Dt, 3471 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3472 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3473 OpcodeStr, !strconcat(Dt, "8"), 3474 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3475 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3476 OpcodeStr, !strconcat(Dt, "16"), 3477 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3478 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3479 OpcodeStr, !strconcat(Dt, "32"), 3480 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3481} 3482 3483 3484// Neon Wide 3-register vector intrinsics, 3485// source operand element sizes of 8, 16 and 32 bits: 3486multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3487 string OpcodeStr, string Dt, 3488 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3489 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3490 OpcodeStr, !strconcat(Dt, "8"), 3491 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3492 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3493 OpcodeStr, !strconcat(Dt, "16"), 3494 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3495 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3496 OpcodeStr, !strconcat(Dt, "32"), 3497 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3498} 3499 3500 3501// Neon Multiply-Op vector operations, 3502// element sizes of 8, 16 and 32 bits: 3503multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3504 InstrItinClass itinD16, InstrItinClass itinD32, 3505 InstrItinClass itinQ16, InstrItinClass itinQ32, 3506 string OpcodeStr, string Dt, SDNode OpNode> { 3507 // 64-bit vector types. 3508 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3509 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3510 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3511 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3512 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3513 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3514 3515 // 128-bit vector types. 3516 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3517 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3518 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3519 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3520 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3521 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3522} 3523 3524multiclass N3VMulOpSL_HS<bits<4> op11_8, 3525 InstrItinClass itinD16, InstrItinClass itinD32, 3526 InstrItinClass itinQ16, InstrItinClass itinQ32, 3527 string OpcodeStr, string Dt, SDNode ShOp> { 3528 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3529 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3530 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3531 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3532 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3533 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3534 mul, ShOp>; 3535 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3536 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3537 mul, ShOp>; 3538} 3539 3540// Neon Intrinsic-Op vector operations, 3541// element sizes of 8, 16 and 32 bits: 3542multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3543 InstrItinClass itinD, InstrItinClass itinQ, 3544 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3545 SDNode OpNode> { 3546 // 64-bit vector types. 3547 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3548 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3549 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3550 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3551 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3552 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3553 3554 // 128-bit vector types. 3555 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3556 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3557 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3558 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3559 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3560 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3561} 3562 3563// Neon 3-argument intrinsics, 3564// element sizes of 8, 16 and 32 bits: 3565multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3566 InstrItinClass itinD, InstrItinClass itinQ, 3567 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3568 // 64-bit vector types. 3569 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 3570 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3571 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 3572 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3573 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 3574 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3575 3576 // 128-bit vector types. 3577 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 3578 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3579 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 3580 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3581 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 3582 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3583} 3584 3585 3586// Neon Long Multiply-Op vector operations, 3587// element sizes of 8, 16 and 32 bits: 3588multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3589 InstrItinClass itin16, InstrItinClass itin32, 3590 string OpcodeStr, string Dt, SDNode MulOp, 3591 SDNode OpNode> { 3592 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3593 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3594 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3595 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3596 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3597 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3598} 3599 3600multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3601 string Dt, SDNode MulOp, SDNode OpNode> { 3602 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3603 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3604 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3605 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3606} 3607 3608 3609// Neon Long 3-argument intrinsics. 3610 3611// First with only element sizes of 16 and 32 bits: 3612multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3613 InstrItinClass itin16, InstrItinClass itin32, 3614 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3615 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3616 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3617 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3618 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3619} 3620 3621multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3622 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3623 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3624 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3625 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3626 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3627} 3628 3629// ....then also with element size of 8 bits: 3630multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3631 InstrItinClass itin16, InstrItinClass itin32, 3632 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3633 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3634 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3635 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3636} 3637 3638// ....with explicit extend (VABAL). 3639multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3640 InstrItinClass itin, string OpcodeStr, string Dt, 3641 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3642 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3643 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3644 IntOp, ExtOp, OpNode>; 3645 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3646 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3647 IntOp, ExtOp, OpNode>; 3648 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3649 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3650 IntOp, ExtOp, OpNode>; 3651} 3652 3653 3654// Neon Pairwise long 2-register intrinsics, 3655// element sizes of 8, 16 and 32 bits: 3656multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3657 bits<5> op11_7, bit op4, 3658 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3659 // 64-bit vector types. 3660 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3661 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3662 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3663 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3664 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3665 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3666 3667 // 128-bit vector types. 3668 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3669 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3670 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3671 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3672 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3673 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3674} 3675 3676 3677// Neon Pairwise long 2-register accumulate intrinsics, 3678// element sizes of 8, 16 and 32 bits: 3679multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3680 bits<5> op11_7, bit op4, 3681 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3682 // 64-bit vector types. 3683 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3684 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3685 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3686 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3687 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3688 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3689 3690 // 128-bit vector types. 3691 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3692 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3693 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3694 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3695 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3696 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3697} 3698 3699 3700// Neon 2-register vector shift by immediate, 3701// with f of either N2RegVShLFrm or N2RegVShRFrm 3702// element sizes of 8, 16, 32 and 64 bits: 3703multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3704 InstrItinClass itin, string OpcodeStr, string Dt, 3705 SDNode OpNode> { 3706 // 64-bit vector types. 3707 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3708 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3709 let Inst{21-19} = 0b001; // imm6 = 001xxx 3710 } 3711 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3712 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3713 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3714 } 3715 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3716 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3717 let Inst{21} = 0b1; // imm6 = 1xxxxx 3718 } 3719 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3720 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3721 // imm6 = xxxxxx 3722 3723 // 128-bit vector types. 3724 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3725 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3726 let Inst{21-19} = 0b001; // imm6 = 001xxx 3727 } 3728 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3729 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3730 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3731 } 3732 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3733 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3734 let Inst{21} = 0b1; // imm6 = 1xxxxx 3735 } 3736 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3737 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3738 // imm6 = xxxxxx 3739} 3740multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3741 InstrItinClass itin, string OpcodeStr, string Dt, 3742 string baseOpc, SDNode OpNode> { 3743 // 64-bit vector types. 3744 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3745 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3746 let Inst{21-19} = 0b001; // imm6 = 001xxx 3747 } 3748 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3749 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3750 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3751 } 3752 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3753 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3754 let Inst{21} = 0b1; // imm6 = 1xxxxx 3755 } 3756 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3757 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3758 // imm6 = xxxxxx 3759 3760 // 128-bit vector types. 3761 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3762 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3763 let Inst{21-19} = 0b001; // imm6 = 001xxx 3764 } 3765 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3766 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3767 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3768 } 3769 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3770 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3771 let Inst{21} = 0b1; // imm6 = 1xxxxx 3772 } 3773 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3774 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3775 // imm6 = xxxxxx 3776} 3777 3778// Neon Shift-Accumulate vector operations, 3779// element sizes of 8, 16, 32 and 64 bits: 3780multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3781 string OpcodeStr, string Dt, SDNode ShOp> { 3782 // 64-bit vector types. 3783 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3784 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3785 let Inst{21-19} = 0b001; // imm6 = 001xxx 3786 } 3787 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3788 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3789 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3790 } 3791 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3792 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3793 let Inst{21} = 0b1; // imm6 = 1xxxxx 3794 } 3795 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3796 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3797 // imm6 = xxxxxx 3798 3799 // 128-bit vector types. 3800 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3801 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3802 let Inst{21-19} = 0b001; // imm6 = 001xxx 3803 } 3804 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3805 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3806 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3807 } 3808 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3809 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3810 let Inst{21} = 0b1; // imm6 = 1xxxxx 3811 } 3812 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3813 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3814 // imm6 = xxxxxx 3815} 3816 3817// Neon Shift-Insert vector operations, 3818// with f of either N2RegVShLFrm or N2RegVShRFrm 3819// element sizes of 8, 16, 32 and 64 bits: 3820multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3821 string OpcodeStr> { 3822 // 64-bit vector types. 3823 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3824 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 3825 let Inst{21-19} = 0b001; // imm6 = 001xxx 3826 } 3827 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3828 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 3829 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3830 } 3831 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3832 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 3833 let Inst{21} = 0b1; // imm6 = 1xxxxx 3834 } 3835 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 3836 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 3837 // imm6 = xxxxxx 3838 3839 // 128-bit vector types. 3840 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3841 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 3842 let Inst{21-19} = 0b001; // imm6 = 001xxx 3843 } 3844 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3845 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 3846 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3847 } 3848 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3849 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 3850 let Inst{21} = 0b1; // imm6 = 1xxxxx 3851 } 3852 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 3853 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 3854 // imm6 = xxxxxx 3855} 3856multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3857 string OpcodeStr> { 3858 // 64-bit vector types. 3859 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3860 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 3861 let Inst{21-19} = 0b001; // imm6 = 001xxx 3862 } 3863 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3864 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 3865 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3866 } 3867 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3868 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 3869 let Inst{21} = 0b1; // imm6 = 1xxxxx 3870 } 3871 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3872 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 3873 // imm6 = xxxxxx 3874 3875 // 128-bit vector types. 3876 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3877 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 3878 let Inst{21-19} = 0b001; // imm6 = 001xxx 3879 } 3880 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3881 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 3882 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3883 } 3884 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3885 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 3886 let Inst{21} = 0b1; // imm6 = 1xxxxx 3887 } 3888 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3889 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 3890 // imm6 = xxxxxx 3891} 3892 3893// Neon Shift Long operations, 3894// element sizes of 8, 16, 32 bits: 3895multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3896 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 3897 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3898 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 3899 let Inst{21-19} = 0b001; // imm6 = 001xxx 3900 } 3901 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3902 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 3903 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3904 } 3905 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3906 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 3907 let Inst{21} = 0b1; // imm6 = 1xxxxx 3908 } 3909} 3910 3911// Neon Shift Narrow operations, 3912// element sizes of 16, 32, 64 bits: 3913multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3914 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 3915 SDNode OpNode> { 3916 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3917 OpcodeStr, !strconcat(Dt, "16"), 3918 v8i8, v8i16, shr_imm8, OpNode> { 3919 let Inst{21-19} = 0b001; // imm6 = 001xxx 3920 } 3921 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3922 OpcodeStr, !strconcat(Dt, "32"), 3923 v4i16, v4i32, shr_imm16, OpNode> { 3924 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3925 } 3926 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3927 OpcodeStr, !strconcat(Dt, "64"), 3928 v2i32, v2i64, shr_imm32, OpNode> { 3929 let Inst{21} = 0b1; // imm6 = 1xxxxx 3930 } 3931} 3932 3933//===----------------------------------------------------------------------===// 3934// Instruction Definitions. 3935//===----------------------------------------------------------------------===// 3936 3937// Vector Add Operations. 3938 3939// VADD : Vector Add (integer and floating-point) 3940defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 3941 add, 1>; 3942def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 3943 v2f32, v2f32, fadd, 1>; 3944def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 3945 v4f32, v4f32, fadd, 1>; 3946// VADDL : Vector Add Long (Q = D + D) 3947defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3948 "vaddl", "s", add, sext, 1>; 3949defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3950 "vaddl", "u", add, zext, 1>; 3951// VADDW : Vector Add Wide (Q = Q + D) 3952defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 3953defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 3954// VHADD : Vector Halving Add 3955defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 3956 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3957 "vhadd", "s", int_arm_neon_vhadds, 1>; 3958defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 3959 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3960 "vhadd", "u", int_arm_neon_vhaddu, 1>; 3961// VRHADD : Vector Rounding Halving Add 3962defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 3963 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3964 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 3965defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 3966 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3967 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 3968// VQADD : Vector Saturating Add 3969defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 3970 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3971 "vqadd", "s", int_arm_neon_vqadds, 1>; 3972defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 3973 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 3974 "vqadd", "u", int_arm_neon_vqaddu, 1>; 3975// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 3976defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 3977// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 3978defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 3979 int_arm_neon_vraddhn, 1>; 3980 3981def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 3982 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 3983def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 3984 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 3985def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 3986 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 3987 3988// Vector Multiply Operations. 3989 3990// VMUL : Vector Multiply (integer, polynomial and floating-point) 3991defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 3992 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 3993def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 3994 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 3995def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 3996 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 3997def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 3998 v2f32, v2f32, fmul, 1>; 3999def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4000 v4f32, v4f32, fmul, 1>; 4001defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4002def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4003def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4004 v2f32, fmul>; 4005 4006def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4007 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 4008 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4009 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4010 (DSubReg_i16_reg imm:$lane))), 4011 (SubReg_i16_lane imm:$lane)))>; 4012def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4013 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 4014 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4015 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4016 (DSubReg_i32_reg imm:$lane))), 4017 (SubReg_i32_lane imm:$lane)))>; 4018def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4019 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 4020 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4021 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4022 (DSubReg_i32_reg imm:$lane))), 4023 (SubReg_i32_lane imm:$lane)))>; 4024 4025 4026def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4027 (VMULslfd DPR:$Rn, 4028 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4029 (i32 0))>; 4030def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4031 (VMULslfq QPR:$Rn, 4032 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4033 (i32 0))>; 4034 4035 4036// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4037defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4038 IIC_VMULi16Q, IIC_VMULi32Q, 4039 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4040defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4041 IIC_VMULi16Q, IIC_VMULi32Q, 4042 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4043def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4044 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4045 imm:$lane)))), 4046 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4047 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4048 (DSubReg_i16_reg imm:$lane))), 4049 (SubReg_i16_lane imm:$lane)))>; 4050def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4051 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4052 imm:$lane)))), 4053 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4054 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4055 (DSubReg_i32_reg imm:$lane))), 4056 (SubReg_i32_lane imm:$lane)))>; 4057 4058// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4059defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4060 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4061 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4062defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4063 IIC_VMULi16Q, IIC_VMULi32Q, 4064 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4065def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4066 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4067 imm:$lane)))), 4068 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4069 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4070 (DSubReg_i16_reg imm:$lane))), 4071 (SubReg_i16_lane imm:$lane)))>; 4072def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4073 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4074 imm:$lane)))), 4075 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4076 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4077 (DSubReg_i32_reg imm:$lane))), 4078 (SubReg_i32_lane imm:$lane)))>; 4079 4080// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4081defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4082 "vmull", "s", NEONvmulls, 1>; 4083defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4084 "vmull", "u", NEONvmullu, 1>; 4085def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4086 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4087defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4088defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4089 4090// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4091defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4092 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4093defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4094 "vqdmull", "s", int_arm_neon_vqdmull>; 4095 4096// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4097 4098// VMLA : Vector Multiply Accumulate (integer and floating-point) 4099defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4100 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4101def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4102 v2f32, fmul_su, fadd_mlx>, 4103 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4104def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4105 v4f32, fmul_su, fadd_mlx>, 4106 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4107defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4108 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4109def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4110 v2f32, fmul_su, fadd_mlx>, 4111 Requires<[HasNEON, UseFPVMLx]>; 4112def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4113 v4f32, v2f32, fmul_su, fadd_mlx>, 4114 Requires<[HasNEON, UseFPVMLx]>; 4115 4116def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4117 (mul (v8i16 QPR:$src2), 4118 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4119 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4120 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4121 (DSubReg_i16_reg imm:$lane))), 4122 (SubReg_i16_lane imm:$lane)))>; 4123 4124def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4125 (mul (v4i32 QPR:$src2), 4126 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4127 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4128 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4129 (DSubReg_i32_reg imm:$lane))), 4130 (SubReg_i32_lane imm:$lane)))>; 4131 4132def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4133 (fmul_su (v4f32 QPR:$src2), 4134 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4135 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4136 (v4f32 QPR:$src2), 4137 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4138 (DSubReg_i32_reg imm:$lane))), 4139 (SubReg_i32_lane imm:$lane)))>, 4140 Requires<[HasNEON, UseFPVMLx]>; 4141 4142// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4143defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4144 "vmlal", "s", NEONvmulls, add>; 4145defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4146 "vmlal", "u", NEONvmullu, add>; 4147 4148defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4149defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4150 4151// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4152defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4153 "vqdmlal", "s", null_frag>; 4154defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4155 4156def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4157 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4158 (v4i16 DPR:$Vm))))), 4159 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4160def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4161 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4162 (v2i32 DPR:$Vm))))), 4163 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4164def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4165 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4166 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4167 imm:$lane)))))), 4168 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4169def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4170 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4171 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4172 imm:$lane)))))), 4173 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4174 4175// VMLS : Vector Multiply Subtract (integer and floating-point) 4176defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4177 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4178def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4179 v2f32, fmul_su, fsub_mlx>, 4180 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4181def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4182 v4f32, fmul_su, fsub_mlx>, 4183 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4184defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4185 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4186def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4187 v2f32, fmul_su, fsub_mlx>, 4188 Requires<[HasNEON, UseFPVMLx]>; 4189def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4190 v4f32, v2f32, fmul_su, fsub_mlx>, 4191 Requires<[HasNEON, UseFPVMLx]>; 4192 4193def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4194 (mul (v8i16 QPR:$src2), 4195 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4196 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4197 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4198 (DSubReg_i16_reg imm:$lane))), 4199 (SubReg_i16_lane imm:$lane)))>; 4200 4201def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4202 (mul (v4i32 QPR:$src2), 4203 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4204 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4205 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4206 (DSubReg_i32_reg imm:$lane))), 4207 (SubReg_i32_lane imm:$lane)))>; 4208 4209def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4210 (fmul_su (v4f32 QPR:$src2), 4211 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4212 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4213 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4214 (DSubReg_i32_reg imm:$lane))), 4215 (SubReg_i32_lane imm:$lane)))>, 4216 Requires<[HasNEON, UseFPVMLx]>; 4217 4218// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4219defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4220 "vmlsl", "s", NEONvmulls, sub>; 4221defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4222 "vmlsl", "u", NEONvmullu, sub>; 4223 4224defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4225defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4226 4227// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4228defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4229 "vqdmlsl", "s", null_frag>; 4230defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; 4231 4232def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4233 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4234 (v4i16 DPR:$Vm))))), 4235 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4236def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4237 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4238 (v2i32 DPR:$Vm))))), 4239 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4240def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4241 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4242 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4243 imm:$lane)))))), 4244 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4245def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4246 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4247 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4248 imm:$lane)))))), 4249 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4250 4251// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4252def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4253 v2f32, fmul_su, fadd_mlx>, 4254 Requires<[HasVFP4,UseFusedMAC]>; 4255 4256def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4257 v4f32, fmul_su, fadd_mlx>, 4258 Requires<[HasVFP4,UseFusedMAC]>; 4259 4260// Fused Vector Multiply Subtract (floating-point) 4261def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4262 v2f32, fmul_su, fsub_mlx>, 4263 Requires<[HasVFP4,UseFusedMAC]>; 4264def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4265 v4f32, fmul_su, fsub_mlx>, 4266 Requires<[HasVFP4,UseFusedMAC]>; 4267 4268// Match @llvm.fma.* intrinsics 4269def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4270 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4271 Requires<[HasVFP4]>; 4272def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4273 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4274 Requires<[HasVFP4]>; 4275def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4276 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4277 Requires<[HasVFP4]>; 4278def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4279 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4280 Requires<[HasVFP4]>; 4281 4282// Vector Subtract Operations. 4283 4284// VSUB : Vector Subtract (integer and floating-point) 4285defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4286 "vsub", "i", sub, 0>; 4287def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4288 v2f32, v2f32, fsub, 0>; 4289def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4290 v4f32, v4f32, fsub, 0>; 4291// VSUBL : Vector Subtract Long (Q = D - D) 4292defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4293 "vsubl", "s", sub, sext, 0>; 4294defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4295 "vsubl", "u", sub, zext, 0>; 4296// VSUBW : Vector Subtract Wide (Q = Q - D) 4297defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4298defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4299// VHSUB : Vector Halving Subtract 4300defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4301 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4302 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4303defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4304 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4305 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4306// VQSUB : Vector Saturing Subtract 4307defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4308 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4309 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4310defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4311 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4312 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4313// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4314defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 4315// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4316defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4317 int_arm_neon_vrsubhn, 0>; 4318 4319def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4320 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 4321def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4322 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 4323def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4324 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 4325 4326// Vector Comparisons. 4327 4328// VCEQ : Vector Compare Equal 4329defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4330 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4331def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4332 NEONvceq, 1>; 4333def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4334 NEONvceq, 1>; 4335 4336let TwoOperandAliasConstraint = "$Vm = $Vd" in 4337defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 4338 "$Vd, $Vm, #0", NEONvceqz>; 4339 4340// VCGE : Vector Compare Greater Than or Equal 4341defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4342 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 4343defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4344 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 4345def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 4346 NEONvcge, 0>; 4347def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 4348 NEONvcge, 0>; 4349 4350let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4351defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 4352 "$Vd, $Vm, #0", NEONvcgez>; 4353defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 4354 "$Vd, $Vm, #0", NEONvclez>; 4355} 4356 4357// VCGT : Vector Compare Greater Than 4358defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4359 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 4360defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4361 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 4362def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 4363 NEONvcgt, 0>; 4364def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 4365 NEONvcgt, 0>; 4366 4367let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4368defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 4369 "$Vd, $Vm, #0", NEONvcgtz>; 4370defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 4371 "$Vd, $Vm, #0", NEONvcltz>; 4372} 4373 4374// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 4375def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4376 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 4377def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4378 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 4379// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 4380def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4381 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 4382def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4383 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 4384// VTST : Vector Test Bits 4385defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 4386 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 4387 4388def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4389 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4390def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4391 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4392def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4393 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4394def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4395 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4396 4397def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4398 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4399def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4400 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4401def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4402 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4403def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4404 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4405 4406// Vector Bitwise Operations. 4407 4408def vnotd : PatFrag<(ops node:$in), 4409 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 4410def vnotq : PatFrag<(ops node:$in), 4411 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 4412 4413 4414// VAND : Vector Bitwise AND 4415def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 4416 v2i32, v2i32, and, 1>; 4417def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 4418 v4i32, v4i32, and, 1>; 4419 4420// VEOR : Vector Bitwise Exclusive OR 4421def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 4422 v2i32, v2i32, xor, 1>; 4423def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 4424 v4i32, v4i32, xor, 1>; 4425 4426// VORR : Vector Bitwise OR 4427def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 4428 v2i32, v2i32, or, 1>; 4429def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 4430 v4i32, v4i32, or, 1>; 4431 4432def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 4433 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4434 IIC_VMOVImm, 4435 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4436 [(set DPR:$Vd, 4437 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4438 let Inst{9} = SIMM{9}; 4439} 4440 4441def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 4442 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4443 IIC_VMOVImm, 4444 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4445 [(set DPR:$Vd, 4446 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4447 let Inst{10-9} = SIMM{10-9}; 4448} 4449 4450def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4451 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4452 IIC_VMOVImm, 4453 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4454 [(set QPR:$Vd, 4455 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4456 let Inst{9} = SIMM{9}; 4457} 4458 4459def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4460 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4461 IIC_VMOVImm, 4462 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4463 [(set QPR:$Vd, 4464 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4465 let Inst{10-9} = SIMM{10-9}; 4466} 4467 4468 4469// VBIC : Vector Bitwise Bit Clear (AND NOT) 4470let TwoOperandAliasConstraint = "$Vn = $Vd" in { 4471def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4472 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4473 "vbic", "$Vd, $Vn, $Vm", "", 4474 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4475 (vnotd DPR:$Vm))))]>; 4476def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4477 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4478 "vbic", "$Vd, $Vn, $Vm", "", 4479 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4480 (vnotq QPR:$Vm))))]>; 4481} 4482 4483def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4484 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4485 IIC_VMOVImm, 4486 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4487 [(set DPR:$Vd, 4488 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4489 let Inst{9} = SIMM{9}; 4490} 4491 4492def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4493 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4494 IIC_VMOVImm, 4495 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4496 [(set DPR:$Vd, 4497 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4498 let Inst{10-9} = SIMM{10-9}; 4499} 4500 4501def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4502 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4503 IIC_VMOVImm, 4504 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4505 [(set QPR:$Vd, 4506 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4507 let Inst{9} = SIMM{9}; 4508} 4509 4510def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4511 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4512 IIC_VMOVImm, 4513 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4514 [(set QPR:$Vd, 4515 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4516 let Inst{10-9} = SIMM{10-9}; 4517} 4518 4519// VORN : Vector Bitwise OR NOT 4520def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4521 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4522 "vorn", "$Vd, $Vn, $Vm", "", 4523 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4524 (vnotd DPR:$Vm))))]>; 4525def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4526 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4527 "vorn", "$Vd, $Vn, $Vm", "", 4528 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4529 (vnotq QPR:$Vm))))]>; 4530 4531// VMVN : Vector Bitwise NOT (Immediate) 4532 4533let isReMaterializable = 1 in { 4534 4535def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4536 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4537 "vmvn", "i16", "$Vd, $SIMM", "", 4538 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4539 let Inst{9} = SIMM{9}; 4540} 4541 4542def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4543 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4544 "vmvn", "i16", "$Vd, $SIMM", "", 4545 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4546 let Inst{9} = SIMM{9}; 4547} 4548 4549def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4550 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4551 "vmvn", "i32", "$Vd, $SIMM", "", 4552 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4553 let Inst{11-8} = SIMM{11-8}; 4554} 4555 4556def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4557 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4558 "vmvn", "i32", "$Vd, $SIMM", "", 4559 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4560 let Inst{11-8} = SIMM{11-8}; 4561} 4562} 4563 4564// VMVN : Vector Bitwise NOT 4565def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4566 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 4567 "vmvn", "$Vd, $Vm", "", 4568 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 4569def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 4570 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 4571 "vmvn", "$Vd, $Vm", "", 4572 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 4573def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 4574def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 4575 4576// VBSL : Vector Bitwise Select 4577def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4578 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4579 N3RegFrm, IIC_VCNTiD, 4580 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4581 [(set DPR:$Vd, 4582 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 4583def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 4584 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 4585 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4586 Requires<[HasNEON]>; 4587def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 4588 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 4589 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4590 Requires<[HasNEON]>; 4591def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 4592 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 4593 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4594 Requires<[HasNEON]>; 4595def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 4596 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 4597 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4598 Requires<[HasNEON]>; 4599def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 4600 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 4601 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4602 Requires<[HasNEON]>; 4603 4604def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 4605 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4606 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4607 Requires<[HasNEON]>; 4608 4609def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 4610 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4611 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4612 Requires<[HasNEON]>; 4613 4614def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4615 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4616 N3RegFrm, IIC_VCNTiQ, 4617 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4618 [(set QPR:$Vd, 4619 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 4620 4621def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 4622 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 4623 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4624 Requires<[HasNEON]>; 4625def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 4626 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 4627 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4628 Requires<[HasNEON]>; 4629def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 4630 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 4631 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4632 Requires<[HasNEON]>; 4633def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 4634 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 4635 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4636 Requires<[HasNEON]>; 4637def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 4638 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 4639 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4640 Requires<[HasNEON]>; 4641 4642def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 4643 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4644 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4645 Requires<[HasNEON]>; 4646def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 4647 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4648 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4649 Requires<[HasNEON]>; 4650 4651// VBIF : Vector Bitwise Insert if False 4652// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 4653// FIXME: This instruction's encoding MAY NOT BE correct. 4654def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 4655 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4656 N3RegFrm, IIC_VBINiD, 4657 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4658 []>; 4659def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 4660 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4661 N3RegFrm, IIC_VBINiQ, 4662 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4663 []>; 4664 4665// VBIT : Vector Bitwise Insert if True 4666// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 4667// FIXME: This instruction's encoding MAY NOT BE correct. 4668def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 4669 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4670 N3RegFrm, IIC_VBINiD, 4671 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4672 []>; 4673def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 4674 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4675 N3RegFrm, IIC_VBINiQ, 4676 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4677 []>; 4678 4679// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 4680// for equivalent operations with different register constraints; it just 4681// inserts copies. 4682 4683// Vector Absolute Differences. 4684 4685// VABD : Vector Absolute Difference 4686defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 4687 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4688 "vabd", "s", int_arm_neon_vabds, 1>; 4689defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 4690 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4691 "vabd", "u", int_arm_neon_vabdu, 1>; 4692def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 4693 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 4694def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 4695 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 4696 4697// VABDL : Vector Absolute Difference Long (Q = | D - D |) 4698defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 4699 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 4700defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 4701 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 4702 4703// VABA : Vector Absolute Difference and Accumulate 4704defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4705 "vaba", "s", int_arm_neon_vabds, add>; 4706defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4707 "vaba", "u", int_arm_neon_vabdu, add>; 4708 4709// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 4710defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 4711 "vabal", "s", int_arm_neon_vabds, zext, add>; 4712defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 4713 "vabal", "u", int_arm_neon_vabdu, zext, add>; 4714 4715// Vector Maximum and Minimum. 4716 4717// VMAX : Vector Maximum 4718defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 4719 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4720 "vmax", "s", int_arm_neon_vmaxs, 1>; 4721defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 4722 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4723 "vmax", "u", int_arm_neon_vmaxu, 1>; 4724def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 4725 "vmax", "f32", 4726 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 4727def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4728 "vmax", "f32", 4729 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 4730 4731// VMAXNM 4732let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 4733 def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 4734 N3RegFrm, NoItinerary, "vmaxnm", "f32", 4735 v2f32, v2f32, int_arm_neon_vmaxnm, 1>, 4736 Requires<[HasV8, HasNEON]>; 4737 def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 4738 N3RegFrm, NoItinerary, "vmaxnm", "f32", 4739 v4f32, v4f32, int_arm_neon_vmaxnm, 1>, 4740 Requires<[HasV8, HasNEON]>; 4741} 4742 4743// VMIN : Vector Minimum 4744defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 4745 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4746 "vmin", "s", int_arm_neon_vmins, 1>; 4747defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 4748 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4749 "vmin", "u", int_arm_neon_vminu, 1>; 4750def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 4751 "vmin", "f32", 4752 v2f32, v2f32, int_arm_neon_vmins, 1>; 4753def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4754 "vmin", "f32", 4755 v4f32, v4f32, int_arm_neon_vmins, 1>; 4756 4757// VMINNM 4758let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 4759 def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 4760 N3RegFrm, NoItinerary, "vminnm", "f32", 4761 v2f32, v2f32, int_arm_neon_vminnm, 1>, 4762 Requires<[HasV8, HasNEON]>; 4763 def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 4764 N3RegFrm, NoItinerary, "vminnm", "f32", 4765 v4f32, v4f32, int_arm_neon_vminnm, 1>, 4766 Requires<[HasV8, HasNEON]>; 4767} 4768 4769// Vector Pairwise Operations. 4770 4771// VPADD : Vector Pairwise Add 4772def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4773 "vpadd", "i8", 4774 v8i8, v8i8, int_arm_neon_vpadd, 0>; 4775def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4776 "vpadd", "i16", 4777 v4i16, v4i16, int_arm_neon_vpadd, 0>; 4778def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4779 "vpadd", "i32", 4780 v2i32, v2i32, int_arm_neon_vpadd, 0>; 4781def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 4782 IIC_VPBIND, "vpadd", "f32", 4783 v2f32, v2f32, int_arm_neon_vpadd, 0>; 4784 4785// VPADDL : Vector Pairwise Add Long 4786defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 4787 int_arm_neon_vpaddls>; 4788defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 4789 int_arm_neon_vpaddlu>; 4790 4791// VPADAL : Vector Pairwise Add and Accumulate Long 4792defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 4793 int_arm_neon_vpadals>; 4794defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 4795 int_arm_neon_vpadalu>; 4796 4797// VPMAX : Vector Pairwise Maximum 4798def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4799 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 4800def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4801 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 4802def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4803 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 4804def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4805 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 4806def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4807 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 4808def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4809 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 4810def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 4811 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 4812 4813// VPMIN : Vector Pairwise Minimum 4814def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4815 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 4816def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4817 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 4818def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4819 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 4820def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4821 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 4822def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4823 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 4824def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4825 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 4826def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 4827 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 4828 4829// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 4830 4831// VRECPE : Vector Reciprocal Estimate 4832def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4833 IIC_VUNAD, "vrecpe", "u32", 4834 v2i32, v2i32, int_arm_neon_vrecpe>; 4835def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4836 IIC_VUNAQ, "vrecpe", "u32", 4837 v4i32, v4i32, int_arm_neon_vrecpe>; 4838def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4839 IIC_VUNAD, "vrecpe", "f32", 4840 v2f32, v2f32, int_arm_neon_vrecpe>; 4841def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4842 IIC_VUNAQ, "vrecpe", "f32", 4843 v4f32, v4f32, int_arm_neon_vrecpe>; 4844 4845// VRECPS : Vector Reciprocal Step 4846def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4847 IIC_VRECSD, "vrecps", "f32", 4848 v2f32, v2f32, int_arm_neon_vrecps, 1>; 4849def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4850 IIC_VRECSQ, "vrecps", "f32", 4851 v4f32, v4f32, int_arm_neon_vrecps, 1>; 4852 4853// VRSQRTE : Vector Reciprocal Square Root Estimate 4854def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4855 IIC_VUNAD, "vrsqrte", "u32", 4856 v2i32, v2i32, int_arm_neon_vrsqrte>; 4857def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4858 IIC_VUNAQ, "vrsqrte", "u32", 4859 v4i32, v4i32, int_arm_neon_vrsqrte>; 4860def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4861 IIC_VUNAD, "vrsqrte", "f32", 4862 v2f32, v2f32, int_arm_neon_vrsqrte>; 4863def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4864 IIC_VUNAQ, "vrsqrte", "f32", 4865 v4f32, v4f32, int_arm_neon_vrsqrte>; 4866 4867// VRSQRTS : Vector Reciprocal Square Root Step 4868def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4869 IIC_VRECSD, "vrsqrts", "f32", 4870 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 4871def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4872 IIC_VRECSQ, "vrsqrts", "f32", 4873 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 4874 4875// Vector Shifts. 4876 4877// VSHL : Vector Shift 4878defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 4879 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4880 "vshl", "s", int_arm_neon_vshifts>; 4881defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 4882 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4883 "vshl", "u", int_arm_neon_vshiftu>; 4884 4885// VSHL : Vector Shift Left (Immediate) 4886defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 4887 4888// VSHR : Vector Shift Right (Immediate) 4889defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 4890 NEONvshrs>; 4891defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 4892 NEONvshru>; 4893 4894// VSHLL : Vector Shift Left Long 4895defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 4896defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 4897 4898// VSHLL : Vector Shift Left Long (with maximum shift count) 4899class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 4900 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 4901 ValueType OpTy, Operand ImmTy, SDNode OpNode> 4902 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 4903 ResTy, OpTy, ImmTy, OpNode> { 4904 let Inst{21-16} = op21_16; 4905 let DecoderMethod = "DecodeVSHLMaxInstruction"; 4906} 4907def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 4908 v8i16, v8i8, imm8, NEONvshlli>; 4909def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 4910 v4i32, v4i16, imm16, NEONvshlli>; 4911def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 4912 v2i64, v2i32, imm32, NEONvshlli>; 4913 4914// VSHRN : Vector Shift Right and Narrow 4915defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 4916 NEONvshrn>; 4917 4918// VRSHL : Vector Rounding Shift 4919defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 4920 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4921 "vrshl", "s", int_arm_neon_vrshifts>; 4922defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 4923 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4924 "vrshl", "u", int_arm_neon_vrshiftu>; 4925// VRSHR : Vector Rounding Shift Right 4926defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 4927 NEONvrshrs>; 4928defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 4929 NEONvrshru>; 4930 4931// VRSHRN : Vector Rounding Shift Right and Narrow 4932defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 4933 NEONvrshrn>; 4934 4935// VQSHL : Vector Saturating Shift 4936defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 4937 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4938 "vqshl", "s", int_arm_neon_vqshifts>; 4939defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 4940 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4941 "vqshl", "u", int_arm_neon_vqshiftu>; 4942// VQSHL : Vector Saturating Shift Left (Immediate) 4943defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 4944defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 4945 4946// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 4947defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 4948 4949// VQSHRN : Vector Saturating Shift Right and Narrow 4950defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 4951 NEONvqshrns>; 4952defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 4953 NEONvqshrnu>; 4954 4955// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 4956defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 4957 NEONvqshrnsu>; 4958 4959// VQRSHL : Vector Saturating Rounding Shift 4960defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 4961 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4962 "vqrshl", "s", int_arm_neon_vqrshifts>; 4963defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 4964 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4965 "vqrshl", "u", int_arm_neon_vqrshiftu>; 4966 4967// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 4968defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 4969 NEONvqrshrns>; 4970defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 4971 NEONvqrshrnu>; 4972 4973// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 4974defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 4975 NEONvqrshrnsu>; 4976 4977// VSRA : Vector Shift Right and Accumulate 4978defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 4979defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 4980// VRSRA : Vector Rounding Shift Right and Accumulate 4981defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 4982defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 4983 4984// VSLI : Vector Shift Left and Insert 4985defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 4986 4987// VSRI : Vector Shift Right and Insert 4988defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 4989 4990// Vector Absolute and Saturating Absolute. 4991 4992// VABS : Vector Absolute Value 4993defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 4994 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 4995 int_arm_neon_vabs>; 4996def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 4997 "vabs", "f32", 4998 v2f32, v2f32, fabs>; 4999def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5000 "vabs", "f32", 5001 v4f32, v4f32, fabs>; 5002 5003def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), 5004 (v2i32 (bitconvert (v8i8 (add DPR:$src, 5005 (NEONvshrs DPR:$src, (i32 7))))))), 5006 (VABSv8i8 DPR:$src)>; 5007def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), 5008 (v2i32 (bitconvert (v4i16 (add DPR:$src, 5009 (NEONvshrs DPR:$src, (i32 15))))))), 5010 (VABSv4i16 DPR:$src)>; 5011def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), 5012 (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), 5013 (VABSv2i32 DPR:$src)>; 5014def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), 5015 (v4i32 (bitconvert (v16i8 (add QPR:$src, 5016 (NEONvshrs QPR:$src, (i32 7))))))), 5017 (VABSv16i8 QPR:$src)>; 5018def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), 5019 (v4i32 (bitconvert (v8i16 (add QPR:$src, 5020 (NEONvshrs QPR:$src, (i32 15))))))), 5021 (VABSv8i16 QPR:$src)>; 5022def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), 5023 (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), 5024 (VABSv4i32 QPR:$src)>; 5025 5026def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; 5027def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; 5028 5029// VQABS : Vector Saturating Absolute Value 5030defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 5031 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 5032 int_arm_neon_vqabs>; 5033 5034// Vector Negate. 5035 5036def vnegd : PatFrag<(ops node:$in), 5037 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 5038def vnegq : PatFrag<(ops node:$in), 5039 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 5040 5041class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5042 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 5043 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 5044 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 5045class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5046 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 5047 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 5048 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 5049 5050// VNEG : Vector Negate (integer) 5051def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 5052def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 5053def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 5054def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 5055def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 5056def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 5057 5058// VNEG : Vector Negate (floating-point) 5059def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 5060 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5061 "vneg", "f32", "$Vd, $Vm", "", 5062 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 5063def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 5064 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5065 "vneg", "f32", "$Vd, $Vm", "", 5066 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 5067 5068def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 5069def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 5070def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 5071def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 5072def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 5073def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 5074 5075// VQNEG : Vector Saturating Negate 5076defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 5077 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 5078 int_arm_neon_vqneg>; 5079 5080// Vector Bit Counting Operations. 5081 5082// VCLS : Vector Count Leading Sign Bits 5083defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 5084 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 5085 int_arm_neon_vcls>; 5086// VCLZ : Vector Count Leading Zeros 5087defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 5088 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 5089 ctlz>; 5090// VCNT : Vector Count One Bits 5091def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5092 IIC_VCNTiD, "vcnt", "8", 5093 v8i8, v8i8, ctpop>; 5094def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5095 IIC_VCNTiQ, "vcnt", "8", 5096 v16i8, v16i8, ctpop>; 5097 5098// Vector Swap 5099def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 5100 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 5101 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5102 []>; 5103def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 5104 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 5105 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5106 []>; 5107 5108// Vector Move Operations. 5109 5110// VMOV : Vector Move (Register) 5111def : InstAlias<"vmov${p} $Vd, $Vm", 5112 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 5113def : InstAlias<"vmov${p} $Vd, $Vm", 5114 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 5115 5116// VMOV : Vector Move (Immediate) 5117 5118let isReMaterializable = 1 in { 5119def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 5120 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5121 "vmov", "i8", "$Vd, $SIMM", "", 5122 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 5123def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 5124 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5125 "vmov", "i8", "$Vd, $SIMM", "", 5126 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 5127 5128def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 5129 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5130 "vmov", "i16", "$Vd, $SIMM", "", 5131 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 5132 let Inst{9} = SIMM{9}; 5133} 5134 5135def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 5136 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5137 "vmov", "i16", "$Vd, $SIMM", "", 5138 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 5139 let Inst{9} = SIMM{9}; 5140} 5141 5142def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 5143 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5144 "vmov", "i32", "$Vd, $SIMM", "", 5145 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 5146 let Inst{11-8} = SIMM{11-8}; 5147} 5148 5149def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 5150 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5151 "vmov", "i32", "$Vd, $SIMM", "", 5152 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 5153 let Inst{11-8} = SIMM{11-8}; 5154} 5155 5156def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 5157 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5158 "vmov", "i64", "$Vd, $SIMM", "", 5159 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 5160def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 5161 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5162 "vmov", "i64", "$Vd, $SIMM", "", 5163 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 5164 5165def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 5166 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5167 "vmov", "f32", "$Vd, $SIMM", "", 5168 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 5169def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 5170 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5171 "vmov", "f32", "$Vd, $SIMM", "", 5172 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 5173} // isReMaterializable 5174 5175// VMOV : Vector Get Lane (move scalar to ARM core register) 5176 5177def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 5178 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5179 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 5180 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 5181 imm:$lane))]> { 5182 let Inst{21} = lane{2}; 5183 let Inst{6-5} = lane{1-0}; 5184} 5185def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 5186 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5187 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 5188 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 5189 imm:$lane))]> { 5190 let Inst{21} = lane{1}; 5191 let Inst{6} = lane{0}; 5192} 5193def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 5194 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5195 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 5196 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 5197 imm:$lane))]> { 5198 let Inst{21} = lane{2}; 5199 let Inst{6-5} = lane{1-0}; 5200} 5201def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 5202 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5203 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 5204 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 5205 imm:$lane))]> { 5206 let Inst{21} = lane{1}; 5207 let Inst{6} = lane{0}; 5208} 5209def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 5210 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 5211 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 5212 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 5213 imm:$lane))]>, 5214 Requires<[HasNEON, HasFastVGETLNi32]> { 5215 let Inst{21} = lane{0}; 5216} 5217// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 5218def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 5219 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5220 (DSubReg_i8_reg imm:$lane))), 5221 (SubReg_i8_lane imm:$lane))>; 5222def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 5223 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5224 (DSubReg_i16_reg imm:$lane))), 5225 (SubReg_i16_lane imm:$lane))>; 5226def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 5227 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5228 (DSubReg_i8_reg imm:$lane))), 5229 (SubReg_i8_lane imm:$lane))>; 5230def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 5231 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5232 (DSubReg_i16_reg imm:$lane))), 5233 (SubReg_i16_lane imm:$lane))>; 5234def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5235 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 5236 (DSubReg_i32_reg imm:$lane))), 5237 (SubReg_i32_lane imm:$lane))>, 5238 Requires<[HasNEON, HasFastVGETLNi32]>; 5239def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 5240 (COPY_TO_REGCLASS 5241 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5242 Requires<[HasNEON, HasSlowVGETLNi32]>; 5243def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5244 (COPY_TO_REGCLASS 5245 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5246 Requires<[HasNEON, HasSlowVGETLNi32]>; 5247def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 5248 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 5249 (SSubReg_f32_reg imm:$src2))>; 5250def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 5251 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 5252 (SSubReg_f32_reg imm:$src2))>; 5253//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 5254// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5255def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 5256 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5257 5258 5259// VMOV : Vector Set Lane (move ARM core register to scalar) 5260 5261let Constraints = "$src1 = $V" in { 5262def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 5263 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 5264 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 5265 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 5266 GPR:$R, imm:$lane))]> { 5267 let Inst{21} = lane{2}; 5268 let Inst{6-5} = lane{1-0}; 5269} 5270def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 5271 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 5272 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 5273 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 5274 GPR:$R, imm:$lane))]> { 5275 let Inst{21} = lane{1}; 5276 let Inst{6} = lane{0}; 5277} 5278def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 5279 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 5280 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 5281 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 5282 GPR:$R, imm:$lane))]> { 5283 let Inst{21} = lane{0}; 5284} 5285} 5286def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 5287 (v16i8 (INSERT_SUBREG QPR:$src1, 5288 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 5289 (DSubReg_i8_reg imm:$lane))), 5290 GPR:$src2, (SubReg_i8_lane imm:$lane))), 5291 (DSubReg_i8_reg imm:$lane)))>; 5292def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 5293 (v8i16 (INSERT_SUBREG QPR:$src1, 5294 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 5295 (DSubReg_i16_reg imm:$lane))), 5296 GPR:$src2, (SubReg_i16_lane imm:$lane))), 5297 (DSubReg_i16_reg imm:$lane)))>; 5298def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 5299 (v4i32 (INSERT_SUBREG QPR:$src1, 5300 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 5301 (DSubReg_i32_reg imm:$lane))), 5302 GPR:$src2, (SubReg_i32_lane imm:$lane))), 5303 (DSubReg_i32_reg imm:$lane)))>; 5304 5305def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 5306 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 5307 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5308def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 5309 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 5310 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5311 5312//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5313// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5314def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5315 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5316 5317def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 5318 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5319def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 5320 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 5321def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 5322 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5323 5324def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 5325 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5326def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 5327 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5328def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 5329 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5330 5331def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 5332 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 5333 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5334 dsub_0)>; 5335def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 5336 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 5337 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5338 dsub_0)>; 5339def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 5340 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5341 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5342 dsub_0)>; 5343 5344// VDUP : Vector Duplicate (from ARM core register to all elements) 5345 5346class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5347 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 5348 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5349 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5350class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5351 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 5352 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5353 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5354 5355def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 5356def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 5357def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 5358 Requires<[HasNEON, HasFastVDUP32]>; 5359def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 5360def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 5361def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 5362 5363// NEONvdup patterns for uarchs with fast VDUP.32. 5364def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 5365 Requires<[HasNEON,HasFastVDUP32]>; 5366def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 5367 5368// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 5369def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 5370 Requires<[HasNEON,HasSlowVDUP32]>; 5371def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 5372 Requires<[HasNEON,HasSlowVDUP32]>; 5373 5374// VDUP : Vector Duplicate Lane (from scalar to all elements) 5375 5376class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 5377 ValueType Ty, Operand IdxTy> 5378 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5379 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 5380 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 5381 5382class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 5383 ValueType ResTy, ValueType OpTy, Operand IdxTy> 5384 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5385 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 5386 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 5387 VectorIndex32:$lane)))]>; 5388 5389// Inst{19-16} is partially specified depending on the element size. 5390 5391def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 5392 bits<3> lane; 5393 let Inst{19-17} = lane{2-0}; 5394} 5395def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 5396 bits<2> lane; 5397 let Inst{19-18} = lane{1-0}; 5398} 5399def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 5400 bits<1> lane; 5401 let Inst{19} = lane{0}; 5402} 5403def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 5404 bits<3> lane; 5405 let Inst{19-17} = lane{2-0}; 5406} 5407def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 5408 bits<2> lane; 5409 let Inst{19-18} = lane{1-0}; 5410} 5411def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 5412 bits<1> lane; 5413 let Inst{19} = lane{0}; 5414} 5415 5416def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5417 (VDUPLN32d DPR:$Vm, imm:$lane)>; 5418 5419def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5420 (VDUPLN32q DPR:$Vm, imm:$lane)>; 5421 5422def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 5423 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 5424 (DSubReg_i8_reg imm:$lane))), 5425 (SubReg_i8_lane imm:$lane)))>; 5426def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 5427 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 5428 (DSubReg_i16_reg imm:$lane))), 5429 (SubReg_i16_lane imm:$lane)))>; 5430def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 5431 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 5432 (DSubReg_i32_reg imm:$lane))), 5433 (SubReg_i32_lane imm:$lane)))>; 5434def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 5435 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 5436 (DSubReg_i32_reg imm:$lane))), 5437 (SubReg_i32_lane imm:$lane)))>; 5438 5439def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5440 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 5441def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5442 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 5443 5444// VMOVN : Vector Narrowing Move 5445defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 5446 "vmovn", "i", trunc>; 5447// VQMOVN : Vector Saturating Narrowing Move 5448defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 5449 "vqmovn", "s", int_arm_neon_vqmovns>; 5450defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 5451 "vqmovn", "u", int_arm_neon_vqmovnu>; 5452defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 5453 "vqmovun", "s", int_arm_neon_vqmovnsu>; 5454// VMOVL : Vector Lengthening Move 5455defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 5456defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 5457def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 5458def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 5459def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 5460 5461// Vector Conversions. 5462 5463// VCVT : Vector Convert Between Floating-Point and Integers 5464def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5465 v2i32, v2f32, fp_to_sint>; 5466def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5467 v2i32, v2f32, fp_to_uint>; 5468def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5469 v2f32, v2i32, sint_to_fp>; 5470def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5471 v2f32, v2i32, uint_to_fp>; 5472 5473def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5474 v4i32, v4f32, fp_to_sint>; 5475def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5476 v4i32, v4f32, fp_to_uint>; 5477def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5478 v4f32, v4i32, sint_to_fp>; 5479def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5480 v4f32, v4i32, uint_to_fp>; 5481 5482// VCVT{A, N, P, M} 5483multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 5484 SDPatternOperator IntU> { 5485 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5486 def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5487 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 5488 def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5489 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 5490 def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5491 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 5492 def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5493 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 5494 } 5495} 5496 5497defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 5498defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 5499defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 5500defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 5501 5502// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 5503let DecoderMethod = "DecodeVCVTD" in { 5504def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5505 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 5506def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5507 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 5508def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5509 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 5510def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5511 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 5512} 5513 5514let DecoderMethod = "DecodeVCVTQ" in { 5515def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5516 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 5517def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5518 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 5519def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5520 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 5521def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5522 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 5523} 5524 5525def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 5526 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 5527def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 5528 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 5529def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 5530 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 5531def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 5532 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 5533 5534def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 5535 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 5536def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 5537 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 5538def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 5539 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 5540def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 5541 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 5542 5543 5544// VCVT : Vector Convert Between Half-Precision and Single-Precision. 5545def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 5546 IIC_VUNAQ, "vcvt", "f16.f32", 5547 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 5548 Requires<[HasNEON, HasFP16]>; 5549def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 5550 IIC_VUNAQ, "vcvt", "f32.f16", 5551 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 5552 Requires<[HasNEON, HasFP16]>; 5553 5554// Vector Reverse. 5555 5556// VREV64 : Vector Reverse elements within 64-bit doublewords 5557 5558class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5559 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 5560 (ins DPR:$Vm), IIC_VMOVD, 5561 OpcodeStr, Dt, "$Vd, $Vm", "", 5562 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 5563class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5564 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 5565 (ins QPR:$Vm), IIC_VMOVQ, 5566 OpcodeStr, Dt, "$Vd, $Vm", "", 5567 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 5568 5569def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 5570def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 5571def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 5572def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 5573 5574def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 5575def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 5576def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 5577def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 5578 5579// VREV32 : Vector Reverse elements within 32-bit words 5580 5581class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5582 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 5583 (ins DPR:$Vm), IIC_VMOVD, 5584 OpcodeStr, Dt, "$Vd, $Vm", "", 5585 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 5586class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5587 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 5588 (ins QPR:$Vm), IIC_VMOVQ, 5589 OpcodeStr, Dt, "$Vd, $Vm", "", 5590 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 5591 5592def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 5593def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 5594 5595def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 5596def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 5597 5598// VREV16 : Vector Reverse elements within 16-bit halfwords 5599 5600class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5601 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 5602 (ins DPR:$Vm), IIC_VMOVD, 5603 OpcodeStr, Dt, "$Vd, $Vm", "", 5604 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 5605class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5606 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 5607 (ins QPR:$Vm), IIC_VMOVQ, 5608 OpcodeStr, Dt, "$Vd, $Vm", "", 5609 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 5610 5611def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 5612def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 5613 5614// Other Vector Shuffles. 5615 5616// Aligned extractions: really just dropping registers 5617 5618class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 5619 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 5620 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 5621 5622def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 5623 5624def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 5625 5626def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 5627 5628def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 5629 5630def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 5631 5632 5633// VEXT : Vector Extract 5634 5635 5636// All of these have a two-operand InstAlias. 5637let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5638class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5639 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 5640 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 5641 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5642 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 5643 (Ty DPR:$Vm), imm:$index)))]> { 5644 bits<3> index; 5645 let Inst{11} = 0b0; 5646 let Inst{10-8} = index{2-0}; 5647} 5648 5649class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5650 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 5651 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 5652 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5653 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 5654 (Ty QPR:$Vm), imm:$index)))]> { 5655 bits<4> index; 5656 let Inst{11-8} = index{3-0}; 5657} 5658} 5659 5660def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 5661 let Inst{10-8} = index{2-0}; 5662} 5663def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 5664 let Inst{10-9} = index{1-0}; 5665 let Inst{8} = 0b0; 5666} 5667def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 5668 let Inst{10} = index{0}; 5669 let Inst{9-8} = 0b00; 5670} 5671def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 5672 (v2f32 DPR:$Vm), 5673 (i32 imm:$index))), 5674 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 5675 5676def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 5677 let Inst{11-8} = index{3-0}; 5678} 5679def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 5680 let Inst{11-9} = index{2-0}; 5681 let Inst{8} = 0b0; 5682} 5683def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 5684 let Inst{11-10} = index{1-0}; 5685 let Inst{9-8} = 0b00; 5686} 5687def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 5688 let Inst{11} = index{0}; 5689 let Inst{10-8} = 0b000; 5690} 5691def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 5692 (v4f32 QPR:$Vm), 5693 (i32 imm:$index))), 5694 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 5695 5696// VTRN : Vector Transpose 5697 5698def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 5699def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 5700def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 5701 5702def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 5703def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 5704def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 5705 5706// VUZP : Vector Unzip (Deinterleave) 5707 5708def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 5709def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 5710// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5711def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 5712 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5713 5714def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 5715def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 5716def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 5717 5718// VZIP : Vector Zip (Interleave) 5719 5720def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 5721def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 5722// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5723def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 5724 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5725 5726def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 5727def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 5728def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 5729 5730// Vector Table Lookup and Table Extension. 5731 5732// VTBL : Vector Table Lookup 5733let DecoderMethod = "DecodeTBLInstruction" in { 5734def VTBL1 5735 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 5736 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 5737 "vtbl", "8", "$Vd, $Vn, $Vm", "", 5738 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 5739let hasExtraSrcRegAllocReq = 1 in { 5740def VTBL2 5741 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 5742 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 5743 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5744def VTBL3 5745 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 5746 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 5747 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5748def VTBL4 5749 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 5750 (ins VecListFourD:$Vn, DPR:$Vm), 5751 NVTBLFrm, IIC_VTB4, 5752 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5753} // hasExtraSrcRegAllocReq = 1 5754 5755def VTBL3Pseudo 5756 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 5757def VTBL4Pseudo 5758 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 5759 5760// VTBX : Vector Table Extension 5761def VTBX1 5762 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 5763 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 5764 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 5765 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 5766 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 5767let hasExtraSrcRegAllocReq = 1 in { 5768def VTBX2 5769 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 5770 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 5771 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 5772def VTBX3 5773 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 5774 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 5775 NVTBLFrm, IIC_VTBX3, 5776 "vtbx", "8", "$Vd, $Vn, $Vm", 5777 "$orig = $Vd", []>; 5778def VTBX4 5779 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 5780 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 5781 "vtbx", "8", "$Vd, $Vn, $Vm", 5782 "$orig = $Vd", []>; 5783} // hasExtraSrcRegAllocReq = 1 5784 5785def VTBX3Pseudo 5786 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5787 IIC_VTBX3, "$orig = $dst", []>; 5788def VTBX4Pseudo 5789 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5790 IIC_VTBX4, "$orig = $dst", []>; 5791} // DecoderMethod = "DecodeTBLInstruction" 5792 5793// VRINT : Vector Rounding 5794multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 5795 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5796 def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, 5797 !strconcat("vrint", op), "f32", 5798 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 5799 let Inst{9-7} = op9_7; 5800 } 5801 def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, 5802 !strconcat("vrint", op), "f32", 5803 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 5804 let Inst{9-7} = op9_7; 5805 } 5806 } 5807 5808 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 5809 (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>; 5810 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 5811 (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>; 5812} 5813 5814defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 5815defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 5816defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 5817defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 5818defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 5819defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 5820 5821//===----------------------------------------------------------------------===// 5822// NEON instructions for single-precision FP math 5823//===----------------------------------------------------------------------===// 5824 5825class N2VSPat<SDNode OpNode, NeonI Inst> 5826 : NEONFPPat<(f32 (OpNode SPR:$a)), 5827 (EXTRACT_SUBREG 5828 (v2f32 (COPY_TO_REGCLASS (Inst 5829 (INSERT_SUBREG 5830 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5831 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 5832 5833class N3VSPat<SDNode OpNode, NeonI Inst> 5834 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 5835 (EXTRACT_SUBREG 5836 (v2f32 (COPY_TO_REGCLASS (Inst 5837 (INSERT_SUBREG 5838 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5839 SPR:$a, ssub_0), 5840 (INSERT_SUBREG 5841 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5842 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5843 5844class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 5845 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 5846 (EXTRACT_SUBREG 5847 (v2f32 (COPY_TO_REGCLASS (Inst 5848 (INSERT_SUBREG 5849 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5850 SPR:$acc, ssub_0), 5851 (INSERT_SUBREG 5852 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5853 SPR:$a, ssub_0), 5854 (INSERT_SUBREG 5855 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5856 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5857 5858def : N3VSPat<fadd, VADDfd>; 5859def : N3VSPat<fsub, VSUBfd>; 5860def : N3VSPat<fmul, VMULfd>; 5861def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 5862 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5863def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 5864 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5865def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 5866 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5867def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 5868 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5869def : N2VSPat<fabs, VABSfd>; 5870def : N2VSPat<fneg, VNEGfd>; 5871def : N3VSPat<NEONfmax, VMAXfd>; 5872def : N3VSPat<NEONfmin, VMINfd>; 5873def : N2VSPat<arm_ftosi, VCVTf2sd>; 5874def : N2VSPat<arm_ftoui, VCVTf2ud>; 5875def : N2VSPat<arm_sitof, VCVTs2fd>; 5876def : N2VSPat<arm_uitof, VCVTu2fd>; 5877 5878// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 5879def : Pat<(f32 (bitconvert GPR:$a)), 5880 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 5881 Requires<[HasNEON, DontUseVMOVSR]>; 5882 5883//===----------------------------------------------------------------------===// 5884// Non-Instruction Patterns 5885//===----------------------------------------------------------------------===// 5886 5887// bit_convert 5888def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 5889def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 5890def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 5891def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 5892def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 5893def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 5894def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 5895def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 5896def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 5897def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 5898def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 5899def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 5900def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 5901def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 5902def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 5903def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 5904def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 5905def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 5906def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 5907def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 5908def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 5909def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 5910def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 5911def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 5912def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 5913def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 5914def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 5915def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 5916def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 5917def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 5918 5919def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 5920def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 5921def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 5922def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 5923def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 5924def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 5925def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 5926def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 5927def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 5928def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 5929def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 5930def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 5931def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 5932def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 5933def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 5934def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 5935def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 5936def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 5937def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 5938def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 5939def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 5940def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 5941def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 5942def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 5943def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 5944def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 5945def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 5946def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 5947def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 5948def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 5949 5950// Fold extracting an element out of a v2i32 into a vfp register. 5951def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 5952 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 5953 5954// Vector lengthening move with load, matching extending loads. 5955 5956// extload, zextload and sextload for a standard lengthening load. Example: 5957// Lengthen_Single<"8", "i16", "8"> = 5958// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 5959// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 5960// (f64 (IMPLICIT_DEF)), (i32 0)))>; 5961multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 5962 let AddedComplexity = 10 in { 5963 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5964 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 5965 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 5966 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 5967 5968 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5969 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 5970 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 5971 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 5972 5973 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5974 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 5975 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 5976 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 5977 } 5978} 5979 5980// extload, zextload and sextload for a lengthening load which only uses 5981// half the lanes available. Example: 5982// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 5983// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 5984// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 5985// (f64 (IMPLICIT_DEF)), (i32 0))), 5986// dsub_0)>; 5987multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 5988 string InsnLanes, string InsnTy> { 5989 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5990 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 5991 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 5992 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5993 dsub_0)>; 5994 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 5995 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 5996 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 5997 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 5998 dsub_0)>; 5999 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6000 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6001 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6002 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6003 dsub_0)>; 6004} 6005 6006// extload, zextload and sextload for a lengthening load followed by another 6007// lengthening load, to quadruple the initial length. 6008// 6009// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 6010// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 6011// (EXTRACT_SUBREG (VMOVLuv4i32 6012// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6013// (f64 (IMPLICIT_DEF)), 6014// (i32 0))), 6015// dsub_0)), 6016// dsub_0)>; 6017multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 6018 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6019 string Insn2Ty> { 6020 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6021 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6022 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6023 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6024 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6025 dsub_0))>; 6026 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6027 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6028 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6029 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6030 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6031 dsub_0))>; 6032 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6033 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6034 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6035 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6036 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6037 dsub_0))>; 6038} 6039 6040// extload, zextload and sextload for a lengthening load followed by another 6041// lengthening load, to quadruple the initial length, but which ends up only 6042// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 6043// 6044// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 6045// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 6046// (EXTRACT_SUBREG (VMOVLuv4i32 6047// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 6048// (f64 (IMPLICIT_DEF)), (i32 0))), 6049// dsub_0)), 6050// dsub_0)>; 6051multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 6052 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6053 string Insn2Ty> { 6054 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6055 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 6056 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6057 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6058 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6059 dsub_0)), 6060 dsub_0)>; 6061 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6062 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 6063 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6064 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6065 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6066 dsub_0)), 6067 dsub_0)>; 6068 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6069 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 6070 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6071 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6072 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6073 dsub_0)), 6074 dsub_0)>; 6075} 6076 6077defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 6078defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 6079defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 6080 6081defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 6082defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 6083 6084// Double lengthening - v4i8 -> v4i16 -> v4i32 6085defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 6086// v2i8 -> v2i16 -> v2i32 6087defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 6088// v2i16 -> v2i32 -> v2i64 6089defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 6090 6091// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 6092def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 6093 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6094 (VLD1LNd16 addrmode6:$addr, 6095 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6096def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 6097 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6098 (VLD1LNd16 addrmode6:$addr, 6099 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6100def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 6101 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 6102 (VLD1LNd16 addrmode6:$addr, 6103 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6104 6105//===----------------------------------------------------------------------===// 6106// Assembler aliases 6107// 6108 6109def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 6110 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 6111def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 6112 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 6113 6114// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 6115defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6116 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6117defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6118 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6119defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6120 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6121defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6122 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6123defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6124 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6125defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6126 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6127defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6128 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6129defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6130 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6131// ... two-operand aliases 6132defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6133 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6134defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6135 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6136defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6137 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6138defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6139 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6140defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6141 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6142defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6143 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6144 6145// VLD1 single-lane pseudo-instructions. These need special handling for 6146// the lane index that an InstAlias can't handle, so we use these instead. 6147def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 6148 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6149def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 6150 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6151def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 6152 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6153 6154def VLD1LNdWB_fixed_Asm_8 : 6155 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 6156 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6157def VLD1LNdWB_fixed_Asm_16 : 6158 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 6159 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6160def VLD1LNdWB_fixed_Asm_32 : 6161 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 6162 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6163def VLD1LNdWB_register_Asm_8 : 6164 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 6165 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 6166 rGPR:$Rm, pred:$p)>; 6167def VLD1LNdWB_register_Asm_16 : 6168 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 6169 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6170 rGPR:$Rm, pred:$p)>; 6171def VLD1LNdWB_register_Asm_32 : 6172 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 6173 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6174 rGPR:$Rm, pred:$p)>; 6175 6176 6177// VST1 single-lane pseudo-instructions. These need special handling for 6178// the lane index that an InstAlias can't handle, so we use these instead. 6179def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 6180 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6181def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 6182 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6183def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 6184 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6185 6186def VST1LNdWB_fixed_Asm_8 : 6187 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 6188 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6189def VST1LNdWB_fixed_Asm_16 : 6190 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 6191 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6192def VST1LNdWB_fixed_Asm_32 : 6193 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 6194 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6195def VST1LNdWB_register_Asm_8 : 6196 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 6197 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 6198 rGPR:$Rm, pred:$p)>; 6199def VST1LNdWB_register_Asm_16 : 6200 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 6201 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6202 rGPR:$Rm, pred:$p)>; 6203def VST1LNdWB_register_Asm_32 : 6204 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 6205 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6206 rGPR:$Rm, pred:$p)>; 6207 6208// VLD2 single-lane pseudo-instructions. These need special handling for 6209// the lane index that an InstAlias can't handle, so we use these instead. 6210def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 6211 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6212def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6213 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6214def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6215 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6216def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6217 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6218def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6219 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6220 6221def VLD2LNdWB_fixed_Asm_8 : 6222 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 6223 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6224def VLD2LNdWB_fixed_Asm_16 : 6225 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6226 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6227def VLD2LNdWB_fixed_Asm_32 : 6228 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6229 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6230def VLD2LNqWB_fixed_Asm_16 : 6231 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6232 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6233def VLD2LNqWB_fixed_Asm_32 : 6234 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6235 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6236def VLD2LNdWB_register_Asm_8 : 6237 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 6238 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6239 rGPR:$Rm, pred:$p)>; 6240def VLD2LNdWB_register_Asm_16 : 6241 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6242 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6243 rGPR:$Rm, pred:$p)>; 6244def VLD2LNdWB_register_Asm_32 : 6245 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6246 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6247 rGPR:$Rm, pred:$p)>; 6248def VLD2LNqWB_register_Asm_16 : 6249 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6250 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6251 rGPR:$Rm, pred:$p)>; 6252def VLD2LNqWB_register_Asm_32 : 6253 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6254 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6255 rGPR:$Rm, pred:$p)>; 6256 6257 6258// VST2 single-lane pseudo-instructions. These need special handling for 6259// the lane index that an InstAlias can't handle, so we use these instead. 6260def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 6261 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6262def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6263 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6264def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6265 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6266def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6267 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6268def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6269 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6270 6271def VST2LNdWB_fixed_Asm_8 : 6272 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 6273 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6274def VST2LNdWB_fixed_Asm_16 : 6275 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6276 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6277def VST2LNdWB_fixed_Asm_32 : 6278 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6279 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6280def VST2LNqWB_fixed_Asm_16 : 6281 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6282 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6283def VST2LNqWB_fixed_Asm_32 : 6284 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6285 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6286def VST2LNdWB_register_Asm_8 : 6287 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 6288 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6289 rGPR:$Rm, pred:$p)>; 6290def VST2LNdWB_register_Asm_16 : 6291 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6292 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6293 rGPR:$Rm, pred:$p)>; 6294def VST2LNdWB_register_Asm_32 : 6295 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6296 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6297 rGPR:$Rm, pred:$p)>; 6298def VST2LNqWB_register_Asm_16 : 6299 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6300 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6301 rGPR:$Rm, pred:$p)>; 6302def VST2LNqWB_register_Asm_32 : 6303 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6304 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6305 rGPR:$Rm, pred:$p)>; 6306 6307// VLD3 all-lanes pseudo-instructions. These need special handling for 6308// the lane index that an InstAlias can't handle, so we use these instead. 6309def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6310 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6311def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6312 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6313def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6314 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6315def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6316 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6317def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6318 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6319def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6320 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6321 6322def VLD3DUPdWB_fixed_Asm_8 : 6323 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6324 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6325def VLD3DUPdWB_fixed_Asm_16 : 6326 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6327 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6328def VLD3DUPdWB_fixed_Asm_32 : 6329 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6330 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6331def VLD3DUPqWB_fixed_Asm_8 : 6332 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6333 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6334def VLD3DUPqWB_fixed_Asm_16 : 6335 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6336 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6337def VLD3DUPqWB_fixed_Asm_32 : 6338 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6339 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6340def VLD3DUPdWB_register_Asm_8 : 6341 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6342 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6343 rGPR:$Rm, pred:$p)>; 6344def VLD3DUPdWB_register_Asm_16 : 6345 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6346 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6347 rGPR:$Rm, pred:$p)>; 6348def VLD3DUPdWB_register_Asm_32 : 6349 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6350 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6351 rGPR:$Rm, pred:$p)>; 6352def VLD3DUPqWB_register_Asm_8 : 6353 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6354 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6355 rGPR:$Rm, pred:$p)>; 6356def VLD3DUPqWB_register_Asm_16 : 6357 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6358 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6359 rGPR:$Rm, pred:$p)>; 6360def VLD3DUPqWB_register_Asm_32 : 6361 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6362 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6363 rGPR:$Rm, pred:$p)>; 6364 6365 6366// VLD3 single-lane pseudo-instructions. These need special handling for 6367// the lane index that an InstAlias can't handle, so we use these instead. 6368def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6369 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6370def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6371 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6372def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6373 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6374def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6375 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6376def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6377 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6378 6379def VLD3LNdWB_fixed_Asm_8 : 6380 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6381 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6382def VLD3LNdWB_fixed_Asm_16 : 6383 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6384 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6385def VLD3LNdWB_fixed_Asm_32 : 6386 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6387 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6388def VLD3LNqWB_fixed_Asm_16 : 6389 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6390 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6391def VLD3LNqWB_fixed_Asm_32 : 6392 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6393 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6394def VLD3LNdWB_register_Asm_8 : 6395 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6396 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6397 rGPR:$Rm, pred:$p)>; 6398def VLD3LNdWB_register_Asm_16 : 6399 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6400 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6401 rGPR:$Rm, pred:$p)>; 6402def VLD3LNdWB_register_Asm_32 : 6403 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6404 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6405 rGPR:$Rm, pred:$p)>; 6406def VLD3LNqWB_register_Asm_16 : 6407 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6408 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6409 rGPR:$Rm, pred:$p)>; 6410def VLD3LNqWB_register_Asm_32 : 6411 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6412 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6413 rGPR:$Rm, pred:$p)>; 6414 6415// VLD3 multiple structure pseudo-instructions. These need special handling for 6416// the vector operands that the normal instructions don't yet model. 6417// FIXME: Remove these when the register classes and instructions are updated. 6418def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6419 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6420def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6421 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6422def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6423 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6424def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6425 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6426def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6427 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6428def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6429 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6430 6431def VLD3dWB_fixed_Asm_8 : 6432 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6433 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6434def VLD3dWB_fixed_Asm_16 : 6435 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6436 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6437def VLD3dWB_fixed_Asm_32 : 6438 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6439 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6440def VLD3qWB_fixed_Asm_8 : 6441 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6442 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6443def VLD3qWB_fixed_Asm_16 : 6444 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6445 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6446def VLD3qWB_fixed_Asm_32 : 6447 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6448 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6449def VLD3dWB_register_Asm_8 : 6450 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6451 (ins VecListThreeD:$list, addrmode6:$addr, 6452 rGPR:$Rm, pred:$p)>; 6453def VLD3dWB_register_Asm_16 : 6454 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6455 (ins VecListThreeD:$list, addrmode6:$addr, 6456 rGPR:$Rm, pred:$p)>; 6457def VLD3dWB_register_Asm_32 : 6458 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6459 (ins VecListThreeD:$list, addrmode6:$addr, 6460 rGPR:$Rm, pred:$p)>; 6461def VLD3qWB_register_Asm_8 : 6462 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6463 (ins VecListThreeQ:$list, addrmode6:$addr, 6464 rGPR:$Rm, pred:$p)>; 6465def VLD3qWB_register_Asm_16 : 6466 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6467 (ins VecListThreeQ:$list, addrmode6:$addr, 6468 rGPR:$Rm, pred:$p)>; 6469def VLD3qWB_register_Asm_32 : 6470 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6471 (ins VecListThreeQ:$list, addrmode6:$addr, 6472 rGPR:$Rm, pred:$p)>; 6473 6474// VST3 single-lane pseudo-instructions. These need special handling for 6475// the lane index that an InstAlias can't handle, so we use these instead. 6476def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6477 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6478def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6479 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6480def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6481 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6482def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6483 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6484def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6485 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6486 6487def VST3LNdWB_fixed_Asm_8 : 6488 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6489 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6490def VST3LNdWB_fixed_Asm_16 : 6491 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6492 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6493def VST3LNdWB_fixed_Asm_32 : 6494 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6495 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6496def VST3LNqWB_fixed_Asm_16 : 6497 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6498 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6499def VST3LNqWB_fixed_Asm_32 : 6500 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6501 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6502def VST3LNdWB_register_Asm_8 : 6503 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6504 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6505 rGPR:$Rm, pred:$p)>; 6506def VST3LNdWB_register_Asm_16 : 6507 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6508 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6509 rGPR:$Rm, pred:$p)>; 6510def VST3LNdWB_register_Asm_32 : 6511 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6512 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6513 rGPR:$Rm, pred:$p)>; 6514def VST3LNqWB_register_Asm_16 : 6515 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6516 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6517 rGPR:$Rm, pred:$p)>; 6518def VST3LNqWB_register_Asm_32 : 6519 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6520 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6521 rGPR:$Rm, pred:$p)>; 6522 6523 6524// VST3 multiple structure pseudo-instructions. These need special handling for 6525// the vector operands that the normal instructions don't yet model. 6526// FIXME: Remove these when the register classes and instructions are updated. 6527def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6528 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6529def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6530 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6531def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6532 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6533def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6534 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6535def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6536 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6537def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6538 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6539 6540def VST3dWB_fixed_Asm_8 : 6541 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6542 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6543def VST3dWB_fixed_Asm_16 : 6544 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6545 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6546def VST3dWB_fixed_Asm_32 : 6547 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6548 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6549def VST3qWB_fixed_Asm_8 : 6550 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6551 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6552def VST3qWB_fixed_Asm_16 : 6553 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6554 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6555def VST3qWB_fixed_Asm_32 : 6556 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6557 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6558def VST3dWB_register_Asm_8 : 6559 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6560 (ins VecListThreeD:$list, addrmode6:$addr, 6561 rGPR:$Rm, pred:$p)>; 6562def VST3dWB_register_Asm_16 : 6563 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6564 (ins VecListThreeD:$list, addrmode6:$addr, 6565 rGPR:$Rm, pred:$p)>; 6566def VST3dWB_register_Asm_32 : 6567 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6568 (ins VecListThreeD:$list, addrmode6:$addr, 6569 rGPR:$Rm, pred:$p)>; 6570def VST3qWB_register_Asm_8 : 6571 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6572 (ins VecListThreeQ:$list, addrmode6:$addr, 6573 rGPR:$Rm, pred:$p)>; 6574def VST3qWB_register_Asm_16 : 6575 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6576 (ins VecListThreeQ:$list, addrmode6:$addr, 6577 rGPR:$Rm, pred:$p)>; 6578def VST3qWB_register_Asm_32 : 6579 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6580 (ins VecListThreeQ:$list, addrmode6:$addr, 6581 rGPR:$Rm, pred:$p)>; 6582 6583// VLD4 all-lanes pseudo-instructions. These need special handling for 6584// the lane index that an InstAlias can't handle, so we use these instead. 6585def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6586 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6587def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6588 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6589def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6590 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6591def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6592 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6593def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6594 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6595def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6596 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6597 6598def VLD4DUPdWB_fixed_Asm_8 : 6599 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6600 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6601def VLD4DUPdWB_fixed_Asm_16 : 6602 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6603 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6604def VLD4DUPdWB_fixed_Asm_32 : 6605 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6606 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6607def VLD4DUPqWB_fixed_Asm_8 : 6608 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6609 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6610def VLD4DUPqWB_fixed_Asm_16 : 6611 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6612 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6613def VLD4DUPqWB_fixed_Asm_32 : 6614 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6615 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6616def VLD4DUPdWB_register_Asm_8 : 6617 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6618 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6619 rGPR:$Rm, pred:$p)>; 6620def VLD4DUPdWB_register_Asm_16 : 6621 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6622 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6623 rGPR:$Rm, pred:$p)>; 6624def VLD4DUPdWB_register_Asm_32 : 6625 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6626 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6627 rGPR:$Rm, pred:$p)>; 6628def VLD4DUPqWB_register_Asm_8 : 6629 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6630 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6631 rGPR:$Rm, pred:$p)>; 6632def VLD4DUPqWB_register_Asm_16 : 6633 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6634 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6635 rGPR:$Rm, pred:$p)>; 6636def VLD4DUPqWB_register_Asm_32 : 6637 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6638 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6639 rGPR:$Rm, pred:$p)>; 6640 6641 6642// VLD4 single-lane pseudo-instructions. These need special handling for 6643// the lane index that an InstAlias can't handle, so we use these instead. 6644def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6645 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6646def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6647 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6648def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6649 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6650def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6651 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6652def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6653 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6654 6655def VLD4LNdWB_fixed_Asm_8 : 6656 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6657 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6658def VLD4LNdWB_fixed_Asm_16 : 6659 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6660 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6661def VLD4LNdWB_fixed_Asm_32 : 6662 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6663 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6664def VLD4LNqWB_fixed_Asm_16 : 6665 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6666 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6667def VLD4LNqWB_fixed_Asm_32 : 6668 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6669 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6670def VLD4LNdWB_register_Asm_8 : 6671 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6672 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6673 rGPR:$Rm, pred:$p)>; 6674def VLD4LNdWB_register_Asm_16 : 6675 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6676 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6677 rGPR:$Rm, pred:$p)>; 6678def VLD4LNdWB_register_Asm_32 : 6679 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6680 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6681 rGPR:$Rm, pred:$p)>; 6682def VLD4LNqWB_register_Asm_16 : 6683 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6684 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6685 rGPR:$Rm, pred:$p)>; 6686def VLD4LNqWB_register_Asm_32 : 6687 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6688 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6689 rGPR:$Rm, pred:$p)>; 6690 6691 6692 6693// VLD4 multiple structure pseudo-instructions. These need special handling for 6694// the vector operands that the normal instructions don't yet model. 6695// FIXME: Remove these when the register classes and instructions are updated. 6696def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6697 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6698def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6699 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6700def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6701 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6702def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6703 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6704def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6705 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6706def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6707 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6708 6709def VLD4dWB_fixed_Asm_8 : 6710 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6711 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6712def VLD4dWB_fixed_Asm_16 : 6713 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6714 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6715def VLD4dWB_fixed_Asm_32 : 6716 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6717 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6718def VLD4qWB_fixed_Asm_8 : 6719 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6720 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6721def VLD4qWB_fixed_Asm_16 : 6722 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6723 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6724def VLD4qWB_fixed_Asm_32 : 6725 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6726 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6727def VLD4dWB_register_Asm_8 : 6728 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6729 (ins VecListFourD:$list, addrmode6:$addr, 6730 rGPR:$Rm, pred:$p)>; 6731def VLD4dWB_register_Asm_16 : 6732 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6733 (ins VecListFourD:$list, addrmode6:$addr, 6734 rGPR:$Rm, pred:$p)>; 6735def VLD4dWB_register_Asm_32 : 6736 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6737 (ins VecListFourD:$list, addrmode6:$addr, 6738 rGPR:$Rm, pred:$p)>; 6739def VLD4qWB_register_Asm_8 : 6740 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6741 (ins VecListFourQ:$list, addrmode6:$addr, 6742 rGPR:$Rm, pred:$p)>; 6743def VLD4qWB_register_Asm_16 : 6744 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6745 (ins VecListFourQ:$list, addrmode6:$addr, 6746 rGPR:$Rm, pred:$p)>; 6747def VLD4qWB_register_Asm_32 : 6748 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6749 (ins VecListFourQ:$list, addrmode6:$addr, 6750 rGPR:$Rm, pred:$p)>; 6751 6752// VST4 single-lane pseudo-instructions. These need special handling for 6753// the lane index that an InstAlias can't handle, so we use these instead. 6754def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6755 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6756def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6757 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6758def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6759 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6760def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6761 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6762def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6763 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6764 6765def VST4LNdWB_fixed_Asm_8 : 6766 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6767 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6768def VST4LNdWB_fixed_Asm_16 : 6769 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6770 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6771def VST4LNdWB_fixed_Asm_32 : 6772 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6773 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6774def VST4LNqWB_fixed_Asm_16 : 6775 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6776 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6777def VST4LNqWB_fixed_Asm_32 : 6778 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6779 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6780def VST4LNdWB_register_Asm_8 : 6781 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6782 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6783 rGPR:$Rm, pred:$p)>; 6784def VST4LNdWB_register_Asm_16 : 6785 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6786 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6787 rGPR:$Rm, pred:$p)>; 6788def VST4LNdWB_register_Asm_32 : 6789 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6790 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6791 rGPR:$Rm, pred:$p)>; 6792def VST4LNqWB_register_Asm_16 : 6793 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6794 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6795 rGPR:$Rm, pred:$p)>; 6796def VST4LNqWB_register_Asm_32 : 6797 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6798 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6799 rGPR:$Rm, pred:$p)>; 6800 6801 6802// VST4 multiple structure pseudo-instructions. These need special handling for 6803// the vector operands that the normal instructions don't yet model. 6804// FIXME: Remove these when the register classes and instructions are updated. 6805def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6806 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6807def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6808 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6809def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6810 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6811def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6812 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6813def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6814 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6815def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6816 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6817 6818def VST4dWB_fixed_Asm_8 : 6819 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6820 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6821def VST4dWB_fixed_Asm_16 : 6822 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6823 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6824def VST4dWB_fixed_Asm_32 : 6825 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6826 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6827def VST4qWB_fixed_Asm_8 : 6828 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6829 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6830def VST4qWB_fixed_Asm_16 : 6831 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6832 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6833def VST4qWB_fixed_Asm_32 : 6834 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6835 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6836def VST4dWB_register_Asm_8 : 6837 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6838 (ins VecListFourD:$list, addrmode6:$addr, 6839 rGPR:$Rm, pred:$p)>; 6840def VST4dWB_register_Asm_16 : 6841 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6842 (ins VecListFourD:$list, addrmode6:$addr, 6843 rGPR:$Rm, pred:$p)>; 6844def VST4dWB_register_Asm_32 : 6845 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6846 (ins VecListFourD:$list, addrmode6:$addr, 6847 rGPR:$Rm, pred:$p)>; 6848def VST4qWB_register_Asm_8 : 6849 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6850 (ins VecListFourQ:$list, addrmode6:$addr, 6851 rGPR:$Rm, pred:$p)>; 6852def VST4qWB_register_Asm_16 : 6853 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6854 (ins VecListFourQ:$list, addrmode6:$addr, 6855 rGPR:$Rm, pred:$p)>; 6856def VST4qWB_register_Asm_32 : 6857 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6858 (ins VecListFourQ:$list, addrmode6:$addr, 6859 rGPR:$Rm, pred:$p)>; 6860 6861// VMOV/VMVN takes an optional datatype suffix 6862defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6863 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6864defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6865 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6866 6867defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 6868 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 6869defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 6870 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 6871 6872// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6873// D-register versions. 6874def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 6875 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6876def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 6877 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6878def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 6879 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6880def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 6881 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6882def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 6883 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6884def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 6885 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6886def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 6887 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6888// Q-register versions. 6889def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 6890 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6891def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 6892 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6893def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 6894 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6895def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 6896 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6897def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 6898 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6899def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 6900 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6901def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 6902 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6903 6904// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6905// D-register versions. 6906def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 6907 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6908def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 6909 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6910def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 6911 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6912def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 6913 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6914def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 6915 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6916def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 6917 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6918def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 6919 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6920// Q-register versions. 6921def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 6922 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6923def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 6924 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6925def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 6926 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6927def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 6928 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6929def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 6930 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6931def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 6932 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6933def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 6934 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6935 6936// VSWP allows, but does not require, a type suffix. 6937defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 6938 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 6939defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 6940 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 6941 6942// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 6943defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 6944 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6945defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 6946 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6947defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 6948 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6949defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 6950 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6951defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 6952 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6953defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 6954 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6955 6956// "vmov Rd, #-imm" can be handled via "vmvn". 6957def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 6958 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6959def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 6960 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6961def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 6962 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6963def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 6964 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 6965 6966// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 6967// these should restrict to just the Q register variants, but the register 6968// classes are enough to match correctly regardless, so we keep it simple 6969// and just use MnemonicAlias. 6970def : NEONMnemonicAlias<"vbicq", "vbic">; 6971def : NEONMnemonicAlias<"vandq", "vand">; 6972def : NEONMnemonicAlias<"veorq", "veor">; 6973def : NEONMnemonicAlias<"vorrq", "vorr">; 6974 6975def : NEONMnemonicAlias<"vmovq", "vmov">; 6976def : NEONMnemonicAlias<"vmvnq", "vmvn">; 6977// Explicit versions for floating point so that the FPImm variants get 6978// handled early. The parser gets confused otherwise. 6979def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 6980def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 6981 6982def : NEONMnemonicAlias<"vaddq", "vadd">; 6983def : NEONMnemonicAlias<"vsubq", "vsub">; 6984 6985def : NEONMnemonicAlias<"vminq", "vmin">; 6986def : NEONMnemonicAlias<"vmaxq", "vmax">; 6987 6988def : NEONMnemonicAlias<"vmulq", "vmul">; 6989 6990def : NEONMnemonicAlias<"vabsq", "vabs">; 6991 6992def : NEONMnemonicAlias<"vshlq", "vshl">; 6993def : NEONMnemonicAlias<"vshrq", "vshr">; 6994 6995def : NEONMnemonicAlias<"vcvtq", "vcvt">; 6996 6997def : NEONMnemonicAlias<"vcleq", "vcle">; 6998def : NEONMnemonicAlias<"vceqq", "vceq">; 6999 7000def : NEONMnemonicAlias<"vzipq", "vzip">; 7001def : NEONMnemonicAlias<"vswpq", "vswp">; 7002 7003def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 7004def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 7005 7006 7007// Alias for loading floating point immediates that aren't representable 7008// using the vmov.f32 encoding but the bitpattern is representable using 7009// the .i32 encoding. 7010def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7011 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7012def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7013 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7014