1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14 15//===----------------------------------------------------------------------===// 16// NEON-specific Operands. 17//===----------------------------------------------------------------------===// 18def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20} 21 22def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26} 27def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31} 32def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36} 37def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 38def nImmSplatNotI16 : Operand<i32> { 39 let ParserMatchClass = nImmSplatNotI16AsmOperand; 40} 41def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 42def nImmSplatNotI32 : Operand<i32> { 43 let ParserMatchClass = nImmSplatNotI32AsmOperand; 44} 45def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 46def nImmVMOVI32 : Operand<i32> { 47 let PrintMethod = "printNEONModImmOperand"; 48 let ParserMatchClass = nImmVMOVI32AsmOperand; 49} 50 51def nImmVMOVI16AsmOperandByteReplicate : 52 AsmOperandClass { 53 let Name = "NEONi16vmovByteReplicate"; 54 let PredicateMethod = "isNEONi16ByteReplicate"; 55 let RenderMethod = "addNEONvmovByteReplicateOperands"; 56} 57def nImmVMOVI32AsmOperandByteReplicate : 58 AsmOperandClass { 59 let Name = "NEONi32vmovByteReplicate"; 60 let PredicateMethod = "isNEONi32ByteReplicate"; 61 let RenderMethod = "addNEONvmovByteReplicateOperands"; 62} 63def nImmVMVNI16AsmOperandByteReplicate : 64 AsmOperandClass { 65 let Name = "NEONi16invByteReplicate"; 66 let PredicateMethod = "isNEONi16ByteReplicate"; 67 let RenderMethod = "addNEONinvByteReplicateOperands"; 68} 69def nImmVMVNI32AsmOperandByteReplicate : 70 AsmOperandClass { 71 let Name = "NEONi32invByteReplicate"; 72 let PredicateMethod = "isNEONi32ByteReplicate"; 73 let RenderMethod = "addNEONinvByteReplicateOperands"; 74} 75 76def nImmVMOVI16ByteReplicate : Operand<i32> { 77 let PrintMethod = "printNEONModImmOperand"; 78 let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate; 79} 80def nImmVMOVI32ByteReplicate : Operand<i32> { 81 let PrintMethod = "printNEONModImmOperand"; 82 let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate; 83} 84def nImmVMVNI16ByteReplicate : Operand<i32> { 85 let PrintMethod = "printNEONModImmOperand"; 86 let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate; 87} 88def nImmVMVNI32ByteReplicate : Operand<i32> { 89 let PrintMethod = "printNEONModImmOperand"; 90 let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate; 91} 92 93def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 94def nImmVMOVI32Neg : Operand<i32> { 95 let PrintMethod = "printNEONModImmOperand"; 96 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 97} 98def nImmVMOVF32 : Operand<i32> { 99 let PrintMethod = "printFPImmOperand"; 100 let ParserMatchClass = FPImmOperand; 101} 102def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 103def nImmSplatI64 : Operand<i32> { 104 let PrintMethod = "printNEONModImmOperand"; 105 let ParserMatchClass = nImmSplatI64AsmOperand; 106} 107 108def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 109def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 110def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 111def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 112 return ((uint64_t)Imm) < 8; 113}]> { 114 let ParserMatchClass = VectorIndex8Operand; 115 let PrintMethod = "printVectorIndex"; 116 let MIOperandInfo = (ops i32imm); 117} 118def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 119 return ((uint64_t)Imm) < 4; 120}]> { 121 let ParserMatchClass = VectorIndex16Operand; 122 let PrintMethod = "printVectorIndex"; 123 let MIOperandInfo = (ops i32imm); 124} 125def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 126 return ((uint64_t)Imm) < 2; 127}]> { 128 let ParserMatchClass = VectorIndex32Operand; 129 let PrintMethod = "printVectorIndex"; 130 let MIOperandInfo = (ops i32imm); 131} 132 133// Register list of one D register. 134def VecListOneDAsmOperand : AsmOperandClass { 135 let Name = "VecListOneD"; 136 let ParserMethod = "parseVectorList"; 137 let RenderMethod = "addVecListOperands"; 138} 139def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 140 let ParserMatchClass = VecListOneDAsmOperand; 141} 142// Register list of two sequential D registers. 143def VecListDPairAsmOperand : AsmOperandClass { 144 let Name = "VecListDPair"; 145 let ParserMethod = "parseVectorList"; 146 let RenderMethod = "addVecListOperands"; 147} 148def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 149 let ParserMatchClass = VecListDPairAsmOperand; 150} 151// Register list of three sequential D registers. 152def VecListThreeDAsmOperand : AsmOperandClass { 153 let Name = "VecListThreeD"; 154 let ParserMethod = "parseVectorList"; 155 let RenderMethod = "addVecListOperands"; 156} 157def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 158 let ParserMatchClass = VecListThreeDAsmOperand; 159} 160// Register list of four sequential D registers. 161def VecListFourDAsmOperand : AsmOperandClass { 162 let Name = "VecListFourD"; 163 let ParserMethod = "parseVectorList"; 164 let RenderMethod = "addVecListOperands"; 165} 166def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 167 let ParserMatchClass = VecListFourDAsmOperand; 168} 169// Register list of two D registers spaced by 2 (two sequential Q registers). 170def VecListDPairSpacedAsmOperand : AsmOperandClass { 171 let Name = "VecListDPairSpaced"; 172 let ParserMethod = "parseVectorList"; 173 let RenderMethod = "addVecListOperands"; 174} 175def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 176 let ParserMatchClass = VecListDPairSpacedAsmOperand; 177} 178// Register list of three D registers spaced by 2 (three Q registers). 179def VecListThreeQAsmOperand : AsmOperandClass { 180 let Name = "VecListThreeQ"; 181 let ParserMethod = "parseVectorList"; 182 let RenderMethod = "addVecListOperands"; 183} 184def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 185 let ParserMatchClass = VecListThreeQAsmOperand; 186} 187// Register list of three D registers spaced by 2 (three Q registers). 188def VecListFourQAsmOperand : AsmOperandClass { 189 let Name = "VecListFourQ"; 190 let ParserMethod = "parseVectorList"; 191 let RenderMethod = "addVecListOperands"; 192} 193def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 194 let ParserMatchClass = VecListFourQAsmOperand; 195} 196 197// Register list of one D register, with "all lanes" subscripting. 198def VecListOneDAllLanesAsmOperand : AsmOperandClass { 199 let Name = "VecListOneDAllLanes"; 200 let ParserMethod = "parseVectorList"; 201 let RenderMethod = "addVecListOperands"; 202} 203def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 204 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 205} 206// Register list of two D registers, with "all lanes" subscripting. 207def VecListDPairAllLanesAsmOperand : AsmOperandClass { 208 let Name = "VecListDPairAllLanes"; 209 let ParserMethod = "parseVectorList"; 210 let RenderMethod = "addVecListOperands"; 211} 212def VecListDPairAllLanes : RegisterOperand<DPair, 213 "printVectorListTwoAllLanes"> { 214 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 215} 216// Register list of two D registers spaced by 2 (two sequential Q registers). 217def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 218 let Name = "VecListDPairSpacedAllLanes"; 219 let ParserMethod = "parseVectorList"; 220 let RenderMethod = "addVecListOperands"; 221} 222def VecListDPairSpacedAllLanes : RegisterOperand<DPair, 223 "printVectorListTwoSpacedAllLanes"> { 224 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 225} 226// Register list of three D registers, with "all lanes" subscripting. 227def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 228 let Name = "VecListThreeDAllLanes"; 229 let ParserMethod = "parseVectorList"; 230 let RenderMethod = "addVecListOperands"; 231} 232def VecListThreeDAllLanes : RegisterOperand<DPR, 233 "printVectorListThreeAllLanes"> { 234 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 235} 236// Register list of three D registers spaced by 2 (three sequential Q regs). 237def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 238 let Name = "VecListThreeQAllLanes"; 239 let ParserMethod = "parseVectorList"; 240 let RenderMethod = "addVecListOperands"; 241} 242def VecListThreeQAllLanes : RegisterOperand<DPR, 243 "printVectorListThreeSpacedAllLanes"> { 244 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 245} 246// Register list of four D registers, with "all lanes" subscripting. 247def VecListFourDAllLanesAsmOperand : AsmOperandClass { 248 let Name = "VecListFourDAllLanes"; 249 let ParserMethod = "parseVectorList"; 250 let RenderMethod = "addVecListOperands"; 251} 252def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 253 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 254} 255// Register list of four D registers spaced by 2 (four sequential Q regs). 256def VecListFourQAllLanesAsmOperand : AsmOperandClass { 257 let Name = "VecListFourQAllLanes"; 258 let ParserMethod = "parseVectorList"; 259 let RenderMethod = "addVecListOperands"; 260} 261def VecListFourQAllLanes : RegisterOperand<DPR, 262 "printVectorListFourSpacedAllLanes"> { 263 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 264} 265 266 267// Register list of one D register, with byte lane subscripting. 268def VecListOneDByteIndexAsmOperand : AsmOperandClass { 269 let Name = "VecListOneDByteIndexed"; 270 let ParserMethod = "parseVectorList"; 271 let RenderMethod = "addVecListIndexedOperands"; 272} 273def VecListOneDByteIndexed : Operand<i32> { 274 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 276} 277// ...with half-word lane subscripting. 278def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 279 let Name = "VecListOneDHWordIndexed"; 280 let ParserMethod = "parseVectorList"; 281 let RenderMethod = "addVecListIndexedOperands"; 282} 283def VecListOneDHWordIndexed : Operand<i32> { 284 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 286} 287// ...with word lane subscripting. 288def VecListOneDWordIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListOneDWordIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListOneDWordIndexed : Operand<i32> { 294 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297 298// Register list of two D registers with byte lane subscripting. 299def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 300 let Name = "VecListTwoDByteIndexed"; 301 let ParserMethod = "parseVectorList"; 302 let RenderMethod = "addVecListIndexedOperands"; 303} 304def VecListTwoDByteIndexed : Operand<i32> { 305 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 306 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 307} 308// ...with half-word lane subscripting. 309def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 310 let Name = "VecListTwoDHWordIndexed"; 311 let ParserMethod = "parseVectorList"; 312 let RenderMethod = "addVecListIndexedOperands"; 313} 314def VecListTwoDHWordIndexed : Operand<i32> { 315 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 316 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 317} 318// ...with word lane subscripting. 319def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 320 let Name = "VecListTwoDWordIndexed"; 321 let ParserMethod = "parseVectorList"; 322 let RenderMethod = "addVecListIndexedOperands"; 323} 324def VecListTwoDWordIndexed : Operand<i32> { 325 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 326 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 327} 328// Register list of two Q registers with half-word lane subscripting. 329def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 330 let Name = "VecListTwoQHWordIndexed"; 331 let ParserMethod = "parseVectorList"; 332 let RenderMethod = "addVecListIndexedOperands"; 333} 334def VecListTwoQHWordIndexed : Operand<i32> { 335 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 336 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 337} 338// ...with word lane subscripting. 339def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 340 let Name = "VecListTwoQWordIndexed"; 341 let ParserMethod = "parseVectorList"; 342 let RenderMethod = "addVecListIndexedOperands"; 343} 344def VecListTwoQWordIndexed : Operand<i32> { 345 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 346 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 347} 348 349 350// Register list of three D registers with byte lane subscripting. 351def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 352 let Name = "VecListThreeDByteIndexed"; 353 let ParserMethod = "parseVectorList"; 354 let RenderMethod = "addVecListIndexedOperands"; 355} 356def VecListThreeDByteIndexed : Operand<i32> { 357 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 359} 360// ...with half-word lane subscripting. 361def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 362 let Name = "VecListThreeDHWordIndexed"; 363 let ParserMethod = "parseVectorList"; 364 let RenderMethod = "addVecListIndexedOperands"; 365} 366def VecListThreeDHWordIndexed : Operand<i32> { 367 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 369} 370// ...with word lane subscripting. 371def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 372 let Name = "VecListThreeDWordIndexed"; 373 let ParserMethod = "parseVectorList"; 374 let RenderMethod = "addVecListIndexedOperands"; 375} 376def VecListThreeDWordIndexed : Operand<i32> { 377 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 379} 380// Register list of three Q registers with half-word lane subscripting. 381def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 382 let Name = "VecListThreeQHWordIndexed"; 383 let ParserMethod = "parseVectorList"; 384 let RenderMethod = "addVecListIndexedOperands"; 385} 386def VecListThreeQHWordIndexed : Operand<i32> { 387 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 389} 390// ...with word lane subscripting. 391def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListThreeQWordIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListThreeQWordIndexed : Operand<i32> { 397 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400 401// Register list of four D registers with byte lane subscripting. 402def VecListFourDByteIndexAsmOperand : AsmOperandClass { 403 let Name = "VecListFourDByteIndexed"; 404 let ParserMethod = "parseVectorList"; 405 let RenderMethod = "addVecListIndexedOperands"; 406} 407def VecListFourDByteIndexed : Operand<i32> { 408 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 409 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 410} 411// ...with half-word lane subscripting. 412def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 413 let Name = "VecListFourDHWordIndexed"; 414 let ParserMethod = "parseVectorList"; 415 let RenderMethod = "addVecListIndexedOperands"; 416} 417def VecListFourDHWordIndexed : Operand<i32> { 418 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 419 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 420} 421// ...with word lane subscripting. 422def VecListFourDWordIndexAsmOperand : AsmOperandClass { 423 let Name = "VecListFourDWordIndexed"; 424 let ParserMethod = "parseVectorList"; 425 let RenderMethod = "addVecListIndexedOperands"; 426} 427def VecListFourDWordIndexed : Operand<i32> { 428 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 429 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 430} 431// Register list of four Q registers with half-word lane subscripting. 432def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 433 let Name = "VecListFourQHWordIndexed"; 434 let ParserMethod = "parseVectorList"; 435 let RenderMethod = "addVecListIndexedOperands"; 436} 437def VecListFourQHWordIndexed : Operand<i32> { 438 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 439 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 440} 441// ...with word lane subscripting. 442def VecListFourQWordIndexAsmOperand : AsmOperandClass { 443 let Name = "VecListFourQWordIndexed"; 444 let ParserMethod = "parseVectorList"; 445 let RenderMethod = "addVecListIndexedOperands"; 446} 447def VecListFourQWordIndexed : Operand<i32> { 448 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 449 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 450} 451 452def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 453 return cast<LoadSDNode>(N)->getAlignment() >= 8; 454}]>; 455def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 456 (store node:$val, node:$ptr), [{ 457 return cast<StoreSDNode>(N)->getAlignment() >= 8; 458}]>; 459def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 460 return cast<LoadSDNode>(N)->getAlignment() == 4; 461}]>; 462def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 463 (store node:$val, node:$ptr), [{ 464 return cast<StoreSDNode>(N)->getAlignment() == 4; 465}]>; 466def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 467 return cast<LoadSDNode>(N)->getAlignment() == 2; 468}]>; 469def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 470 (store node:$val, node:$ptr), [{ 471 return cast<StoreSDNode>(N)->getAlignment() == 2; 472}]>; 473def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 474 return cast<LoadSDNode>(N)->getAlignment() == 1; 475}]>; 476def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 477 (store node:$val, node:$ptr), [{ 478 return cast<StoreSDNode>(N)->getAlignment() == 1; 479}]>; 480def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 481 return cast<LoadSDNode>(N)->getAlignment() < 4; 482}]>; 483def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 484 (store node:$val, node:$ptr), [{ 485 return cast<StoreSDNode>(N)->getAlignment() < 4; 486}]>; 487 488//===----------------------------------------------------------------------===// 489// NEON-specific DAG Nodes. 490//===----------------------------------------------------------------------===// 491 492def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 493def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 494 495def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 496def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 497def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 498def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 499def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 500def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 501def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 502def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 503def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 504def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 505def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 506 507// Types for vector shift by immediates. The "SHX" version is for long and 508// narrow operations where the source and destination vectors have different 509// types. The "SHINS" version is for shift and insert operations. 510def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 511 SDTCisVT<2, i32>]>; 512def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 513 SDTCisVT<2, i32>]>; 514def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 515 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 516 517def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 518def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 519def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 520def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 521 522def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 523def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 524def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 525 526def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 527def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 528def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 529def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 530def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 531def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 532 533def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 534def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 535def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 536 537def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 538def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 539 540def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 541 SDTCisVT<2, i32>]>; 542def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 543def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 544 545def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 546def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 547def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 548def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 549 550def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 551 SDTCisVT<2, i32>]>; 552def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 553def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 554 555def NEONvbsl : SDNode<"ARMISD::VBSL", 556 SDTypeProfile<1, 3, [SDTCisVec<0>, 557 SDTCisSameAs<0, 1>, 558 SDTCisSameAs<0, 2>, 559 SDTCisSameAs<0, 3>]>>; 560 561def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 562 563// VDUPLANE can produce a quad-register result from a double-register source, 564// so the result is not constrained to match the source. 565def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 566 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 567 SDTCisVT<2, i32>]>>; 568 569def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 570 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 571def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 572 573def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 574def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 575def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 576def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 577 578def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 579 SDTCisSameAs<0, 2>, 580 SDTCisSameAs<0, 3>]>; 581def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 582def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 583def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 584 585def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 586 SDTCisSameAs<1, 2>]>; 587def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 588def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 589 590def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 591 SDTCisSameAs<0, 2>]>; 592def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 593def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 594 595def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 596 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 597 unsigned EltBits = 0; 598 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 599 return (EltBits == 32 && EltVal == 0); 600}]>; 601 602def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 603 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 604 unsigned EltBits = 0; 605 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 606 return (EltBits == 8 && EltVal == 0xff); 607}]>; 608 609//===----------------------------------------------------------------------===// 610// NEON load / store instructions 611//===----------------------------------------------------------------------===// 612 613// Use VLDM to load a Q register as a D register pair. 614// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 615def VLDMQIA 616 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 617 IIC_fpLoad_m, "", 618 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; 619 620// Use VSTM to store a Q register as a D register pair. 621// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 622def VSTMQIA 623 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 624 IIC_fpStore_m, "", 625 [(store (v2f64 DPair:$src), GPR:$Rn)]>; 626 627// Classes for VLD* pseudo-instructions with multi-register operands. 628// These are expanded to real instructions after register allocation. 629class VLDQPseudo<InstrItinClass itin> 630 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 631class VLDQWBPseudo<InstrItinClass itin> 632 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 633 (ins addrmode6:$addr, am6offset:$offset), itin, 634 "$addr.addr = $wb">; 635class VLDQWBfixedPseudo<InstrItinClass itin> 636 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 637 (ins addrmode6:$addr), itin, 638 "$addr.addr = $wb">; 639class VLDQWBregisterPseudo<InstrItinClass itin> 640 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 641 (ins addrmode6:$addr, rGPR:$offset), itin, 642 "$addr.addr = $wb">; 643 644class VLDQQPseudo<InstrItinClass itin> 645 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 646class VLDQQWBPseudo<InstrItinClass itin> 647 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 648 (ins addrmode6:$addr, am6offset:$offset), itin, 649 "$addr.addr = $wb">; 650class VLDQQWBfixedPseudo<InstrItinClass itin> 651 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 652 (ins addrmode6:$addr), itin, 653 "$addr.addr = $wb">; 654class VLDQQWBregisterPseudo<InstrItinClass itin> 655 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 656 (ins addrmode6:$addr, rGPR:$offset), itin, 657 "$addr.addr = $wb">; 658 659 660class VLDQQQQPseudo<InstrItinClass itin> 661 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 662 "$src = $dst">; 663class VLDQQQQWBPseudo<InstrItinClass itin> 664 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 665 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 666 "$addr.addr = $wb, $src = $dst">; 667 668let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 669 670// VLD1 : Vector Load (multiple single elements) 671class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 672 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 673 (ins AddrMode:$Rn), IIC_VLD1, 674 "vld1", Dt, "$Vd, $Rn", "", []> { 675 let Rm = 0b1111; 676 let Inst{4} = Rn{4}; 677 let DecoderMethod = "DecodeVLDST1Instruction"; 678} 679class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 680 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 681 (ins AddrMode:$Rn), IIC_VLD1x2, 682 "vld1", Dt, "$Vd, $Rn", "", []> { 683 let Rm = 0b1111; 684 let Inst{5-4} = Rn{5-4}; 685 let DecoderMethod = "DecodeVLDST1Instruction"; 686} 687 688def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 689def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 690def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 691def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 692 693def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 694def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 695def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 696def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 697 698// ...with address register writeback: 699multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 700 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 701 (ins AddrMode:$Rn), IIC_VLD1u, 702 "vld1", Dt, "$Vd, $Rn!", 703 "$Rn.addr = $wb", []> { 704 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 705 let Inst{4} = Rn{4}; 706 let DecoderMethod = "DecodeVLDST1Instruction"; 707 } 708 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 709 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 710 "vld1", Dt, "$Vd, $Rn, $Rm", 711 "$Rn.addr = $wb", []> { 712 let Inst{4} = Rn{4}; 713 let DecoderMethod = "DecodeVLDST1Instruction"; 714 } 715} 716multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 717 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 718 (ins AddrMode:$Rn), IIC_VLD1x2u, 719 "vld1", Dt, "$Vd, $Rn!", 720 "$Rn.addr = $wb", []> { 721 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 722 let Inst{5-4} = Rn{5-4}; 723 let DecoderMethod = "DecodeVLDST1Instruction"; 724 } 725 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 726 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 727 "vld1", Dt, "$Vd, $Rn, $Rm", 728 "$Rn.addr = $wb", []> { 729 let Inst{5-4} = Rn{5-4}; 730 let DecoderMethod = "DecodeVLDST1Instruction"; 731 } 732} 733 734defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 735defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 736defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 737defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 738defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 739defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 740defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 741defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 742 743// ...with 3 registers 744class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 745 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 746 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 747 "$Vd, $Rn", "", []> { 748 let Rm = 0b1111; 749 let Inst{4} = Rn{4}; 750 let DecoderMethod = "DecodeVLDST1Instruction"; 751} 752multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 753 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 754 (ins AddrMode:$Rn), IIC_VLD1x2u, 755 "vld1", Dt, "$Vd, $Rn!", 756 "$Rn.addr = $wb", []> { 757 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 758 let Inst{4} = Rn{4}; 759 let DecoderMethod = "DecodeVLDST1Instruction"; 760 } 761 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 762 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 763 "vld1", Dt, "$Vd, $Rn, $Rm", 764 "$Rn.addr = $wb", []> { 765 let Inst{4} = Rn{4}; 766 let DecoderMethod = "DecodeVLDST1Instruction"; 767 } 768} 769 770def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 771def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 772def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 773def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 774 775defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 776defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 777defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 778defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 779 780def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 781def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; 782def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>; 783 784// ...with 4 registers 785class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 786 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 787 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 788 "$Vd, $Rn", "", []> { 789 let Rm = 0b1111; 790 let Inst{5-4} = Rn{5-4}; 791 let DecoderMethod = "DecodeVLDST1Instruction"; 792} 793multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 794 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 795 (ins AddrMode:$Rn), IIC_VLD1x2u, 796 "vld1", Dt, "$Vd, $Rn!", 797 "$Rn.addr = $wb", []> { 798 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 799 let Inst{5-4} = Rn{5-4}; 800 let DecoderMethod = "DecodeVLDST1Instruction"; 801 } 802 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 803 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 804 "vld1", Dt, "$Vd, $Rn, $Rm", 805 "$Rn.addr = $wb", []> { 806 let Inst{5-4} = Rn{5-4}; 807 let DecoderMethod = "DecodeVLDST1Instruction"; 808 } 809} 810 811def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 812def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 813def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 814def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 815 816defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 817defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 818defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 819defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 820 821def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 822def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; 823def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>; 824 825// VLD2 : Vector Load (multiple 2-element structures) 826class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 827 InstrItinClass itin, Operand AddrMode> 828 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 829 (ins AddrMode:$Rn), itin, 830 "vld2", Dt, "$Vd, $Rn", "", []> { 831 let Rm = 0b1111; 832 let Inst{5-4} = Rn{5-4}; 833 let DecoderMethod = "DecodeVLDST2Instruction"; 834} 835 836def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 837 addrmode6align64or128>; 838def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 839 addrmode6align64or128>; 840def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 841 addrmode6align64or128>; 842 843def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 844 addrmode6align64or128or256>; 845def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 846 addrmode6align64or128or256>; 847def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 848 addrmode6align64or128or256>; 849 850def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 851def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 852def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 853 854// ...with address register writeback: 855multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 856 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 857 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 858 (ins AddrMode:$Rn), itin, 859 "vld2", Dt, "$Vd, $Rn!", 860 "$Rn.addr = $wb", []> { 861 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 862 let Inst{5-4} = Rn{5-4}; 863 let DecoderMethod = "DecodeVLDST2Instruction"; 864 } 865 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 866 (ins AddrMode:$Rn, rGPR:$Rm), itin, 867 "vld2", Dt, "$Vd, $Rn, $Rm", 868 "$Rn.addr = $wb", []> { 869 let Inst{5-4} = Rn{5-4}; 870 let DecoderMethod = "DecodeVLDST2Instruction"; 871 } 872} 873 874defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 875 addrmode6align64or128>; 876defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 877 addrmode6align64or128>; 878defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 879 addrmode6align64or128>; 880 881defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 882 addrmode6align64or128or256>; 883defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 884 addrmode6align64or128or256>; 885defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 886 addrmode6align64or128or256>; 887 888def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 889def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 890def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 891def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 892def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 893def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 894 895// ...with double-spaced registers 896def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 897 addrmode6align64or128>; 898def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 899 addrmode6align64or128>; 900def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 901 addrmode6align64or128>; 902defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 903 addrmode6align64or128>; 904defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 905 addrmode6align64or128>; 906defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 907 addrmode6align64or128>; 908 909// VLD3 : Vector Load (multiple 3-element structures) 910class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 911 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 912 (ins addrmode6:$Rn), IIC_VLD3, 913 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 914 let Rm = 0b1111; 915 let Inst{4} = Rn{4}; 916 let DecoderMethod = "DecodeVLDST3Instruction"; 917} 918 919def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 920def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 921def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 922 923def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 924def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 925def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 926 927// ...with address register writeback: 928class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 929 : NLdSt<0, 0b10, op11_8, op7_4, 930 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 931 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 932 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 933 "$Rn.addr = $wb", []> { 934 let Inst{4} = Rn{4}; 935 let DecoderMethod = "DecodeVLDST3Instruction"; 936} 937 938def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 939def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 940def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 941 942def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 943def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 944def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 945 946// ...with double-spaced registers: 947def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 948def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 949def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 950def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 951def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 952def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 953 954def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 955def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 956def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 957 958// ...alternate versions to be allocated odd register numbers: 959def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 960def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 961def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 962 963def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 964def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 965def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 966 967// VLD4 : Vector Load (multiple 4-element structures) 968class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 969 : NLdSt<0, 0b10, op11_8, op7_4, 970 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 971 (ins addrmode6:$Rn), IIC_VLD4, 972 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 973 let Rm = 0b1111; 974 let Inst{5-4} = Rn{5-4}; 975 let DecoderMethod = "DecodeVLDST4Instruction"; 976} 977 978def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 979def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 980def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 981 982def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 983def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 984def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 985 986// ...with address register writeback: 987class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 988 : NLdSt<0, 0b10, op11_8, op7_4, 989 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 990 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 991 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 992 "$Rn.addr = $wb", []> { 993 let Inst{5-4} = Rn{5-4}; 994 let DecoderMethod = "DecodeVLDST4Instruction"; 995} 996 997def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 998def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 999def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 1000 1001def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 1002def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 1003def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 1004 1005// ...with double-spaced registers: 1006def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 1007def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 1008def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 1009def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 1010def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 1011def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 1012 1013def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1014def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1015def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1016 1017// ...alternate versions to be allocated odd register numbers: 1018def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1019def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1020def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 1021 1022def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1023def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1024def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 1025 1026} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1027 1028// Classes for VLD*LN pseudo-instructions with multi-register operands. 1029// These are expanded to real instructions after register allocation. 1030class VLDQLNPseudo<InstrItinClass itin> 1031 : PseudoNLdSt<(outs QPR:$dst), 1032 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1033 itin, "$src = $dst">; 1034class VLDQLNWBPseudo<InstrItinClass itin> 1035 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1036 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1037 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1038class VLDQQLNPseudo<InstrItinClass itin> 1039 : PseudoNLdSt<(outs QQPR:$dst), 1040 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1041 itin, "$src = $dst">; 1042class VLDQQLNWBPseudo<InstrItinClass itin> 1043 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1044 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1045 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1046class VLDQQQQLNPseudo<InstrItinClass itin> 1047 : PseudoNLdSt<(outs QQQQPR:$dst), 1048 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1049 itin, "$src = $dst">; 1050class VLDQQQQLNWBPseudo<InstrItinClass itin> 1051 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1052 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1053 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1054 1055// VLD1LN : Vector Load (single element to one lane) 1056class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1057 PatFrag LoadOp> 1058 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1059 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1060 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1061 "$src = $Vd", 1062 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1063 (i32 (LoadOp addrmode6:$Rn)), 1064 imm:$lane))]> { 1065 let Rm = 0b1111; 1066 let DecoderMethod = "DecodeVLD1LN"; 1067} 1068class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1069 PatFrag LoadOp> 1070 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1071 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1072 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1073 "$src = $Vd", 1074 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1075 (i32 (LoadOp addrmode6oneL32:$Rn)), 1076 imm:$lane))]> { 1077 let Rm = 0b1111; 1078 let DecoderMethod = "DecodeVLD1LN"; 1079} 1080class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 1081 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1082 (i32 (LoadOp addrmode6:$addr)), 1083 imm:$lane))]; 1084} 1085 1086def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1087 let Inst{7-5} = lane{2-0}; 1088} 1089def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1090 let Inst{7-6} = lane{1-0}; 1091 let Inst{5-4} = Rn{5-4}; 1092} 1093def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1094 let Inst{7} = lane{0}; 1095 let Inst{5-4} = Rn{5-4}; 1096} 1097 1098def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1099def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1100def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1101 1102def : Pat<(vector_insert (v2f32 DPR:$src), 1103 (f32 (load addrmode6:$addr)), imm:$lane), 1104 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1105def : Pat<(vector_insert (v4f32 QPR:$src), 1106 (f32 (load addrmode6:$addr)), imm:$lane), 1107 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1108 1109let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1110 1111// ...with address register writeback: 1112class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1113 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1114 (ins addrmode6:$Rn, am6offset:$Rm, 1115 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1116 "\\{$Vd[$lane]\\}, $Rn$Rm", 1117 "$src = $Vd, $Rn.addr = $wb", []> { 1118 let DecoderMethod = "DecodeVLD1LN"; 1119} 1120 1121def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1122 let Inst{7-5} = lane{2-0}; 1123} 1124def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1125 let Inst{7-6} = lane{1-0}; 1126 let Inst{4} = Rn{4}; 1127} 1128def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1129 let Inst{7} = lane{0}; 1130 let Inst{5} = Rn{4}; 1131 let Inst{4} = Rn{4}; 1132} 1133 1134def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1135def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1136def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1137 1138// VLD2LN : Vector Load (single 2-element structure to one lane) 1139class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1140 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1141 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1142 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1143 "$src1 = $Vd, $src2 = $dst2", []> { 1144 let Rm = 0b1111; 1145 let Inst{4} = Rn{4}; 1146 let DecoderMethod = "DecodeVLD2LN"; 1147} 1148 1149def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1150 let Inst{7-5} = lane{2-0}; 1151} 1152def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1153 let Inst{7-6} = lane{1-0}; 1154} 1155def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1156 let Inst{7} = lane{0}; 1157} 1158 1159def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1160def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1161def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1162 1163// ...with double-spaced registers: 1164def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1165 let Inst{7-6} = lane{1-0}; 1166} 1167def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1168 let Inst{7} = lane{0}; 1169} 1170 1171def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1172def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1173 1174// ...with address register writeback: 1175class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1176 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1177 (ins addrmode6:$Rn, am6offset:$Rm, 1178 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1179 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1180 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1181 let Inst{4} = Rn{4}; 1182 let DecoderMethod = "DecodeVLD2LN"; 1183} 1184 1185def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1186 let Inst{7-5} = lane{2-0}; 1187} 1188def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1189 let Inst{7-6} = lane{1-0}; 1190} 1191def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1192 let Inst{7} = lane{0}; 1193} 1194 1195def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1196def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1197def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1198 1199def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1200 let Inst{7-6} = lane{1-0}; 1201} 1202def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1203 let Inst{7} = lane{0}; 1204} 1205 1206def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1207def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1208 1209// VLD3LN : Vector Load (single 3-element structure to one lane) 1210class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1211 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1212 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1213 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1214 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1215 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 1216 let Rm = 0b1111; 1217 let DecoderMethod = "DecodeVLD3LN"; 1218} 1219 1220def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1221 let Inst{7-5} = lane{2-0}; 1222} 1223def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1224 let Inst{7-6} = lane{1-0}; 1225} 1226def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1227 let Inst{7} = lane{0}; 1228} 1229 1230def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1231def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1232def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1233 1234// ...with double-spaced registers: 1235def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1236 let Inst{7-6} = lane{1-0}; 1237} 1238def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1239 let Inst{7} = lane{0}; 1240} 1241 1242def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1243def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1244 1245// ...with address register writeback: 1246class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1247 : NLdStLn<1, 0b10, op11_8, op7_4, 1248 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1249 (ins addrmode6:$Rn, am6offset:$Rm, 1250 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1251 IIC_VLD3lnu, "vld3", Dt, 1252 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1253 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1254 []> { 1255 let DecoderMethod = "DecodeVLD3LN"; 1256} 1257 1258def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1259 let Inst{7-5} = lane{2-0}; 1260} 1261def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1262 let Inst{7-6} = lane{1-0}; 1263} 1264def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1265 let Inst{7} = lane{0}; 1266} 1267 1268def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1269def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1270def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1271 1272def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1273 let Inst{7-6} = lane{1-0}; 1274} 1275def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1276 let Inst{7} = lane{0}; 1277} 1278 1279def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1280def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1281 1282// VLD4LN : Vector Load (single 4-element structure to one lane) 1283class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1284 : NLdStLn<1, 0b10, op11_8, op7_4, 1285 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1286 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1287 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1288 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1289 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 1290 let Rm = 0b1111; 1291 let Inst{4} = Rn{4}; 1292 let DecoderMethod = "DecodeVLD4LN"; 1293} 1294 1295def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1296 let Inst{7-5} = lane{2-0}; 1297} 1298def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1299 let Inst{7-6} = lane{1-0}; 1300} 1301def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1302 let Inst{7} = lane{0}; 1303 let Inst{5} = Rn{5}; 1304} 1305 1306def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1307def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1308def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1309 1310// ...with double-spaced registers: 1311def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1312 let Inst{7-6} = lane{1-0}; 1313} 1314def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1315 let Inst{7} = lane{0}; 1316 let Inst{5} = Rn{5}; 1317} 1318 1319def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1320def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1321 1322// ...with address register writeback: 1323class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1324 : NLdStLn<1, 0b10, op11_8, op7_4, 1325 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1326 (ins addrmode6:$Rn, am6offset:$Rm, 1327 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1328 IIC_VLD4lnu, "vld4", Dt, 1329"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1330"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1331 []> { 1332 let Inst{4} = Rn{4}; 1333 let DecoderMethod = "DecodeVLD4LN" ; 1334} 1335 1336def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1337 let Inst{7-5} = lane{2-0}; 1338} 1339def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1340 let Inst{7-6} = lane{1-0}; 1341} 1342def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1343 let Inst{7} = lane{0}; 1344 let Inst{5} = Rn{5}; 1345} 1346 1347def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1348def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1349def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1350 1351def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1352 let Inst{7-6} = lane{1-0}; 1353} 1354def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1355 let Inst{7} = lane{0}; 1356 let Inst{5} = Rn{5}; 1357} 1358 1359def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1360def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1361 1362} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1363 1364// VLD1DUP : Vector Load (single element to all lanes) 1365class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1366 Operand AddrMode> 1367 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1368 (ins AddrMode:$Rn), 1369 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1370 [(set VecListOneDAllLanes:$Vd, 1371 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1372 let Rm = 0b1111; 1373 let Inst{4} = Rn{4}; 1374 let DecoderMethod = "DecodeVLD1DupInstruction"; 1375} 1376def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1377 addrmode6dupalignNone>; 1378def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1379 addrmode6dupalign16>; 1380def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1381 addrmode6dupalign32>; 1382 1383def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1384 (VLD1DUPd32 addrmode6:$addr)>; 1385 1386class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1387 Operand AddrMode> 1388 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1389 (ins AddrMode:$Rn), IIC_VLD1dup, 1390 "vld1", Dt, "$Vd, $Rn", "", 1391 [(set VecListDPairAllLanes:$Vd, 1392 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1393 let Rm = 0b1111; 1394 let Inst{4} = Rn{4}; 1395 let DecoderMethod = "DecodeVLD1DupInstruction"; 1396} 1397 1398def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1399 addrmode6dupalignNone>; 1400def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1401 addrmode6dupalign16>; 1402def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1403 addrmode6dupalign32>; 1404 1405def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1406 (VLD1DUPq32 addrmode6:$addr)>; 1407 1408let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1409// ...with address register writeback: 1410multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1411 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1412 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1413 (ins AddrMode:$Rn), IIC_VLD1dupu, 1414 "vld1", Dt, "$Vd, $Rn!", 1415 "$Rn.addr = $wb", []> { 1416 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1417 let Inst{4} = Rn{4}; 1418 let DecoderMethod = "DecodeVLD1DupInstruction"; 1419 } 1420 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1421 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1422 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1423 "vld1", Dt, "$Vd, $Rn, $Rm", 1424 "$Rn.addr = $wb", []> { 1425 let Inst{4} = Rn{4}; 1426 let DecoderMethod = "DecodeVLD1DupInstruction"; 1427 } 1428} 1429multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1430 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1431 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1432 (ins AddrMode:$Rn), IIC_VLD1dupu, 1433 "vld1", Dt, "$Vd, $Rn!", 1434 "$Rn.addr = $wb", []> { 1435 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1436 let Inst{4} = Rn{4}; 1437 let DecoderMethod = "DecodeVLD1DupInstruction"; 1438 } 1439 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1440 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1441 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1442 "vld1", Dt, "$Vd, $Rn, $Rm", 1443 "$Rn.addr = $wb", []> { 1444 let Inst{4} = Rn{4}; 1445 let DecoderMethod = "DecodeVLD1DupInstruction"; 1446 } 1447} 1448 1449defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1450defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1451defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1452 1453defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1454defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1455defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1456 1457// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1458class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1459 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1460 (ins AddrMode:$Rn), IIC_VLD2dup, 1461 "vld2", Dt, "$Vd, $Rn", "", []> { 1462 let Rm = 0b1111; 1463 let Inst{4} = Rn{4}; 1464 let DecoderMethod = "DecodeVLD2DupInstruction"; 1465} 1466 1467def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1468 addrmode6dupalign16>; 1469def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1470 addrmode6dupalign32>; 1471def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1472 addrmode6dupalign64>; 1473 1474// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1475// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1476// ...with double-spaced registers 1477def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1478 addrmode6dupalign16>; 1479def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1480 addrmode6dupalign32>; 1481def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1482 addrmode6dupalign64>; 1483 1484// ...with address register writeback: 1485multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1486 Operand AddrMode> { 1487 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1488 (outs VdTy:$Vd, GPR:$wb), 1489 (ins AddrMode:$Rn), IIC_VLD2dupu, 1490 "vld2", Dt, "$Vd, $Rn!", 1491 "$Rn.addr = $wb", []> { 1492 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1493 let Inst{4} = Rn{4}; 1494 let DecoderMethod = "DecodeVLD2DupInstruction"; 1495 } 1496 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1497 (outs VdTy:$Vd, GPR:$wb), 1498 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1499 "vld2", Dt, "$Vd, $Rn, $Rm", 1500 "$Rn.addr = $wb", []> { 1501 let Inst{4} = Rn{4}; 1502 let DecoderMethod = "DecodeVLD2DupInstruction"; 1503 } 1504} 1505 1506defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1507 addrmode6dupalign16>; 1508defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1509 addrmode6dupalign32>; 1510defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1511 addrmode6dupalign64>; 1512 1513defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1514 addrmode6dupalign16>; 1515defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1516 addrmode6dupalign32>; 1517defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1518 addrmode6dupalign64>; 1519 1520// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1521class VLD3DUP<bits<4> op7_4, string Dt> 1522 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1523 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1524 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1525 let Rm = 0b1111; 1526 let Inst{4} = 0; 1527 let DecoderMethod = "DecodeVLD3DupInstruction"; 1528} 1529 1530def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1531def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1532def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1533 1534def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1535def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1536def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1537 1538// ...with double-spaced registers (not used for codegen): 1539def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1540def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1541def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1542 1543// ...with address register writeback: 1544class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1545 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1546 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1547 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1548 "$Rn.addr = $wb", []> { 1549 let Inst{4} = 0; 1550 let DecoderMethod = "DecodeVLD3DupInstruction"; 1551} 1552 1553def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1554def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1555def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1556 1557def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1558def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1559def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1560 1561def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1562def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1563def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1564 1565// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1566class VLD4DUP<bits<4> op7_4, string Dt> 1567 : NLdSt<1, 0b10, 0b1111, op7_4, 1568 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1569 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1570 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1571 let Rm = 0b1111; 1572 let Inst{4} = Rn{4}; 1573 let DecoderMethod = "DecodeVLD4DupInstruction"; 1574} 1575 1576def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1577def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1578def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1579 1580def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1581def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1582def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1583 1584// ...with double-spaced registers (not used for codegen): 1585def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1586def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1587def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1588 1589// ...with address register writeback: 1590class VLD4DUPWB<bits<4> op7_4, string Dt> 1591 : NLdSt<1, 0b10, 0b1111, op7_4, 1592 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1593 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1594 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1595 "$Rn.addr = $wb", []> { 1596 let Inst{4} = Rn{4}; 1597 let DecoderMethod = "DecodeVLD4DupInstruction"; 1598} 1599 1600def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1601def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1602def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1603 1604def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1605def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1606def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1607 1608def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1609def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1610def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1611 1612} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1613 1614let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1615 1616// Classes for VST* pseudo-instructions with multi-register operands. 1617// These are expanded to real instructions after register allocation. 1618class VSTQPseudo<InstrItinClass itin> 1619 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1620class VSTQWBPseudo<InstrItinClass itin> 1621 : PseudoNLdSt<(outs GPR:$wb), 1622 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1623 "$addr.addr = $wb">; 1624class VSTQWBfixedPseudo<InstrItinClass itin> 1625 : PseudoNLdSt<(outs GPR:$wb), 1626 (ins addrmode6:$addr, QPR:$src), itin, 1627 "$addr.addr = $wb">; 1628class VSTQWBregisterPseudo<InstrItinClass itin> 1629 : PseudoNLdSt<(outs GPR:$wb), 1630 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1631 "$addr.addr = $wb">; 1632class VSTQQPseudo<InstrItinClass itin> 1633 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1634class VSTQQWBPseudo<InstrItinClass itin> 1635 : PseudoNLdSt<(outs GPR:$wb), 1636 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1637 "$addr.addr = $wb">; 1638class VSTQQWBfixedPseudo<InstrItinClass itin> 1639 : PseudoNLdSt<(outs GPR:$wb), 1640 (ins addrmode6:$addr, QQPR:$src), itin, 1641 "$addr.addr = $wb">; 1642class VSTQQWBregisterPseudo<InstrItinClass itin> 1643 : PseudoNLdSt<(outs GPR:$wb), 1644 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1645 "$addr.addr = $wb">; 1646 1647class VSTQQQQPseudo<InstrItinClass itin> 1648 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1649class VSTQQQQWBPseudo<InstrItinClass itin> 1650 : PseudoNLdSt<(outs GPR:$wb), 1651 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1652 "$addr.addr = $wb">; 1653 1654// VST1 : Vector Store (multiple single elements) 1655class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1656 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1657 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1658 let Rm = 0b1111; 1659 let Inst{4} = Rn{4}; 1660 let DecoderMethod = "DecodeVLDST1Instruction"; 1661} 1662class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1663 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1664 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1665 let Rm = 0b1111; 1666 let Inst{5-4} = Rn{5-4}; 1667 let DecoderMethod = "DecodeVLDST1Instruction"; 1668} 1669 1670def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1671def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1672def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1673def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1674 1675def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1676def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1677def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1678def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1679 1680// ...with address register writeback: 1681multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1682 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1683 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1684 "vst1", Dt, "$Vd, $Rn!", 1685 "$Rn.addr = $wb", []> { 1686 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1687 let Inst{4} = Rn{4}; 1688 let DecoderMethod = "DecodeVLDST1Instruction"; 1689 } 1690 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1691 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1692 IIC_VLD1u, 1693 "vst1", Dt, "$Vd, $Rn, $Rm", 1694 "$Rn.addr = $wb", []> { 1695 let Inst{4} = Rn{4}; 1696 let DecoderMethod = "DecodeVLDST1Instruction"; 1697 } 1698} 1699multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1700 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1701 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1702 "vst1", Dt, "$Vd, $Rn!", 1703 "$Rn.addr = $wb", []> { 1704 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1705 let Inst{5-4} = Rn{5-4}; 1706 let DecoderMethod = "DecodeVLDST1Instruction"; 1707 } 1708 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1709 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1710 IIC_VLD1x2u, 1711 "vst1", Dt, "$Vd, $Rn, $Rm", 1712 "$Rn.addr = $wb", []> { 1713 let Inst{5-4} = Rn{5-4}; 1714 let DecoderMethod = "DecodeVLDST1Instruction"; 1715 } 1716} 1717 1718defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1719defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1720defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1721defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1722 1723defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1724defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1725defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1726defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1727 1728// ...with 3 registers 1729class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1730 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1731 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1732 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1733 let Rm = 0b1111; 1734 let Inst{4} = Rn{4}; 1735 let DecoderMethod = "DecodeVLDST1Instruction"; 1736} 1737multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1738 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1739 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1740 "vst1", Dt, "$Vd, $Rn!", 1741 "$Rn.addr = $wb", []> { 1742 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1743 let Inst{5-4} = Rn{5-4}; 1744 let DecoderMethod = "DecodeVLDST1Instruction"; 1745 } 1746 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1747 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1748 IIC_VLD1x3u, 1749 "vst1", Dt, "$Vd, $Rn, $Rm", 1750 "$Rn.addr = $wb", []> { 1751 let Inst{5-4} = Rn{5-4}; 1752 let DecoderMethod = "DecodeVLDST1Instruction"; 1753 } 1754} 1755 1756def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1757def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1758def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1759def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1760 1761defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1762defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1763defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1764defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1765 1766def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1767def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>; 1768def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1769 1770// ...with 4 registers 1771class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1772 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1773 (ins AddrMode:$Rn, VecListFourD:$Vd), 1774 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1775 []> { 1776 let Rm = 0b1111; 1777 let Inst{5-4} = Rn{5-4}; 1778 let DecoderMethod = "DecodeVLDST1Instruction"; 1779} 1780multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1781 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1782 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1783 "vst1", Dt, "$Vd, $Rn!", 1784 "$Rn.addr = $wb", []> { 1785 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1786 let Inst{5-4} = Rn{5-4}; 1787 let DecoderMethod = "DecodeVLDST1Instruction"; 1788 } 1789 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1790 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1791 IIC_VLD1x4u, 1792 "vst1", Dt, "$Vd, $Rn, $Rm", 1793 "$Rn.addr = $wb", []> { 1794 let Inst{5-4} = Rn{5-4}; 1795 let DecoderMethod = "DecodeVLDST1Instruction"; 1796 } 1797} 1798 1799def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1800def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1801def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1802def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1803 1804defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1805defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1806defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1807defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1808 1809def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1810def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>; 1811def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1812 1813// VST2 : Vector Store (multiple 2-element structures) 1814class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1815 InstrItinClass itin, Operand AddrMode> 1816 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1817 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1818 let Rm = 0b1111; 1819 let Inst{5-4} = Rn{5-4}; 1820 let DecoderMethod = "DecodeVLDST2Instruction"; 1821} 1822 1823def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1824 addrmode6align64or128>; 1825def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1826 addrmode6align64or128>; 1827def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1828 addrmode6align64or128>; 1829 1830def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1831 addrmode6align64or128or256>; 1832def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1833 addrmode6align64or128or256>; 1834def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1835 addrmode6align64or128or256>; 1836 1837def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1838def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1839def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1840 1841// ...with address register writeback: 1842multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1843 RegisterOperand VdTy, Operand AddrMode> { 1844 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1845 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1846 "vst2", Dt, "$Vd, $Rn!", 1847 "$Rn.addr = $wb", []> { 1848 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1849 let Inst{5-4} = Rn{5-4}; 1850 let DecoderMethod = "DecodeVLDST2Instruction"; 1851 } 1852 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1853 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1854 "vst2", Dt, "$Vd, $Rn, $Rm", 1855 "$Rn.addr = $wb", []> { 1856 let Inst{5-4} = Rn{5-4}; 1857 let DecoderMethod = "DecodeVLDST2Instruction"; 1858 } 1859} 1860multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1861 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1862 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1863 "vst2", Dt, "$Vd, $Rn!", 1864 "$Rn.addr = $wb", []> { 1865 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1866 let Inst{5-4} = Rn{5-4}; 1867 let DecoderMethod = "DecodeVLDST2Instruction"; 1868 } 1869 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1870 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1871 IIC_VLD1u, 1872 "vst2", Dt, "$Vd, $Rn, $Rm", 1873 "$Rn.addr = $wb", []> { 1874 let Inst{5-4} = Rn{5-4}; 1875 let DecoderMethod = "DecodeVLDST2Instruction"; 1876 } 1877} 1878 1879defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1880 addrmode6align64or128>; 1881defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1882 addrmode6align64or128>; 1883defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1884 addrmode6align64or128>; 1885 1886defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1887defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1888defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1889 1890def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1891def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1892def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1893def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1894def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1895def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1896 1897// ...with double-spaced registers 1898def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1899 addrmode6align64or128>; 1900def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1901 addrmode6align64or128>; 1902def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1903 addrmode6align64or128>; 1904defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 1905 addrmode6align64or128>; 1906defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 1907 addrmode6align64or128>; 1908defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 1909 addrmode6align64or128>; 1910 1911// VST3 : Vector Store (multiple 3-element structures) 1912class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1913 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1914 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1915 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1916 let Rm = 0b1111; 1917 let Inst{4} = Rn{4}; 1918 let DecoderMethod = "DecodeVLDST3Instruction"; 1919} 1920 1921def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1922def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1923def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1924 1925def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1926def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1927def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1928 1929// ...with address register writeback: 1930class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1931 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1932 (ins addrmode6:$Rn, am6offset:$Rm, 1933 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1934 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1935 "$Rn.addr = $wb", []> { 1936 let Inst{4} = Rn{4}; 1937 let DecoderMethod = "DecodeVLDST3Instruction"; 1938} 1939 1940def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1941def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1942def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1943 1944def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1945def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1946def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1947 1948// ...with double-spaced registers: 1949def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1950def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1951def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1952def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1953def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1954def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1955 1956def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1957def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1958def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1959 1960// ...alternate versions to be allocated odd register numbers: 1961def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1962def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1963def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1964 1965def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1966def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1967def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1968 1969// VST4 : Vector Store (multiple 4-element structures) 1970class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1971 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1972 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1973 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1974 "", []> { 1975 let Rm = 0b1111; 1976 let Inst{5-4} = Rn{5-4}; 1977 let DecoderMethod = "DecodeVLDST4Instruction"; 1978} 1979 1980def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1981def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1982def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1983 1984def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1985def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1986def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1987 1988// ...with address register writeback: 1989class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1990 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1991 (ins addrmode6:$Rn, am6offset:$Rm, 1992 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1993 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1994 "$Rn.addr = $wb", []> { 1995 let Inst{5-4} = Rn{5-4}; 1996 let DecoderMethod = "DecodeVLDST4Instruction"; 1997} 1998 1999def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 2000def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 2001def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 2002 2003def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 2004def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 2005def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 2006 2007// ...with double-spaced registers: 2008def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2009def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2010def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2011def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2012def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2013def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2014 2015def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2016def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2017def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2018 2019// ...alternate versions to be allocated odd register numbers: 2020def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2021def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2022def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 2023 2024def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2025def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2026def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 2027 2028} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2029 2030// Classes for VST*LN pseudo-instructions with multi-register operands. 2031// These are expanded to real instructions after register allocation. 2032class VSTQLNPseudo<InstrItinClass itin> 2033 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2034 itin, "">; 2035class VSTQLNWBPseudo<InstrItinClass itin> 2036 : PseudoNLdSt<(outs GPR:$wb), 2037 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2038 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2039class VSTQQLNPseudo<InstrItinClass itin> 2040 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2041 itin, "">; 2042class VSTQQLNWBPseudo<InstrItinClass itin> 2043 : PseudoNLdSt<(outs GPR:$wb), 2044 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2045 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2046class VSTQQQQLNPseudo<InstrItinClass itin> 2047 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2048 itin, "">; 2049class VSTQQQQLNWBPseudo<InstrItinClass itin> 2050 : PseudoNLdSt<(outs GPR:$wb), 2051 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2052 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2053 2054// VST1LN : Vector Store (single element from one lane) 2055class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2056 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2057 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2058 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2059 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2060 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { 2061 let Rm = 0b1111; 2062 let DecoderMethod = "DecodeVST1LN"; 2063} 2064class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2065 : VSTQLNPseudo<IIC_VST1ln> { 2066 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2067 addrmode6:$addr)]; 2068} 2069 2070def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2071 NEONvgetlaneu, addrmode6> { 2072 let Inst{7-5} = lane{2-0}; 2073} 2074def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2075 NEONvgetlaneu, addrmode6> { 2076 let Inst{7-6} = lane{1-0}; 2077 let Inst{4} = Rn{4}; 2078} 2079 2080def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2081 addrmode6oneL32> { 2082 let Inst{7} = lane{0}; 2083 let Inst{5-4} = Rn{5-4}; 2084} 2085 2086def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 2087def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 2088def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2089 2090def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2091 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2092def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2093 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2094 2095// ...with address register writeback: 2096class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2097 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2098 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2099 (ins AdrMode:$Rn, am6offset:$Rm, 2100 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2101 "\\{$Vd[$lane]\\}, $Rn$Rm", 2102 "$Rn.addr = $wb", 2103 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2104 AdrMode:$Rn, am6offset:$Rm))]> { 2105 let DecoderMethod = "DecodeVST1LN"; 2106} 2107class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2108 : VSTQLNWBPseudo<IIC_VST1lnu> { 2109 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2110 addrmode6:$addr, am6offset:$offset))]; 2111} 2112 2113def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2114 NEONvgetlaneu, addrmode6> { 2115 let Inst{7-5} = lane{2-0}; 2116} 2117def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2118 NEONvgetlaneu, addrmode6> { 2119 let Inst{7-6} = lane{1-0}; 2120 let Inst{4} = Rn{4}; 2121} 2122def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2123 extractelt, addrmode6oneL32> { 2124 let Inst{7} = lane{0}; 2125 let Inst{5-4} = Rn{5-4}; 2126} 2127 2128def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2129def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2130def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2131 2132let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2133 2134// VST2LN : Vector Store (single 2-element structure from one lane) 2135class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2136 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2137 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2138 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2139 "", []> { 2140 let Rm = 0b1111; 2141 let Inst{4} = Rn{4}; 2142 let DecoderMethod = "DecodeVST2LN"; 2143} 2144 2145def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2146 let Inst{7-5} = lane{2-0}; 2147} 2148def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2149 let Inst{7-6} = lane{1-0}; 2150} 2151def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2152 let Inst{7} = lane{0}; 2153} 2154 2155def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2156def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2157def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2158 2159// ...with double-spaced registers: 2160def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2161 let Inst{7-6} = lane{1-0}; 2162 let Inst{4} = Rn{4}; 2163} 2164def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2165 let Inst{7} = lane{0}; 2166 let Inst{4} = Rn{4}; 2167} 2168 2169def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2170def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2171 2172// ...with address register writeback: 2173class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2174 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2175 (ins addrmode6:$Rn, am6offset:$Rm, 2176 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2177 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2178 "$Rn.addr = $wb", []> { 2179 let Inst{4} = Rn{4}; 2180 let DecoderMethod = "DecodeVST2LN"; 2181} 2182 2183def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2184 let Inst{7-5} = lane{2-0}; 2185} 2186def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2187 let Inst{7-6} = lane{1-0}; 2188} 2189def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2190 let Inst{7} = lane{0}; 2191} 2192 2193def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2194def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2195def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2196 2197def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2198 let Inst{7-6} = lane{1-0}; 2199} 2200def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2201 let Inst{7} = lane{0}; 2202} 2203 2204def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2205def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2206 2207// VST3LN : Vector Store (single 3-element structure from one lane) 2208class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2209 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2210 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2211 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2212 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 2213 let Rm = 0b1111; 2214 let DecoderMethod = "DecodeVST3LN"; 2215} 2216 2217def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2218 let Inst{7-5} = lane{2-0}; 2219} 2220def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2221 let Inst{7-6} = lane{1-0}; 2222} 2223def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2224 let Inst{7} = lane{0}; 2225} 2226 2227def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2228def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2229def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2230 2231// ...with double-spaced registers: 2232def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2233 let Inst{7-6} = lane{1-0}; 2234} 2235def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2236 let Inst{7} = lane{0}; 2237} 2238 2239def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2240def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2241 2242// ...with address register writeback: 2243class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2244 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2245 (ins addrmode6:$Rn, am6offset:$Rm, 2246 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2247 IIC_VST3lnu, "vst3", Dt, 2248 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2249 "$Rn.addr = $wb", []> { 2250 let DecoderMethod = "DecodeVST3LN"; 2251} 2252 2253def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2254 let Inst{7-5} = lane{2-0}; 2255} 2256def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2257 let Inst{7-6} = lane{1-0}; 2258} 2259def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2260 let Inst{7} = lane{0}; 2261} 2262 2263def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2264def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2265def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2266 2267def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2268 let Inst{7-6} = lane{1-0}; 2269} 2270def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2271 let Inst{7} = lane{0}; 2272} 2273 2274def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2275def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2276 2277// VST4LN : Vector Store (single 4-element structure from one lane) 2278class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2279 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2280 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2281 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2282 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2283 "", []> { 2284 let Rm = 0b1111; 2285 let Inst{4} = Rn{4}; 2286 let DecoderMethod = "DecodeVST4LN"; 2287} 2288 2289def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2290 let Inst{7-5} = lane{2-0}; 2291} 2292def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2293 let Inst{7-6} = lane{1-0}; 2294} 2295def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2296 let Inst{7} = lane{0}; 2297 let Inst{5} = Rn{5}; 2298} 2299 2300def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2301def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2302def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2303 2304// ...with double-spaced registers: 2305def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2306 let Inst{7-6} = lane{1-0}; 2307} 2308def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2309 let Inst{7} = lane{0}; 2310 let Inst{5} = Rn{5}; 2311} 2312 2313def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2314def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2315 2316// ...with address register writeback: 2317class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2318 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2319 (ins addrmode6:$Rn, am6offset:$Rm, 2320 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2321 IIC_VST4lnu, "vst4", Dt, 2322 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2323 "$Rn.addr = $wb", []> { 2324 let Inst{4} = Rn{4}; 2325 let DecoderMethod = "DecodeVST4LN"; 2326} 2327 2328def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2329 let Inst{7-5} = lane{2-0}; 2330} 2331def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2332 let Inst{7-6} = lane{1-0}; 2333} 2334def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2335 let Inst{7} = lane{0}; 2336 let Inst{5} = Rn{5}; 2337} 2338 2339def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2340def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2341def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2342 2343def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2344 let Inst{7-6} = lane{1-0}; 2345} 2346def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2347 let Inst{7} = lane{0}; 2348 let Inst{5} = Rn{5}; 2349} 2350 2351def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2352def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2353 2354} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2355 2356// Use vld1/vst1 for unaligned f64 load / store 2357def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2358 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; 2359def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2360 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2361def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2362 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; 2363def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2364 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2365def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2366 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; 2367def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2368 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; 2369 2370// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2371// load / store if it's legal. 2372def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2373 (VLD1q64 addrmode6:$addr)>; 2374def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2375 (VST1q64 addrmode6:$addr, QPR:$value)>; 2376def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2377 (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>; 2378def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2379 (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2380def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2381 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; 2382def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2383 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2384def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2385 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; 2386def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2387 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2388 2389//===----------------------------------------------------------------------===// 2390// NEON pattern fragments 2391//===----------------------------------------------------------------------===// 2392 2393// Extract D sub-registers of Q registers. 2394def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2395 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2396 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 2397}]>; 2398def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2399 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2400 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 2401}]>; 2402def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2403 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2404 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 2405}]>; 2406def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2407 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2408 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 2409}]>; 2410 2411// Extract S sub-registers of Q/D registers. 2412def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2413 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2414 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 2415}]>; 2416 2417// Translate lane numbers from Q registers to D subregs. 2418def SubReg_i8_lane : SDNodeXForm<imm, [{ 2419 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 2420}]>; 2421def SubReg_i16_lane : SDNodeXForm<imm, [{ 2422 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 2423}]>; 2424def SubReg_i32_lane : SDNodeXForm<imm, [{ 2425 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 2426}]>; 2427 2428//===----------------------------------------------------------------------===// 2429// Instruction Classes 2430//===----------------------------------------------------------------------===// 2431 2432// Basic 2-register operations: double- and quad-register. 2433class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2434 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2435 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2436 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2437 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2438 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2439class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2440 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2441 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2442 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2443 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2444 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2445 2446// Basic 2-register intrinsics, both double- and quad-register. 2447class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2448 bits<2> op17_16, bits<5> op11_7, bit op4, 2449 InstrItinClass itin, string OpcodeStr, string Dt, 2450 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2451 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2452 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2453 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2454class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2455 bits<2> op17_16, bits<5> op11_7, bit op4, 2456 InstrItinClass itin, string OpcodeStr, string Dt, 2457 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2458 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2459 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2460 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2461 2462// Same as above, but not predicated. 2463class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2464 InstrItinClass itin, string OpcodeStr, string Dt, 2465 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2466 : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2467 itin, OpcodeStr, Dt, 2468 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2469 2470class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2471 InstrItinClass itin, string OpcodeStr, string Dt, 2472 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2473 : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2474 itin, OpcodeStr, Dt, 2475 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2476 2477// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2478class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2479 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2480 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2481 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2482 itin, OpcodeStr, Dt, 2483 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2484 2485// Same as N2VQIntXnp but with Vd as a src register. 2486class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2487 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2488 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2489 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2490 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2491 itin, OpcodeStr, Dt, 2492 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2493 let Constraints = "$src = $Vd"; 2494} 2495 2496// Narrow 2-register operations. 2497class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2498 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2499 InstrItinClass itin, string OpcodeStr, string Dt, 2500 ValueType TyD, ValueType TyQ, SDNode OpNode> 2501 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2502 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2503 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2504 2505// Narrow 2-register intrinsics. 2506class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2507 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2508 InstrItinClass itin, string OpcodeStr, string Dt, 2509 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2510 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2511 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2512 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2513 2514// Long 2-register operations (currently only used for VMOVL). 2515class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2516 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2517 InstrItinClass itin, string OpcodeStr, string Dt, 2518 ValueType TyQ, ValueType TyD, SDNode OpNode> 2519 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2520 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2521 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2522 2523// Long 2-register intrinsics. 2524class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2525 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2526 InstrItinClass itin, string OpcodeStr, string Dt, 2527 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2528 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2529 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2530 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2531 2532// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2533class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2534 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2535 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2536 OpcodeStr, Dt, "$Vd, $Vm", 2537 "$src1 = $Vd, $src2 = $Vm", []>; 2538class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2539 InstrItinClass itin, string OpcodeStr, string Dt> 2540 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2541 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2542 "$src1 = $Vd, $src2 = $Vm", []>; 2543 2544// Basic 3-register operations: double- and quad-register. 2545class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2546 InstrItinClass itin, string OpcodeStr, string Dt, 2547 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2548 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2549 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2550 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2551 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2552 // All of these have a two-operand InstAlias. 2553 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2554 let isCommutable = Commutable; 2555} 2556// Same as N3VD but no data type. 2557class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2558 InstrItinClass itin, string OpcodeStr, 2559 ValueType ResTy, ValueType OpTy, 2560 SDNode OpNode, bit Commutable> 2561 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2562 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2563 OpcodeStr, "$Vd, $Vn, $Vm", "", 2564 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2565 // All of these have a two-operand InstAlias. 2566 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2567 let isCommutable = Commutable; 2568} 2569 2570class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2571 InstrItinClass itin, string OpcodeStr, string Dt, 2572 ValueType Ty, SDNode ShOp> 2573 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2574 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2575 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2576 [(set (Ty DPR:$Vd), 2577 (Ty (ShOp (Ty DPR:$Vn), 2578 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2579 // All of these have a two-operand InstAlias. 2580 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2581 let isCommutable = 0; 2582} 2583class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2584 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2585 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2586 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2587 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2588 [(set (Ty DPR:$Vd), 2589 (Ty (ShOp (Ty DPR:$Vn), 2590 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2591 // All of these have a two-operand InstAlias. 2592 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2593 let isCommutable = 0; 2594} 2595 2596class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2597 InstrItinClass itin, string OpcodeStr, string Dt, 2598 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2599 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2600 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2601 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2602 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2603 // All of these have a two-operand InstAlias. 2604 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2605 let isCommutable = Commutable; 2606} 2607class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2608 InstrItinClass itin, string OpcodeStr, 2609 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2610 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2611 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2612 OpcodeStr, "$Vd, $Vn, $Vm", "", 2613 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2614 // All of these have a two-operand InstAlias. 2615 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2616 let isCommutable = Commutable; 2617} 2618class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2619 InstrItinClass itin, string OpcodeStr, string Dt, 2620 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2621 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2622 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2623 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2624 [(set (ResTy QPR:$Vd), 2625 (ResTy (ShOp (ResTy QPR:$Vn), 2626 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2627 imm:$lane)))))]> { 2628 // All of these have a two-operand InstAlias. 2629 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2630 let isCommutable = 0; 2631} 2632class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2633 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2634 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2635 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2636 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2637 [(set (ResTy QPR:$Vd), 2638 (ResTy (ShOp (ResTy QPR:$Vn), 2639 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2640 imm:$lane)))))]> { 2641 // All of these have a two-operand InstAlias. 2642 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2643 let isCommutable = 0; 2644} 2645 2646// Basic 3-register intrinsics, both double- and quad-register. 2647class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2648 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2649 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2650 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2651 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2652 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2653 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2654 // All of these have a two-operand InstAlias. 2655 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2656 let isCommutable = Commutable; 2657} 2658 2659class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2660 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2661 string Dt, ValueType ResTy, ValueType OpTy, 2662 SDPatternOperator IntOp, bit Commutable> 2663 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2664 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2665 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2666 2667class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2668 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2669 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2670 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2671 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2672 [(set (Ty DPR:$Vd), 2673 (Ty (IntOp (Ty DPR:$Vn), 2674 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2675 imm:$lane)))))]> { 2676 let isCommutable = 0; 2677} 2678 2679class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2680 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2681 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2682 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2683 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2684 [(set (Ty DPR:$Vd), 2685 (Ty (IntOp (Ty DPR:$Vn), 2686 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2687 let isCommutable = 0; 2688} 2689class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2690 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2691 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2692 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2693 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2694 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2695 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2696 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2697 let isCommutable = 0; 2698} 2699 2700class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2701 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2702 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2703 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2704 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2705 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2706 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2707 // All of these have a two-operand InstAlias. 2708 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2709 let isCommutable = Commutable; 2710} 2711 2712class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2713 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2714 string Dt, ValueType ResTy, ValueType OpTy, 2715 SDPatternOperator IntOp, bit Commutable> 2716 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2717 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2718 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2719 2720// Same as N3VQIntnp but with Vd as a src register. 2721class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2722 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2723 string Dt, ValueType ResTy, ValueType OpTy, 2724 SDPatternOperator IntOp, bit Commutable> 2725 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2726 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2727 f, itin, OpcodeStr, Dt, 2728 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2729 (OpTy QPR:$Vm))))]> { 2730 let Constraints = "$src = $Vd"; 2731} 2732 2733class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2734 string OpcodeStr, string Dt, 2735 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2736 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2737 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2738 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2739 [(set (ResTy QPR:$Vd), 2740 (ResTy (IntOp (ResTy QPR:$Vn), 2741 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2742 imm:$lane)))))]> { 2743 let isCommutable = 0; 2744} 2745class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2746 string OpcodeStr, string Dt, 2747 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2748 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2749 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2750 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2751 [(set (ResTy QPR:$Vd), 2752 (ResTy (IntOp (ResTy QPR:$Vn), 2753 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2754 imm:$lane)))))]> { 2755 let isCommutable = 0; 2756} 2757class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2758 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2759 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2760 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2761 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2762 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2763 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2764 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2765 let isCommutable = 0; 2766} 2767 2768// Multiply-Add/Sub operations: double- and quad-register. 2769class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2770 InstrItinClass itin, string OpcodeStr, string Dt, 2771 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2772 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2773 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2774 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2775 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2776 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2777 2778class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2779 string OpcodeStr, string Dt, 2780 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2781 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2782 (outs DPR:$Vd), 2783 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2784 NVMulSLFrm, itin, 2785 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2786 [(set (Ty DPR:$Vd), 2787 (Ty (ShOp (Ty DPR:$src1), 2788 (Ty (MulOp DPR:$Vn, 2789 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2790 imm:$lane)))))))]>; 2791class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2792 string OpcodeStr, string Dt, 2793 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2794 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2795 (outs DPR:$Vd), 2796 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2797 NVMulSLFrm, itin, 2798 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2799 [(set (Ty DPR:$Vd), 2800 (Ty (ShOp (Ty DPR:$src1), 2801 (Ty (MulOp DPR:$Vn, 2802 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2803 imm:$lane)))))))]>; 2804 2805class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2806 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2807 SDPatternOperator MulOp, SDPatternOperator OpNode> 2808 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2809 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2810 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2811 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2812 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2813class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2814 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2815 SDPatternOperator MulOp, SDPatternOperator ShOp> 2816 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2817 (outs QPR:$Vd), 2818 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2819 NVMulSLFrm, itin, 2820 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2821 [(set (ResTy QPR:$Vd), 2822 (ResTy (ShOp (ResTy QPR:$src1), 2823 (ResTy (MulOp QPR:$Vn, 2824 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2825 imm:$lane)))))))]>; 2826class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2827 string OpcodeStr, string Dt, 2828 ValueType ResTy, ValueType OpTy, 2829 SDPatternOperator MulOp, SDPatternOperator ShOp> 2830 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2831 (outs QPR:$Vd), 2832 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2833 NVMulSLFrm, itin, 2834 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2835 [(set (ResTy QPR:$Vd), 2836 (ResTy (ShOp (ResTy QPR:$src1), 2837 (ResTy (MulOp QPR:$Vn, 2838 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2839 imm:$lane)))))))]>; 2840 2841// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2842class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2843 InstrItinClass itin, string OpcodeStr, string Dt, 2844 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2845 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2846 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2847 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2848 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2849 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2850class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2851 InstrItinClass itin, string OpcodeStr, string Dt, 2852 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2853 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2854 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2855 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2856 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2857 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2858 2859// Neon 3-argument intrinsics, both double- and quad-register. 2860// The destination register is also used as the first source operand register. 2861class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2862 InstrItinClass itin, string OpcodeStr, string Dt, 2863 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2864 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2865 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2866 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2867 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2868 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2869class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2870 InstrItinClass itin, string OpcodeStr, string Dt, 2871 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2872 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2873 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2874 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2875 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2876 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2877 2878// Long Multiply-Add/Sub operations. 2879class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2880 InstrItinClass itin, string OpcodeStr, string Dt, 2881 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2882 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2883 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2884 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2885 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2886 (TyQ (MulOp (TyD DPR:$Vn), 2887 (TyD DPR:$Vm)))))]>; 2888class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2889 InstrItinClass itin, string OpcodeStr, string Dt, 2890 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2891 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2892 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2893 NVMulSLFrm, itin, 2894 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2895 [(set QPR:$Vd, 2896 (OpNode (TyQ QPR:$src1), 2897 (TyQ (MulOp (TyD DPR:$Vn), 2898 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2899 imm:$lane))))))]>; 2900class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2901 InstrItinClass itin, string OpcodeStr, string Dt, 2902 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2903 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2904 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2905 NVMulSLFrm, itin, 2906 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2907 [(set QPR:$Vd, 2908 (OpNode (TyQ QPR:$src1), 2909 (TyQ (MulOp (TyD DPR:$Vn), 2910 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2911 imm:$lane))))))]>; 2912 2913// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2914class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2915 InstrItinClass itin, string OpcodeStr, string Dt, 2916 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2917 SDNode OpNode> 2918 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2919 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2920 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2921 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2922 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2923 (TyD DPR:$Vm)))))))]>; 2924 2925// Neon Long 3-argument intrinsic. The destination register is 2926// a quad-register and is also used as the first source operand register. 2927class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2928 InstrItinClass itin, string OpcodeStr, string Dt, 2929 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2930 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2931 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2932 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2933 [(set QPR:$Vd, 2934 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2935class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2936 string OpcodeStr, string Dt, 2937 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2938 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2939 (outs QPR:$Vd), 2940 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2941 NVMulSLFrm, itin, 2942 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2943 [(set (ResTy QPR:$Vd), 2944 (ResTy (IntOp (ResTy QPR:$src1), 2945 (OpTy DPR:$Vn), 2946 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2947 imm:$lane)))))]>; 2948class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2949 InstrItinClass itin, string OpcodeStr, string Dt, 2950 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2951 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2952 (outs QPR:$Vd), 2953 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2954 NVMulSLFrm, itin, 2955 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2956 [(set (ResTy QPR:$Vd), 2957 (ResTy (IntOp (ResTy QPR:$src1), 2958 (OpTy DPR:$Vn), 2959 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2960 imm:$lane)))))]>; 2961 2962// Narrowing 3-register intrinsics. 2963class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2964 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2965 SDPatternOperator IntOp, bit Commutable> 2966 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2967 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2968 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2969 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2970 let isCommutable = Commutable; 2971} 2972 2973// Long 3-register operations. 2974class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2975 InstrItinClass itin, string OpcodeStr, string Dt, 2976 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2977 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2978 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2979 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2980 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2981 let isCommutable = Commutable; 2982} 2983 2984class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2985 InstrItinClass itin, string OpcodeStr, string Dt, 2986 ValueType TyQ, ValueType TyD, SDNode OpNode> 2987 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2988 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2989 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2990 [(set QPR:$Vd, 2991 (TyQ (OpNode (TyD DPR:$Vn), 2992 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2993class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2994 InstrItinClass itin, string OpcodeStr, string Dt, 2995 ValueType TyQ, ValueType TyD, SDNode OpNode> 2996 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2997 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2998 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2999 [(set QPR:$Vd, 3000 (TyQ (OpNode (TyD DPR:$Vn), 3001 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3002 3003// Long 3-register operations with explicitly extended operands. 3004class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3005 InstrItinClass itin, string OpcodeStr, string Dt, 3006 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 3007 bit Commutable> 3008 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3009 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3010 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3011 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3012 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3013 let isCommutable = Commutable; 3014} 3015 3016// Long 3-register intrinsics with explicit extend (VABDL). 3017class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3018 InstrItinClass itin, string OpcodeStr, string Dt, 3019 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3020 bit Commutable> 3021 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3022 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3023 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3024 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3025 (TyD DPR:$Vm))))))]> { 3026 let isCommutable = Commutable; 3027} 3028 3029// Long 3-register intrinsics. 3030class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3031 InstrItinClass itin, string OpcodeStr, string Dt, 3032 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3033 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3034 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3035 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3036 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3037 let isCommutable = Commutable; 3038} 3039 3040// Same as above, but not predicated. 3041class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3042 bit op4, InstrItinClass itin, string OpcodeStr, 3043 string Dt, ValueType ResTy, ValueType OpTy, 3044 SDPatternOperator IntOp, bit Commutable> 3045 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3046 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3047 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3048 3049class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3050 string OpcodeStr, string Dt, 3051 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3052 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3053 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3054 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3055 [(set (ResTy QPR:$Vd), 3056 (ResTy (IntOp (OpTy DPR:$Vn), 3057 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 3058 imm:$lane)))))]>; 3059class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3060 InstrItinClass itin, string OpcodeStr, string Dt, 3061 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3062 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3063 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3064 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3065 [(set (ResTy QPR:$Vd), 3066 (ResTy (IntOp (OpTy DPR:$Vn), 3067 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 3068 imm:$lane)))))]>; 3069 3070// Wide 3-register operations. 3071class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3072 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3073 SDNode OpNode, SDNode ExtOp, bit Commutable> 3074 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3075 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3076 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3077 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3078 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3079 // All of these have a two-operand InstAlias. 3080 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3081 let isCommutable = Commutable; 3082} 3083 3084// Pairwise long 2-register intrinsics, both double- and quad-register. 3085class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3086 bits<2> op17_16, bits<5> op11_7, bit op4, 3087 string OpcodeStr, string Dt, 3088 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3089 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3090 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3091 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3092class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3093 bits<2> op17_16, bits<5> op11_7, bit op4, 3094 string OpcodeStr, string Dt, 3095 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3096 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3097 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3098 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3099 3100// Pairwise long 2-register accumulate intrinsics, 3101// both double- and quad-register. 3102// The destination register is also used as the first source operand register. 3103class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3104 bits<2> op17_16, bits<5> op11_7, bit op4, 3105 string OpcodeStr, string Dt, 3106 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3107 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3108 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3109 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3110 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3111class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3112 bits<2> op17_16, bits<5> op11_7, bit op4, 3113 string OpcodeStr, string Dt, 3114 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3115 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3116 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3117 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3118 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3119 3120// Shift by immediate, 3121// both double- and quad-register. 3122let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3123class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3124 Format f, InstrItinClass itin, Operand ImmTy, 3125 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3126 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3127 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3128 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3129 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3130class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3131 Format f, InstrItinClass itin, Operand ImmTy, 3132 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3133 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3134 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3135 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3136 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3137} 3138 3139// Long shift by immediate. 3140class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3141 string OpcodeStr, string Dt, 3142 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3143 SDPatternOperator OpNode> 3144 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3145 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3146 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3147 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3148 3149// Narrow shift by immediate. 3150class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3151 InstrItinClass itin, string OpcodeStr, string Dt, 3152 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3153 SDPatternOperator OpNode> 3154 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3155 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3156 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3157 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3158 (i32 ImmTy:$SIMM))))]>; 3159 3160// Shift right by immediate and accumulate, 3161// both double- and quad-register. 3162let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3163class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3164 Operand ImmTy, string OpcodeStr, string Dt, 3165 ValueType Ty, SDNode ShOp> 3166 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3167 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3168 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3169 [(set DPR:$Vd, (Ty (add DPR:$src1, 3170 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3171class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3172 Operand ImmTy, string OpcodeStr, string Dt, 3173 ValueType Ty, SDNode ShOp> 3174 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3175 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3176 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3177 [(set QPR:$Vd, (Ty (add QPR:$src1, 3178 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3179} 3180 3181// Shift by immediate and insert, 3182// both double- and quad-register. 3183let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3184class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3185 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3186 ValueType Ty,SDNode ShOp> 3187 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3188 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3189 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3190 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3191class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3192 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3193 ValueType Ty,SDNode ShOp> 3194 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3195 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3196 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3197 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3198} 3199 3200// Convert, with fractional bits immediate, 3201// both double- and quad-register. 3202class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3203 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3204 SDPatternOperator IntOp> 3205 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3206 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3207 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3208 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3209class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3210 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3211 SDPatternOperator IntOp> 3212 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3213 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3214 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3215 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3216 3217//===----------------------------------------------------------------------===// 3218// Multiclasses 3219//===----------------------------------------------------------------------===// 3220 3221// Abbreviations used in multiclass suffixes: 3222// Q = quarter int (8 bit) elements 3223// H = half int (16 bit) elements 3224// S = single int (32 bit) elements 3225// D = double int (64 bit) elements 3226 3227// Neon 2-register vector operations and intrinsics. 3228 3229// Neon 2-register comparisons. 3230// source operand element sizes of 8, 16 and 32 bits: 3231multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3232 bits<5> op11_7, bit op4, string opc, string Dt, 3233 string asm, SDNode OpNode> { 3234 // 64-bit vector types. 3235 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3236 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3237 opc, !strconcat(Dt, "8"), asm, "", 3238 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3239 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3240 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3241 opc, !strconcat(Dt, "16"), asm, "", 3242 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3243 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3244 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3245 opc, !strconcat(Dt, "32"), asm, "", 3246 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3247 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3248 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3249 opc, "f32", asm, "", 3250 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3251 let Inst{10} = 1; // overwrite F = 1 3252 } 3253 3254 // 128-bit vector types. 3255 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3256 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3257 opc, !strconcat(Dt, "8"), asm, "", 3258 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3259 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3260 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3261 opc, !strconcat(Dt, "16"), asm, "", 3262 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3263 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3264 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3265 opc, !strconcat(Dt, "32"), asm, "", 3266 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3267 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3268 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3269 opc, "f32", asm, "", 3270 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3271 let Inst{10} = 1; // overwrite F = 1 3272 } 3273} 3274 3275 3276// Neon 2-register vector intrinsics, 3277// element sizes of 8, 16 and 32 bits: 3278multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3279 bits<5> op11_7, bit op4, 3280 InstrItinClass itinD, InstrItinClass itinQ, 3281 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3282 // 64-bit vector types. 3283 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3284 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3285 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3286 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3287 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3288 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3289 3290 // 128-bit vector types. 3291 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3292 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3293 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3294 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3295 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3296 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3297} 3298 3299 3300// Neon Narrowing 2-register vector operations, 3301// source operand element sizes of 16, 32 and 64 bits: 3302multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3303 bits<5> op11_7, bit op6, bit op4, 3304 InstrItinClass itin, string OpcodeStr, string Dt, 3305 SDNode OpNode> { 3306 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3307 itin, OpcodeStr, !strconcat(Dt, "16"), 3308 v8i8, v8i16, OpNode>; 3309 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3310 itin, OpcodeStr, !strconcat(Dt, "32"), 3311 v4i16, v4i32, OpNode>; 3312 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3313 itin, OpcodeStr, !strconcat(Dt, "64"), 3314 v2i32, v2i64, OpNode>; 3315} 3316 3317// Neon Narrowing 2-register vector intrinsics, 3318// source operand element sizes of 16, 32 and 64 bits: 3319multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3320 bits<5> op11_7, bit op6, bit op4, 3321 InstrItinClass itin, string OpcodeStr, string Dt, 3322 SDPatternOperator IntOp> { 3323 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3324 itin, OpcodeStr, !strconcat(Dt, "16"), 3325 v8i8, v8i16, IntOp>; 3326 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3327 itin, OpcodeStr, !strconcat(Dt, "32"), 3328 v4i16, v4i32, IntOp>; 3329 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3330 itin, OpcodeStr, !strconcat(Dt, "64"), 3331 v2i32, v2i64, IntOp>; 3332} 3333 3334 3335// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3336// source operand element sizes of 16, 32 and 64 bits: 3337multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3338 string OpcodeStr, string Dt, SDNode OpNode> { 3339 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3340 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3341 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3342 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3343 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3344 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3345} 3346 3347 3348// Neon 3-register vector operations. 3349 3350// First with only element sizes of 8, 16 and 32 bits: 3351multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3352 InstrItinClass itinD16, InstrItinClass itinD32, 3353 InstrItinClass itinQ16, InstrItinClass itinQ32, 3354 string OpcodeStr, string Dt, 3355 SDNode OpNode, bit Commutable = 0> { 3356 // 64-bit vector types. 3357 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3358 OpcodeStr, !strconcat(Dt, "8"), 3359 v8i8, v8i8, OpNode, Commutable>; 3360 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3361 OpcodeStr, !strconcat(Dt, "16"), 3362 v4i16, v4i16, OpNode, Commutable>; 3363 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3364 OpcodeStr, !strconcat(Dt, "32"), 3365 v2i32, v2i32, OpNode, Commutable>; 3366 3367 // 128-bit vector types. 3368 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3369 OpcodeStr, !strconcat(Dt, "8"), 3370 v16i8, v16i8, OpNode, Commutable>; 3371 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3372 OpcodeStr, !strconcat(Dt, "16"), 3373 v8i16, v8i16, OpNode, Commutable>; 3374 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3375 OpcodeStr, !strconcat(Dt, "32"), 3376 v4i32, v4i32, OpNode, Commutable>; 3377} 3378 3379multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3380 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3381 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3382 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3383 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3384 v4i32, v2i32, ShOp>; 3385} 3386 3387// ....then also with element size 64 bits: 3388multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3389 InstrItinClass itinD, InstrItinClass itinQ, 3390 string OpcodeStr, string Dt, 3391 SDNode OpNode, bit Commutable = 0> 3392 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3393 OpcodeStr, Dt, OpNode, Commutable> { 3394 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3395 OpcodeStr, !strconcat(Dt, "64"), 3396 v1i64, v1i64, OpNode, Commutable>; 3397 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3398 OpcodeStr, !strconcat(Dt, "64"), 3399 v2i64, v2i64, OpNode, Commutable>; 3400} 3401 3402 3403// Neon 3-register vector intrinsics. 3404 3405// First with only element sizes of 16 and 32 bits: 3406multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3407 InstrItinClass itinD16, InstrItinClass itinD32, 3408 InstrItinClass itinQ16, InstrItinClass itinQ32, 3409 string OpcodeStr, string Dt, 3410 SDPatternOperator IntOp, bit Commutable = 0> { 3411 // 64-bit vector types. 3412 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3413 OpcodeStr, !strconcat(Dt, "16"), 3414 v4i16, v4i16, IntOp, Commutable>; 3415 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3416 OpcodeStr, !strconcat(Dt, "32"), 3417 v2i32, v2i32, IntOp, Commutable>; 3418 3419 // 128-bit vector types. 3420 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3421 OpcodeStr, !strconcat(Dt, "16"), 3422 v8i16, v8i16, IntOp, Commutable>; 3423 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3424 OpcodeStr, !strconcat(Dt, "32"), 3425 v4i32, v4i32, IntOp, Commutable>; 3426} 3427multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3428 InstrItinClass itinD16, InstrItinClass itinD32, 3429 InstrItinClass itinQ16, InstrItinClass itinQ32, 3430 string OpcodeStr, string Dt, 3431 SDPatternOperator IntOp> { 3432 // 64-bit vector types. 3433 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3434 OpcodeStr, !strconcat(Dt, "16"), 3435 v4i16, v4i16, IntOp>; 3436 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3437 OpcodeStr, !strconcat(Dt, "32"), 3438 v2i32, v2i32, IntOp>; 3439 3440 // 128-bit vector types. 3441 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3442 OpcodeStr, !strconcat(Dt, "16"), 3443 v8i16, v8i16, IntOp>; 3444 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3445 OpcodeStr, !strconcat(Dt, "32"), 3446 v4i32, v4i32, IntOp>; 3447} 3448 3449multiclass N3VIntSL_HS<bits<4> op11_8, 3450 InstrItinClass itinD16, InstrItinClass itinD32, 3451 InstrItinClass itinQ16, InstrItinClass itinQ32, 3452 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3453 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3454 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3455 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3456 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3457 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3458 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3459 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3460 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3461} 3462 3463// ....then also with element size of 8 bits: 3464multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3465 InstrItinClass itinD16, InstrItinClass itinD32, 3466 InstrItinClass itinQ16, InstrItinClass itinQ32, 3467 string OpcodeStr, string Dt, 3468 SDPatternOperator IntOp, bit Commutable = 0> 3469 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3470 OpcodeStr, Dt, IntOp, Commutable> { 3471 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3472 OpcodeStr, !strconcat(Dt, "8"), 3473 v8i8, v8i8, IntOp, Commutable>; 3474 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3475 OpcodeStr, !strconcat(Dt, "8"), 3476 v16i8, v16i8, IntOp, Commutable>; 3477} 3478multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3479 InstrItinClass itinD16, InstrItinClass itinD32, 3480 InstrItinClass itinQ16, InstrItinClass itinQ32, 3481 string OpcodeStr, string Dt, 3482 SDPatternOperator IntOp> 3483 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3484 OpcodeStr, Dt, IntOp> { 3485 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3486 OpcodeStr, !strconcat(Dt, "8"), 3487 v8i8, v8i8, IntOp>; 3488 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3489 OpcodeStr, !strconcat(Dt, "8"), 3490 v16i8, v16i8, IntOp>; 3491} 3492 3493 3494// ....then also with element size of 64 bits: 3495multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3496 InstrItinClass itinD16, InstrItinClass itinD32, 3497 InstrItinClass itinQ16, InstrItinClass itinQ32, 3498 string OpcodeStr, string Dt, 3499 SDPatternOperator IntOp, bit Commutable = 0> 3500 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3501 OpcodeStr, Dt, IntOp, Commutable> { 3502 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3503 OpcodeStr, !strconcat(Dt, "64"), 3504 v1i64, v1i64, IntOp, Commutable>; 3505 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3506 OpcodeStr, !strconcat(Dt, "64"), 3507 v2i64, v2i64, IntOp, Commutable>; 3508} 3509multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3510 InstrItinClass itinD16, InstrItinClass itinD32, 3511 InstrItinClass itinQ16, InstrItinClass itinQ32, 3512 string OpcodeStr, string Dt, 3513 SDPatternOperator IntOp> 3514 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3515 OpcodeStr, Dt, IntOp> { 3516 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3517 OpcodeStr, !strconcat(Dt, "64"), 3518 v1i64, v1i64, IntOp>; 3519 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3520 OpcodeStr, !strconcat(Dt, "64"), 3521 v2i64, v2i64, IntOp>; 3522} 3523 3524// Neon Narrowing 3-register vector intrinsics, 3525// source operand element sizes of 16, 32 and 64 bits: 3526multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3527 string OpcodeStr, string Dt, 3528 SDPatternOperator IntOp, bit Commutable = 0> { 3529 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3530 OpcodeStr, !strconcat(Dt, "16"), 3531 v8i8, v8i16, IntOp, Commutable>; 3532 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3533 OpcodeStr, !strconcat(Dt, "32"), 3534 v4i16, v4i32, IntOp, Commutable>; 3535 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3536 OpcodeStr, !strconcat(Dt, "64"), 3537 v2i32, v2i64, IntOp, Commutable>; 3538} 3539 3540 3541// Neon Long 3-register vector operations. 3542 3543multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3544 InstrItinClass itin16, InstrItinClass itin32, 3545 string OpcodeStr, string Dt, 3546 SDNode OpNode, bit Commutable = 0> { 3547 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3548 OpcodeStr, !strconcat(Dt, "8"), 3549 v8i16, v8i8, OpNode, Commutable>; 3550 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3551 OpcodeStr, !strconcat(Dt, "16"), 3552 v4i32, v4i16, OpNode, Commutable>; 3553 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3554 OpcodeStr, !strconcat(Dt, "32"), 3555 v2i64, v2i32, OpNode, Commutable>; 3556} 3557 3558multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3559 InstrItinClass itin, string OpcodeStr, string Dt, 3560 SDNode OpNode> { 3561 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3562 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3563 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3564 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3565} 3566 3567multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3568 InstrItinClass itin16, InstrItinClass itin32, 3569 string OpcodeStr, string Dt, 3570 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3571 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3572 OpcodeStr, !strconcat(Dt, "8"), 3573 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3574 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3575 OpcodeStr, !strconcat(Dt, "16"), 3576 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3577 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3578 OpcodeStr, !strconcat(Dt, "32"), 3579 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3580} 3581 3582// Neon Long 3-register vector intrinsics. 3583 3584// First with only element sizes of 16 and 32 bits: 3585multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3586 InstrItinClass itin16, InstrItinClass itin32, 3587 string OpcodeStr, string Dt, 3588 SDPatternOperator IntOp, bit Commutable = 0> { 3589 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3590 OpcodeStr, !strconcat(Dt, "16"), 3591 v4i32, v4i16, IntOp, Commutable>; 3592 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3593 OpcodeStr, !strconcat(Dt, "32"), 3594 v2i64, v2i32, IntOp, Commutable>; 3595} 3596 3597multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3598 InstrItinClass itin, string OpcodeStr, string Dt, 3599 SDPatternOperator IntOp> { 3600 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3601 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3602 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3603 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3604} 3605 3606// ....then also with element size of 8 bits: 3607multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3608 InstrItinClass itin16, InstrItinClass itin32, 3609 string OpcodeStr, string Dt, 3610 SDPatternOperator IntOp, bit Commutable = 0> 3611 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3612 IntOp, Commutable> { 3613 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3614 OpcodeStr, !strconcat(Dt, "8"), 3615 v8i16, v8i8, IntOp, Commutable>; 3616} 3617 3618// ....with explicit extend (VABDL). 3619multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3620 InstrItinClass itin, string OpcodeStr, string Dt, 3621 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3622 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3623 OpcodeStr, !strconcat(Dt, "8"), 3624 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3625 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3626 OpcodeStr, !strconcat(Dt, "16"), 3627 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3628 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3629 OpcodeStr, !strconcat(Dt, "32"), 3630 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3631} 3632 3633 3634// Neon Wide 3-register vector intrinsics, 3635// source operand element sizes of 8, 16 and 32 bits: 3636multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3637 string OpcodeStr, string Dt, 3638 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3639 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3640 OpcodeStr, !strconcat(Dt, "8"), 3641 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3642 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3643 OpcodeStr, !strconcat(Dt, "16"), 3644 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3645 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3646 OpcodeStr, !strconcat(Dt, "32"), 3647 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3648} 3649 3650 3651// Neon Multiply-Op vector operations, 3652// element sizes of 8, 16 and 32 bits: 3653multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3654 InstrItinClass itinD16, InstrItinClass itinD32, 3655 InstrItinClass itinQ16, InstrItinClass itinQ32, 3656 string OpcodeStr, string Dt, SDNode OpNode> { 3657 // 64-bit vector types. 3658 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3659 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3660 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3661 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3662 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3663 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3664 3665 // 128-bit vector types. 3666 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3667 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3668 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3669 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3670 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3671 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3672} 3673 3674multiclass N3VMulOpSL_HS<bits<4> op11_8, 3675 InstrItinClass itinD16, InstrItinClass itinD32, 3676 InstrItinClass itinQ16, InstrItinClass itinQ32, 3677 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3678 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3679 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3680 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3681 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3682 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3683 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3684 mul, ShOp>; 3685 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3686 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3687 mul, ShOp>; 3688} 3689 3690// Neon Intrinsic-Op vector operations, 3691// element sizes of 8, 16 and 32 bits: 3692multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3693 InstrItinClass itinD, InstrItinClass itinQ, 3694 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3695 SDNode OpNode> { 3696 // 64-bit vector types. 3697 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3698 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3699 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3700 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3701 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3702 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3703 3704 // 128-bit vector types. 3705 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3706 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3707 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3708 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3709 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3710 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3711} 3712 3713// Neon 3-argument intrinsics, 3714// element sizes of 16 and 32 bits: 3715multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3716 InstrItinClass itinD16, InstrItinClass itinD32, 3717 InstrItinClass itinQ16, InstrItinClass itinQ32, 3718 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3719 // 64-bit vector types. 3720 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3721 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3722 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3723 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3724 3725 // 128-bit vector types. 3726 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3727 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3728 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3729 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3730} 3731 3732// element sizes of 8, 16 and 32 bits: 3733multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3734 InstrItinClass itinD16, InstrItinClass itinD32, 3735 InstrItinClass itinQ16, InstrItinClass itinQ32, 3736 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3737 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3738 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3739 // 64-bit vector types. 3740 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3741 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3742 // 128-bit vector types. 3743 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3744 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3745} 3746 3747// Neon Long Multiply-Op vector operations, 3748// element sizes of 8, 16 and 32 bits: 3749multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3750 InstrItinClass itin16, InstrItinClass itin32, 3751 string OpcodeStr, string Dt, SDNode MulOp, 3752 SDNode OpNode> { 3753 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3754 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3755 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3756 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3757 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3758 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3759} 3760 3761multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3762 string Dt, SDNode MulOp, SDNode OpNode> { 3763 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3764 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3765 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3766 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3767} 3768 3769 3770// Neon Long 3-argument intrinsics. 3771 3772// First with only element sizes of 16 and 32 bits: 3773multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3774 InstrItinClass itin16, InstrItinClass itin32, 3775 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3776 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3777 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3778 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3779 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3780} 3781 3782multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3783 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3784 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3785 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3786 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3787 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3788} 3789 3790// ....then also with element size of 8 bits: 3791multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3792 InstrItinClass itin16, InstrItinClass itin32, 3793 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3794 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3795 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3796 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3797} 3798 3799// ....with explicit extend (VABAL). 3800multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3801 InstrItinClass itin, string OpcodeStr, string Dt, 3802 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3803 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3804 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3805 IntOp, ExtOp, OpNode>; 3806 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3807 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3808 IntOp, ExtOp, OpNode>; 3809 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3810 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3811 IntOp, ExtOp, OpNode>; 3812} 3813 3814 3815// Neon Pairwise long 2-register intrinsics, 3816// element sizes of 8, 16 and 32 bits: 3817multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3818 bits<5> op11_7, bit op4, 3819 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3820 // 64-bit vector types. 3821 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3822 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3823 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3824 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3825 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3826 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3827 3828 // 128-bit vector types. 3829 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3830 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3831 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3832 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3833 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3834 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3835} 3836 3837 3838// Neon Pairwise long 2-register accumulate intrinsics, 3839// element sizes of 8, 16 and 32 bits: 3840multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3841 bits<5> op11_7, bit op4, 3842 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3843 // 64-bit vector types. 3844 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3845 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3846 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3847 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3848 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3849 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3850 3851 // 128-bit vector types. 3852 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3853 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3854 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3855 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3856 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3857 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3858} 3859 3860 3861// Neon 2-register vector shift by immediate, 3862// with f of either N2RegVShLFrm or N2RegVShRFrm 3863// element sizes of 8, 16, 32 and 64 bits: 3864multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3865 InstrItinClass itin, string OpcodeStr, string Dt, 3866 SDNode OpNode> { 3867 // 64-bit vector types. 3868 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3869 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3870 let Inst{21-19} = 0b001; // imm6 = 001xxx 3871 } 3872 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3873 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3874 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3875 } 3876 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3877 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3878 let Inst{21} = 0b1; // imm6 = 1xxxxx 3879 } 3880 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3881 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3882 // imm6 = xxxxxx 3883 3884 // 128-bit vector types. 3885 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3886 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3887 let Inst{21-19} = 0b001; // imm6 = 001xxx 3888 } 3889 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3890 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3891 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3892 } 3893 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3894 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3895 let Inst{21} = 0b1; // imm6 = 1xxxxx 3896 } 3897 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3898 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3899 // imm6 = xxxxxx 3900} 3901multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3902 InstrItinClass itin, string OpcodeStr, string Dt, 3903 string baseOpc, SDNode OpNode> { 3904 // 64-bit vector types. 3905 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3906 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3907 let Inst{21-19} = 0b001; // imm6 = 001xxx 3908 } 3909 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3910 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3911 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3912 } 3913 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3914 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3915 let Inst{21} = 0b1; // imm6 = 1xxxxx 3916 } 3917 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3918 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3919 // imm6 = xxxxxx 3920 3921 // 128-bit vector types. 3922 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3923 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3924 let Inst{21-19} = 0b001; // imm6 = 001xxx 3925 } 3926 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3927 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3928 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3929 } 3930 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3931 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3932 let Inst{21} = 0b1; // imm6 = 1xxxxx 3933 } 3934 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3935 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3936 // imm6 = xxxxxx 3937} 3938 3939// Neon Shift-Accumulate vector operations, 3940// element sizes of 8, 16, 32 and 64 bits: 3941multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3942 string OpcodeStr, string Dt, SDNode ShOp> { 3943 // 64-bit vector types. 3944 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3945 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3946 let Inst{21-19} = 0b001; // imm6 = 001xxx 3947 } 3948 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3949 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3950 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3951 } 3952 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3953 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3954 let Inst{21} = 0b1; // imm6 = 1xxxxx 3955 } 3956 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3957 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3958 // imm6 = xxxxxx 3959 3960 // 128-bit vector types. 3961 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3962 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3963 let Inst{21-19} = 0b001; // imm6 = 001xxx 3964 } 3965 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3966 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3967 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3968 } 3969 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3970 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3971 let Inst{21} = 0b1; // imm6 = 1xxxxx 3972 } 3973 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3974 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3975 // imm6 = xxxxxx 3976} 3977 3978// Neon Shift-Insert vector operations, 3979// with f of either N2RegVShLFrm or N2RegVShRFrm 3980// element sizes of 8, 16, 32 and 64 bits: 3981multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3982 string OpcodeStr> { 3983 // 64-bit vector types. 3984 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3985 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 3986 let Inst{21-19} = 0b001; // imm6 = 001xxx 3987 } 3988 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3989 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 3990 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3991 } 3992 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3993 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 3994 let Inst{21} = 0b1; // imm6 = 1xxxxx 3995 } 3996 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 3997 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 3998 // imm6 = xxxxxx 3999 4000 // 128-bit vector types. 4001 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4002 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 4003 let Inst{21-19} = 0b001; // imm6 = 001xxx 4004 } 4005 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4006 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 4007 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4008 } 4009 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4010 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 4011 let Inst{21} = 0b1; // imm6 = 1xxxxx 4012 } 4013 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4014 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 4015 // imm6 = xxxxxx 4016} 4017multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4018 string OpcodeStr> { 4019 // 64-bit vector types. 4020 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4021 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 4022 let Inst{21-19} = 0b001; // imm6 = 001xxx 4023 } 4024 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4025 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 4026 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4027 } 4028 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4029 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 4030 let Inst{21} = 0b1; // imm6 = 1xxxxx 4031 } 4032 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4033 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 4034 // imm6 = xxxxxx 4035 4036 // 128-bit vector types. 4037 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4038 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 4039 let Inst{21-19} = 0b001; // imm6 = 001xxx 4040 } 4041 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4042 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 4043 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4044 } 4045 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4046 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 4047 let Inst{21} = 0b1; // imm6 = 1xxxxx 4048 } 4049 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4050 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 4051 // imm6 = xxxxxx 4052} 4053 4054// Neon Shift Long operations, 4055// element sizes of 8, 16, 32 bits: 4056multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4057 bit op4, string OpcodeStr, string Dt, 4058 SDPatternOperator OpNode> { 4059 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4060 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4061 let Inst{21-19} = 0b001; // imm6 = 001xxx 4062 } 4063 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4064 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4065 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4066 } 4067 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4068 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4069 let Inst{21} = 0b1; // imm6 = 1xxxxx 4070 } 4071} 4072 4073// Neon Shift Narrow operations, 4074// element sizes of 16, 32, 64 bits: 4075multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4076 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4077 SDPatternOperator OpNode> { 4078 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4079 OpcodeStr, !strconcat(Dt, "16"), 4080 v8i8, v8i16, shr_imm8, OpNode> { 4081 let Inst{21-19} = 0b001; // imm6 = 001xxx 4082 } 4083 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4084 OpcodeStr, !strconcat(Dt, "32"), 4085 v4i16, v4i32, shr_imm16, OpNode> { 4086 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4087 } 4088 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4089 OpcodeStr, !strconcat(Dt, "64"), 4090 v2i32, v2i64, shr_imm32, OpNode> { 4091 let Inst{21} = 0b1; // imm6 = 1xxxxx 4092 } 4093} 4094 4095//===----------------------------------------------------------------------===// 4096// Instruction Definitions. 4097//===----------------------------------------------------------------------===// 4098 4099// Vector Add Operations. 4100 4101// VADD : Vector Add (integer and floating-point) 4102defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4103 add, 1>; 4104def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4105 v2f32, v2f32, fadd, 1>; 4106def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4107 v4f32, v4f32, fadd, 1>; 4108// VADDL : Vector Add Long (Q = D + D) 4109defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4110 "vaddl", "s", add, sext, 1>; 4111defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4112 "vaddl", "u", add, zext, 1>; 4113// VADDW : Vector Add Wide (Q = Q + D) 4114defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4115defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 4116// VHADD : Vector Halving Add 4117defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4118 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4119 "vhadd", "s", int_arm_neon_vhadds, 1>; 4120defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4121 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4122 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4123// VRHADD : Vector Rounding Halving Add 4124defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4125 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4126 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4127defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4128 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4129 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4130// VQADD : Vector Saturating Add 4131defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4132 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4133 "vqadd", "s", int_arm_neon_vqadds, 1>; 4134defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4135 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4136 "vqadd", "u", int_arm_neon_vqaddu, 1>; 4137// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4138defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4139// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4140defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4141 int_arm_neon_vraddhn, 1>; 4142 4143def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4144 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4145def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4146 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4147def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4148 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4149 4150// Vector Multiply Operations. 4151 4152// VMUL : Vector Multiply (integer, polynomial and floating-point) 4153defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4154 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4155def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4156 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4157def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4158 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4159def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4160 v2f32, v2f32, fmul, 1>; 4161def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4162 v4f32, v4f32, fmul, 1>; 4163defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4164def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4165def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4166 v2f32, fmul>; 4167 4168def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4169 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 4170 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4171 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4172 (DSubReg_i16_reg imm:$lane))), 4173 (SubReg_i16_lane imm:$lane)))>; 4174def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4175 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 4176 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4177 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4178 (DSubReg_i32_reg imm:$lane))), 4179 (SubReg_i32_lane imm:$lane)))>; 4180def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4181 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 4182 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4183 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4184 (DSubReg_i32_reg imm:$lane))), 4185 (SubReg_i32_lane imm:$lane)))>; 4186 4187 4188def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4189 (VMULslfd DPR:$Rn, 4190 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4191 (i32 0))>; 4192def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4193 (VMULslfq QPR:$Rn, 4194 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4195 (i32 0))>; 4196 4197 4198// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4199defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4200 IIC_VMULi16Q, IIC_VMULi32Q, 4201 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4202defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4203 IIC_VMULi16Q, IIC_VMULi32Q, 4204 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4205def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4206 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4207 imm:$lane)))), 4208 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4209 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4210 (DSubReg_i16_reg imm:$lane))), 4211 (SubReg_i16_lane imm:$lane)))>; 4212def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4213 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4214 imm:$lane)))), 4215 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4216 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4217 (DSubReg_i32_reg imm:$lane))), 4218 (SubReg_i32_lane imm:$lane)))>; 4219 4220// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4221defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4222 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4223 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4224defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4225 IIC_VMULi16Q, IIC_VMULi32Q, 4226 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4227def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4228 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4229 imm:$lane)))), 4230 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4231 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4232 (DSubReg_i16_reg imm:$lane))), 4233 (SubReg_i16_lane imm:$lane)))>; 4234def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4235 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4236 imm:$lane)))), 4237 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4238 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4239 (DSubReg_i32_reg imm:$lane))), 4240 (SubReg_i32_lane imm:$lane)))>; 4241 4242// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4243let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4244 DecoderNamespace = "NEONData" in { 4245 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4246 "vmull", "s", NEONvmulls, 1>; 4247 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4248 "vmull", "u", NEONvmullu, 1>; 4249 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4250 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4251 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4252 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4253 Requires<[HasV8, HasCrypto]>; 4254} 4255defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4256defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4257 4258// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4259defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4260 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4261defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4262 "vqdmull", "s", int_arm_neon_vqdmull>; 4263 4264// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4265 4266// VMLA : Vector Multiply Accumulate (integer and floating-point) 4267defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4268 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4269def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4270 v2f32, fmul_su, fadd_mlx>, 4271 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4272def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4273 v4f32, fmul_su, fadd_mlx>, 4274 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4275defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4276 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4277def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4278 v2f32, fmul_su, fadd_mlx>, 4279 Requires<[HasNEON, UseFPVMLx]>; 4280def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4281 v4f32, v2f32, fmul_su, fadd_mlx>, 4282 Requires<[HasNEON, UseFPVMLx]>; 4283 4284def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4285 (mul (v8i16 QPR:$src2), 4286 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4287 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4288 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4289 (DSubReg_i16_reg imm:$lane))), 4290 (SubReg_i16_lane imm:$lane)))>; 4291 4292def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4293 (mul (v4i32 QPR:$src2), 4294 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4295 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4296 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4297 (DSubReg_i32_reg imm:$lane))), 4298 (SubReg_i32_lane imm:$lane)))>; 4299 4300def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4301 (fmul_su (v4f32 QPR:$src2), 4302 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4303 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4304 (v4f32 QPR:$src2), 4305 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4306 (DSubReg_i32_reg imm:$lane))), 4307 (SubReg_i32_lane imm:$lane)))>, 4308 Requires<[HasNEON, UseFPVMLx]>; 4309 4310// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4311defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4312 "vmlal", "s", NEONvmulls, add>; 4313defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4314 "vmlal", "u", NEONvmullu, add>; 4315 4316defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4317defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4318 4319let Predicates = [HasNEON, HasV8_1a] in { 4320 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4321 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4322 // (Q += D * D) 4323 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4324 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4325 null_frag>; 4326 def : Pat<(v4i16 (int_arm_neon_vqadds 4327 (v4i16 DPR:$src1), 4328 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4329 (v4i16 DPR:$Vm))))), 4330 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4331 def : Pat<(v2i32 (int_arm_neon_vqadds 4332 (v2i32 DPR:$src1), 4333 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4334 (v2i32 DPR:$Vm))))), 4335 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4336 def : Pat<(v8i16 (int_arm_neon_vqadds 4337 (v8i16 QPR:$src1), 4338 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4339 (v8i16 QPR:$Vm))))), 4340 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4341 def : Pat<(v4i32 (int_arm_neon_vqadds 4342 (v4i32 QPR:$src1), 4343 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4344 (v4i32 QPR:$Vm))))), 4345 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4346 4347 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4348 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4349 null_frag>; 4350 def : Pat<(v4i16 (int_arm_neon_vqadds 4351 (v4i16 DPR:$src1), 4352 (v4i16 (int_arm_neon_vqrdmulh 4353 (v4i16 DPR:$Vn), 4354 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4355 imm:$lane)))))), 4356 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4357 imm:$lane))>; 4358 def : Pat<(v2i32 (int_arm_neon_vqadds 4359 (v2i32 DPR:$src1), 4360 (v2i32 (int_arm_neon_vqrdmulh 4361 (v2i32 DPR:$Vn), 4362 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4363 imm:$lane)))))), 4364 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4365 imm:$lane))>; 4366 def : Pat<(v8i16 (int_arm_neon_vqadds 4367 (v8i16 QPR:$src1), 4368 (v8i16 (int_arm_neon_vqrdmulh 4369 (v8i16 QPR:$src2), 4370 (v8i16 (NEONvduplane (v8i16 QPR:$src3), 4371 imm:$lane)))))), 4372 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4373 (v8i16 QPR:$src2), 4374 (v4i16 (EXTRACT_SUBREG 4375 QPR:$src3, 4376 (DSubReg_i16_reg imm:$lane))), 4377 (SubReg_i16_lane imm:$lane)))>; 4378 def : Pat<(v4i32 (int_arm_neon_vqadds 4379 (v4i32 QPR:$src1), 4380 (v4i32 (int_arm_neon_vqrdmulh 4381 (v4i32 QPR:$src2), 4382 (v4i32 (NEONvduplane (v4i32 QPR:$src3), 4383 imm:$lane)))))), 4384 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4385 (v4i32 QPR:$src2), 4386 (v2i32 (EXTRACT_SUBREG 4387 QPR:$src3, 4388 (DSubReg_i32_reg imm:$lane))), 4389 (SubReg_i32_lane imm:$lane)))>; 4390 4391 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4392 // (Q -= D * D) 4393 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4394 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4395 null_frag>; 4396 def : Pat<(v4i16 (int_arm_neon_vqsubs 4397 (v4i16 DPR:$src1), 4398 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4399 (v4i16 DPR:$Vm))))), 4400 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4401 def : Pat<(v2i32 (int_arm_neon_vqsubs 4402 (v2i32 DPR:$src1), 4403 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4404 (v2i32 DPR:$Vm))))), 4405 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4406 def : Pat<(v8i16 (int_arm_neon_vqsubs 4407 (v8i16 QPR:$src1), 4408 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4409 (v8i16 QPR:$Vm))))), 4410 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4411 def : Pat<(v4i32 (int_arm_neon_vqsubs 4412 (v4i32 QPR:$src1), 4413 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4414 (v4i32 QPR:$Vm))))), 4415 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4416 4417 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4418 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4419 null_frag>; 4420 def : Pat<(v4i16 (int_arm_neon_vqsubs 4421 (v4i16 DPR:$src1), 4422 (v4i16 (int_arm_neon_vqrdmulh 4423 (v4i16 DPR:$Vn), 4424 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4425 imm:$lane)))))), 4426 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4427 def : Pat<(v2i32 (int_arm_neon_vqsubs 4428 (v2i32 DPR:$src1), 4429 (v2i32 (int_arm_neon_vqrdmulh 4430 (v2i32 DPR:$Vn), 4431 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4432 imm:$lane)))))), 4433 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4434 imm:$lane))>; 4435 def : Pat<(v8i16 (int_arm_neon_vqsubs 4436 (v8i16 QPR:$src1), 4437 (v8i16 (int_arm_neon_vqrdmulh 4438 (v8i16 QPR:$src2), 4439 (v8i16 (NEONvduplane (v8i16 QPR:$src3), 4440 imm:$lane)))))), 4441 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4442 (v8i16 QPR:$src2), 4443 (v4i16 (EXTRACT_SUBREG 4444 QPR:$src3, 4445 (DSubReg_i16_reg imm:$lane))), 4446 (SubReg_i16_lane imm:$lane)))>; 4447 def : Pat<(v4i32 (int_arm_neon_vqsubs 4448 (v4i32 QPR:$src1), 4449 (v4i32 (int_arm_neon_vqrdmulh 4450 (v4i32 QPR:$src2), 4451 (v4i32 (NEONvduplane (v4i32 QPR:$src3), 4452 imm:$lane)))))), 4453 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4454 (v4i32 QPR:$src2), 4455 (v2i32 (EXTRACT_SUBREG 4456 QPR:$src3, 4457 (DSubReg_i32_reg imm:$lane))), 4458 (SubReg_i32_lane imm:$lane)))>; 4459} 4460// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4461defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4462 "vqdmlal", "s", null_frag>; 4463defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4464 4465def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4466 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4467 (v4i16 DPR:$Vm))))), 4468 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4469def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4470 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4471 (v2i32 DPR:$Vm))))), 4472 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4473def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4474 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4475 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4476 imm:$lane)))))), 4477 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4478def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4479 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4480 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4481 imm:$lane)))))), 4482 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4483 4484// VMLS : Vector Multiply Subtract (integer and floating-point) 4485defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4486 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4487def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4488 v2f32, fmul_su, fsub_mlx>, 4489 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4490def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4491 v4f32, fmul_su, fsub_mlx>, 4492 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4493defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4494 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4495def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4496 v2f32, fmul_su, fsub_mlx>, 4497 Requires<[HasNEON, UseFPVMLx]>; 4498def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4499 v4f32, v2f32, fmul_su, fsub_mlx>, 4500 Requires<[HasNEON, UseFPVMLx]>; 4501 4502def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4503 (mul (v8i16 QPR:$src2), 4504 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4505 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4506 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4507 (DSubReg_i16_reg imm:$lane))), 4508 (SubReg_i16_lane imm:$lane)))>; 4509 4510def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4511 (mul (v4i32 QPR:$src2), 4512 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4513 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4514 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4515 (DSubReg_i32_reg imm:$lane))), 4516 (SubReg_i32_lane imm:$lane)))>; 4517 4518def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4519 (fmul_su (v4f32 QPR:$src2), 4520 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4521 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4522 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4523 (DSubReg_i32_reg imm:$lane))), 4524 (SubReg_i32_lane imm:$lane)))>, 4525 Requires<[HasNEON, UseFPVMLx]>; 4526 4527// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4528defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4529 "vmlsl", "s", NEONvmulls, sub>; 4530defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4531 "vmlsl", "u", NEONvmullu, sub>; 4532 4533defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4534defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4535 4536// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4537defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4538 "vqdmlsl", "s", null_frag>; 4539defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4540 4541def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4542 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4543 (v4i16 DPR:$Vm))))), 4544 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4545def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4546 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4547 (v2i32 DPR:$Vm))))), 4548 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4549def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4550 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4551 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4552 imm:$lane)))))), 4553 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4554def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4555 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4556 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4557 imm:$lane)))))), 4558 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4559 4560// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4561def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4562 v2f32, fmul_su, fadd_mlx>, 4563 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4564 4565def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4566 v4f32, fmul_su, fadd_mlx>, 4567 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4568 4569// Fused Vector Multiply Subtract (floating-point) 4570def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4571 v2f32, fmul_su, fsub_mlx>, 4572 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4573def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4574 v4f32, fmul_su, fsub_mlx>, 4575 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4576 4577// Match @llvm.fma.* intrinsics 4578def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4579 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4580 Requires<[HasVFP4]>; 4581def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4582 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4583 Requires<[HasVFP4]>; 4584def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4585 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4586 Requires<[HasVFP4]>; 4587def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4588 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4589 Requires<[HasVFP4]>; 4590 4591// Vector Subtract Operations. 4592 4593// VSUB : Vector Subtract (integer and floating-point) 4594defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4595 "vsub", "i", sub, 0>; 4596def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4597 v2f32, v2f32, fsub, 0>; 4598def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4599 v4f32, v4f32, fsub, 0>; 4600// VSUBL : Vector Subtract Long (Q = D - D) 4601defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4602 "vsubl", "s", sub, sext, 0>; 4603defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4604 "vsubl", "u", sub, zext, 0>; 4605// VSUBW : Vector Subtract Wide (Q = Q - D) 4606defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4607defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4608// VHSUB : Vector Halving Subtract 4609defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4610 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4611 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4612defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4613 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4614 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4615// VQSUB : Vector Saturing Subtract 4616defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4617 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4618 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4619defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4620 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4621 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4622// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4623defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 4624// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4625defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4626 int_arm_neon_vrsubhn, 0>; 4627 4628def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4629 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 4630def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4631 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 4632def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4633 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 4634 4635// Vector Comparisons. 4636 4637// VCEQ : Vector Compare Equal 4638defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4639 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4640def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4641 NEONvceq, 1>; 4642def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4643 NEONvceq, 1>; 4644 4645let TwoOperandAliasConstraint = "$Vm = $Vd" in 4646defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 4647 "$Vd, $Vm, #0", NEONvceqz>; 4648 4649// VCGE : Vector Compare Greater Than or Equal 4650defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4651 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 4652defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4653 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 4654def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 4655 NEONvcge, 0>; 4656def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 4657 NEONvcge, 0>; 4658 4659let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4660defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 4661 "$Vd, $Vm, #0", NEONvcgez>; 4662defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 4663 "$Vd, $Vm, #0", NEONvclez>; 4664} 4665 4666// VCGT : Vector Compare Greater Than 4667defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4668 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 4669defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4670 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 4671def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 4672 NEONvcgt, 0>; 4673def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 4674 NEONvcgt, 0>; 4675 4676let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4677defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 4678 "$Vd, $Vm, #0", NEONvcgtz>; 4679defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 4680 "$Vd, $Vm, #0", NEONvcltz>; 4681} 4682 4683// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 4684def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4685 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 4686def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4687 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 4688// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 4689def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4690 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 4691def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4692 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 4693// VTST : Vector Test Bits 4694defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 4695 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 4696 4697def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4698 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4699def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4700 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4701def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4702 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4703def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4704 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4705 4706def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4707 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4708def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4709 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4710def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4711 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4712def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4713 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4714 4715// Vector Bitwise Operations. 4716 4717def vnotd : PatFrag<(ops node:$in), 4718 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 4719def vnotq : PatFrag<(ops node:$in), 4720 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 4721 4722 4723// VAND : Vector Bitwise AND 4724def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 4725 v2i32, v2i32, and, 1>; 4726def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 4727 v4i32, v4i32, and, 1>; 4728 4729// VEOR : Vector Bitwise Exclusive OR 4730def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 4731 v2i32, v2i32, xor, 1>; 4732def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 4733 v4i32, v4i32, xor, 1>; 4734 4735// VORR : Vector Bitwise OR 4736def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 4737 v2i32, v2i32, or, 1>; 4738def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 4739 v4i32, v4i32, or, 1>; 4740 4741def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 4742 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4743 IIC_VMOVImm, 4744 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4745 [(set DPR:$Vd, 4746 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4747 let Inst{9} = SIMM{9}; 4748} 4749 4750def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 4751 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4752 IIC_VMOVImm, 4753 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4754 [(set DPR:$Vd, 4755 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4756 let Inst{10-9} = SIMM{10-9}; 4757} 4758 4759def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4760 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4761 IIC_VMOVImm, 4762 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4763 [(set QPR:$Vd, 4764 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4765 let Inst{9} = SIMM{9}; 4766} 4767 4768def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4769 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4770 IIC_VMOVImm, 4771 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4772 [(set QPR:$Vd, 4773 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4774 let Inst{10-9} = SIMM{10-9}; 4775} 4776 4777 4778// VBIC : Vector Bitwise Bit Clear (AND NOT) 4779let TwoOperandAliasConstraint = "$Vn = $Vd" in { 4780def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4781 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4782 "vbic", "$Vd, $Vn, $Vm", "", 4783 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4784 (vnotd DPR:$Vm))))]>; 4785def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4786 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4787 "vbic", "$Vd, $Vn, $Vm", "", 4788 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4789 (vnotq QPR:$Vm))))]>; 4790} 4791 4792def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4793 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4794 IIC_VMOVImm, 4795 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4796 [(set DPR:$Vd, 4797 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4798 let Inst{9} = SIMM{9}; 4799} 4800 4801def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4802 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4803 IIC_VMOVImm, 4804 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4805 [(set DPR:$Vd, 4806 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4807 let Inst{10-9} = SIMM{10-9}; 4808} 4809 4810def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4811 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4812 IIC_VMOVImm, 4813 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4814 [(set QPR:$Vd, 4815 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4816 let Inst{9} = SIMM{9}; 4817} 4818 4819def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4820 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4821 IIC_VMOVImm, 4822 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4823 [(set QPR:$Vd, 4824 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4825 let Inst{10-9} = SIMM{10-9}; 4826} 4827 4828// VORN : Vector Bitwise OR NOT 4829def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4830 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4831 "vorn", "$Vd, $Vn, $Vm", "", 4832 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4833 (vnotd DPR:$Vm))))]>; 4834def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4835 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4836 "vorn", "$Vd, $Vn, $Vm", "", 4837 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4838 (vnotq QPR:$Vm))))]>; 4839 4840// VMVN : Vector Bitwise NOT (Immediate) 4841 4842let isReMaterializable = 1 in { 4843 4844def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4845 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4846 "vmvn", "i16", "$Vd, $SIMM", "", 4847 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4848 let Inst{9} = SIMM{9}; 4849} 4850 4851def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4852 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4853 "vmvn", "i16", "$Vd, $SIMM", "", 4854 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4855 let Inst{9} = SIMM{9}; 4856} 4857 4858def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4859 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4860 "vmvn", "i32", "$Vd, $SIMM", "", 4861 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4862 let Inst{11-8} = SIMM{11-8}; 4863} 4864 4865def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4866 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4867 "vmvn", "i32", "$Vd, $SIMM", "", 4868 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4869 let Inst{11-8} = SIMM{11-8}; 4870} 4871} 4872 4873// VMVN : Vector Bitwise NOT 4874def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4875 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 4876 "vmvn", "$Vd, $Vm", "", 4877 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 4878def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 4879 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 4880 "vmvn", "$Vd, $Vm", "", 4881 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 4882def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 4883def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 4884 4885// VBSL : Vector Bitwise Select 4886def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4887 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4888 N3RegFrm, IIC_VCNTiD, 4889 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4890 [(set DPR:$Vd, 4891 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 4892def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 4893 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 4894 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4895 Requires<[HasNEON]>; 4896def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 4897 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 4898 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4899 Requires<[HasNEON]>; 4900def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 4901 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 4902 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4903 Requires<[HasNEON]>; 4904def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 4905 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 4906 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4907 Requires<[HasNEON]>; 4908def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 4909 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 4910 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4911 Requires<[HasNEON]>; 4912 4913def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 4914 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4915 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4916 Requires<[HasNEON]>; 4917 4918def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 4919 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4920 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4921 Requires<[HasNEON]>; 4922 4923def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4924 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4925 N3RegFrm, IIC_VCNTiQ, 4926 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4927 [(set QPR:$Vd, 4928 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 4929 4930def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 4931 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 4932 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4933 Requires<[HasNEON]>; 4934def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 4935 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 4936 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4937 Requires<[HasNEON]>; 4938def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 4939 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 4940 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4941 Requires<[HasNEON]>; 4942def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 4943 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 4944 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4945 Requires<[HasNEON]>; 4946def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 4947 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 4948 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4949 Requires<[HasNEON]>; 4950 4951def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 4952 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4953 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4954 Requires<[HasNEON]>; 4955def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 4956 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4957 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4958 Requires<[HasNEON]>; 4959 4960// VBIF : Vector Bitwise Insert if False 4961// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 4962// FIXME: This instruction's encoding MAY NOT BE correct. 4963def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 4964 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4965 N3RegFrm, IIC_VBINiD, 4966 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4967 []>; 4968def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 4969 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4970 N3RegFrm, IIC_VBINiQ, 4971 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4972 []>; 4973 4974// VBIT : Vector Bitwise Insert if True 4975// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 4976// FIXME: This instruction's encoding MAY NOT BE correct. 4977def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 4978 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4979 N3RegFrm, IIC_VBINiD, 4980 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4981 []>; 4982def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 4983 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4984 N3RegFrm, IIC_VBINiQ, 4985 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4986 []>; 4987 4988// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 4989// for equivalent operations with different register constraints; it just 4990// inserts copies. 4991 4992// Vector Absolute Differences. 4993 4994// VABD : Vector Absolute Difference 4995defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 4996 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4997 "vabd", "s", int_arm_neon_vabds, 1>; 4998defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 4999 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5000 "vabd", "u", int_arm_neon_vabdu, 1>; 5001def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5002 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5003def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5004 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5005 5006// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5007defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5008 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5009defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5010 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5011 5012// VABA : Vector Absolute Difference and Accumulate 5013defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5014 "vaba", "s", int_arm_neon_vabds, add>; 5015defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5016 "vaba", "u", int_arm_neon_vabdu, add>; 5017 5018// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5019defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5020 "vabal", "s", int_arm_neon_vabds, zext, add>; 5021defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5022 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5023 5024// Vector Maximum and Minimum. 5025 5026// VMAX : Vector Maximum 5027defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5028 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5029 "vmax", "s", int_arm_neon_vmaxs, 1>; 5030defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5031 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5032 "vmax", "u", int_arm_neon_vmaxu, 1>; 5033def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5034 "vmax", "f32", 5035 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 5036def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5037 "vmax", "f32", 5038 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 5039 5040// VMAXNM 5041let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5042 def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5043 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5044 v2f32, v2f32, int_arm_neon_vmaxnm, 1>, 5045 Requires<[HasV8, HasNEON]>; 5046 def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5047 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5048 v4f32, v4f32, int_arm_neon_vmaxnm, 1>, 5049 Requires<[HasV8, HasNEON]>; 5050} 5051 5052// VMIN : Vector Minimum 5053defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5054 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5055 "vmin", "s", int_arm_neon_vmins, 1>; 5056defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5057 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5058 "vmin", "u", int_arm_neon_vminu, 1>; 5059def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5060 "vmin", "f32", 5061 v2f32, v2f32, int_arm_neon_vmins, 1>; 5062def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5063 "vmin", "f32", 5064 v4f32, v4f32, int_arm_neon_vmins, 1>; 5065 5066// VMINNM 5067let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5068 def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5069 N3RegFrm, NoItinerary, "vminnm", "f32", 5070 v2f32, v2f32, int_arm_neon_vminnm, 1>, 5071 Requires<[HasV8, HasNEON]>; 5072 def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5073 N3RegFrm, NoItinerary, "vminnm", "f32", 5074 v4f32, v4f32, int_arm_neon_vminnm, 1>, 5075 Requires<[HasV8, HasNEON]>; 5076} 5077 5078// Vector Pairwise Operations. 5079 5080// VPADD : Vector Pairwise Add 5081def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5082 "vpadd", "i8", 5083 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5084def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5085 "vpadd", "i16", 5086 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5087def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5088 "vpadd", "i32", 5089 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5090def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5091 IIC_VPBIND, "vpadd", "f32", 5092 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5093 5094// VPADDL : Vector Pairwise Add Long 5095defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5096 int_arm_neon_vpaddls>; 5097defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5098 int_arm_neon_vpaddlu>; 5099 5100// VPADAL : Vector Pairwise Add and Accumulate Long 5101defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5102 int_arm_neon_vpadals>; 5103defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5104 int_arm_neon_vpadalu>; 5105 5106// VPMAX : Vector Pairwise Maximum 5107def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5108 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5109def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5110 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5111def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5112 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5113def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5114 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5115def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5116 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5117def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5118 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5119def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5120 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5121 5122// VPMIN : Vector Pairwise Minimum 5123def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5124 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5125def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5126 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5127def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5128 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5129def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5130 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5131def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5132 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5133def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5134 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5135def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5136 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5137 5138// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5139 5140// VRECPE : Vector Reciprocal Estimate 5141def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5142 IIC_VUNAD, "vrecpe", "u32", 5143 v2i32, v2i32, int_arm_neon_vrecpe>; 5144def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5145 IIC_VUNAQ, "vrecpe", "u32", 5146 v4i32, v4i32, int_arm_neon_vrecpe>; 5147def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5148 IIC_VUNAD, "vrecpe", "f32", 5149 v2f32, v2f32, int_arm_neon_vrecpe>; 5150def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5151 IIC_VUNAQ, "vrecpe", "f32", 5152 v4f32, v4f32, int_arm_neon_vrecpe>; 5153 5154// VRECPS : Vector Reciprocal Step 5155def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5156 IIC_VRECSD, "vrecps", "f32", 5157 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5158def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5159 IIC_VRECSQ, "vrecps", "f32", 5160 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5161 5162// VRSQRTE : Vector Reciprocal Square Root Estimate 5163def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5164 IIC_VUNAD, "vrsqrte", "u32", 5165 v2i32, v2i32, int_arm_neon_vrsqrte>; 5166def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5167 IIC_VUNAQ, "vrsqrte", "u32", 5168 v4i32, v4i32, int_arm_neon_vrsqrte>; 5169def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5170 IIC_VUNAD, "vrsqrte", "f32", 5171 v2f32, v2f32, int_arm_neon_vrsqrte>; 5172def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5173 IIC_VUNAQ, "vrsqrte", "f32", 5174 v4f32, v4f32, int_arm_neon_vrsqrte>; 5175 5176// VRSQRTS : Vector Reciprocal Square Root Step 5177def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5178 IIC_VRECSD, "vrsqrts", "f32", 5179 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5180def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5181 IIC_VRECSQ, "vrsqrts", "f32", 5182 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5183 5184// Vector Shifts. 5185 5186// VSHL : Vector Shift 5187defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5188 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5189 "vshl", "s", int_arm_neon_vshifts>; 5190defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5191 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5192 "vshl", "u", int_arm_neon_vshiftu>; 5193 5194// VSHL : Vector Shift Left (Immediate) 5195defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 5196 5197// VSHR : Vector Shift Right (Immediate) 5198defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5199 NEONvshrs>; 5200defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5201 NEONvshru>; 5202 5203// VSHLL : Vector Shift Left Long 5204defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5205 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>; 5206defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5207 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>; 5208 5209// VSHLL : Vector Shift Left Long (with maximum shift count) 5210class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5211 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5212 ValueType OpTy, Operand ImmTy> 5213 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5214 ResTy, OpTy, ImmTy, null_frag> { 5215 let Inst{21-16} = op21_16; 5216 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5217} 5218def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5219 v8i16, v8i8, imm8>; 5220def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5221 v4i32, v4i16, imm16>; 5222def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5223 v2i64, v2i32, imm32>; 5224 5225def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))), 5226 (VSHLLi8 DPR:$Rn, 8)>; 5227def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))), 5228 (VSHLLi16 DPR:$Rn, 16)>; 5229def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))), 5230 (VSHLLi32 DPR:$Rn, 32)>; 5231def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))), 5232 (VSHLLi8 DPR:$Rn, 8)>; 5233def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))), 5234 (VSHLLi16 DPR:$Rn, 16)>; 5235def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))), 5236 (VSHLLi32 DPR:$Rn, 32)>; 5237 5238// VSHRN : Vector Shift Right and Narrow 5239defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 5240 PatFrag<(ops node:$Rn, node:$amt), 5241 (trunc (NEONvshrs node:$Rn, node:$amt))>>; 5242 5243def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))), 5244 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 5245def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))), 5246 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 5247def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))), 5248 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 5249 5250// VRSHL : Vector Rounding Shift 5251defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 5252 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5253 "vrshl", "s", int_arm_neon_vrshifts>; 5254defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 5255 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5256 "vrshl", "u", int_arm_neon_vrshiftu>; 5257// VRSHR : Vector Rounding Shift Right 5258defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 5259 NEONvrshrs>; 5260defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 5261 NEONvrshru>; 5262 5263// VRSHRN : Vector Rounding Shift Right and Narrow 5264defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 5265 NEONvrshrn>; 5266 5267// VQSHL : Vector Saturating Shift 5268defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 5269 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5270 "vqshl", "s", int_arm_neon_vqshifts>; 5271defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 5272 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5273 "vqshl", "u", int_arm_neon_vqshiftu>; 5274// VQSHL : Vector Saturating Shift Left (Immediate) 5275defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 5276defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 5277 5278// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5279defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 5280 5281// VQSHRN : Vector Saturating Shift Right and Narrow 5282defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5283 NEONvqshrns>; 5284defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5285 NEONvqshrnu>; 5286 5287// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5288defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5289 NEONvqshrnsu>; 5290 5291// VQRSHL : Vector Saturating Rounding Shift 5292defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5293 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5294 "vqrshl", "s", int_arm_neon_vqrshifts>; 5295defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5296 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5297 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5298 5299// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 5300defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 5301 NEONvqrshrns>; 5302defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 5303 NEONvqrshrnu>; 5304 5305// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 5306defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 5307 NEONvqrshrnsu>; 5308 5309// VSRA : Vector Shift Right and Accumulate 5310defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 5311defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 5312// VRSRA : Vector Rounding Shift Right and Accumulate 5313defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 5314defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 5315 5316// VSLI : Vector Shift Left and Insert 5317defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 5318 5319// VSRI : Vector Shift Right and Insert 5320defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 5321 5322// Vector Absolute and Saturating Absolute. 5323 5324// VABS : Vector Absolute Value 5325defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 5326 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 5327 int_arm_neon_vabs>; 5328def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5329 "vabs", "f32", 5330 v2f32, v2f32, fabs>; 5331def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5332 "vabs", "f32", 5333 v4f32, v4f32, fabs>; 5334 5335def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), 5336 (v2i32 (bitconvert (v8i8 (add DPR:$src, 5337 (NEONvshrs DPR:$src, (i32 7))))))), 5338 (VABSv8i8 DPR:$src)>; 5339def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), 5340 (v2i32 (bitconvert (v4i16 (add DPR:$src, 5341 (NEONvshrs DPR:$src, (i32 15))))))), 5342 (VABSv4i16 DPR:$src)>; 5343def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), 5344 (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), 5345 (VABSv2i32 DPR:$src)>; 5346def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), 5347 (v4i32 (bitconvert (v16i8 (add QPR:$src, 5348 (NEONvshrs QPR:$src, (i32 7))))))), 5349 (VABSv16i8 QPR:$src)>; 5350def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), 5351 (v4i32 (bitconvert (v8i16 (add QPR:$src, 5352 (NEONvshrs QPR:$src, (i32 15))))))), 5353 (VABSv8i16 QPR:$src)>; 5354def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), 5355 (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), 5356 (VABSv4i32 QPR:$src)>; 5357 5358// VQABS : Vector Saturating Absolute Value 5359defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 5360 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 5361 int_arm_neon_vqabs>; 5362 5363// Vector Negate. 5364 5365def vnegd : PatFrag<(ops node:$in), 5366 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 5367def vnegq : PatFrag<(ops node:$in), 5368 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 5369 5370class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5371 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 5372 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 5373 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 5374class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5375 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 5376 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 5377 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 5378 5379// VNEG : Vector Negate (integer) 5380def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 5381def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 5382def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 5383def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 5384def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 5385def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 5386 5387// VNEG : Vector Negate (floating-point) 5388def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 5389 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5390 "vneg", "f32", "$Vd, $Vm", "", 5391 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 5392def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 5393 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5394 "vneg", "f32", "$Vd, $Vm", "", 5395 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 5396 5397def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 5398def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 5399def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 5400def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 5401def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 5402def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 5403 5404// VQNEG : Vector Saturating Negate 5405defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 5406 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 5407 int_arm_neon_vqneg>; 5408 5409// Vector Bit Counting Operations. 5410 5411// VCLS : Vector Count Leading Sign Bits 5412defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 5413 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 5414 int_arm_neon_vcls>; 5415// VCLZ : Vector Count Leading Zeros 5416defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 5417 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 5418 ctlz>; 5419// VCNT : Vector Count One Bits 5420def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5421 IIC_VCNTiD, "vcnt", "8", 5422 v8i8, v8i8, ctpop>; 5423def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5424 IIC_VCNTiQ, "vcnt", "8", 5425 v16i8, v16i8, ctpop>; 5426 5427// Vector Swap 5428def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 5429 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 5430 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5431 []>; 5432def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 5433 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 5434 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5435 []>; 5436 5437// Vector Move Operations. 5438 5439// VMOV : Vector Move (Register) 5440def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5441 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 5442def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5443 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 5444 5445// VMOV : Vector Move (Immediate) 5446 5447let isReMaterializable = 1 in { 5448def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 5449 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5450 "vmov", "i8", "$Vd, $SIMM", "", 5451 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 5452def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 5453 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5454 "vmov", "i8", "$Vd, $SIMM", "", 5455 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 5456 5457def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 5458 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5459 "vmov", "i16", "$Vd, $SIMM", "", 5460 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 5461 let Inst{9} = SIMM{9}; 5462} 5463 5464def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 5465 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5466 "vmov", "i16", "$Vd, $SIMM", "", 5467 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 5468 let Inst{9} = SIMM{9}; 5469} 5470 5471def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 5472 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5473 "vmov", "i32", "$Vd, $SIMM", "", 5474 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 5475 let Inst{11-8} = SIMM{11-8}; 5476} 5477 5478def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 5479 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5480 "vmov", "i32", "$Vd, $SIMM", "", 5481 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 5482 let Inst{11-8} = SIMM{11-8}; 5483} 5484 5485def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 5486 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5487 "vmov", "i64", "$Vd, $SIMM", "", 5488 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 5489def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 5490 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5491 "vmov", "i64", "$Vd, $SIMM", "", 5492 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 5493 5494def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 5495 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5496 "vmov", "f32", "$Vd, $SIMM", "", 5497 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 5498def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 5499 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5500 "vmov", "f32", "$Vd, $SIMM", "", 5501 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 5502} // isReMaterializable 5503 5504// Add support for bytes replication feature, so it could be GAS compatible. 5505// E.g. instructions below: 5506// "vmov.i32 d0, 0xffffffff" 5507// "vmov.i32 d0, 0xabababab" 5508// "vmov.i16 d0, 0xabab" 5509// are incorrect, but we could deal with such cases. 5510// For last two instructions, for example, it should emit: 5511// "vmov.i8 d0, 0xab" 5512def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", 5513 (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; 5514def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", 5515 (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; 5516def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", 5517 (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; 5518def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", 5519 (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; 5520 5521// Also add same support for VMVN instructions. So instruction: 5522// "vmvn.i32 d0, 0xabababab" 5523// actually means: 5524// "vmov.i8 d0, 0x54" 5525def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", 5526 (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; 5527def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", 5528 (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; 5529def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", 5530 (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; 5531def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", 5532 (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; 5533 5534// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 5535// require zero cycles to execute so they should be used wherever possible for 5536// setting a register to zero. 5537 5538// Even without these pseudo-insts we would probably end up with the correct 5539// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 5540// since they are sometimes rather expensive (in general). 5541 5542let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 5543 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 5544 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], 5545 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 5546 Requires<[HasZCZ]>; 5547 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 5548 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], 5549 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 5550 Requires<[HasZCZ]>; 5551} 5552 5553// VMOV : Vector Get Lane (move scalar to ARM core register) 5554 5555def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 5556 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5557 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 5558 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 5559 imm:$lane))]> { 5560 let Inst{21} = lane{2}; 5561 let Inst{6-5} = lane{1-0}; 5562} 5563def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 5564 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5565 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 5566 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 5567 imm:$lane))]> { 5568 let Inst{21} = lane{1}; 5569 let Inst{6} = lane{0}; 5570} 5571def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 5572 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5573 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 5574 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 5575 imm:$lane))]> { 5576 let Inst{21} = lane{2}; 5577 let Inst{6-5} = lane{1-0}; 5578} 5579def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 5580 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5581 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 5582 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 5583 imm:$lane))]> { 5584 let Inst{21} = lane{1}; 5585 let Inst{6} = lane{0}; 5586} 5587def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 5588 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 5589 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 5590 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 5591 imm:$lane))]>, 5592 Requires<[HasVFP2, HasFastVGETLNi32]> { 5593 let Inst{21} = lane{0}; 5594} 5595// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 5596def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 5597 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5598 (DSubReg_i8_reg imm:$lane))), 5599 (SubReg_i8_lane imm:$lane))>; 5600def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 5601 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5602 (DSubReg_i16_reg imm:$lane))), 5603 (SubReg_i16_lane imm:$lane))>; 5604def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 5605 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5606 (DSubReg_i8_reg imm:$lane))), 5607 (SubReg_i8_lane imm:$lane))>; 5608def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 5609 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5610 (DSubReg_i16_reg imm:$lane))), 5611 (SubReg_i16_lane imm:$lane))>; 5612def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5613 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 5614 (DSubReg_i32_reg imm:$lane))), 5615 (SubReg_i32_lane imm:$lane))>, 5616 Requires<[HasNEON, HasFastVGETLNi32]>; 5617def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 5618 (COPY_TO_REGCLASS 5619 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5620 Requires<[HasNEON, HasSlowVGETLNi32]>; 5621def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5622 (COPY_TO_REGCLASS 5623 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5624 Requires<[HasNEON, HasSlowVGETLNi32]>; 5625def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 5626 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 5627 (SSubReg_f32_reg imm:$src2))>; 5628def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 5629 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 5630 (SSubReg_f32_reg imm:$src2))>; 5631//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 5632// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5633def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 5634 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5635 5636 5637// VMOV : Vector Set Lane (move ARM core register to scalar) 5638 5639let Constraints = "$src1 = $V" in { 5640def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 5641 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 5642 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 5643 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 5644 GPR:$R, imm:$lane))]> { 5645 let Inst{21} = lane{2}; 5646 let Inst{6-5} = lane{1-0}; 5647} 5648def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 5649 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 5650 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 5651 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 5652 GPR:$R, imm:$lane))]> { 5653 let Inst{21} = lane{1}; 5654 let Inst{6} = lane{0}; 5655} 5656def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 5657 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 5658 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 5659 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 5660 GPR:$R, imm:$lane))]>, 5661 Requires<[HasVFP2]> { 5662 let Inst{21} = lane{0}; 5663 // This instruction is equivalent as 5664 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 5665 let isInsertSubreg = 1; 5666} 5667} 5668def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 5669 (v16i8 (INSERT_SUBREG QPR:$src1, 5670 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 5671 (DSubReg_i8_reg imm:$lane))), 5672 GPR:$src2, (SubReg_i8_lane imm:$lane))), 5673 (DSubReg_i8_reg imm:$lane)))>; 5674def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 5675 (v8i16 (INSERT_SUBREG QPR:$src1, 5676 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 5677 (DSubReg_i16_reg imm:$lane))), 5678 GPR:$src2, (SubReg_i16_lane imm:$lane))), 5679 (DSubReg_i16_reg imm:$lane)))>; 5680def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 5681 (v4i32 (INSERT_SUBREG QPR:$src1, 5682 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 5683 (DSubReg_i32_reg imm:$lane))), 5684 GPR:$src2, (SubReg_i32_lane imm:$lane))), 5685 (DSubReg_i32_reg imm:$lane)))>; 5686 5687def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 5688 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 5689 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5690def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 5691 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 5692 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5693 5694//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5695// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5696def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5697 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5698 5699def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 5700 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5701def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 5702 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 5703def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 5704 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5705 5706def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 5707 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5708def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 5709 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5710def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 5711 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5712 5713def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 5714 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 5715 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5716 dsub_0)>; 5717def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 5718 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 5719 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5720 dsub_0)>; 5721def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 5722 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5723 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5724 dsub_0)>; 5725 5726// VDUP : Vector Duplicate (from ARM core register to all elements) 5727 5728class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5729 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 5730 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5731 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5732class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5733 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 5734 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5735 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5736 5737def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 5738def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 5739def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 5740 Requires<[HasNEON, HasFastVDUP32]>; 5741def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 5742def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 5743def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 5744 5745// NEONvdup patterns for uarchs with fast VDUP.32. 5746def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 5747 Requires<[HasNEON,HasFastVDUP32]>; 5748def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 5749 5750// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 5751def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 5752 Requires<[HasNEON,HasSlowVDUP32]>; 5753def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 5754 Requires<[HasNEON,HasSlowVDUP32]>; 5755 5756// VDUP : Vector Duplicate Lane (from scalar to all elements) 5757 5758class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 5759 ValueType Ty, Operand IdxTy> 5760 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5761 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 5762 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 5763 5764class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 5765 ValueType ResTy, ValueType OpTy, Operand IdxTy> 5766 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5767 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 5768 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 5769 VectorIndex32:$lane)))]>; 5770 5771// Inst{19-16} is partially specified depending on the element size. 5772 5773def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 5774 bits<3> lane; 5775 let Inst{19-17} = lane{2-0}; 5776} 5777def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 5778 bits<2> lane; 5779 let Inst{19-18} = lane{1-0}; 5780} 5781def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 5782 bits<1> lane; 5783 let Inst{19} = lane{0}; 5784} 5785def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 5786 bits<3> lane; 5787 let Inst{19-17} = lane{2-0}; 5788} 5789def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 5790 bits<2> lane; 5791 let Inst{19-18} = lane{1-0}; 5792} 5793def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 5794 bits<1> lane; 5795 let Inst{19} = lane{0}; 5796} 5797 5798def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5799 (VDUPLN32d DPR:$Vm, imm:$lane)>; 5800 5801def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5802 (VDUPLN32q DPR:$Vm, imm:$lane)>; 5803 5804def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 5805 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 5806 (DSubReg_i8_reg imm:$lane))), 5807 (SubReg_i8_lane imm:$lane)))>; 5808def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 5809 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 5810 (DSubReg_i16_reg imm:$lane))), 5811 (SubReg_i16_lane imm:$lane)))>; 5812def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 5813 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 5814 (DSubReg_i32_reg imm:$lane))), 5815 (SubReg_i32_lane imm:$lane)))>; 5816def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 5817 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 5818 (DSubReg_i32_reg imm:$lane))), 5819 (SubReg_i32_lane imm:$lane)))>; 5820 5821def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))), 5822 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 5823 SPR:$src, ssub_0), (i32 0)))>; 5824def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))), 5825 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 5826 SPR:$src, ssub_0), (i32 0)))>; 5827 5828// VMOVN : Vector Narrowing Move 5829defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 5830 "vmovn", "i", trunc>; 5831// VQMOVN : Vector Saturating Narrowing Move 5832defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 5833 "vqmovn", "s", int_arm_neon_vqmovns>; 5834defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 5835 "vqmovn", "u", int_arm_neon_vqmovnu>; 5836defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 5837 "vqmovun", "s", int_arm_neon_vqmovnsu>; 5838// VMOVL : Vector Lengthening Move 5839defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 5840defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 5841def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 5842def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 5843def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 5844 5845// Vector Conversions. 5846 5847// VCVT : Vector Convert Between Floating-Point and Integers 5848def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5849 v2i32, v2f32, fp_to_sint>; 5850def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5851 v2i32, v2f32, fp_to_uint>; 5852def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5853 v2f32, v2i32, sint_to_fp>; 5854def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5855 v2f32, v2i32, uint_to_fp>; 5856 5857def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5858 v4i32, v4f32, fp_to_sint>; 5859def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5860 v4i32, v4f32, fp_to_uint>; 5861def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5862 v4f32, v4i32, sint_to_fp>; 5863def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5864 v4f32, v4i32, uint_to_fp>; 5865 5866// VCVT{A, N, P, M} 5867multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 5868 SDPatternOperator IntU> { 5869 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5870 def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5871 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 5872 def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5873 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 5874 def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5875 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 5876 def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5877 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 5878 } 5879} 5880 5881defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 5882defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 5883defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 5884defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 5885 5886// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 5887let DecoderMethod = "DecodeVCVTD" in { 5888def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5889 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 5890def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5891 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 5892def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5893 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 5894def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5895 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 5896} 5897 5898let DecoderMethod = "DecodeVCVTQ" in { 5899def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5900 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 5901def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5902 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 5903def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5904 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 5905def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5906 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 5907} 5908 5909def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 5910 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 5911def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 5912 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 5913def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 5914 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 5915def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 5916 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 5917 5918def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 5919 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 5920def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 5921 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 5922def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 5923 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 5924def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 5925 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 5926 5927 5928// VCVT : Vector Convert Between Half-Precision and Single-Precision. 5929def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 5930 IIC_VUNAQ, "vcvt", "f16.f32", 5931 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 5932 Requires<[HasNEON, HasFP16]>; 5933def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 5934 IIC_VUNAQ, "vcvt", "f32.f16", 5935 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 5936 Requires<[HasNEON, HasFP16]>; 5937 5938// Vector Reverse. 5939 5940// VREV64 : Vector Reverse elements within 64-bit doublewords 5941 5942class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5943 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 5944 (ins DPR:$Vm), IIC_VMOVD, 5945 OpcodeStr, Dt, "$Vd, $Vm", "", 5946 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 5947class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5948 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 5949 (ins QPR:$Vm), IIC_VMOVQ, 5950 OpcodeStr, Dt, "$Vd, $Vm", "", 5951 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 5952 5953def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 5954def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 5955def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 5956def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 5957 5958def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 5959def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 5960def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 5961def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 5962 5963// VREV32 : Vector Reverse elements within 32-bit words 5964 5965class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5966 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 5967 (ins DPR:$Vm), IIC_VMOVD, 5968 OpcodeStr, Dt, "$Vd, $Vm", "", 5969 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 5970class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5971 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 5972 (ins QPR:$Vm), IIC_VMOVQ, 5973 OpcodeStr, Dt, "$Vd, $Vm", "", 5974 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 5975 5976def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 5977def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 5978 5979def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 5980def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 5981 5982// VREV16 : Vector Reverse elements within 16-bit halfwords 5983 5984class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5985 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 5986 (ins DPR:$Vm), IIC_VMOVD, 5987 OpcodeStr, Dt, "$Vd, $Vm", "", 5988 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 5989class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5990 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 5991 (ins QPR:$Vm), IIC_VMOVQ, 5992 OpcodeStr, Dt, "$Vd, $Vm", "", 5993 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 5994 5995def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 5996def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 5997 5998// Other Vector Shuffles. 5999 6000// Aligned extractions: really just dropping registers 6001 6002class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 6003 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 6004 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 6005 6006def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 6007 6008def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 6009 6010def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 6011 6012def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 6013 6014def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 6015 6016 6017// VEXT : Vector Extract 6018 6019 6020// All of these have a two-operand InstAlias. 6021let TwoOperandAliasConstraint = "$Vn = $Vd" in { 6022class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6023 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 6024 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 6025 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6026 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 6027 (Ty DPR:$Vm), imm:$index)))]> { 6028 bits<3> index; 6029 let Inst{11} = 0b0; 6030 let Inst{10-8} = index{2-0}; 6031} 6032 6033class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6034 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 6035 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 6036 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6037 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 6038 (Ty QPR:$Vm), imm:$index)))]> { 6039 bits<4> index; 6040 let Inst{11-8} = index{3-0}; 6041} 6042} 6043 6044def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 6045 let Inst{10-8} = index{2-0}; 6046} 6047def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 6048 let Inst{10-9} = index{1-0}; 6049 let Inst{8} = 0b0; 6050} 6051def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 6052 let Inst{10} = index{0}; 6053 let Inst{9-8} = 0b00; 6054} 6055def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 6056 (v2f32 DPR:$Vm), 6057 (i32 imm:$index))), 6058 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 6059 6060def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 6061 let Inst{11-8} = index{3-0}; 6062} 6063def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 6064 let Inst{11-9} = index{2-0}; 6065 let Inst{8} = 0b0; 6066} 6067def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 6068 let Inst{11-10} = index{1-0}; 6069 let Inst{9-8} = 0b00; 6070} 6071def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 6072 let Inst{11} = index{0}; 6073 let Inst{10-8} = 0b000; 6074} 6075def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 6076 (v4f32 QPR:$Vm), 6077 (i32 imm:$index))), 6078 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 6079 6080// VTRN : Vector Transpose 6081 6082def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 6083def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 6084def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 6085 6086def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 6087def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 6088def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 6089 6090// VUZP : Vector Unzip (Deinterleave) 6091 6092def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 6093def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 6094// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6095def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 6096 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6097 6098def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 6099def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 6100def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 6101 6102// VZIP : Vector Zip (Interleave) 6103 6104def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 6105def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 6106// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6107def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 6108 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6109 6110def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 6111def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 6112def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 6113 6114// Vector Table Lookup and Table Extension. 6115 6116// VTBL : Vector Table Lookup 6117let DecoderMethod = "DecodeTBLInstruction" in { 6118def VTBL1 6119 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 6120 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 6121 "vtbl", "8", "$Vd, $Vn, $Vm", "", 6122 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 6123let hasExtraSrcRegAllocReq = 1 in { 6124def VTBL2 6125 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 6126 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 6127 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6128def VTBL3 6129 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 6130 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 6131 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6132def VTBL4 6133 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 6134 (ins VecListFourD:$Vn, DPR:$Vm), 6135 NVTBLFrm, IIC_VTB4, 6136 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6137} // hasExtraSrcRegAllocReq = 1 6138 6139def VTBL3Pseudo 6140 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 6141def VTBL4Pseudo 6142 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 6143 6144// VTBX : Vector Table Extension 6145def VTBX1 6146 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 6147 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 6148 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 6149 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 6150 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 6151let hasExtraSrcRegAllocReq = 1 in { 6152def VTBX2 6153 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 6154 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 6155 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 6156def VTBX3 6157 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 6158 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 6159 NVTBLFrm, IIC_VTBX3, 6160 "vtbx", "8", "$Vd, $Vn, $Vm", 6161 "$orig = $Vd", []>; 6162def VTBX4 6163 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 6164 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 6165 "vtbx", "8", "$Vd, $Vn, $Vm", 6166 "$orig = $Vd", []>; 6167} // hasExtraSrcRegAllocReq = 1 6168 6169def VTBX3Pseudo 6170 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6171 IIC_VTBX3, "$orig = $dst", []>; 6172def VTBX4Pseudo 6173 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6174 IIC_VTBX4, "$orig = $dst", []>; 6175} // DecoderMethod = "DecodeTBLInstruction" 6176 6177// VRINT : Vector Rounding 6178multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 6179 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6180 def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, 6181 !strconcat("vrint", op), "f32", 6182 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 6183 let Inst{9-7} = op9_7; 6184 } 6185 def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, 6186 !strconcat("vrint", op), "f32", 6187 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 6188 let Inst{9-7} = op9_7; 6189 } 6190 } 6191 6192 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 6193 (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>; 6194 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 6195 (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>; 6196} 6197 6198defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 6199defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 6200defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 6201defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 6202defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 6203defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 6204 6205// Cryptography instructions 6206let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 6207 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 6208 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 6209 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 6210 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 6211 Requires<[HasV8, HasCrypto]>; 6212 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 6213 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 6214 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 6215 Requires<[HasV8, HasCrypto]>; 6216 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 6217 SDPatternOperator Int> 6218 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 6219 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 6220 Requires<[HasV8, HasCrypto]>; 6221 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 6222 SDPatternOperator Int> 6223 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 6224 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 6225 Requires<[HasV8, HasCrypto]>; 6226 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 6227 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 6228 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 6229 Requires<[HasV8, HasCrypto]>; 6230} 6231 6232def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 6233def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 6234def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 6235def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 6236 6237def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 6238def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 6239def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 6240def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 6241def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 6242def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 6243def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 6244def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 6245def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 6246def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 6247 6248def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 6249 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 6250 (SHA1H (SUBREG_TO_REG (i64 0), 6251 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 6252 ssub_0)), 6253 ssub_0)), GPR)>; 6254 6255def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6256 (SHA1C v4i32:$hash_abcd, 6257 (SUBREG_TO_REG (i64 0), 6258 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6259 ssub_0), 6260 v4i32:$wk)>; 6261 6262def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6263 (SHA1M v4i32:$hash_abcd, 6264 (SUBREG_TO_REG (i64 0), 6265 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6266 ssub_0), 6267 v4i32:$wk)>; 6268 6269def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6270 (SHA1P v4i32:$hash_abcd, 6271 (SUBREG_TO_REG (i64 0), 6272 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6273 ssub_0), 6274 v4i32:$wk)>; 6275 6276//===----------------------------------------------------------------------===// 6277// NEON instructions for single-precision FP math 6278//===----------------------------------------------------------------------===// 6279 6280class N2VSPat<SDNode OpNode, NeonI Inst> 6281 : NEONFPPat<(f32 (OpNode SPR:$a)), 6282 (EXTRACT_SUBREG 6283 (v2f32 (COPY_TO_REGCLASS (Inst 6284 (INSERT_SUBREG 6285 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6286 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 6287 6288class N3VSPat<SDNode OpNode, NeonI Inst> 6289 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 6290 (EXTRACT_SUBREG 6291 (v2f32 (COPY_TO_REGCLASS (Inst 6292 (INSERT_SUBREG 6293 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6294 SPR:$a, ssub_0), 6295 (INSERT_SUBREG 6296 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6297 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6298 6299class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 6300 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 6301 (EXTRACT_SUBREG 6302 (v2f32 (COPY_TO_REGCLASS (Inst 6303 (INSERT_SUBREG 6304 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6305 SPR:$acc, ssub_0), 6306 (INSERT_SUBREG 6307 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6308 SPR:$a, ssub_0), 6309 (INSERT_SUBREG 6310 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6311 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6312 6313class NVCVTIFPat<SDNode OpNode, NeonI Inst> 6314 : NEONFPPat<(f32 (OpNode GPR:$a)), 6315 (f32 (EXTRACT_SUBREG 6316 (v2f32 (Inst 6317 (INSERT_SUBREG 6318 (v2f32 (IMPLICIT_DEF)), 6319 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 6320 ssub_0))>; 6321class NVCVTFIPat<SDNode OpNode, NeonI Inst> 6322 : NEONFPPat<(i32 (OpNode SPR:$a)), 6323 (i32 (EXTRACT_SUBREG 6324 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6325 SPR:$a, ssub_0))), 6326 ssub_0))>; 6327 6328def : N3VSPat<fadd, VADDfd>; 6329def : N3VSPat<fsub, VSUBfd>; 6330def : N3VSPat<fmul, VMULfd>; 6331def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 6332 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 6333def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 6334 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 6335def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 6336 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 6337def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 6338 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 6339def : N2VSPat<fabs, VABSfd>; 6340def : N2VSPat<fneg, VNEGfd>; 6341def : N3VSPat<NEONfmax, VMAXfd>; 6342def : N3VSPat<NEONfmin, VMINfd>; 6343def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 6344def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 6345def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 6346def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 6347 6348// NEON doesn't have any f64 conversions, so provide patterns to make 6349// sure the VFP conversions match when extracting from a vector. 6350def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 6351 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6352def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 6353 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6354def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 6355 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6356def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 6357 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6358 6359 6360// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 6361def : Pat<(f32 (bitconvert GPR:$a)), 6362 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 6363 Requires<[HasNEON, DontUseVMOVSR]>; 6364 6365//===----------------------------------------------------------------------===// 6366// Non-Instruction Patterns 6367//===----------------------------------------------------------------------===// 6368 6369// bit_convert 6370let Predicates = [IsLE] in { 6371 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 6372 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 6373 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 6374} 6375def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 6376let Predicates = [IsLE] in { 6377 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 6378 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 6379 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 6380 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 6381 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 6382} 6383def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 6384let Predicates = [IsLE] in { 6385 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 6386 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 6387 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 6388 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 6389 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 6390 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 6391 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 6392 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 6393 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 6394 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 6395} 6396def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 6397let Predicates = [IsLE] in { 6398 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 6399 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 6400 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 6401 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 6402 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 6403 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 6404} 6405def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 6406let Predicates = [IsLE] in { 6407 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 6408 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 6409} 6410 6411let Predicates = [IsLE] in { 6412 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 6413 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 6414 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 6415} 6416def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 6417let Predicates = [IsLE] in { 6418 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 6419 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 6420 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 6421 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 6422 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 6423} 6424def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 6425let Predicates = [IsLE] in { 6426 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 6427 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 6428 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 6429 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 6430 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 6431 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 6432 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 6433 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 6434 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 6435 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 6436 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 6437} 6438def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 6439let Predicates = [IsLE] in { 6440 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 6441 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 6442 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 6443} 6444def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 6445let Predicates = [IsLE] in { 6446 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 6447 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 6448 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 6449 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 6450} 6451 6452let Predicates = [IsBE] in { 6453 // 64 bit conversions 6454 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 6455 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 6456 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 6457 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 6458 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 6459 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 6460 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 6461 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 6462 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 6463 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 6464 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 6465 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 6466 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 6467 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 6468 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 6469 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 6470 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 6471 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 6472 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 6473 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 6474 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 6475 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 6476 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 6477 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 6478 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 6479 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 6480 6481 // 128 bit conversions 6482 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 6483 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 6484 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 6485 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 6486 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 6487 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 6488 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 6489 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 6490 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 6491 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 6492 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 6493 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 6494 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 6495 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 6496 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 6497 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 6498 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 6499 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 6500 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 6501 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 6502 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 6503 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 6504 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 6505 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 6506 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 6507 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 6508} 6509 6510// Fold extracting an element out of a v2i32 into a vfp register. 6511def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 6512 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6513 6514// Vector lengthening move with load, matching extending loads. 6515 6516// extload, zextload and sextload for a standard lengthening load. Example: 6517// Lengthen_Single<"8", "i16", "8"> = 6518// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 6519// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 6520// (f64 (IMPLICIT_DEF)), (i32 0)))>; 6521multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 6522 let AddedComplexity = 10 in { 6523 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6524 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 6525 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6526 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6527 6528 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6529 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 6530 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6531 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6532 6533 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6534 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 6535 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 6536 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6537 } 6538} 6539 6540// extload, zextload and sextload for a lengthening load which only uses 6541// half the lanes available. Example: 6542// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 6543// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 6544// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6545// (f64 (IMPLICIT_DEF)), (i32 0))), 6546// dsub_0)>; 6547multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 6548 string InsnLanes, string InsnTy> { 6549 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6550 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6551 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6552 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6553 dsub_0)>; 6554 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6555 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6556 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6557 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6558 dsub_0)>; 6559 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6560 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6561 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6562 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6563 dsub_0)>; 6564} 6565 6566// The following class definition is basically a copy of the 6567// Lengthen_HalfSingle definition above, however with an additional parameter 6568// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 6569// data loaded by VLD1LN into proper vector format in big endian mode. 6570multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 6571 string InsnLanes, string InsnTy, string RevLanes> { 6572 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6573 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6574 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6575 (!cast<Instruction>("VREV32d" # RevLanes) 6576 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6577 dsub_0)>; 6578 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6579 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6580 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6581 (!cast<Instruction>("VREV32d" # RevLanes) 6582 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6583 dsub_0)>; 6584 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6585 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6586 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6587 (!cast<Instruction>("VREV32d" # RevLanes) 6588 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6589 dsub_0)>; 6590} 6591 6592// extload, zextload and sextload for a lengthening load followed by another 6593// lengthening load, to quadruple the initial length. 6594// 6595// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 6596// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 6597// (EXTRACT_SUBREG (VMOVLuv4i32 6598// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6599// (f64 (IMPLICIT_DEF)), 6600// (i32 0))), 6601// dsub_0)), 6602// dsub_0)>; 6603multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 6604 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6605 string Insn2Ty> { 6606 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6607 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6608 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6609 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6610 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6611 dsub_0))>; 6612 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6613 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6614 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6615 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6616 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6617 dsub_0))>; 6618 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6619 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6620 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6621 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6622 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6623 dsub_0))>; 6624} 6625 6626// The following class definition is basically a copy of the 6627// Lengthen_Double definition above, however with an additional parameter 6628// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 6629// data loaded by VLD1LN into proper vector format in big endian mode. 6630multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 6631 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6632 string Insn2Ty, string RevLanes> { 6633 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6634 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6635 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6636 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6637 (!cast<Instruction>("VREV32d" # RevLanes) 6638 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6639 dsub_0))>; 6640 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6641 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6642 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6643 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6644 (!cast<Instruction>("VREV32d" # RevLanes) 6645 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6646 dsub_0))>; 6647 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6648 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6649 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6650 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6651 (!cast<Instruction>("VREV32d" # RevLanes) 6652 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6653 dsub_0))>; 6654} 6655 6656// extload, zextload and sextload for a lengthening load followed by another 6657// lengthening load, to quadruple the initial length, but which ends up only 6658// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 6659// 6660// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 6661// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 6662// (EXTRACT_SUBREG (VMOVLuv4i32 6663// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 6664// (f64 (IMPLICIT_DEF)), (i32 0))), 6665// dsub_0)), 6666// dsub_0)>; 6667multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 6668 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6669 string Insn2Ty> { 6670 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6671 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 6672 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6673 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6674 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6675 dsub_0)), 6676 dsub_0)>; 6677 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6678 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 6679 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6680 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6681 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6682 dsub_0)), 6683 dsub_0)>; 6684 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6685 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 6686 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6687 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6688 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6689 dsub_0)), 6690 dsub_0)>; 6691} 6692 6693// The following class definition is basically a copy of the 6694// Lengthen_HalfDouble definition above, however with an additional VREV16d8 6695// instruction to convert data loaded by VLD1LN into proper vector format 6696// in big endian mode. 6697multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 6698 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6699 string Insn2Ty> { 6700 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6701 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 6702 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6703 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6704 (!cast<Instruction>("VREV16d8") 6705 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6706 dsub_0)), 6707 dsub_0)>; 6708 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6709 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 6710 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6711 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6712 (!cast<Instruction>("VREV16d8") 6713 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6714 dsub_0)), 6715 dsub_0)>; 6716 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6717 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 6718 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6719 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6720 (!cast<Instruction>("VREV16d8") 6721 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 6722 dsub_0)), 6723 dsub_0)>; 6724} 6725 6726defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 6727defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 6728defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 6729 6730let Predicates = [IsLE] in { 6731 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 6732 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 6733 6734 // Double lengthening - v4i8 -> v4i16 -> v4i32 6735 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 6736 // v2i8 -> v2i16 -> v2i32 6737 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 6738 // v2i16 -> v2i32 -> v2i64 6739 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 6740} 6741 6742let Predicates = [IsBE] in { 6743 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 6744 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 6745 6746 // Double lengthening - v4i8 -> v4i16 -> v4i32 6747 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 6748 // v2i8 -> v2i16 -> v2i32 6749 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 6750 // v2i16 -> v2i32 -> v2i64 6751 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 6752} 6753 6754// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 6755let Predicates = [IsLE] in { 6756 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 6757 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6758 (VLD1LNd16 addrmode6:$addr, 6759 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6760 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 6761 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6762 (VLD1LNd16 addrmode6:$addr, 6763 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6764 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 6765 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 6766 (VLD1LNd16 addrmode6:$addr, 6767 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6768} 6769// The following patterns are basically a copy of the patterns above, 6770// however with an additional VREV16d instruction to convert data 6771// loaded by VLD1LN into proper vector format in big endian mode. 6772let Predicates = [IsBE] in { 6773 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 6774 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6775 (!cast<Instruction>("VREV16d8") 6776 (VLD1LNd16 addrmode6:$addr, 6777 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 6778 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 6779 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6780 (!cast<Instruction>("VREV16d8") 6781 (VLD1LNd16 addrmode6:$addr, 6782 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 6783 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 6784 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 6785 (!cast<Instruction>("VREV16d8") 6786 (VLD1LNd16 addrmode6:$addr, 6787 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 6788} 6789 6790//===----------------------------------------------------------------------===// 6791// Assembler aliases 6792// 6793 6794def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 6795 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 6796def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 6797 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 6798 6799// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 6800defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6801 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6802defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6803 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6804defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6805 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6806defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6807 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6808defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6809 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6810defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6811 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6812defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6813 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6814defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6815 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6816// ... two-operand aliases 6817defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6818 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6819defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6820 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6821defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6822 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6823defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6824 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6825defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6826 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6827defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6828 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6829// ... immediates 6830def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 6831 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 6832def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 6833 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 6834def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 6835 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 6836def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 6837 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 6838 6839 6840// VLD1 single-lane pseudo-instructions. These need special handling for 6841// the lane index that an InstAlias can't handle, so we use these instead. 6842def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 6843 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6844 pred:$p)>; 6845def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 6846 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6847 pred:$p)>; 6848def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 6849 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6850 pred:$p)>; 6851 6852def VLD1LNdWB_fixed_Asm_8 : 6853 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 6854 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6855 pred:$p)>; 6856def VLD1LNdWB_fixed_Asm_16 : 6857 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 6858 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6859 pred:$p)>; 6860def VLD1LNdWB_fixed_Asm_32 : 6861 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 6862 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6863 pred:$p)>; 6864def VLD1LNdWB_register_Asm_8 : 6865 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 6866 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6867 rGPR:$Rm, pred:$p)>; 6868def VLD1LNdWB_register_Asm_16 : 6869 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 6870 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6871 rGPR:$Rm, pred:$p)>; 6872def VLD1LNdWB_register_Asm_32 : 6873 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 6874 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6875 rGPR:$Rm, pred:$p)>; 6876 6877 6878// VST1 single-lane pseudo-instructions. These need special handling for 6879// the lane index that an InstAlias can't handle, so we use these instead. 6880def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 6881 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6882 pred:$p)>; 6883def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 6884 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6885 pred:$p)>; 6886def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 6887 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6888 pred:$p)>; 6889 6890def VST1LNdWB_fixed_Asm_8 : 6891 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 6892 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6893 pred:$p)>; 6894def VST1LNdWB_fixed_Asm_16 : 6895 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 6896 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6897 pred:$p)>; 6898def VST1LNdWB_fixed_Asm_32 : 6899 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 6900 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6901 pred:$p)>; 6902def VST1LNdWB_register_Asm_8 : 6903 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 6904 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 6905 rGPR:$Rm, pred:$p)>; 6906def VST1LNdWB_register_Asm_16 : 6907 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 6908 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 6909 rGPR:$Rm, pred:$p)>; 6910def VST1LNdWB_register_Asm_32 : 6911 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 6912 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 6913 rGPR:$Rm, pred:$p)>; 6914 6915// VLD2 single-lane pseudo-instructions. These need special handling for 6916// the lane index that an InstAlias can't handle, so we use these instead. 6917def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 6918 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6919 pred:$p)>; 6920def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6921 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6922 pred:$p)>; 6923def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6924 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 6925def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6926 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 6927 pred:$p)>; 6928def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6929 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 6930 pred:$p)>; 6931 6932def VLD2LNdWB_fixed_Asm_8 : 6933 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 6934 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6935 pred:$p)>; 6936def VLD2LNdWB_fixed_Asm_16 : 6937 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6938 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6939 pred:$p)>; 6940def VLD2LNdWB_fixed_Asm_32 : 6941 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6942 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 6943 pred:$p)>; 6944def VLD2LNqWB_fixed_Asm_16 : 6945 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6946 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 6947 pred:$p)>; 6948def VLD2LNqWB_fixed_Asm_32 : 6949 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6950 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 6951 pred:$p)>; 6952def VLD2LNdWB_register_Asm_8 : 6953 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 6954 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6955 rGPR:$Rm, pred:$p)>; 6956def VLD2LNdWB_register_Asm_16 : 6957 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6958 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6959 rGPR:$Rm, pred:$p)>; 6960def VLD2LNdWB_register_Asm_32 : 6961 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6962 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 6963 rGPR:$Rm, pred:$p)>; 6964def VLD2LNqWB_register_Asm_16 : 6965 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6966 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 6967 rGPR:$Rm, pred:$p)>; 6968def VLD2LNqWB_register_Asm_32 : 6969 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6970 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 6971 rGPR:$Rm, pred:$p)>; 6972 6973 6974// VST2 single-lane pseudo-instructions. These need special handling for 6975// the lane index that an InstAlias can't handle, so we use these instead. 6976def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 6977 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6978 pred:$p)>; 6979def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6980 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6981 pred:$p)>; 6982def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6983 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 6984 pred:$p)>; 6985def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6986 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 6987 pred:$p)>; 6988def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6989 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 6990 pred:$p)>; 6991 6992def VST2LNdWB_fixed_Asm_8 : 6993 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 6994 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 6995 pred:$p)>; 6996def VST2LNdWB_fixed_Asm_16 : 6997 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6998 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 6999 pred:$p)>; 7000def VST2LNdWB_fixed_Asm_32 : 7001 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 7002 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7003 pred:$p)>; 7004def VST2LNqWB_fixed_Asm_16 : 7005 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 7006 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7007 pred:$p)>; 7008def VST2LNqWB_fixed_Asm_32 : 7009 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 7010 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7011 pred:$p)>; 7012def VST2LNdWB_register_Asm_8 : 7013 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 7014 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7015 rGPR:$Rm, pred:$p)>; 7016def VST2LNdWB_register_Asm_16 : 7017 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 7018 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7019 rGPR:$Rm, pred:$p)>; 7020def VST2LNdWB_register_Asm_32 : 7021 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 7022 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7023 rGPR:$Rm, pred:$p)>; 7024def VST2LNqWB_register_Asm_16 : 7025 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 7026 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7027 rGPR:$Rm, pred:$p)>; 7028def VST2LNqWB_register_Asm_32 : 7029 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 7030 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7031 rGPR:$Rm, pred:$p)>; 7032 7033// VLD3 all-lanes pseudo-instructions. These need special handling for 7034// the lane index that an InstAlias can't handle, so we use these instead. 7035def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7036 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7037 pred:$p)>; 7038def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7039 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7040 pred:$p)>; 7041def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7042 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7043 pred:$p)>; 7044def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7045 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7046 pred:$p)>; 7047def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7048 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7049 pred:$p)>; 7050def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7051 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7052 pred:$p)>; 7053 7054def VLD3DUPdWB_fixed_Asm_8 : 7055 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7056 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7057 pred:$p)>; 7058def VLD3DUPdWB_fixed_Asm_16 : 7059 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7060 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7061 pred:$p)>; 7062def VLD3DUPdWB_fixed_Asm_32 : 7063 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7064 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7065 pred:$p)>; 7066def VLD3DUPqWB_fixed_Asm_8 : 7067 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7068 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7069 pred:$p)>; 7070def VLD3DUPqWB_fixed_Asm_16 : 7071 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7072 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7073 pred:$p)>; 7074def VLD3DUPqWB_fixed_Asm_32 : 7075 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7076 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7077 pred:$p)>; 7078def VLD3DUPdWB_register_Asm_8 : 7079 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7080 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7081 rGPR:$Rm, pred:$p)>; 7082def VLD3DUPdWB_register_Asm_16 : 7083 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7084 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7085 rGPR:$Rm, pred:$p)>; 7086def VLD3DUPdWB_register_Asm_32 : 7087 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7088 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7089 rGPR:$Rm, pred:$p)>; 7090def VLD3DUPqWB_register_Asm_8 : 7091 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7092 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7093 rGPR:$Rm, pred:$p)>; 7094def VLD3DUPqWB_register_Asm_16 : 7095 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7096 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7097 rGPR:$Rm, pred:$p)>; 7098def VLD3DUPqWB_register_Asm_32 : 7099 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7100 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7101 rGPR:$Rm, pred:$p)>; 7102 7103 7104// VLD3 single-lane pseudo-instructions. These need special handling for 7105// the lane index that an InstAlias can't handle, so we use these instead. 7106def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7107 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7108 pred:$p)>; 7109def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7110 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7111 pred:$p)>; 7112def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7113 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7114 pred:$p)>; 7115def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7116 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7117 pred:$p)>; 7118def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7119 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7120 pred:$p)>; 7121 7122def VLD3LNdWB_fixed_Asm_8 : 7123 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7124 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7125 pred:$p)>; 7126def VLD3LNdWB_fixed_Asm_16 : 7127 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7128 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7129 pred:$p)>; 7130def VLD3LNdWB_fixed_Asm_32 : 7131 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7132 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7133 pred:$p)>; 7134def VLD3LNqWB_fixed_Asm_16 : 7135 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7136 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7137 pred:$p)>; 7138def VLD3LNqWB_fixed_Asm_32 : 7139 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7140 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7141 pred:$p)>; 7142def VLD3LNdWB_register_Asm_8 : 7143 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7144 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7145 rGPR:$Rm, pred:$p)>; 7146def VLD3LNdWB_register_Asm_16 : 7147 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7148 (ins VecListThreeDHWordIndexed:$list, 7149 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7150def VLD3LNdWB_register_Asm_32 : 7151 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7152 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7153 rGPR:$Rm, pred:$p)>; 7154def VLD3LNqWB_register_Asm_16 : 7155 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7156 (ins VecListThreeQHWordIndexed:$list, 7157 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7158def VLD3LNqWB_register_Asm_32 : 7159 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7160 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7161 rGPR:$Rm, pred:$p)>; 7162 7163// VLD3 multiple structure pseudo-instructions. These need special handling for 7164// the vector operands that the normal instructions don't yet model. 7165// FIXME: Remove these when the register classes and instructions are updated. 7166def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7167 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7168def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7169 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7170def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7171 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7172def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7173 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7174def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7175 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7176def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7177 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7178 7179def VLD3dWB_fixed_Asm_8 : 7180 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7181 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7182def VLD3dWB_fixed_Asm_16 : 7183 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7184 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7185def VLD3dWB_fixed_Asm_32 : 7186 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7187 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7188def VLD3qWB_fixed_Asm_8 : 7189 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7190 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7191def VLD3qWB_fixed_Asm_16 : 7192 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7193 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7194def VLD3qWB_fixed_Asm_32 : 7195 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7196 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7197def VLD3dWB_register_Asm_8 : 7198 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7199 (ins VecListThreeD:$list, addrmode6align64:$addr, 7200 rGPR:$Rm, pred:$p)>; 7201def VLD3dWB_register_Asm_16 : 7202 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7203 (ins VecListThreeD:$list, addrmode6align64:$addr, 7204 rGPR:$Rm, pred:$p)>; 7205def VLD3dWB_register_Asm_32 : 7206 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7207 (ins VecListThreeD:$list, addrmode6align64:$addr, 7208 rGPR:$Rm, pred:$p)>; 7209def VLD3qWB_register_Asm_8 : 7210 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7211 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7212 rGPR:$Rm, pred:$p)>; 7213def VLD3qWB_register_Asm_16 : 7214 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7215 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7216 rGPR:$Rm, pred:$p)>; 7217def VLD3qWB_register_Asm_32 : 7218 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7219 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7220 rGPR:$Rm, pred:$p)>; 7221 7222// VST3 single-lane pseudo-instructions. These need special handling for 7223// the lane index that an InstAlias can't handle, so we use these instead. 7224def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7225 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7226 pred:$p)>; 7227def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7228 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7229 pred:$p)>; 7230def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7231 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7232 pred:$p)>; 7233def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7234 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7235 pred:$p)>; 7236def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7237 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7238 pred:$p)>; 7239 7240def VST3LNdWB_fixed_Asm_8 : 7241 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7242 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7243 pred:$p)>; 7244def VST3LNdWB_fixed_Asm_16 : 7245 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7246 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7247 pred:$p)>; 7248def VST3LNdWB_fixed_Asm_32 : 7249 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7250 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7251 pred:$p)>; 7252def VST3LNqWB_fixed_Asm_16 : 7253 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7254 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7255 pred:$p)>; 7256def VST3LNqWB_fixed_Asm_32 : 7257 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7258 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7259 pred:$p)>; 7260def VST3LNdWB_register_Asm_8 : 7261 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7262 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7263 rGPR:$Rm, pred:$p)>; 7264def VST3LNdWB_register_Asm_16 : 7265 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7266 (ins VecListThreeDHWordIndexed:$list, 7267 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7268def VST3LNdWB_register_Asm_32 : 7269 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7270 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7271 rGPR:$Rm, pred:$p)>; 7272def VST3LNqWB_register_Asm_16 : 7273 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7274 (ins VecListThreeQHWordIndexed:$list, 7275 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7276def VST3LNqWB_register_Asm_32 : 7277 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7278 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7279 rGPR:$Rm, pred:$p)>; 7280 7281 7282// VST3 multiple structure pseudo-instructions. These need special handling for 7283// the vector operands that the normal instructions don't yet model. 7284// FIXME: Remove these when the register classes and instructions are updated. 7285def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7286 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7287def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7288 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7289def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7290 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7291def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7292 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7293def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7294 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7295def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7296 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7297 7298def VST3dWB_fixed_Asm_8 : 7299 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7300 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7301def VST3dWB_fixed_Asm_16 : 7302 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7303 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7304def VST3dWB_fixed_Asm_32 : 7305 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7306 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7307def VST3qWB_fixed_Asm_8 : 7308 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7309 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7310def VST3qWB_fixed_Asm_16 : 7311 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7312 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7313def VST3qWB_fixed_Asm_32 : 7314 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7315 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7316def VST3dWB_register_Asm_8 : 7317 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7318 (ins VecListThreeD:$list, addrmode6align64:$addr, 7319 rGPR:$Rm, pred:$p)>; 7320def VST3dWB_register_Asm_16 : 7321 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7322 (ins VecListThreeD:$list, addrmode6align64:$addr, 7323 rGPR:$Rm, pred:$p)>; 7324def VST3dWB_register_Asm_32 : 7325 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7326 (ins VecListThreeD:$list, addrmode6align64:$addr, 7327 rGPR:$Rm, pred:$p)>; 7328def VST3qWB_register_Asm_8 : 7329 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7330 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7331 rGPR:$Rm, pred:$p)>; 7332def VST3qWB_register_Asm_16 : 7333 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7334 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7335 rGPR:$Rm, pred:$p)>; 7336def VST3qWB_register_Asm_32 : 7337 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7338 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7339 rGPR:$Rm, pred:$p)>; 7340 7341// VLD4 all-lanes pseudo-instructions. These need special handling for 7342// the lane index that an InstAlias can't handle, so we use these instead. 7343def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7344 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7345 pred:$p)>; 7346def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7347 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7348 pred:$p)>; 7349def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7350 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 7351 pred:$p)>; 7352def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7353 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7354 pred:$p)>; 7355def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7356 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7357 pred:$p)>; 7358def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7359 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 7360 pred:$p)>; 7361 7362def VLD4DUPdWB_fixed_Asm_8 : 7363 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7364 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7365 pred:$p)>; 7366def VLD4DUPdWB_fixed_Asm_16 : 7367 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7368 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7369 pred:$p)>; 7370def VLD4DUPdWB_fixed_Asm_32 : 7371 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7372 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 7373 pred:$p)>; 7374def VLD4DUPqWB_fixed_Asm_8 : 7375 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7376 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7377 pred:$p)>; 7378def VLD4DUPqWB_fixed_Asm_16 : 7379 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7380 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7381 pred:$p)>; 7382def VLD4DUPqWB_fixed_Asm_32 : 7383 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7384 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 7385 pred:$p)>; 7386def VLD4DUPdWB_register_Asm_8 : 7387 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7388 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 7389 rGPR:$Rm, pred:$p)>; 7390def VLD4DUPdWB_register_Asm_16 : 7391 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7392 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 7393 rGPR:$Rm, pred:$p)>; 7394def VLD4DUPdWB_register_Asm_32 : 7395 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7396 (ins VecListFourDAllLanes:$list, 7397 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 7398def VLD4DUPqWB_register_Asm_8 : 7399 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7400 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 7401 rGPR:$Rm, pred:$p)>; 7402def VLD4DUPqWB_register_Asm_16 : 7403 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7404 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 7405 rGPR:$Rm, pred:$p)>; 7406def VLD4DUPqWB_register_Asm_32 : 7407 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7408 (ins VecListFourQAllLanes:$list, 7409 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 7410 7411 7412// VLD4 single-lane pseudo-instructions. These need special handling for 7413// the lane index that an InstAlias can't handle, so we use these instead. 7414def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7415 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7416 pred:$p)>; 7417def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7418 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7419 pred:$p)>; 7420def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7421 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7422 pred:$p)>; 7423def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7424 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7425 pred:$p)>; 7426def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7427 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7428 pred:$p)>; 7429 7430def VLD4LNdWB_fixed_Asm_8 : 7431 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7432 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7433 pred:$p)>; 7434def VLD4LNdWB_fixed_Asm_16 : 7435 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7436 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7437 pred:$p)>; 7438def VLD4LNdWB_fixed_Asm_32 : 7439 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7440 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7441 pred:$p)>; 7442def VLD4LNqWB_fixed_Asm_16 : 7443 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7444 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7445 pred:$p)>; 7446def VLD4LNqWB_fixed_Asm_32 : 7447 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7448 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7449 pred:$p)>; 7450def VLD4LNdWB_register_Asm_8 : 7451 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7452 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7453 rGPR:$Rm, pred:$p)>; 7454def VLD4LNdWB_register_Asm_16 : 7455 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7456 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7457 rGPR:$Rm, pred:$p)>; 7458def VLD4LNdWB_register_Asm_32 : 7459 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7460 (ins VecListFourDWordIndexed:$list, 7461 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7462def VLD4LNqWB_register_Asm_16 : 7463 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7464 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7465 rGPR:$Rm, pred:$p)>; 7466def VLD4LNqWB_register_Asm_32 : 7467 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7468 (ins VecListFourQWordIndexed:$list, 7469 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7470 7471 7472 7473// VLD4 multiple structure pseudo-instructions. These need special handling for 7474// the vector operands that the normal instructions don't yet model. 7475// FIXME: Remove these when the register classes and instructions are updated. 7476def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7477 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7478 pred:$p)>; 7479def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7480 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7481 pred:$p)>; 7482def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7483 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7484 pred:$p)>; 7485def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 7486 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7487 pred:$p)>; 7488def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 7489 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7490 pred:$p)>; 7491def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 7492 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7493 pred:$p)>; 7494 7495def VLD4dWB_fixed_Asm_8 : 7496 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7497 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7498 pred:$p)>; 7499def VLD4dWB_fixed_Asm_16 : 7500 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7501 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7502 pred:$p)>; 7503def VLD4dWB_fixed_Asm_32 : 7504 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7505 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7506 pred:$p)>; 7507def VLD4qWB_fixed_Asm_8 : 7508 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 7509 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7510 pred:$p)>; 7511def VLD4qWB_fixed_Asm_16 : 7512 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 7513 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7514 pred:$p)>; 7515def VLD4qWB_fixed_Asm_32 : 7516 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 7517 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7518 pred:$p)>; 7519def VLD4dWB_register_Asm_8 : 7520 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7521 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7522 rGPR:$Rm, pred:$p)>; 7523def VLD4dWB_register_Asm_16 : 7524 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7525 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7526 rGPR:$Rm, pred:$p)>; 7527def VLD4dWB_register_Asm_32 : 7528 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7529 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7530 rGPR:$Rm, pred:$p)>; 7531def VLD4qWB_register_Asm_8 : 7532 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 7533 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7534 rGPR:$Rm, pred:$p)>; 7535def VLD4qWB_register_Asm_16 : 7536 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 7537 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7538 rGPR:$Rm, pred:$p)>; 7539def VLD4qWB_register_Asm_32 : 7540 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 7541 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7542 rGPR:$Rm, pred:$p)>; 7543 7544// VST4 single-lane pseudo-instructions. These need special handling for 7545// the lane index that an InstAlias can't handle, so we use these instead. 7546def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7547 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7548 pred:$p)>; 7549def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7550 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7551 pred:$p)>; 7552def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7553 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7554 pred:$p)>; 7555def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7556 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7557 pred:$p)>; 7558def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7559 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7560 pred:$p)>; 7561 7562def VST4LNdWB_fixed_Asm_8 : 7563 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7564 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7565 pred:$p)>; 7566def VST4LNdWB_fixed_Asm_16 : 7567 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7568 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7569 pred:$p)>; 7570def VST4LNdWB_fixed_Asm_32 : 7571 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7572 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 7573 pred:$p)>; 7574def VST4LNqWB_fixed_Asm_16 : 7575 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7576 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7577 pred:$p)>; 7578def VST4LNqWB_fixed_Asm_32 : 7579 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7580 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 7581 pred:$p)>; 7582def VST4LNdWB_register_Asm_8 : 7583 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 7584 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 7585 rGPR:$Rm, pred:$p)>; 7586def VST4LNdWB_register_Asm_16 : 7587 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7588 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 7589 rGPR:$Rm, pred:$p)>; 7590def VST4LNdWB_register_Asm_32 : 7591 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7592 (ins VecListFourDWordIndexed:$list, 7593 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7594def VST4LNqWB_register_Asm_16 : 7595 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7596 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 7597 rGPR:$Rm, pred:$p)>; 7598def VST4LNqWB_register_Asm_32 : 7599 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7600 (ins VecListFourQWordIndexed:$list, 7601 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 7602 7603 7604// VST4 multiple structure pseudo-instructions. These need special handling for 7605// the vector operands that the normal instructions don't yet model. 7606// FIXME: Remove these when the register classes and instructions are updated. 7607def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7608 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7609 pred:$p)>; 7610def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7611 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7612 pred:$p)>; 7613def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7614 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7615 pred:$p)>; 7616def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 7617 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7618 pred:$p)>; 7619def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 7620 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7621 pred:$p)>; 7622def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 7623 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7624 pred:$p)>; 7625 7626def VST4dWB_fixed_Asm_8 : 7627 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7628 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7629 pred:$p)>; 7630def VST4dWB_fixed_Asm_16 : 7631 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7632 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7633 pred:$p)>; 7634def VST4dWB_fixed_Asm_32 : 7635 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7636 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7637 pred:$p)>; 7638def VST4qWB_fixed_Asm_8 : 7639 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 7640 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7641 pred:$p)>; 7642def VST4qWB_fixed_Asm_16 : 7643 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 7644 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7645 pred:$p)>; 7646def VST4qWB_fixed_Asm_32 : 7647 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 7648 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7649 pred:$p)>; 7650def VST4dWB_register_Asm_8 : 7651 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 7652 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7653 rGPR:$Rm, pred:$p)>; 7654def VST4dWB_register_Asm_16 : 7655 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7656 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7657 rGPR:$Rm, pred:$p)>; 7658def VST4dWB_register_Asm_32 : 7659 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7660 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 7661 rGPR:$Rm, pred:$p)>; 7662def VST4qWB_register_Asm_8 : 7663 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 7664 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7665 rGPR:$Rm, pred:$p)>; 7666def VST4qWB_register_Asm_16 : 7667 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 7668 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7669 rGPR:$Rm, pred:$p)>; 7670def VST4qWB_register_Asm_32 : 7671 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 7672 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 7673 rGPR:$Rm, pred:$p)>; 7674 7675// VMOV/VMVN takes an optional datatype suffix 7676defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 7677 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 7678defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 7679 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 7680 7681defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 7682 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 7683defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 7684 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 7685 7686// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 7687// D-register versions. 7688def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 7689 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7690def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 7691 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7692def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 7693 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7694def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 7695 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7696def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 7697 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7698def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 7699 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7700def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 7701 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7702// Q-register versions. 7703def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 7704 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7705def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 7706 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7707def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 7708 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7709def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 7710 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7711def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 7712 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7713def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 7714 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7715def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 7716 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7717 7718// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 7719// D-register versions. 7720def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 7721 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7722def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 7723 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7724def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 7725 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7726def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 7727 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7728def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 7729 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7730def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 7731 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7732def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 7733 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7734// Q-register versions. 7735def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 7736 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7737def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 7738 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7739def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 7740 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7741def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 7742 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7743def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 7744 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7745def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 7746 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7747def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 7748 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7749 7750// VSWP allows, but does not require, a type suffix. 7751defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 7752 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 7753defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 7754 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 7755 7756// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 7757defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 7758 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7759defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 7760 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7761defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 7762 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7763defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 7764 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7765defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 7766 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7767defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 7768 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7769 7770// "vmov Rd, #-imm" can be handled via "vmvn". 7771def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 7772 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7773def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 7774 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7775def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 7776 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7777def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 7778 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7779 7780// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 7781// these should restrict to just the Q register variants, but the register 7782// classes are enough to match correctly regardless, so we keep it simple 7783// and just use MnemonicAlias. 7784def : NEONMnemonicAlias<"vbicq", "vbic">; 7785def : NEONMnemonicAlias<"vandq", "vand">; 7786def : NEONMnemonicAlias<"veorq", "veor">; 7787def : NEONMnemonicAlias<"vorrq", "vorr">; 7788 7789def : NEONMnemonicAlias<"vmovq", "vmov">; 7790def : NEONMnemonicAlias<"vmvnq", "vmvn">; 7791// Explicit versions for floating point so that the FPImm variants get 7792// handled early. The parser gets confused otherwise. 7793def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 7794def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 7795 7796def : NEONMnemonicAlias<"vaddq", "vadd">; 7797def : NEONMnemonicAlias<"vsubq", "vsub">; 7798 7799def : NEONMnemonicAlias<"vminq", "vmin">; 7800def : NEONMnemonicAlias<"vmaxq", "vmax">; 7801 7802def : NEONMnemonicAlias<"vmulq", "vmul">; 7803 7804def : NEONMnemonicAlias<"vabsq", "vabs">; 7805 7806def : NEONMnemonicAlias<"vshlq", "vshl">; 7807def : NEONMnemonicAlias<"vshrq", "vshr">; 7808 7809def : NEONMnemonicAlias<"vcvtq", "vcvt">; 7810 7811def : NEONMnemonicAlias<"vcleq", "vcle">; 7812def : NEONMnemonicAlias<"vceqq", "vceq">; 7813 7814def : NEONMnemonicAlias<"vzipq", "vzip">; 7815def : NEONMnemonicAlias<"vswpq", "vswp">; 7816 7817def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 7818def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 7819 7820 7821// Alias for loading floating point immediates that aren't representable 7822// using the vmov.f32 encoding but the bitpattern is representable using 7823// the .i32 encoding. 7824def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7825 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7826def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7827 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7828