AArch64InstrInfo.td revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// AArch64 Instruction definitions. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// ARM Instruction Predicate Definitions. 16// 17def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, 18 AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; 19def HasNEON : Predicate<"Subtarget->hasNEON()">, 20 AssemblerPredicate<"FeatureNEON", "neon">; 21def HasCrypto : Predicate<"Subtarget->hasCrypto()">, 22 AssemblerPredicate<"FeatureCrypto", "crypto">; 23def HasCRC : Predicate<"Subtarget->hasCRC()">, 24 AssemblerPredicate<"FeatureCRC", "crc">; 25def IsLE : Predicate<"Subtarget->isLittleEndian()">; 26def IsBE : Predicate<"!Subtarget->isLittleEndian()">; 27 28//===----------------------------------------------------------------------===// 29// AArch64-specific DAG Nodes. 30// 31 32// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS 33def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, 34 [SDTCisSameAs<0, 2>, 35 SDTCisSameAs<0, 3>, 36 SDTCisInt<0>, SDTCisVT<1, i32>]>; 37 38// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS 39def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, 40 [SDTCisSameAs<0, 1>, 41 SDTCisSameAs<0, 2>, 42 SDTCisInt<0>, 43 SDTCisVT<3, i32>]>; 44 45// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS 46def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, 47 [SDTCisSameAs<0, 2>, 48 SDTCisSameAs<0, 3>, 49 SDTCisInt<0>, 50 SDTCisVT<1, i32>, 51 SDTCisVT<4, i32>]>; 52 53def SDT_AArch64Brcond : SDTypeProfile<0, 3, 54 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, 55 SDTCisVT<2, i32>]>; 56def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; 57def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 58 SDTCisVT<2, OtherVT>]>; 59 60 61def SDT_AArch64CSel : SDTypeProfile<1, 4, 62 [SDTCisSameAs<0, 1>, 63 SDTCisSameAs<0, 2>, 64 SDTCisInt<3>, 65 SDTCisVT<4, i32>]>; 66def SDT_AArch64FCmp : SDTypeProfile<0, 2, 67 [SDTCisFP<0>, 68 SDTCisSameAs<0, 1>]>; 69def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 70def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; 71def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, 72 SDTCisSameAs<0, 1>, 73 SDTCisSameAs<0, 2>]>; 74def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; 75def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 76def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 77 SDTCisInt<2>, SDTCisInt<3>]>; 78def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 79def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 80 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 81def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; 82 83def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 84def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; 85def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; 86def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 87 SDTCisSameAs<0,2>]>; 88def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 89 SDTCisSameAs<0,2>, 90 SDTCisSameAs<0,3>]>; 91def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 92def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; 93 94def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; 95 96def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, 97 SDTCisPtrTy<1>]>; 98def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, 99 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, 100 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, 101 SDTCisSameAs<1, 4>]>; 102 103 104// Node definitions. 105def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; 106def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; 107def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; 108def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", 109 SDCallSeqStart<[ SDTCisVT<0, i32> ]>, 110 [SDNPHasChain, SDNPOutGlue]>; 111def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", 112 SDCallSeqEnd<[ SDTCisVT<0, i32>, 113 SDTCisVT<1, i32> ]>, 114 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 115def AArch64call : SDNode<"AArch64ISD::CALL", 116 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 117 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 118 SDNPVariadic]>; 119def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, 120 [SDNPHasChain]>; 121def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, 122 [SDNPHasChain]>; 123def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, 124 [SDNPHasChain]>; 125def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, 126 [SDNPHasChain]>; 127def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, 128 [SDNPHasChain]>; 129 130 131def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; 132def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; 133def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; 134def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; 135def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, 136 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 137def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; 138def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; 139def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, 140 [SDNPCommutative]>; 141def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; 142def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, 143 [SDNPCommutative]>; 144def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; 145def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; 146 147def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; 148 149def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; 150 151def AArch64fmax : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>; 152def AArch64fmin : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>; 153 154def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; 155def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; 156def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; 157def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; 158def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; 159 160def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; 161def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; 162def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; 163def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; 164def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; 165def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; 166 167def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; 168def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; 169def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; 170def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; 171def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; 172def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; 173def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; 174 175def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; 176def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; 177def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; 178def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; 179 180def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; 181def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; 182def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; 183def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; 184def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; 185def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; 186def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; 187def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; 188 189def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>; 190def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; 191def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>; 192 193def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; 194def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; 195def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; 196def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; 197def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; 198 199def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; 200def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; 201def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; 202 203def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; 204def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; 205def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; 206def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; 207def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; 208def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), 209 (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 210 211def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; 212def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; 213def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; 214def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; 215def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; 216 217def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; 218def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; 219 220def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>; 221 222def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, 223 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 224 225def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, 226 [SDNPHasChain, SDNPSideEffect]>; 227 228def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; 229def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; 230 231def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL", 232 SDT_AArch64TLSDescCall, 233 [SDNPInGlue, SDNPOutGlue, SDNPHasChain, 234 SDNPVariadic]>; 235 236def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", 237 SDT_AArch64WrapperLarge>; 238 239 240//===----------------------------------------------------------------------===// 241 242//===----------------------------------------------------------------------===// 243 244// AArch64 Instruction Predicate Definitions. 245// 246def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; 247def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">; 248def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; 249def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; 250def ForCodeSize : Predicate<"ForCodeSize">; 251def NotForCodeSize : Predicate<"!ForCodeSize">; 252 253include "AArch64InstrFormats.td" 254 255//===----------------------------------------------------------------------===// 256 257//===----------------------------------------------------------------------===// 258// Miscellaneous instructions. 259//===----------------------------------------------------------------------===// 260 261let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { 262def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), 263 [(AArch64callseq_start timm:$amt)]>; 264def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 265 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; 266} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 267 268let isReMaterializable = 1, isCodeGenOnly = 1 in { 269// FIXME: The following pseudo instructions are only needed because remat 270// cannot handle multiple instructions. When that changes, they can be 271// removed, along with the AArch64Wrapper node. 272 273let AddedComplexity = 10 in 274def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), 275 [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, 276 Sched<[WriteLDAdr]>; 277 278// The MOVaddr instruction should match only when the add is not folded 279// into a load or store address. 280def MOVaddr 281 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 282 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), 283 tglobaladdr:$low))]>, 284 Sched<[WriteAdrAdr]>; 285def MOVaddrJT 286 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 287 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), 288 tjumptable:$low))]>, 289 Sched<[WriteAdrAdr]>; 290def MOVaddrCP 291 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 292 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), 293 tconstpool:$low))]>, 294 Sched<[WriteAdrAdr]>; 295def MOVaddrBA 296 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 297 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), 298 tblockaddress:$low))]>, 299 Sched<[WriteAdrAdr]>; 300def MOVaddrTLS 301 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 302 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), 303 tglobaltlsaddr:$low))]>, 304 Sched<[WriteAdrAdr]>; 305def MOVaddrEXT 306 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 307 [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), 308 texternalsym:$low))]>, 309 Sched<[WriteAdrAdr]>; 310 311} // isReMaterializable, isCodeGenOnly 312 313def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), 314 (LOADgot tglobaltlsaddr:$addr)>; 315 316def : Pat<(AArch64LOADgot texternalsym:$addr), 317 (LOADgot texternalsym:$addr)>; 318 319def : Pat<(AArch64LOADgot tconstpool:$addr), 320 (LOADgot tconstpool:$addr)>; 321 322//===----------------------------------------------------------------------===// 323// System instructions. 324//===----------------------------------------------------------------------===// 325 326def HINT : HintI<"hint">; 327def : InstAlias<"nop", (HINT 0b000)>; 328def : InstAlias<"yield",(HINT 0b001)>; 329def : InstAlias<"wfe", (HINT 0b010)>; 330def : InstAlias<"wfi", (HINT 0b011)>; 331def : InstAlias<"sev", (HINT 0b100)>; 332def : InstAlias<"sevl", (HINT 0b101)>; 333 334 // As far as LLVM is concerned this writes to the system's exclusive monitors. 335let mayLoad = 1, mayStore = 1 in 336def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; 337 338def DMB : CRmSystemI<barrier_op, 0b101, "dmb">; 339def DSB : CRmSystemI<barrier_op, 0b100, "dsb">; 340def ISB : CRmSystemI<barrier_op, 0b110, "isb">; 341def : InstAlias<"clrex", (CLREX 0xf)>; 342def : InstAlias<"isb", (ISB 0xf)>; 343 344def MRS : MRSI; 345def MSR : MSRI; 346def MSRpstate: MSRpstateI; 347 348// The thread pointer (on Linux, at least, where this has been implemented) is 349// TPIDR_EL0. 350def : Pat<(AArch64threadpointer), (MRS 0xde82)>; 351 352// Generic system instructions 353def SYSxt : SystemXtI<0, "sys">; 354def SYSLxt : SystemLXtI<1, "sysl">; 355 356def : InstAlias<"sys $op1, $Cn, $Cm, $op2", 357 (SYSxt imm0_7:$op1, sys_cr_op:$Cn, 358 sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 359 360//===----------------------------------------------------------------------===// 361// Move immediate instructions. 362//===----------------------------------------------------------------------===// 363 364defm MOVK : InsertImmediate<0b11, "movk">; 365defm MOVN : MoveImmediate<0b00, "movn">; 366 367let PostEncoderMethod = "fixMOVZ" in 368defm MOVZ : MoveImmediate<0b10, "movz">; 369 370// First group of aliases covers an implicit "lsl #0". 371def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>; 372def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>; 373def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; 374def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; 375def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; 376def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; 377 378// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. 379def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; 380def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; 381def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; 382def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; 383 384def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; 385def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; 386def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; 387def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; 388 389def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48)>; 390def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>; 391def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>; 392def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>; 393 394def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; 395def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; 396 397def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; 398def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; 399 400def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>; 401def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>; 402 403// Final group of aliases covers true "mov $Rd, $imm" cases. 404multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, 405 int width, int shift> { 406 def _asmoperand : AsmOperandClass { 407 let Name = basename # width # "_lsl" # shift # "MovAlias"; 408 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " 409 # shift # ">"; 410 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; 411 } 412 413 def _movimm : Operand<i32> { 414 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); 415 } 416 417 def : InstAlias<"mov $Rd, $imm", 418 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; 419} 420 421defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; 422defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; 423 424defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; 425defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; 426defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; 427defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; 428 429defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; 430defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; 431 432defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; 433defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; 434defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; 435defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; 436 437let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, 438 isAsCheapAsAMove = 1 in { 439// FIXME: The following pseudo instructions are only needed because remat 440// cannot handle multiple instructions. When that changes, we can select 441// directly to the real instructions and get rid of these pseudos. 442 443def MOVi32imm 444 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), 445 [(set GPR32:$dst, imm:$src)]>, 446 Sched<[WriteImm]>; 447def MOVi64imm 448 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), 449 [(set GPR64:$dst, imm:$src)]>, 450 Sched<[WriteImm]>; 451} // isReMaterializable, isCodeGenOnly 452 453// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the 454// eventual expansion code fewer bits to worry about getting right. Marshalling 455// the types is a little tricky though: 456def i64imm_32bit : ImmLeaf<i64, [{ 457 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 458}]>; 459 460def trunc_imm : SDNodeXForm<imm, [{ 461 return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i32); 462}]>; 463 464def : Pat<(i64 i64imm_32bit:$src), 465 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; 466 467// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK 468// sequences. 469def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, 470 tglobaladdr:$g1, tglobaladdr:$g0), 471 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48), 472 tglobaladdr:$g2, 32), 473 tglobaladdr:$g1, 16), 474 tglobaladdr:$g0, 0)>; 475 476def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, 477 tblockaddress:$g1, tblockaddress:$g0), 478 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48), 479 tblockaddress:$g2, 32), 480 tblockaddress:$g1, 16), 481 tblockaddress:$g0, 0)>; 482 483def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, 484 tconstpool:$g1, tconstpool:$g0), 485 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48), 486 tconstpool:$g2, 32), 487 tconstpool:$g1, 16), 488 tconstpool:$g0, 0)>; 489 490def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, 491 tjumptable:$g1, tjumptable:$g0), 492 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48), 493 tjumptable:$g2, 32), 494 tjumptable:$g1, 16), 495 tjumptable:$g0, 0)>; 496 497 498//===----------------------------------------------------------------------===// 499// Arithmetic instructions. 500//===----------------------------------------------------------------------===// 501 502// Add/subtract with carry. 503defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; 504defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; 505 506def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; 507def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; 508def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; 509def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; 510 511// Add/subtract 512defm ADD : AddSub<0, "add", add>; 513defm SUB : AddSub<1, "sub">; 514 515def : InstAlias<"mov $dst, $src", 516 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; 517def : InstAlias<"mov $dst, $src", 518 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; 519def : InstAlias<"mov $dst, $src", 520 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; 521def : InstAlias<"mov $dst, $src", 522 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; 523 524defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">; 525defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">; 526 527// Use SUBS instead of SUB to enable CSE between SUBS and SUB. 528def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), 529 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; 530def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), 531 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; 532def : Pat<(sub GPR32:$Rn, GPR32:$Rm), 533 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; 534def : Pat<(sub GPR64:$Rn, GPR64:$Rm), 535 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; 536def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), 537 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; 538def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), 539 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; 540def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3), 541 (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>; 542def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3), 543 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>; 544 545// Because of the immediate format for add/sub-imm instructions, the 546// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 547// These patterns capture that transformation. 548let AddedComplexity = 1 in { 549def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 550 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 551def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 552 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 553def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 554 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 555def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 556 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 557} 558 559// Because of the immediate format for add/sub-imm instructions, the 560// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 561// These patterns capture that transformation. 562let AddedComplexity = 1 in { 563def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 564 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 565def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 566 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 567def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 568 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 569def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 570 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 571} 572 573def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 574def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 575def : InstAlias<"neg $dst, $src$shift", 576 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 577def : InstAlias<"neg $dst, $src$shift", 578 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 579 580def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 581def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 582def : InstAlias<"negs $dst, $src$shift", 583 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 584def : InstAlias<"negs $dst, $src$shift", 585 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 586 587 588// Unsigned/Signed divide 589defm UDIV : Div<0, "udiv", udiv>; 590defm SDIV : Div<1, "sdiv", sdiv>; 591let isCodeGenOnly = 1 in { 592defm UDIV_Int : Div<0, "udiv", int_aarch64_udiv>; 593defm SDIV_Int : Div<1, "sdiv", int_aarch64_sdiv>; 594} 595 596// Variable shift 597defm ASRV : Shift<0b10, "asr", sra>; 598defm LSLV : Shift<0b00, "lsl", shl>; 599defm LSRV : Shift<0b01, "lsr", srl>; 600defm RORV : Shift<0b11, "ror", rotr>; 601 602def : ShiftAlias<"asrv", ASRVWr, GPR32>; 603def : ShiftAlias<"asrv", ASRVXr, GPR64>; 604def : ShiftAlias<"lslv", LSLVWr, GPR32>; 605def : ShiftAlias<"lslv", LSLVXr, GPR64>; 606def : ShiftAlias<"lsrv", LSRVWr, GPR32>; 607def : ShiftAlias<"lsrv", LSRVXr, GPR64>; 608def : ShiftAlias<"rorv", RORVWr, GPR32>; 609def : ShiftAlias<"rorv", RORVXr, GPR64>; 610 611// Multiply-add 612let AddedComplexity = 7 in { 613defm MADD : MulAccum<0, "madd", add>; 614defm MSUB : MulAccum<1, "msub", sub>; 615 616def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), 617 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 618def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), 619 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 620 621def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), 622 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 623def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), 624 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 625} // AddedComplexity = 7 626 627let AddedComplexity = 5 in { 628def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; 629def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; 630def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; 631def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; 632 633def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), 634 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 635def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), 636 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 637 638def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), 639 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 640def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), 641 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 642} // AddedComplexity = 5 643 644def : MulAccumWAlias<"mul", MADDWrrr>; 645def : MulAccumXAlias<"mul", MADDXrrr>; 646def : MulAccumWAlias<"mneg", MSUBWrrr>; 647def : MulAccumXAlias<"mneg", MSUBXrrr>; 648def : WideMulAccumAlias<"smull", SMADDLrrr>; 649def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; 650def : WideMulAccumAlias<"umull", UMADDLrrr>; 651def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; 652 653// Multiply-high 654def SMULHrr : MulHi<0b010, "smulh", mulhs>; 655def UMULHrr : MulHi<0b110, "umulh", mulhu>; 656 657// CRC32 658def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; 659def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; 660def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; 661def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; 662 663def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; 664def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; 665def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; 666def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; 667 668 669//===----------------------------------------------------------------------===// 670// Logical instructions. 671//===----------------------------------------------------------------------===// 672 673// (immediate) 674defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>; 675defm AND : LogicalImm<0b00, "and", and>; 676defm EOR : LogicalImm<0b10, "eor", xor>; 677defm ORR : LogicalImm<0b01, "orr", or>; 678 679// FIXME: these aliases *are* canonical sometimes (when movz can't be 680// used). Actually, it seems to be working right now, but putting logical_immXX 681// here is a bit dodgy on the AsmParser side too. 682def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, 683 logical_imm32:$imm), 0>; 684def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, 685 logical_imm64:$imm), 0>; 686 687 688// (register) 689defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; 690defm BICS : LogicalRegS<0b11, 1, "bics", 691 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; 692defm AND : LogicalReg<0b00, 0, "and", and>; 693defm BIC : LogicalReg<0b00, 1, "bic", 694 BinOpFrag<(and node:$LHS, (not node:$RHS))>>; 695defm EON : LogicalReg<0b10, 1, "eon", 696 BinOpFrag<(xor node:$LHS, (not node:$RHS))>>; 697defm EOR : LogicalReg<0b10, 0, "eor", xor>; 698defm ORN : LogicalReg<0b01, 1, "orn", 699 BinOpFrag<(or node:$LHS, (not node:$RHS))>>; 700defm ORR : LogicalReg<0b01, 0, "orr", or>; 701 702def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; 703def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; 704 705def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; 706def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; 707 708def : InstAlias<"mvn $Wd, $Wm$sh", 709 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; 710def : InstAlias<"mvn $Xd, $Xm$sh", 711 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; 712 713def : InstAlias<"tst $src1, $src2", 714 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; 715def : InstAlias<"tst $src1, $src2", 716 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; 717 718def : InstAlias<"tst $src1, $src2", 719 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; 720def : InstAlias<"tst $src1, $src2", 721 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; 722 723def : InstAlias<"tst $src1, $src2$sh", 724 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; 725def : InstAlias<"tst $src1, $src2$sh", 726 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; 727 728 729def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; 730def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; 731 732 733//===----------------------------------------------------------------------===// 734// One operand data processing instructions. 735//===----------------------------------------------------------------------===// 736 737defm CLS : OneOperandData<0b101, "cls">; 738defm CLZ : OneOperandData<0b100, "clz", ctlz>; 739defm RBIT : OneOperandData<0b000, "rbit">; 740def REV16Wr : OneWRegData<0b001, "rev16", 741 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; 742def REV16Xr : OneXRegData<0b001, "rev16", null_frag>; 743 744def : Pat<(cttz GPR32:$Rn), 745 (CLZWr (RBITWr GPR32:$Rn))>; 746def : Pat<(cttz GPR64:$Rn), 747 (CLZXr (RBITXr GPR64:$Rn))>; 748def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), 749 (i32 1))), 750 (CLSWr GPR32:$Rn)>; 751def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), 752 (i64 1))), 753 (CLSXr GPR64:$Rn)>; 754 755// Unlike the other one operand instructions, the instructions with the "rev" 756// mnemonic do *not* just different in the size bit, but actually use different 757// opcode bits for the different sizes. 758def REVWr : OneWRegData<0b010, "rev", bswap>; 759def REVXr : OneXRegData<0b011, "rev", bswap>; 760def REV32Xr : OneXRegData<0b010, "rev32", 761 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; 762 763// The bswap commutes with the rotr so we want a pattern for both possible 764// orders. 765def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; 766def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; 767 768//===----------------------------------------------------------------------===// 769// Bitfield immediate extraction instruction. 770//===----------------------------------------------------------------------===// 771let neverHasSideEffects = 1 in 772defm EXTR : ExtractImm<"extr">; 773def : InstAlias<"ror $dst, $src, $shift", 774 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; 775def : InstAlias<"ror $dst, $src, $shift", 776 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; 777 778def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), 779 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; 780def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), 781 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; 782 783//===----------------------------------------------------------------------===// 784// Other bitfield immediate instructions. 785//===----------------------------------------------------------------------===// 786let neverHasSideEffects = 1 in { 787defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; 788defm SBFM : BitfieldImm<0b00, "sbfm">; 789defm UBFM : BitfieldImm<0b10, "ubfm">; 790} 791 792def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ 793 uint64_t enc = (32 - N->getZExtValue()) & 0x1f; 794 return CurDAG->getTargetConstant(enc, MVT::i64); 795}]>; 796 797def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ 798 uint64_t enc = 31 - N->getZExtValue(); 799 return CurDAG->getTargetConstant(enc, MVT::i64); 800}]>; 801 802// min(7, 31 - shift_amt) 803def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 804 uint64_t enc = 31 - N->getZExtValue(); 805 enc = enc > 7 ? 7 : enc; 806 return CurDAG->getTargetConstant(enc, MVT::i64); 807}]>; 808 809// min(15, 31 - shift_amt) 810def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 811 uint64_t enc = 31 - N->getZExtValue(); 812 enc = enc > 15 ? 15 : enc; 813 return CurDAG->getTargetConstant(enc, MVT::i64); 814}]>; 815 816def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ 817 uint64_t enc = (64 - N->getZExtValue()) & 0x3f; 818 return CurDAG->getTargetConstant(enc, MVT::i64); 819}]>; 820 821def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ 822 uint64_t enc = 63 - N->getZExtValue(); 823 return CurDAG->getTargetConstant(enc, MVT::i64); 824}]>; 825 826// min(7, 63 - shift_amt) 827def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 828 uint64_t enc = 63 - N->getZExtValue(); 829 enc = enc > 7 ? 7 : enc; 830 return CurDAG->getTargetConstant(enc, MVT::i64); 831}]>; 832 833// min(15, 63 - shift_amt) 834def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 835 uint64_t enc = 63 - N->getZExtValue(); 836 enc = enc > 15 ? 15 : enc; 837 return CurDAG->getTargetConstant(enc, MVT::i64); 838}]>; 839 840// min(31, 63 - shift_amt) 841def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ 842 uint64_t enc = 63 - N->getZExtValue(); 843 enc = enc > 31 ? 31 : enc; 844 return CurDAG->getTargetConstant(enc, MVT::i64); 845}]>; 846 847def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), 848 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 849 (i64 (i32shift_b imm0_31:$imm)))>; 850def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), 851 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 852 (i64 (i64shift_b imm0_63:$imm)))>; 853 854let AddedComplexity = 10 in { 855def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), 856 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 857def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), 858 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 859} 860 861def : InstAlias<"asr $dst, $src, $shift", 862 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 863def : InstAlias<"asr $dst, $src, $shift", 864 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 865def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 866def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 867def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 868def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 869def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 870 871def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), 872 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 873def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), 874 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 875 876def : InstAlias<"lsr $dst, $src, $shift", 877 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 878def : InstAlias<"lsr $dst, $src, $shift", 879 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 880def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 881def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 882def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 883def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 884def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 885 886//===----------------------------------------------------------------------===// 887// Conditionally set flags instructions. 888//===----------------------------------------------------------------------===// 889defm CCMN : CondSetFlagsImm<0, "ccmn">; 890defm CCMP : CondSetFlagsImm<1, "ccmp">; 891 892defm CCMN : CondSetFlagsReg<0, "ccmn">; 893defm CCMP : CondSetFlagsReg<1, "ccmp">; 894 895//===----------------------------------------------------------------------===// 896// Conditional select instructions. 897//===----------------------------------------------------------------------===// 898defm CSEL : CondSelect<0, 0b00, "csel">; 899 900def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; 901defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; 902defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; 903defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; 904 905def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 906 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 907def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 908 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 909def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 910 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 911def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 912 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 913def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 914 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 915def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 916 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 917 918def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), 919 (CSINCWr WZR, WZR, (i32 imm:$cc))>; 920def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), 921 (CSINCXr XZR, XZR, (i32 imm:$cc))>; 922def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), 923 (CSINVWr WZR, WZR, (i32 imm:$cc))>; 924def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), 925 (CSINVXr XZR, XZR, (i32 imm:$cc))>; 926 927// The inverse of the condition code from the alias instruction is what is used 928// in the aliased instruction. The parser all ready inverts the condition code 929// for these aliases. 930def : InstAlias<"cset $dst, $cc", 931 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 932def : InstAlias<"cset $dst, $cc", 933 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 934 935def : InstAlias<"csetm $dst, $cc", 936 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 937def : InstAlias<"csetm $dst, $cc", 938 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 939 940def : InstAlias<"cinc $dst, $src, $cc", 941 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 942def : InstAlias<"cinc $dst, $src, $cc", 943 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 944 945def : InstAlias<"cinv $dst, $src, $cc", 946 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 947def : InstAlias<"cinv $dst, $src, $cc", 948 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 949 950def : InstAlias<"cneg $dst, $src, $cc", 951 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 952def : InstAlias<"cneg $dst, $src, $cc", 953 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 954 955//===----------------------------------------------------------------------===// 956// PC-relative instructions. 957//===----------------------------------------------------------------------===// 958let isReMaterializable = 1 in { 959let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in { 960def ADR : ADRI<0, "adr", adrlabel, []>; 961} // neverHasSideEffects = 1 962 963def ADRP : ADRI<1, "adrp", adrplabel, 964 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; 965} // isReMaterializable = 1 966 967// page address of a constant pool entry, block address 968def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; 969def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; 970 971//===----------------------------------------------------------------------===// 972// Unconditional branch (register) instructions. 973//===----------------------------------------------------------------------===// 974 975let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 976def RET : BranchReg<0b0010, "ret", []>; 977def DRPS : SpecialReturn<0b0101, "drps">; 978def ERET : SpecialReturn<0b0100, "eret">; 979} // isReturn = 1, isTerminator = 1, isBarrier = 1 980 981// Default to the LR register. 982def : InstAlias<"ret", (RET LR)>; 983 984let isCall = 1, Defs = [LR], Uses = [SP] in { 985def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>; 986} // isCall 987 988let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 989def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; 990} // isBranch, isTerminator, isBarrier, isIndirectBranch 991 992// Create a separate pseudo-instruction for codegen to use so that we don't 993// flag lr as used in every function. It'll be restored before the RET by the 994// epilogue if it's legitimately used. 995def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> { 996 let isTerminator = 1; 997 let isBarrier = 1; 998 let isReturn = 1; 999} 1000 1001// This is a directive-like pseudo-instruction. The purpose is to insert an 1002// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction 1003// (which in the usual case is a BLR). 1004let hasSideEffects = 1 in 1005def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { 1006 let AsmString = ".tlsdesccall $sym"; 1007} 1008 1009// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It 1010// gets expanded to two MCInsts during lowering. 1011let isCall = 1, Defs = [LR] in 1012def TLSDESC_BLR 1013 : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym), 1014 [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; 1015 1016def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym), 1017 (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>; 1018//===----------------------------------------------------------------------===// 1019// Conditional branch (immediate) instruction. 1020//===----------------------------------------------------------------------===// 1021def Bcc : BranchCond; 1022 1023//===----------------------------------------------------------------------===// 1024// Compare-and-branch instructions. 1025//===----------------------------------------------------------------------===// 1026defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; 1027defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; 1028 1029//===----------------------------------------------------------------------===// 1030// Test-bit-and-branch instructions. 1031//===----------------------------------------------------------------------===// 1032defm TBZ : TestBranch<0, "tbz", AArch64tbz>; 1033defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; 1034 1035//===----------------------------------------------------------------------===// 1036// Unconditional branch (immediate) instructions. 1037//===----------------------------------------------------------------------===// 1038let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 1039def B : BranchImm<0, "b", [(br bb:$addr)]>; 1040} // isBranch, isTerminator, isBarrier 1041 1042let isCall = 1, Defs = [LR], Uses = [SP] in { 1043def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; 1044} // isCall 1045def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; 1046 1047//===----------------------------------------------------------------------===// 1048// Exception generation instructions. 1049//===----------------------------------------------------------------------===// 1050def BRK : ExceptionGeneration<0b001, 0b00, "brk">; 1051def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; 1052def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; 1053def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">; 1054def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; 1055def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; 1056def SMC : ExceptionGeneration<0b000, 0b11, "smc">; 1057def SVC : ExceptionGeneration<0b000, 0b01, "svc">; 1058 1059// DCPSn defaults to an immediate operand of zero if unspecified. 1060def : InstAlias<"dcps1", (DCPS1 0)>; 1061def : InstAlias<"dcps2", (DCPS2 0)>; 1062def : InstAlias<"dcps3", (DCPS3 0)>; 1063 1064//===----------------------------------------------------------------------===// 1065// Load instructions. 1066//===----------------------------------------------------------------------===// 1067 1068// Pair (indexed, offset) 1069defm LDPW : LoadPairOffset<0b00, 0, GPR32, simm7s4, "ldp">; 1070defm LDPX : LoadPairOffset<0b10, 0, GPR64, simm7s8, "ldp">; 1071defm LDPS : LoadPairOffset<0b00, 1, FPR32, simm7s4, "ldp">; 1072defm LDPD : LoadPairOffset<0b01, 1, FPR64, simm7s8, "ldp">; 1073defm LDPQ : LoadPairOffset<0b10, 1, FPR128, simm7s16, "ldp">; 1074 1075defm LDPSW : LoadPairOffset<0b01, 0, GPR64, simm7s4, "ldpsw">; 1076 1077// Pair (pre-indexed) 1078def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, simm7s4, "ldp">; 1079def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, simm7s8, "ldp">; 1080def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, simm7s4, "ldp">; 1081def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, simm7s8, "ldp">; 1082def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, simm7s16, "ldp">; 1083 1084def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; 1085 1086// Pair (post-indexed) 1087def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">; 1088def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">; 1089def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">; 1090def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">; 1091def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">; 1092 1093def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; 1094 1095 1096// Pair (no allocate) 1097defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32, simm7s4, "ldnp">; 1098defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64, simm7s8, "ldnp">; 1099defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32, simm7s4, "ldnp">; 1100defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64, simm7s8, "ldnp">; 1101defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128, simm7s16, "ldnp">; 1102 1103//--- 1104// (register offset) 1105//--- 1106 1107// Integer 1108defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; 1109defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; 1110defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; 1111defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; 1112 1113// Floating-point 1114defm LDRB : Load8RO<0b00, 1, 0b01, FPR8, "ldr", untyped, load>; 1115defm LDRH : Load16RO<0b01, 1, 0b01, FPR16, "ldr", f16, load>; 1116defm LDRS : Load32RO<0b10, 1, 0b01, FPR32, "ldr", f32, load>; 1117defm LDRD : Load64RO<0b11, 1, 0b01, FPR64, "ldr", f64, load>; 1118defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128, "ldr", f128, load>; 1119 1120// Load sign-extended half-word 1121defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; 1122defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; 1123 1124// Load sign-extended byte 1125defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; 1126defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; 1127 1128// Load sign-extended word 1129defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; 1130 1131// Pre-fetch. 1132defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; 1133 1134// For regular load, we do not have any alignment requirement. 1135// Thus, it is safe to directly map the vector loads with interesting 1136// addressing modes. 1137// FIXME: We could do the same for bitconvert to floating point vectors. 1138multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, 1139 ValueType ScalTy, ValueType VecTy, 1140 Instruction LOADW, Instruction LOADX, 1141 SubRegIndex sub> { 1142 def : Pat<(VecTy (scalar_to_vector (ScalTy 1143 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), 1144 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 1145 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), 1146 sub)>; 1147 1148 def : Pat<(VecTy (scalar_to_vector (ScalTy 1149 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), 1150 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 1151 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), 1152 sub)>; 1153} 1154 1155let AddedComplexity = 10 in { 1156defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; 1157defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; 1158 1159defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; 1160defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; 1161 1162defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; 1163defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; 1164 1165defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; 1166defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; 1167 1168defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; 1169 1170defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; 1171 1172 1173def : Pat <(v1i64 (scalar_to_vector (i64 1174 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, 1175 ro_Wextend64:$extend))))), 1176 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; 1177 1178def : Pat <(v1i64 (scalar_to_vector (i64 1179 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, 1180 ro_Xextend64:$extend))))), 1181 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; 1182} 1183 1184// Match all load 64 bits width whose type is compatible with FPR64 1185multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, 1186 Instruction LOADW, Instruction LOADX> { 1187 1188 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 1189 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1190 1191 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 1192 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1193} 1194 1195let AddedComplexity = 10 in { 1196let Predicates = [IsLE] in { 1197 // We must do vector loads with LD1 in big-endian. 1198 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; 1199 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; 1200 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; 1201 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; 1202} 1203 1204defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; 1205defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; 1206 1207// Match all load 128 bits width whose type is compatible with FPR128 1208let Predicates = [IsLE] in { 1209 // We must do vector loads with LD1 in big-endian. 1210 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; 1211 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; 1212 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; 1213 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; 1214 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; 1215 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; 1216} 1217} // AddedComplexity = 10 1218 1219// zextload -> i64 1220multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, 1221 Instruction INSTW, Instruction INSTX> { 1222 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 1223 (SUBREG_TO_REG (i64 0), 1224 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 1225 sub_32)>; 1226 1227 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 1228 (SUBREG_TO_REG (i64 0), 1229 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 1230 sub_32)>; 1231} 1232 1233let AddedComplexity = 10 in { 1234 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; 1235 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; 1236 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; 1237 1238 // zextloadi1 -> zextloadi8 1239 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 1240 1241 // extload -> zextload 1242 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 1243 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 1244 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 1245 1246 // extloadi1 -> zextloadi8 1247 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; 1248} 1249 1250 1251// zextload -> i64 1252multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, 1253 Instruction INSTW, Instruction INSTX> { 1254 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 1255 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1256 1257 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 1258 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1259 1260} 1261 1262let AddedComplexity = 10 in { 1263 // extload -> zextload 1264 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 1265 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 1266 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 1267 1268 // zextloadi1 -> zextloadi8 1269 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 1270} 1271 1272//--- 1273// (unsigned immediate) 1274//--- 1275defm LDRX : LoadUI<0b11, 0, 0b01, GPR64, uimm12s8, "ldr", 1276 [(set GPR64:$Rt, 1277 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 1278defm LDRW : LoadUI<0b10, 0, 0b01, GPR32, uimm12s4, "ldr", 1279 [(set GPR32:$Rt, 1280 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 1281defm LDRB : LoadUI<0b00, 1, 0b01, FPR8, uimm12s1, "ldr", 1282 [(set FPR8:$Rt, 1283 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; 1284defm LDRH : LoadUI<0b01, 1, 0b01, FPR16, uimm12s2, "ldr", 1285 [(set (f16 FPR16:$Rt), 1286 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; 1287defm LDRS : LoadUI<0b10, 1, 0b01, FPR32, uimm12s4, "ldr", 1288 [(set (f32 FPR32:$Rt), 1289 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 1290defm LDRD : LoadUI<0b11, 1, 0b01, FPR64, uimm12s8, "ldr", 1291 [(set (f64 FPR64:$Rt), 1292 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 1293defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128, uimm12s16, "ldr", 1294 [(set (f128 FPR128:$Rt), 1295 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; 1296 1297// For regular load, we do not have any alignment requirement. 1298// Thus, it is safe to directly map the vector loads with interesting 1299// addressing modes. 1300// FIXME: We could do the same for bitconvert to floating point vectors. 1301def : Pat <(v8i8 (scalar_to_vector (i32 1302 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 1303 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 1304 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 1305def : Pat <(v16i8 (scalar_to_vector (i32 1306 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 1307 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 1308 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 1309def : Pat <(v4i16 (scalar_to_vector (i32 1310 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 1311 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 1312 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 1313def : Pat <(v8i16 (scalar_to_vector (i32 1314 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 1315 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 1316 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 1317def : Pat <(v2i32 (scalar_to_vector (i32 1318 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 1319 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 1320 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 1321def : Pat <(v4i32 (scalar_to_vector (i32 1322 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 1323 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 1324 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 1325def : Pat <(v1i64 (scalar_to_vector (i64 1326 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 1327 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1328def : Pat <(v2i64 (scalar_to_vector (i64 1329 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 1330 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 1331 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; 1332 1333// Match all load 64 bits width whose type is compatible with FPR64 1334let Predicates = [IsLE] in { 1335 // We must use LD1 to perform vector loads in big-endian. 1336 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1337 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1338 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1339 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1340 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1341 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1342 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1343 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1344} 1345def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1346 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1347def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1348 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1349 1350// Match all load 128 bits width whose type is compatible with FPR128 1351let Predicates = [IsLE] in { 1352 // We must use LD1 to perform vector loads in big-endian. 1353 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1354 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1355 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1356 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1357 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1358 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1359 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1360 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1361 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1362 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1363 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1364 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1365} 1366def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1367 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1368 1369defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", 1370 [(set GPR32:$Rt, 1371 (zextloadi16 (am_indexed16 GPR64sp:$Rn, 1372 uimm12s2:$offset)))]>; 1373defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", 1374 [(set GPR32:$Rt, 1375 (zextloadi8 (am_indexed8 GPR64sp:$Rn, 1376 uimm12s1:$offset)))]>; 1377// zextload -> i64 1378def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1379 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1380def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 1381 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 1382 1383// zextloadi1 -> zextloadi8 1384def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1385 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 1386def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1387 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1388 1389// extload -> zextload 1390def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 1391 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; 1392def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1393 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 1394def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1395 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 1396def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 1397 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 1398def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 1399 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 1400def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1401 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1402def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1403 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1404 1405// load sign-extended half-word 1406defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", 1407 [(set GPR32:$Rt, 1408 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 1409 uimm12s2:$offset)))]>; 1410defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", 1411 [(set GPR64:$Rt, 1412 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 1413 uimm12s2:$offset)))]>; 1414 1415// load sign-extended byte 1416defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", 1417 [(set GPR32:$Rt, 1418 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 1419 uimm12s1:$offset)))]>; 1420defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", 1421 [(set GPR64:$Rt, 1422 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 1423 uimm12s1:$offset)))]>; 1424 1425// load sign-extended word 1426defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", 1427 [(set GPR64:$Rt, 1428 (sextloadi32 (am_indexed32 GPR64sp:$Rn, 1429 uimm12s4:$offset)))]>; 1430 1431// load zero-extended word 1432def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 1433 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 1434 1435// Pre-fetch. 1436def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", 1437 [(AArch64Prefetch imm:$Rt, 1438 (am_indexed64 GPR64sp:$Rn, 1439 uimm12s8:$offset))]>; 1440 1441def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; 1442 1443//--- 1444// (literal) 1445def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">; 1446def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">; 1447def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">; 1448def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">; 1449def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">; 1450 1451// load sign-extended word 1452def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">; 1453 1454// prefetch 1455def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; 1456// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; 1457 1458//--- 1459// (unscaled immediate) 1460defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64, "ldur", 1461 [(set GPR64:$Rt, 1462 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 1463defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32, "ldur", 1464 [(set GPR32:$Rt, 1465 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 1466defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8, "ldur", 1467 [(set FPR8:$Rt, 1468 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 1469defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16, "ldur", 1470 [(set FPR16:$Rt, 1471 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1472defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32, "ldur", 1473 [(set (f32 FPR32:$Rt), 1474 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 1475defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64, "ldur", 1476 [(set (f64 FPR64:$Rt), 1477 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 1478defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128, "ldur", 1479 [(set (f128 FPR128:$Rt), 1480 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; 1481 1482defm LDURHH 1483 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", 1484 [(set GPR32:$Rt, 1485 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1486defm LDURBB 1487 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", 1488 [(set GPR32:$Rt, 1489 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1490 1491// Match all load 64 bits width whose type is compatible with FPR64 1492let Predicates = [IsLE] in { 1493 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1494 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1495 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1496 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1497 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1498 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1499 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1500 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1501} 1502def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1503 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1504def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1505 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1506 1507// Match all load 128 bits width whose type is compatible with FPR128 1508let Predicates = [IsLE] in { 1509 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1510 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1511 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1512 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1513 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1514 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1515 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1516 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1517 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1518 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1519 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1520 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1521} 1522 1523// anyext -> zext 1524def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1525 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 1526def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1527 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1528def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1529 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1530def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 1531 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1532def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1533 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1534def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1535 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1536def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1537 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1538// unscaled zext 1539def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1540 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 1541def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1542 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1543def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1544 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1545def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 1546 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1547def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1548 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1549def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1550 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1551def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1552 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1553 1554 1555//--- 1556// LDR mnemonics fall back to LDUR for negative or unaligned offsets. 1557 1558// Define new assembler match classes as we want to only match these when 1559// the don't otherwise match the scaled addressing mode for LDR/STR. Don't 1560// associate a DiagnosticType either, as we want the diagnostic for the 1561// canonical form (the scaled operand) to take precedence. 1562class SImm9OffsetOperand<int Width> : AsmOperandClass { 1563 let Name = "SImm9OffsetFB" # Width; 1564 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; 1565 let RenderMethod = "addImmOperands"; 1566} 1567 1568def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; 1569def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; 1570def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; 1571def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; 1572def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; 1573 1574def simm9_offset_fb8 : Operand<i64> { 1575 let ParserMatchClass = SImm9OffsetFB8Operand; 1576} 1577def simm9_offset_fb16 : Operand<i64> { 1578 let ParserMatchClass = SImm9OffsetFB16Operand; 1579} 1580def simm9_offset_fb32 : Operand<i64> { 1581 let ParserMatchClass = SImm9OffsetFB32Operand; 1582} 1583def simm9_offset_fb64 : Operand<i64> { 1584 let ParserMatchClass = SImm9OffsetFB64Operand; 1585} 1586def simm9_offset_fb128 : Operand<i64> { 1587 let ParserMatchClass = SImm9OffsetFB128Operand; 1588} 1589 1590def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1591 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 1592def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1593 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 1594def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1595 (LDURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1596def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1597 (LDURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1598def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1599 (LDURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 1600def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1601 (LDURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 1602def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1603 (LDURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 1604 1605// zextload -> i64 1606def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1607 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1608def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1609 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1610 1611// load sign-extended half-word 1612defm LDURSHW 1613 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", 1614 [(set GPR32:$Rt, 1615 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1616defm LDURSHX 1617 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", 1618 [(set GPR64:$Rt, 1619 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1620 1621// load sign-extended byte 1622defm LDURSBW 1623 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", 1624 [(set GPR32:$Rt, 1625 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 1626defm LDURSBX 1627 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", 1628 [(set GPR64:$Rt, 1629 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 1630 1631// load sign-extended word 1632defm LDURSW 1633 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", 1634 [(set GPR64:$Rt, 1635 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 1636 1637// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. 1638def : InstAlias<"ldrb $Rt, [$Rn, $offset]", 1639 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1640def : InstAlias<"ldrh $Rt, [$Rn, $offset]", 1641 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1642def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 1643 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1644def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 1645 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1646def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 1647 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1648def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 1649 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1650def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", 1651 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 1652 1653// Pre-fetch. 1654defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", 1655 [(AArch64Prefetch imm:$Rt, 1656 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 1657 1658//--- 1659// (unscaled immediate, unprivileged) 1660defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; 1661defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; 1662 1663defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; 1664defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; 1665 1666// load sign-extended half-word 1667defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; 1668defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; 1669 1670// load sign-extended byte 1671defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; 1672defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; 1673 1674// load sign-extended word 1675defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; 1676 1677//--- 1678// (immediate pre-indexed) 1679def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">; 1680def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">; 1681def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8, "ldr">; 1682def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">; 1683def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">; 1684def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">; 1685def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">; 1686 1687// load sign-extended half-word 1688def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">; 1689def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">; 1690 1691// load sign-extended byte 1692def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">; 1693def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">; 1694 1695// load zero-extended byte 1696def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">; 1697def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">; 1698 1699// load sign-extended word 1700def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">; 1701 1702//--- 1703// (immediate post-indexed) 1704def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">; 1705def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">; 1706def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8, "ldr">; 1707def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">; 1708def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">; 1709def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">; 1710def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">; 1711 1712// load sign-extended half-word 1713def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">; 1714def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">; 1715 1716// load sign-extended byte 1717def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">; 1718def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">; 1719 1720// load zero-extended byte 1721def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">; 1722def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">; 1723 1724// load sign-extended word 1725def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">; 1726 1727//===----------------------------------------------------------------------===// 1728// Store instructions. 1729//===----------------------------------------------------------------------===// 1730 1731// Pair (indexed, offset) 1732// FIXME: Use dedicated range-checked addressing mode operand here. 1733defm STPW : StorePairOffset<0b00, 0, GPR32, simm7s4, "stp">; 1734defm STPX : StorePairOffset<0b10, 0, GPR64, simm7s8, "stp">; 1735defm STPS : StorePairOffset<0b00, 1, FPR32, simm7s4, "stp">; 1736defm STPD : StorePairOffset<0b01, 1, FPR64, simm7s8, "stp">; 1737defm STPQ : StorePairOffset<0b10, 1, FPR128, simm7s16, "stp">; 1738 1739// Pair (pre-indexed) 1740def STPWpre : StorePairPreIdx<0b00, 0, GPR32, simm7s4, "stp">; 1741def STPXpre : StorePairPreIdx<0b10, 0, GPR64, simm7s8, "stp">; 1742def STPSpre : StorePairPreIdx<0b00, 1, FPR32, simm7s4, "stp">; 1743def STPDpre : StorePairPreIdx<0b01, 1, FPR64, simm7s8, "stp">; 1744def STPQpre : StorePairPreIdx<0b10, 1, FPR128, simm7s16, "stp">; 1745 1746// Pair (pre-indexed) 1747def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">; 1748def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">; 1749def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">; 1750def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">; 1751def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">; 1752 1753// Pair (no allocate) 1754defm STNPW : StorePairNoAlloc<0b00, 0, GPR32, simm7s4, "stnp">; 1755defm STNPX : StorePairNoAlloc<0b10, 0, GPR64, simm7s8, "stnp">; 1756defm STNPS : StorePairNoAlloc<0b00, 1, FPR32, simm7s4, "stnp">; 1757defm STNPD : StorePairNoAlloc<0b01, 1, FPR64, simm7s8, "stnp">; 1758defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128, simm7s16, "stnp">; 1759 1760//--- 1761// (Register offset) 1762 1763// Integer 1764defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; 1765defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; 1766defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; 1767defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; 1768 1769 1770// Floating-point 1771defm STRB : Store8RO< 0b00, 1, 0b00, FPR8, "str", untyped, store>; 1772defm STRH : Store16RO<0b01, 1, 0b00, FPR16, "str", f16, store>; 1773defm STRS : Store32RO<0b10, 1, 0b00, FPR32, "str", f32, store>; 1774defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>; 1775defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>; 1776 1777multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, 1778 Instruction STRW, Instruction STRX> { 1779 1780 def : Pat<(storeop GPR64:$Rt, 1781 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 1782 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), 1783 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1784 1785 def : Pat<(storeop GPR64:$Rt, 1786 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 1787 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), 1788 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1789} 1790 1791let AddedComplexity = 10 in { 1792 // truncstore i64 1793 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; 1794 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; 1795 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; 1796} 1797 1798multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, 1799 Instruction STRW, Instruction STRX> { 1800 def : Pat<(store (VecTy FPR:$Rt), 1801 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 1802 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1803 1804 def : Pat<(store (VecTy FPR:$Rt), 1805 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 1806 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1807} 1808 1809let AddedComplexity = 10 in { 1810// Match all store 64 bits width whose type is compatible with FPR64 1811let Predicates = [IsLE] in { 1812 // We must use ST1 to store vectors in big-endian. 1813 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; 1814 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; 1815 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; 1816 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; 1817} 1818 1819defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; 1820defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; 1821 1822// Match all store 128 bits width whose type is compatible with FPR128 1823let Predicates = [IsLE] in { 1824 // We must use ST1 to store vectors in big-endian. 1825 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; 1826 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; 1827 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; 1828 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; 1829 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; 1830 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; 1831} 1832} // AddedComplexity = 10 1833 1834//--- 1835// (unsigned immediate) 1836defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str", 1837 [(store GPR64:$Rt, 1838 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 1839defm STRW : StoreUI<0b10, 0, 0b00, GPR32, uimm12s4, "str", 1840 [(store GPR32:$Rt, 1841 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 1842defm STRB : StoreUI<0b00, 1, 0b00, FPR8, uimm12s1, "str", 1843 [(store FPR8:$Rt, 1844 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; 1845defm STRH : StoreUI<0b01, 1, 0b00, FPR16, uimm12s2, "str", 1846 [(store (f16 FPR16:$Rt), 1847 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; 1848defm STRS : StoreUI<0b10, 1, 0b00, FPR32, uimm12s4, "str", 1849 [(store (f32 FPR32:$Rt), 1850 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 1851defm STRD : StoreUI<0b11, 1, 0b00, FPR64, uimm12s8, "str", 1852 [(store (f64 FPR64:$Rt), 1853 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 1854defm STRQ : StoreUI<0b00, 1, 0b10, FPR128, uimm12s16, "str", []>; 1855 1856defm STRHH : StoreUI<0b01, 0, 0b00, GPR32, uimm12s2, "strh", 1857 [(truncstorei16 GPR32:$Rt, 1858 (am_indexed16 GPR64sp:$Rn, 1859 uimm12s2:$offset))]>; 1860defm STRBB : StoreUI<0b00, 0, 0b00, GPR32, uimm12s1, "strb", 1861 [(truncstorei8 GPR32:$Rt, 1862 (am_indexed8 GPR64sp:$Rn, 1863 uimm12s1:$offset))]>; 1864 1865// Match all store 64 bits width whose type is compatible with FPR64 1866let AddedComplexity = 10 in { 1867let Predicates = [IsLE] in { 1868 // We must use ST1 to store vectors in big-endian. 1869 def : Pat<(store (v2f32 FPR64:$Rt), 1870 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1871 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1872 def : Pat<(store (v8i8 FPR64:$Rt), 1873 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1874 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1875 def : Pat<(store (v4i16 FPR64:$Rt), 1876 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1877 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1878 def : Pat<(store (v2i32 FPR64:$Rt), 1879 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1880 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1881} 1882def : Pat<(store (v1f64 FPR64:$Rt), 1883 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1884 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1885def : Pat<(store (v1i64 FPR64:$Rt), 1886 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1887 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1888 1889// Match all store 128 bits width whose type is compatible with FPR128 1890let Predicates = [IsLE] in { 1891 // We must use ST1 to store vectors in big-endian. 1892 def : Pat<(store (v4f32 FPR128:$Rt), 1893 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1894 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1895 def : Pat<(store (v2f64 FPR128:$Rt), 1896 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1897 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1898 def : Pat<(store (v16i8 FPR128:$Rt), 1899 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1900 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1901 def : Pat<(store (v8i16 FPR128:$Rt), 1902 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1903 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1904 def : Pat<(store (v4i32 FPR128:$Rt), 1905 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1906 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1907 def : Pat<(store (v2i64 FPR128:$Rt), 1908 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1909 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1910} 1911def : Pat<(store (f128 FPR128:$Rt), 1912 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1913 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1914 1915// truncstore i64 1916def : Pat<(truncstorei32 GPR64:$Rt, 1917 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), 1918 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; 1919def : Pat<(truncstorei16 GPR64:$Rt, 1920 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 1921 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; 1922def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), 1923 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; 1924 1925} // AddedComplexity = 10 1926 1927//--- 1928// (unscaled immediate) 1929defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64, "stur", 1930 [(store GPR64:$Rt, 1931 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 1932defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32, "stur", 1933 [(store GPR32:$Rt, 1934 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 1935defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8, "stur", 1936 [(store FPR8:$Rt, 1937 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 1938defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16, "stur", 1939 [(store (f16 FPR16:$Rt), 1940 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 1941defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32, "stur", 1942 [(store (f32 FPR32:$Rt), 1943 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 1944defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64, "stur", 1945 [(store (f64 FPR64:$Rt), 1946 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 1947defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128, "stur", 1948 [(store (f128 FPR128:$Rt), 1949 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; 1950defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32, "sturh", 1951 [(truncstorei16 GPR32:$Rt, 1952 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 1953defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32, "sturb", 1954 [(truncstorei8 GPR32:$Rt, 1955 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 1956 1957// Match all store 64 bits width whose type is compatible with FPR64 1958let Predicates = [IsLE] in { 1959 // We must use ST1 to store vectors in big-endian. 1960 def : Pat<(store (v2f32 FPR64:$Rt), 1961 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1962 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1963 def : Pat<(store (v8i8 FPR64:$Rt), 1964 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1965 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1966 def : Pat<(store (v4i16 FPR64:$Rt), 1967 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1968 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1969 def : Pat<(store (v2i32 FPR64:$Rt), 1970 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1971 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1972} 1973def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1974 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1975def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1976 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1977 1978// Match all store 128 bits width whose type is compatible with FPR128 1979let Predicates = [IsLE] in { 1980 // We must use ST1 to store vectors in big-endian. 1981 def : Pat<(store (v4f32 FPR128:$Rt), 1982 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1983 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1984 def : Pat<(store (v2f64 FPR128:$Rt), 1985 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1986 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1987 def : Pat<(store (v16i8 FPR128:$Rt), 1988 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1989 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1990 def : Pat<(store (v8i16 FPR128:$Rt), 1991 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1992 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1993 def : Pat<(store (v4i32 FPR128:$Rt), 1994 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1995 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1996 def : Pat<(store (v2i64 FPR128:$Rt), 1997 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1998 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1999 def : Pat<(store (v2f64 FPR128:$Rt), 2000 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2001 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2002} 2003 2004// unscaled i64 truncating stores 2005def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), 2006 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 2007def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 2008 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 2009def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), 2010 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 2011 2012//--- 2013// STR mnemonics fall back to STUR for negative or unaligned offsets. 2014def : InstAlias<"str $Rt, [$Rn, $offset]", 2015 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 2016def : InstAlias<"str $Rt, [$Rn, $offset]", 2017 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 2018def : InstAlias<"str $Rt, [$Rn, $offset]", 2019 (STURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 2020def : InstAlias<"str $Rt, [$Rn, $offset]", 2021 (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 2022def : InstAlias<"str $Rt, [$Rn, $offset]", 2023 (STURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 2024def : InstAlias<"str $Rt, [$Rn, $offset]", 2025 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 2026def : InstAlias<"str $Rt, [$Rn, $offset]", 2027 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 2028 2029def : InstAlias<"strb $Rt, [$Rn, $offset]", 2030 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 2031def : InstAlias<"strh $Rt, [$Rn, $offset]", 2032 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 2033 2034//--- 2035// (unscaled immediate, unprivileged) 2036defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; 2037defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; 2038 2039defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; 2040defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; 2041 2042//--- 2043// (immediate pre-indexed) 2044def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str", pre_store, i32>; 2045def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str", pre_store, i64>; 2046def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8, "str", pre_store, untyped>; 2047def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str", pre_store, f16>; 2048def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str", pre_store, f32>; 2049def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str", pre_store, f64>; 2050def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str", pre_store, f128>; 2051 2052def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb", pre_truncsti8, i32>; 2053def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh", pre_truncsti16, i32>; 2054 2055// truncstore i64 2056def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2057 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2058 simm9:$off)>; 2059def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2060 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2061 simm9:$off)>; 2062def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2063 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2064 simm9:$off)>; 2065 2066def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2067 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2068def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2069 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2070def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2071 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2072def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2073 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2074def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2075 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2076def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2077 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2078 2079def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2080 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2081def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2082 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2083def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2084 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2085def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2086 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2087def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2088 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2089def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2090 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2091 2092//--- 2093// (immediate post-indexed) 2094def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str", post_store, i32>; 2095def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str", post_store, i64>; 2096def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8, "str", post_store, untyped>; 2097def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str", post_store, f16>; 2098def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str", post_store, f32>; 2099def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str", post_store, f64>; 2100def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str", post_store, f128>; 2101 2102def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb", post_truncsti8, i32>; 2103def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh", post_truncsti16, i32>; 2104 2105// truncstore i64 2106def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2107 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2108 simm9:$off)>; 2109def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2110 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2111 simm9:$off)>; 2112def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2113 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2114 simm9:$off)>; 2115 2116def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2117 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2118def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2119 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2120def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2121 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2122def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2123 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2124def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2125 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2126def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2127 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2128 2129def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2130 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2131def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2132 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2133def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2134 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2135def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2136 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2137def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2138 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2139def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2140 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2141 2142//===----------------------------------------------------------------------===// 2143// Load/store exclusive instructions. 2144//===----------------------------------------------------------------------===// 2145 2146def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; 2147def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; 2148def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; 2149def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; 2150 2151def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; 2152def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; 2153def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; 2154def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; 2155 2156def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; 2157def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; 2158def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; 2159def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; 2160 2161def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; 2162def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; 2163def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; 2164def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; 2165 2166def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; 2167def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; 2168def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; 2169def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; 2170 2171def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; 2172def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; 2173def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; 2174def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; 2175 2176def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; 2177def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; 2178 2179def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; 2180def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; 2181 2182def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; 2183def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; 2184 2185def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; 2186def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; 2187 2188//===----------------------------------------------------------------------===// 2189// Scaled floating point to integer conversion instructions. 2190//===----------------------------------------------------------------------===// 2191 2192defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; 2193defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; 2194defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; 2195defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; 2196defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; 2197defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; 2198defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; 2199defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; 2200defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>; 2201defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>; 2202defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>; 2203defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>; 2204let isCodeGenOnly = 1 in { 2205defm FCVTZS_Int : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>; 2206defm FCVTZU_Int : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; 2207defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>; 2208defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; 2209} 2210 2211//===----------------------------------------------------------------------===// 2212// Scaled integer to floating point conversion instructions. 2213//===----------------------------------------------------------------------===// 2214 2215defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>; 2216defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; 2217 2218//===----------------------------------------------------------------------===// 2219// Unscaled integer to floating point conversion instruction. 2220//===----------------------------------------------------------------------===// 2221 2222defm FMOV : UnscaledConversion<"fmov">; 2223 2224def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>; 2225def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>; 2226 2227//===----------------------------------------------------------------------===// 2228// Floating point conversion instruction. 2229//===----------------------------------------------------------------------===// 2230 2231defm FCVT : FPConversion<"fcvt">; 2232 2233def : Pat<(f32_to_f16 FPR32:$Rn), 2234 (i32 (COPY_TO_REGCLASS 2235 (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)), 2236 GPR32))>; 2237 2238def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn), 2239 [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>; 2240 2241//===----------------------------------------------------------------------===// 2242// Floating point single operand instructions. 2243//===----------------------------------------------------------------------===// 2244 2245defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; 2246defm FMOV : SingleOperandFPData<0b0000, "fmov">; 2247defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; 2248defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>; 2249defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; 2250defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; 2251defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>; 2252defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; 2253 2254def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))), 2255 (FRINTNDr FPR64:$Rn)>; 2256 2257// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior 2258// in the C spec. Setting hasSideEffects ensures it is not DCE'd. 2259// <rdar://problem/13715968> 2260// TODO: We should really model the FPSR flags correctly. This is really ugly. 2261let hasSideEffects = 1 in { 2262defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; 2263} 2264 2265defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; 2266 2267let SchedRW = [WriteFDiv] in { 2268defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; 2269} 2270 2271//===----------------------------------------------------------------------===// 2272// Floating point two operand instructions. 2273//===----------------------------------------------------------------------===// 2274 2275defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; 2276let SchedRW = [WriteFDiv] in { 2277defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; 2278} 2279defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>; 2280defm FMAX : TwoOperandFPData<0b0100, "fmax", AArch64fmax>; 2281defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>; 2282defm FMIN : TwoOperandFPData<0b0101, "fmin", AArch64fmin>; 2283let SchedRW = [WriteFMul] in { 2284defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; 2285defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; 2286} 2287defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; 2288 2289def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2290 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; 2291def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2292 (FMINDrr FPR64:$Rn, FPR64:$Rm)>; 2293def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2294 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; 2295def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2296 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; 2297 2298//===----------------------------------------------------------------------===// 2299// Floating point three operand instructions. 2300//===----------------------------------------------------------------------===// 2301 2302defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; 2303defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", 2304 TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; 2305defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", 2306 TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; 2307defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", 2308 TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; 2309 2310// The following def pats catch the case where the LHS of an FMA is negated. 2311// The TriOpFrag above catches the case where the middle operand is negated. 2312 2313// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike 2314// the NEON variant. 2315def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), 2316 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 2317 2318def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), 2319 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 2320 2321// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and 2322// "(-a) + b*(-c)". 2323def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), 2324 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 2325 2326def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), 2327 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 2328 2329def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), 2330 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 2331 2332def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))), 2333 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 2334 2335//===----------------------------------------------------------------------===// 2336// Floating point comparison instructions. 2337//===----------------------------------------------------------------------===// 2338 2339defm FCMPE : FPComparison<1, "fcmpe">; 2340defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; 2341 2342//===----------------------------------------------------------------------===// 2343// Floating point conditional comparison instructions. 2344//===----------------------------------------------------------------------===// 2345 2346defm FCCMPE : FPCondComparison<1, "fccmpe">; 2347defm FCCMP : FPCondComparison<0, "fccmp">; 2348 2349//===----------------------------------------------------------------------===// 2350// Floating point conditional select instruction. 2351//===----------------------------------------------------------------------===// 2352 2353defm FCSEL : FPCondSelect<"fcsel">; 2354 2355// CSEL instructions providing f128 types need to be handled by a 2356// pseudo-instruction since the eventual code will need to introduce basic 2357// blocks and control flow. 2358def F128CSEL : Pseudo<(outs FPR128:$Rd), 2359 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), 2360 [(set (f128 FPR128:$Rd), 2361 (AArch64csel FPR128:$Rn, FPR128:$Rm, 2362 (i32 imm:$cond), NZCV))]> { 2363 let Uses = [NZCV]; 2364 let usesCustomInserter = 1; 2365} 2366 2367 2368//===----------------------------------------------------------------------===// 2369// Floating point immediate move. 2370//===----------------------------------------------------------------------===// 2371 2372let isReMaterializable = 1 in { 2373defm FMOV : FPMoveImmediate<"fmov">; 2374} 2375 2376//===----------------------------------------------------------------------===// 2377// Advanced SIMD two vector instructions. 2378//===----------------------------------------------------------------------===// 2379 2380defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>; 2381defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; 2382defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; 2383defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; 2384defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; 2385defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; 2386defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; 2387defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; 2388defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; 2389defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; 2390 2391defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 2392defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 2393defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 2394defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 2395defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 2396defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; 2397defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; 2398defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; 2399def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), 2400 (FCVTLv4i16 V64:$Rn)>; 2401def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), 2402 (i64 4)))), 2403 (FCVTLv8i16 V128:$Rn)>; 2404def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; 2405def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn), 2406 (i64 2))))), 2407 (FCVTLv4i32 V128:$Rn)>; 2408 2409defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; 2410defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; 2411defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; 2412defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; 2413defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; 2414def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), 2415 (FCVTNv4i16 V128:$Rn)>; 2416def : Pat<(concat_vectors V64:$Rd, 2417 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), 2418 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 2419def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; 2420def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))), 2421 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 2422defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; 2423defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; 2424defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", 2425 int_aarch64_neon_fcvtxn>; 2426defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; 2427defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; 2428let isCodeGenOnly = 1 in { 2429defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", 2430 int_aarch64_neon_fcvtzs>; 2431defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", 2432 int_aarch64_neon_fcvtzu>; 2433} 2434defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; 2435defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; 2436defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>; 2437defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; 2438defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; 2439defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>; 2440defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; 2441defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; 2442defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; 2443defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; 2444defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; 2445defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", 2446 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 2447defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; 2448// Aliases for MVN -> NOT. 2449def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", 2450 (NOTv8i8 V64:$Vd, V64:$Vn)>; 2451def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", 2452 (NOTv16i8 V128:$Vd, V128:$Vn)>; 2453 2454def : Pat<(AArch64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>; 2455def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>; 2456def : Pat<(AArch64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>; 2457def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>; 2458def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>; 2459def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; 2460def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; 2461 2462def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2463def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2464def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2465def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2466def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2467def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2468def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2469def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2470 2471def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2472def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2473def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2474def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2475def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2476 2477defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>; 2478defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; 2479defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; 2480defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; 2481defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", 2482 BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; 2483defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; 2484defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; 2485defm SHLL : SIMDVectorLShiftLongBySizeBHS; 2486defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 2487defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 2488defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; 2489defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; 2490defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; 2491defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", 2492 BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >; 2493defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", 2494 int_aarch64_neon_uaddlp>; 2495defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; 2496defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; 2497defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; 2498defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; 2499defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; 2500defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; 2501 2502def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; 2503def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; 2504 2505// Patterns for vector long shift (by element width). These need to match all 2506// three of zext, sext and anyext so it's easier to pull the patterns out of the 2507// definition. 2508multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { 2509 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), 2510 (SHLLv8i8 V64:$Rn)>; 2511 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), 2512 (SHLLv16i8 V128:$Rn)>; 2513 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), 2514 (SHLLv4i16 V64:$Rn)>; 2515 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), 2516 (SHLLv8i16 V128:$Rn)>; 2517 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), 2518 (SHLLv2i32 V64:$Rn)>; 2519 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), 2520 (SHLLv4i32 V128:$Rn)>; 2521} 2522 2523defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; 2524defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; 2525defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; 2526 2527//===----------------------------------------------------------------------===// 2528// Advanced SIMD three vector instructions. 2529//===----------------------------------------------------------------------===// 2530 2531defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; 2532defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>; 2533defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; 2534defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; 2535defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; 2536defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; 2537defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; 2538defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; 2539defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>; 2540defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>; 2541defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>; 2542defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>; 2543defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>; 2544defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; 2545defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; 2546defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; 2547defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>; 2548defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; 2549defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>; 2550defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>; 2551defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>; 2552defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>; 2553defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>; 2554defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>; 2555defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>; 2556 2557// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the 2558// instruction expects the addend first, while the fma intrinsic puts it last. 2559defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla", 2560 TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; 2561defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls", 2562 TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 2563 2564// The following def pats catch the case where the LHS of an FMA is negated. 2565// The TriOpFrag above catches the case where the middle operand is negated. 2566def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)), 2567 (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>; 2568 2569def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), 2570 (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>; 2571 2572def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), 2573 (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; 2574 2575defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>; 2576defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>; 2577defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>; 2578defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>; 2579defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>; 2580defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", 2581 TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; 2582defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", 2583 TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >; 2584defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; 2585defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; 2586defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", 2587 TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >; 2588defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>; 2589defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>; 2590defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; 2591defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; 2592defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>; 2593defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; 2594defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>; 2595defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; 2596defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; 2597defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; 2598defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; 2599defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; 2600defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; 2601defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>; 2602defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; 2603defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; 2604defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; 2605defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", 2606 TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >; 2607defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>; 2608defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>; 2609defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; 2610defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; 2611defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>; 2612defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; 2613defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>; 2614defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; 2615defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; 2616defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; 2617defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; 2618defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; 2619defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; 2620defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; 2621 2622defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; 2623defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", 2624 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; 2625defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">; 2626defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; 2627defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl", 2628 TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>; 2629defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; 2630defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", 2631 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; 2632defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; 2633 2634def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), 2635 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2636def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), 2637 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2638def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), 2639 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2640def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), 2641 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2642 2643def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), 2644 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2645def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), 2646 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2647def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), 2648 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2649def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), 2650 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2651 2652def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", 2653 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; 2654def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", 2655 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 2656def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", 2657 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 2658def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", 2659 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 2660 2661def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", 2662 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; 2663def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", 2664 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 2665def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", 2666 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 2667def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", 2668 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 2669 2670def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # 2671 "|cmls.8b\t$dst, $src1, $src2}", 2672 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 2673def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # 2674 "|cmls.16b\t$dst, $src1, $src2}", 2675 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 2676def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # 2677 "|cmls.4h\t$dst, $src1, $src2}", 2678 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 2679def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # 2680 "|cmls.8h\t$dst, $src1, $src2}", 2681 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 2682def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # 2683 "|cmls.2s\t$dst, $src1, $src2}", 2684 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 2685def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # 2686 "|cmls.4s\t$dst, $src1, $src2}", 2687 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 2688def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # 2689 "|cmls.2d\t$dst, $src1, $src2}", 2690 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 2691 2692def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # 2693 "|cmlo.8b\t$dst, $src1, $src2}", 2694 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 2695def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # 2696 "|cmlo.16b\t$dst, $src1, $src2}", 2697 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 2698def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # 2699 "|cmlo.4h\t$dst, $src1, $src2}", 2700 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 2701def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # 2702 "|cmlo.8h\t$dst, $src1, $src2}", 2703 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 2704def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # 2705 "|cmlo.2s\t$dst, $src1, $src2}", 2706 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 2707def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # 2708 "|cmlo.4s\t$dst, $src1, $src2}", 2709 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 2710def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # 2711 "|cmlo.2d\t$dst, $src1, $src2}", 2712 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 2713 2714def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # 2715 "|cmle.8b\t$dst, $src1, $src2}", 2716 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 2717def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # 2718 "|cmle.16b\t$dst, $src1, $src2}", 2719 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 2720def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # 2721 "|cmle.4h\t$dst, $src1, $src2}", 2722 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 2723def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # 2724 "|cmle.8h\t$dst, $src1, $src2}", 2725 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 2726def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # 2727 "|cmle.2s\t$dst, $src1, $src2}", 2728 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 2729def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # 2730 "|cmle.4s\t$dst, $src1, $src2}", 2731 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 2732def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # 2733 "|cmle.2d\t$dst, $src1, $src2}", 2734 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 2735 2736def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # 2737 "|cmlt.8b\t$dst, $src1, $src2}", 2738 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 2739def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # 2740 "|cmlt.16b\t$dst, $src1, $src2}", 2741 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 2742def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # 2743 "|cmlt.4h\t$dst, $src1, $src2}", 2744 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 2745def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # 2746 "|cmlt.8h\t$dst, $src1, $src2}", 2747 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 2748def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # 2749 "|cmlt.2s\t$dst, $src1, $src2}", 2750 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 2751def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # 2752 "|cmlt.4s\t$dst, $src1, $src2}", 2753 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 2754def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # 2755 "|cmlt.2d\t$dst, $src1, $src2}", 2756 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 2757 2758def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # 2759 "|fcmle.2s\t$dst, $src1, $src2}", 2760 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 2761def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # 2762 "|fcmle.4s\t$dst, $src1, $src2}", 2763 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 2764def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # 2765 "|fcmle.2d\t$dst, $src1, $src2}", 2766 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 2767 2768def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # 2769 "|fcmlt.2s\t$dst, $src1, $src2}", 2770 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 2771def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # 2772 "|fcmlt.4s\t$dst, $src1, $src2}", 2773 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 2774def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # 2775 "|fcmlt.2d\t$dst, $src1, $src2}", 2776 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 2777 2778def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # 2779 "|facle.2s\t$dst, $src1, $src2}", 2780 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 2781def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # 2782 "|facle.4s\t$dst, $src1, $src2}", 2783 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 2784def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # 2785 "|facle.2d\t$dst, $src1, $src2}", 2786 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 2787 2788def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # 2789 "|faclt.2s\t$dst, $src1, $src2}", 2790 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 2791def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # 2792 "|faclt.4s\t$dst, $src1, $src2}", 2793 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 2794def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # 2795 "|faclt.2d\t$dst, $src1, $src2}", 2796 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 2797 2798//===----------------------------------------------------------------------===// 2799// Advanced SIMD three scalar instructions. 2800//===----------------------------------------------------------------------===// 2801 2802defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; 2803defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; 2804defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; 2805defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; 2806defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; 2807defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; 2808defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; 2809defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>; 2810def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2811 (FABD64 FPR64:$Rn, FPR64:$Rm)>; 2812defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge", 2813 int_aarch64_neon_facge>; 2814defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt", 2815 int_aarch64_neon_facgt>; 2816defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; 2817defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; 2818defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; 2819defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>; 2820defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>; 2821defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>; 2822defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; 2823defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; 2824defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 2825defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; 2826defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; 2827defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; 2828defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; 2829defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; 2830defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; 2831defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; 2832defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; 2833defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; 2834defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; 2835defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; 2836defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; 2837 2838def : InstAlias<"cmls $dst, $src1, $src2", 2839 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2840def : InstAlias<"cmle $dst, $src1, $src2", 2841 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2842def : InstAlias<"cmlo $dst, $src1, $src2", 2843 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2844def : InstAlias<"cmlt $dst, $src1, $src2", 2845 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2846def : InstAlias<"fcmle $dst, $src1, $src2", 2847 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 2848def : InstAlias<"fcmle $dst, $src1, $src2", 2849 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2850def : InstAlias<"fcmlt $dst, $src1, $src2", 2851 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 2852def : InstAlias<"fcmlt $dst, $src1, $src2", 2853 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2854def : InstAlias<"facle $dst, $src1, $src2", 2855 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 2856def : InstAlias<"facle $dst, $src1, $src2", 2857 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2858def : InstAlias<"faclt $dst, $src1, $src2", 2859 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 2860def : InstAlias<"faclt $dst, $src1, $src2", 2861 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2862 2863//===----------------------------------------------------------------------===// 2864// Advanced SIMD three scalar instructions (mixed operands). 2865//===----------------------------------------------------------------------===// 2866defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", 2867 int_aarch64_neon_sqdmulls_scalar>; 2868defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; 2869defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; 2870 2871def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), 2872 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 2873 (i32 FPR32:$Rm))))), 2874 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 2875def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), 2876 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 2877 (i32 FPR32:$Rm))))), 2878 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 2879 2880//===----------------------------------------------------------------------===// 2881// Advanced SIMD two scalar instructions. 2882//===----------------------------------------------------------------------===// 2883 2884defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_aarch64_neon_abs>; 2885defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; 2886defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; 2887defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; 2888defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; 2889defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; 2890defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 2891defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 2892defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 2893defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 2894defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 2895defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">; 2896defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">; 2897defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">; 2898defm FCVTMU : SIMDTwoScalarSD< 1, 0, 0b11011, "fcvtmu">; 2899defm FCVTNS : SIMDTwoScalarSD< 0, 0, 0b11010, "fcvtns">; 2900defm FCVTNU : SIMDTwoScalarSD< 1, 0, 0b11010, "fcvtnu">; 2901defm FCVTPS : SIMDTwoScalarSD< 0, 1, 0b11010, "fcvtps">; 2902defm FCVTPU : SIMDTwoScalarSD< 1, 1, 0b11010, "fcvtpu">; 2903def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; 2904defm FCVTZS : SIMDTwoScalarSD< 0, 1, 0b11011, "fcvtzs">; 2905defm FCVTZU : SIMDTwoScalarSD< 1, 1, 0b11011, "fcvtzu">; 2906defm FRECPE : SIMDTwoScalarSD< 0, 1, 0b11101, "frecpe">; 2907defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">; 2908defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">; 2909defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", 2910 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 2911defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", AArch64sitof>; 2912defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 2913defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 2914defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; 2915defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; 2916defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", 2917 int_aarch64_neon_suqadd>; 2918defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", AArch64uitof>; 2919defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; 2920defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", 2921 int_aarch64_neon_usqadd>; 2922 2923def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>; 2924 2925def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), 2926 (FCVTASv1i64 FPR64:$Rn)>; 2927def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), 2928 (FCVTAUv1i64 FPR64:$Rn)>; 2929def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), 2930 (FCVTMSv1i64 FPR64:$Rn)>; 2931def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), 2932 (FCVTMUv1i64 FPR64:$Rn)>; 2933def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), 2934 (FCVTNSv1i64 FPR64:$Rn)>; 2935def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), 2936 (FCVTNUv1i64 FPR64:$Rn)>; 2937def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), 2938 (FCVTPSv1i64 FPR64:$Rn)>; 2939def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), 2940 (FCVTPUv1i64 FPR64:$Rn)>; 2941 2942def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), 2943 (FRECPEv1i32 FPR32:$Rn)>; 2944def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), 2945 (FRECPEv1i64 FPR64:$Rn)>; 2946def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), 2947 (FRECPEv1i64 FPR64:$Rn)>; 2948 2949def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), 2950 (FRECPXv1i32 FPR32:$Rn)>; 2951def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), 2952 (FRECPXv1i64 FPR64:$Rn)>; 2953 2954def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), 2955 (FRSQRTEv1i32 FPR32:$Rn)>; 2956def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), 2957 (FRSQRTEv1i64 FPR64:$Rn)>; 2958def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), 2959 (FRSQRTEv1i64 FPR64:$Rn)>; 2960 2961// If an integer is about to be converted to a floating point value, 2962// just load it on the floating point unit. 2963// Here are the patterns for 8 and 16-bits to float. 2964// 8-bits -> float. 2965multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, 2966 SDPatternOperator loadop, Instruction UCVTF, 2967 ROAddrMode ro, Instruction LDRW, Instruction LDRX, 2968 SubRegIndex sub> { 2969 def : Pat<(DstTy (uint_to_fp (SrcTy 2970 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, 2971 ro.Wext:$extend))))), 2972 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 2973 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 2974 sub))>; 2975 2976 def : Pat<(DstTy (uint_to_fp (SrcTy 2977 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, 2978 ro.Wext:$extend))))), 2979 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 2980 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 2981 sub))>; 2982} 2983 2984defm : UIntToFPROLoadPat<f32, i32, zextloadi8, 2985 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; 2986def : Pat <(f32 (uint_to_fp (i32 2987 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 2988 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 2989 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 2990def : Pat <(f32 (uint_to_fp (i32 2991 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 2992 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 2993 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 2994// 16-bits -> float. 2995defm : UIntToFPROLoadPat<f32, i32, zextloadi16, 2996 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; 2997def : Pat <(f32 (uint_to_fp (i32 2998 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 2999 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 3000 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 3001def : Pat <(f32 (uint_to_fp (i32 3002 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 3003 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 3004 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 3005// 32-bits are handled in target specific dag combine: 3006// performIntToFpCombine. 3007// 64-bits integer to 32-bits floating point, not possible with 3008// UCVTF on floating point registers (both source and destination 3009// must have the same size). 3010 3011// Here are the patterns for 8, 16, 32, and 64-bits to double. 3012// 8-bits -> double. 3013defm : UIntToFPROLoadPat<f64, i32, zextloadi8, 3014 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; 3015def : Pat <(f64 (uint_to_fp (i32 3016 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3017 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3018 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 3019def : Pat <(f64 (uint_to_fp (i32 3020 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 3021 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3022 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 3023// 16-bits -> double. 3024defm : UIntToFPROLoadPat<f64, i32, zextloadi16, 3025 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; 3026def : Pat <(f64 (uint_to_fp (i32 3027 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3028 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3029 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 3030def : Pat <(f64 (uint_to_fp (i32 3031 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 3032 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3033 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 3034// 32-bits -> double. 3035defm : UIntToFPROLoadPat<f64, i32, load, 3036 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; 3037def : Pat <(f64 (uint_to_fp (i32 3038 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3039 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3040 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; 3041def : Pat <(f64 (uint_to_fp (i32 3042 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), 3043 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3044 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; 3045// 64-bits -> double are handled in target specific dag combine: 3046// performIntToFpCombine. 3047 3048//===----------------------------------------------------------------------===// 3049// Advanced SIMD three different-sized vector instructions. 3050//===----------------------------------------------------------------------===// 3051 3052defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; 3053defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; 3054defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; 3055defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; 3056defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>; 3057defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", 3058 int_aarch64_neon_sabd>; 3059defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", 3060 int_aarch64_neon_sabd>; 3061defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", 3062 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; 3063defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", 3064 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; 3065defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", 3066 TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 3067defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", 3068 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 3069defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>; 3070defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", 3071 int_aarch64_neon_sqadd>; 3072defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", 3073 int_aarch64_neon_sqsub>; 3074defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", 3075 int_aarch64_neon_sqdmull>; 3076defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", 3077 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; 3078defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", 3079 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; 3080defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", 3081 int_aarch64_neon_uabd>; 3082defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", 3083 int_aarch64_neon_uabd>; 3084defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", 3085 BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; 3086defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", 3087 BinOpFrag<(add node:$LHS, (zext node:$RHS))>>; 3088defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", 3089 TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 3090defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", 3091 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 3092defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>; 3093defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", 3094 BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>; 3095defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", 3096 BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; 3097 3098// Patterns for 64-bit pmull 3099def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm), 3100 (PMULLv1i64 V64:$Rn, V64:$Rm)>; 3101def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)), 3102 (vector_extract (v2i64 V128:$Rm), (i64 1))), 3103 (PMULLv2i64 V128:$Rn, V128:$Rm)>; 3104 3105// CodeGen patterns for addhn and subhn instructions, which can actually be 3106// written in LLVM IR without too much difficulty. 3107 3108// ADDHN 3109def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), 3110 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 3111def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3112 (i32 16))))), 3113 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 3114def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3115 (i32 32))))), 3116 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 3117def : Pat<(concat_vectors (v8i8 V64:$Rd), 3118 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3119 (i32 8))))), 3120 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3121 V128:$Rn, V128:$Rm)>; 3122def : Pat<(concat_vectors (v4i16 V64:$Rd), 3123 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3124 (i32 16))))), 3125 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3126 V128:$Rn, V128:$Rm)>; 3127def : Pat<(concat_vectors (v2i32 V64:$Rd), 3128 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3129 (i32 32))))), 3130 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3131 V128:$Rn, V128:$Rm)>; 3132 3133// SUBHN 3134def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), 3135 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 3136def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3137 (i32 16))))), 3138 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 3139def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3140 (i32 32))))), 3141 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 3142def : Pat<(concat_vectors (v8i8 V64:$Rd), 3143 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3144 (i32 8))))), 3145 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3146 V128:$Rn, V128:$Rm)>; 3147def : Pat<(concat_vectors (v4i16 V64:$Rd), 3148 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3149 (i32 16))))), 3150 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3151 V128:$Rn, V128:$Rm)>; 3152def : Pat<(concat_vectors (v2i32 V64:$Rd), 3153 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3154 (i32 32))))), 3155 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3156 V128:$Rn, V128:$Rm)>; 3157 3158//---------------------------------------------------------------------------- 3159// AdvSIMD bitwise extract from vector instruction. 3160//---------------------------------------------------------------------------- 3161 3162defm EXT : SIMDBitwiseExtract<"ext">; 3163 3164def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 3165 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 3166def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3167 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3168def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 3169 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 3170def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 3171 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 3172def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3173 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3174def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3175 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3176def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3177 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3178def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3179 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3180 3181// We use EXT to handle extract_subvector to copy the upper 64-bits of a 3182// 128-bit vector. 3183def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))), 3184 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3185def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))), 3186 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3187def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))), 3188 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3189def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))), 3190 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3191def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))), 3192 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3193def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))), 3194 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3195 3196 3197//---------------------------------------------------------------------------- 3198// AdvSIMD zip vector 3199//---------------------------------------------------------------------------- 3200 3201defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; 3202defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; 3203defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; 3204defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; 3205defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; 3206defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; 3207 3208//---------------------------------------------------------------------------- 3209// AdvSIMD TBL/TBX instructions 3210//---------------------------------------------------------------------------- 3211 3212defm TBL : SIMDTableLookup< 0, "tbl">; 3213defm TBX : SIMDTableLookupTied<1, "tbx">; 3214 3215def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 3216 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; 3217def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 3218 (TBLv16i8One V128:$Ri, V128:$Rn)>; 3219 3220def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), 3221 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 3222 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; 3223def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), 3224 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 3225 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; 3226 3227 3228//---------------------------------------------------------------------------- 3229// AdvSIMD scalar CPY instruction 3230//---------------------------------------------------------------------------- 3231 3232defm CPY : SIMDScalarCPY<"cpy">; 3233 3234//---------------------------------------------------------------------------- 3235// AdvSIMD scalar pairwise instructions 3236//---------------------------------------------------------------------------- 3237 3238defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; 3239defm FADDP : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">; 3240defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">; 3241defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">; 3242defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">; 3243defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">; 3244def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))), 3245 (ADDPv2i64p V128:$Rn)>; 3246def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))), 3247 (ADDPv2i64p V128:$Rn)>; 3248def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), 3249 (FADDPv2i32p V64:$Rn)>; 3250def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), 3251 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; 3252def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), 3253 (FADDPv2i64p V128:$Rn)>; 3254def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), 3255 (FMAXNMPv2i32p V64:$Rn)>; 3256def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), 3257 (FMAXNMPv2i64p V128:$Rn)>; 3258def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), 3259 (FMAXPv2i32p V64:$Rn)>; 3260def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), 3261 (FMAXPv2i64p V128:$Rn)>; 3262def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), 3263 (FMINNMPv2i32p V64:$Rn)>; 3264def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), 3265 (FMINNMPv2i64p V128:$Rn)>; 3266def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), 3267 (FMINPv2i32p V64:$Rn)>; 3268def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), 3269 (FMINPv2i64p V128:$Rn)>; 3270 3271//---------------------------------------------------------------------------- 3272// AdvSIMD INS/DUP instructions 3273//---------------------------------------------------------------------------- 3274 3275def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>; 3276def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>; 3277def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>; 3278def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>; 3279def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>; 3280def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>; 3281def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>; 3282 3283def DUPv2i64lane : SIMDDup64FromElement; 3284def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; 3285def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; 3286def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; 3287def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; 3288def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; 3289def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; 3290 3291def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), 3292 (v2f32 (DUPv2i32lane 3293 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 3294 (i64 0)))>; 3295def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), 3296 (v4f32 (DUPv4i32lane 3297 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 3298 (i64 0)))>; 3299def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), 3300 (v2f64 (DUPv2i64lane 3301 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), 3302 (i64 0)))>; 3303 3304def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 3305 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; 3306def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 3307 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; 3308def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), 3309 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; 3310 3311// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane 3312// instruction even if the types don't match: we just have to remap the lane 3313// carefully. N.b. this trick only applies to truncations. 3314def VecIndex_x2 : SDNodeXForm<imm, [{ 3315 return CurDAG->getTargetConstant(2 * N->getZExtValue(), MVT::i64); 3316}]>; 3317def VecIndex_x4 : SDNodeXForm<imm, [{ 3318 return CurDAG->getTargetConstant(4 * N->getZExtValue(), MVT::i64); 3319}]>; 3320def VecIndex_x8 : SDNodeXForm<imm, [{ 3321 return CurDAG->getTargetConstant(8 * N->getZExtValue(), MVT::i64); 3322}]>; 3323 3324multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, 3325 ValueType Src128VT, ValueType ScalVT, 3326 Instruction DUP, SDNodeXForm IdxXFORM> { 3327 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), 3328 imm:$idx)))), 3329 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 3330 3331 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), 3332 imm:$idx)))), 3333 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 3334} 3335 3336defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; 3337defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; 3338defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; 3339 3340defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; 3341defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; 3342defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; 3343 3344multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, 3345 SDNodeXForm IdxXFORM> { 3346 def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn), 3347 imm:$idx))))), 3348 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 3349 3350 def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn), 3351 imm:$idx))))), 3352 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 3353} 3354 3355defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; 3356defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; 3357defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; 3358 3359defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; 3360defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; 3361defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; 3362 3363// SMOV and UMOV definitions, with some extra patterns for convenience 3364defm SMOV : SMov; 3365defm UMOV : UMov; 3366 3367def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 3368 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; 3369def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 3370 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 3371def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 3372 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 3373def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 3374 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 3375def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 3376 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 3377def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), 3378 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; 3379 3380// Extracting i8 or i16 elements will have the zero-extend transformed to 3381// an 'and' mask by type legalization since neither i8 nor i16 are legal types 3382// for AArch64. Match these patterns here since UMOV already zeroes out the high 3383// bits of the destination register. 3384def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), 3385 (i32 0xff)), 3386 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; 3387def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), 3388 (i32 0xffff)), 3389 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; 3390 3391defm INS : SIMDIns; 3392 3393def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), 3394 (SUBREG_TO_REG (i32 0), 3395 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3396def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), 3397 (SUBREG_TO_REG (i32 0), 3398 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3399 3400def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), 3401 (SUBREG_TO_REG (i32 0), 3402 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3403def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), 3404 (SUBREG_TO_REG (i32 0), 3405 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3406 3407def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), 3408 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 3409 (i32 FPR32:$Rn), ssub))>; 3410def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), 3411 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3412 (i32 FPR32:$Rn), ssub))>; 3413def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), 3414 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 3415 (i64 FPR64:$Rn), dsub))>; 3416 3417def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), 3418 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 3419def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), 3420 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 3421def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), 3422 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; 3423 3424def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), 3425 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 3426 (EXTRACT_SUBREG 3427 (INSvi32lane 3428 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 3429 VectorIndexS:$imm, 3430 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 3431 (i64 0)), 3432 dsub)>; 3433def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), 3434 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 3435 (INSvi32lane 3436 V128:$Rn, VectorIndexS:$imm, 3437 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 3438 (i64 0))>; 3439def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), 3440 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), 3441 (INSvi64lane 3442 V128:$Rn, VectorIndexD:$imm, 3443 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), 3444 (i64 0))>; 3445 3446// Copy an element at a constant index in one vector into a constant indexed 3447// element of another. 3448// FIXME refactor to a shared class/dev parameterized on vector type, vector 3449// index type and INS extension 3450def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane 3451 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), 3452 VectorIndexB:$idx2)), 3453 (v16i8 (INSvi8lane 3454 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) 3455 )>; 3456def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane 3457 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), 3458 VectorIndexH:$idx2)), 3459 (v8i16 (INSvi16lane 3460 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) 3461 )>; 3462def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane 3463 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), 3464 VectorIndexS:$idx2)), 3465 (v4i32 (INSvi32lane 3466 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) 3467 )>; 3468def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane 3469 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), 3470 VectorIndexD:$idx2)), 3471 (v2i64 (INSvi64lane 3472 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) 3473 )>; 3474 3475multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, 3476 ValueType VTScal, Instruction INS> { 3477 def : Pat<(VT128 (vector_insert V128:$src, 3478 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 3479 imm:$Immd)), 3480 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; 3481 3482 def : Pat<(VT128 (vector_insert V128:$src, 3483 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 3484 imm:$Immd)), 3485 (INS V128:$src, imm:$Immd, 3486 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; 3487 3488 def : Pat<(VT64 (vector_insert V64:$src, 3489 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 3490 imm:$Immd)), 3491 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), 3492 imm:$Immd, V128:$Rn, imm:$Immn), 3493 dsub)>; 3494 3495 def : Pat<(VT64 (vector_insert V64:$src, 3496 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 3497 imm:$Immd)), 3498 (EXTRACT_SUBREG 3499 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, 3500 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), 3501 dsub)>; 3502} 3503 3504defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; 3505defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; 3506defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>; 3507defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>; 3508defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>; 3509defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>; 3510 3511 3512// Floating point vector extractions are codegen'd as either a sequence of 3513// subregister extractions, possibly fed by an INS if the lane number is 3514// anything other than zero. 3515def : Pat<(vector_extract (v2f64 V128:$Rn), 0), 3516 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; 3517def : Pat<(vector_extract (v4f32 V128:$Rn), 0), 3518 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; 3519def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), 3520 (f64 (EXTRACT_SUBREG 3521 (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0, 3522 V128:$Rn, VectorIndexD:$idx), 3523 dsub))>; 3524def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), 3525 (f32 (EXTRACT_SUBREG 3526 (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0, 3527 V128:$Rn, VectorIndexS:$idx), 3528 ssub))>; 3529 3530// All concat_vectors operations are canonicalised to act on i64 vectors for 3531// AArch64. In the general case we need an instruction, which had just as well be 3532// INS. 3533class ConcatPat<ValueType DstTy, ValueType SrcTy> 3534 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), 3535 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, 3536 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; 3537 3538def : ConcatPat<v2i64, v1i64>; 3539def : ConcatPat<v2f64, v1f64>; 3540def : ConcatPat<v4i32, v2i32>; 3541def : ConcatPat<v4f32, v2f32>; 3542def : ConcatPat<v8i16, v4i16>; 3543def : ConcatPat<v16i8, v8i8>; 3544 3545// If the high lanes are undef, though, we can just ignore them: 3546class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> 3547 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), 3548 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; 3549 3550def : ConcatUndefPat<v2i64, v1i64>; 3551def : ConcatUndefPat<v2f64, v1f64>; 3552def : ConcatUndefPat<v4i32, v2i32>; 3553def : ConcatUndefPat<v4f32, v2f32>; 3554def : ConcatUndefPat<v8i16, v4i16>; 3555def : ConcatUndefPat<v16i8, v8i8>; 3556 3557//---------------------------------------------------------------------------- 3558// AdvSIMD across lanes instructions 3559//---------------------------------------------------------------------------- 3560 3561defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; 3562defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; 3563defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; 3564defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; 3565defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; 3566defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; 3567defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; 3568defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; 3569defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; 3570defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; 3571defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>; 3572 3573multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> { 3574// If there is a sign extension after this intrinsic, consume it as smov already 3575// performed it 3576 def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)), 3577 (i32 (SMOVvi8to32 3578 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3579 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 3580 (i64 0)))>; 3581 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 3582 (i32 (SMOVvi8to32 3583 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3584 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 3585 (i64 0)))>; 3586// If there is a sign extension after this intrinsic, consume it as smov already 3587// performed it 3588def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)), 3589 (i32 (SMOVvi8to32 3590 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3591 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 3592 (i64 0)))>; 3593def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 3594 (i32 (SMOVvi8to32 3595 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3596 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 3597 (i64 0)))>; 3598// If there is a sign extension after this intrinsic, consume it as smov already 3599// performed it 3600def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)), 3601 (i32 (SMOVvi16to32 3602 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3603 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 3604 (i64 0)))>; 3605def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 3606 (i32 (SMOVvi16to32 3607 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3608 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 3609 (i64 0)))>; 3610// If there is a sign extension after this intrinsic, consume it as smov already 3611// performed it 3612def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)), 3613 (i32 (SMOVvi16to32 3614 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3615 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 3616 (i64 0)))>; 3617def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 3618 (i32 (SMOVvi16to32 3619 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3620 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 3621 (i64 0)))>; 3622 3623def : Pat<(i32 (intOp (v4i32 V128:$Rn))), 3624 (i32 (EXTRACT_SUBREG 3625 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3626 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), 3627 ssub))>; 3628} 3629 3630multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> { 3631// If there is a masking operation keeping only what has been actually 3632// generated, consume it. 3633 def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)), 3634 (i32 (EXTRACT_SUBREG 3635 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3636 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 3637 ssub))>; 3638 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 3639 (i32 (EXTRACT_SUBREG 3640 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3641 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 3642 ssub))>; 3643// If there is a masking operation keeping only what has been actually 3644// generated, consume it. 3645def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)), 3646 (i32 (EXTRACT_SUBREG 3647 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3648 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 3649 ssub))>; 3650def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 3651 (i32 (EXTRACT_SUBREG 3652 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3653 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 3654 ssub))>; 3655 3656// If there is a masking operation keeping only what has been actually 3657// generated, consume it. 3658def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)), 3659 (i32 (EXTRACT_SUBREG 3660 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3661 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 3662 ssub))>; 3663def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 3664 (i32 (EXTRACT_SUBREG 3665 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3666 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 3667 ssub))>; 3668// If there is a masking operation keeping only what has been actually 3669// generated, consume it. 3670def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)), 3671 (i32 (EXTRACT_SUBREG 3672 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3673 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 3674 ssub))>; 3675def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 3676 (i32 (EXTRACT_SUBREG 3677 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3678 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 3679 ssub))>; 3680 3681def : Pat<(i32 (intOp (v4i32 V128:$Rn))), 3682 (i32 (EXTRACT_SUBREG 3683 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3684 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), 3685 ssub))>; 3686 3687} 3688 3689multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { 3690 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 3691 (i32 (SMOVvi16to32 3692 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3693 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 3694 (i64 0)))>; 3695def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 3696 (i32 (SMOVvi16to32 3697 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3698 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 3699 (i64 0)))>; 3700 3701def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 3702 (i32 (EXTRACT_SUBREG 3703 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3704 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 3705 ssub))>; 3706def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 3707 (i32 (EXTRACT_SUBREG 3708 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3709 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 3710 ssub))>; 3711 3712def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 3713 (i64 (EXTRACT_SUBREG 3714 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3715 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 3716 dsub))>; 3717} 3718 3719multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, 3720 Intrinsic intOp> { 3721 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 3722 (i32 (EXTRACT_SUBREG 3723 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3724 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 3725 ssub))>; 3726def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 3727 (i32 (EXTRACT_SUBREG 3728 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3729 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 3730 ssub))>; 3731 3732def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 3733 (i32 (EXTRACT_SUBREG 3734 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3735 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 3736 ssub))>; 3737def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 3738 (i32 (EXTRACT_SUBREG 3739 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3740 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 3741 ssub))>; 3742 3743def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 3744 (i64 (EXTRACT_SUBREG 3745 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3746 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 3747 dsub))>; 3748} 3749 3750defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>; 3751// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 3752def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))), 3753 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3754 3755defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>; 3756// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 3757def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))), 3758 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3759 3760defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>; 3761def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))), 3762 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3763 3764defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>; 3765def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))), 3766 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3767 3768defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>; 3769def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))), 3770 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3771 3772defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>; 3773def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))), 3774 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3775 3776defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; 3777defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; 3778 3779// The vaddlv_s32 intrinsic gets mapped to SADDLP. 3780def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), 3781 (i64 (EXTRACT_SUBREG 3782 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3783 (SADDLPv2i32_v1i64 V64:$Rn), dsub), 3784 dsub))>; 3785// The vaddlv_u32 intrinsic gets mapped to UADDLP. 3786def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), 3787 (i64 (EXTRACT_SUBREG 3788 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3789 (UADDLPv2i32_v1i64 V64:$Rn), dsub), 3790 dsub))>; 3791 3792//------------------------------------------------------------------------------ 3793// AdvSIMD modified immediate instructions 3794//------------------------------------------------------------------------------ 3795 3796// AdvSIMD BIC 3797defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; 3798// AdvSIMD ORR 3799defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; 3800 3801def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 3802def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 3803def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 3804def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 3805 3806def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 3807def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 3808def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 3809def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 3810 3811def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 3812def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 3813def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 3814def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 3815 3816def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 3817def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 3818def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 3819def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 3820 3821// AdvSIMD FMOV 3822def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8, 3823 "fmov", ".2d", 3824 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 3825def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8, 3826 "fmov", ".2s", 3827 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 3828def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8, 3829 "fmov", ".4s", 3830 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 3831 3832// AdvSIMD MOVI 3833 3834// EDIT byte mask: scalar 3835let isReMaterializable = 1, isAsCheapAsAMove = 1 in 3836def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", 3837 [(set FPR64:$Rd, simdimmtype10:$imm8)]>; 3838// The movi_edit node has the immediate value already encoded, so we use 3839// a plain imm0_255 here. 3840def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), 3841 (MOVID imm0_255:$shift)>; 3842 3843def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>; 3844def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>; 3845def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>; 3846def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>; 3847 3848def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>; 3849def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>; 3850def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>; 3851def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; 3852 3853// EDIT byte mask: 2d 3854 3855// The movi_edit node has the immediate value already encoded, so we use 3856// a plain imm0_255 in the pattern 3857let isReMaterializable = 1, isAsCheapAsAMove = 1 in 3858def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128, 3859 simdimmtype10, 3860 "movi", ".2d", 3861 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; 3862 3863 3864// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing. 3865// Complexity is added to break a tie with a plain MOVI. 3866let AddedComplexity = 1 in { 3867def : Pat<(f32 fpimm0), 3868 (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>, 3869 Requires<[HasZCZ]>; 3870def : Pat<(f64 fpimm0), 3871 (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>, 3872 Requires<[HasZCZ]>; 3873} 3874 3875def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 3876def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 3877def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 3878def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; 3879 3880def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; 3881def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; 3882def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; 3883def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; 3884 3885def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>; 3886def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>; 3887 3888// EDIT per word & halfword: 2s, 4h, 4s, & 8h 3889defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; 3890 3891def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 3892def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 3893def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 3894def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 3895 3896def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 3897def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 3898def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 3899def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 3900 3901def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 3902 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; 3903def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 3904 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; 3905def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 3906 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; 3907def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 3908 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; 3909 3910// EDIT per word: 2s & 4s with MSL shifter 3911def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", 3912 [(set (v2i32 V64:$Rd), 3913 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 3914def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", 3915 [(set (v4i32 V128:$Rd), 3916 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 3917 3918// Per byte: 8b & 16b 3919def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255, 3920 "movi", ".8b", 3921 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; 3922def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255, 3923 "movi", ".16b", 3924 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; 3925 3926// AdvSIMD MVNI 3927 3928// EDIT per word & halfword: 2s, 4h, 4s, & 8h 3929defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; 3930 3931def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 3932def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 3933def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 3934def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 3935 3936def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 3937def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 3938def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 3939def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 3940 3941def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 3942 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; 3943def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 3944 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; 3945def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 3946 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; 3947def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 3948 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; 3949 3950// EDIT per word: 2s & 4s with MSL shifter 3951def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", 3952 [(set (v2i32 V64:$Rd), 3953 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 3954def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", 3955 [(set (v4i32 V128:$Rd), 3956 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 3957 3958//---------------------------------------------------------------------------- 3959// AdvSIMD indexed element 3960//---------------------------------------------------------------------------- 3961 3962let neverHasSideEffects = 1 in { 3963 defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">; 3964 defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">; 3965} 3966 3967// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the 3968// instruction expects the addend first, while the intrinsic expects it last. 3969 3970// On the other hand, there are quite a few valid combinatorial options due to 3971// the commutativity of multiplication and the fact that (-x) * y = x * (-y). 3972defm : SIMDFPIndexedSDTiedPatterns<"FMLA", 3973 TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; 3974defm : SIMDFPIndexedSDTiedPatterns<"FMLA", 3975 TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; 3976 3977defm : SIMDFPIndexedSDTiedPatterns<"FMLS", 3978 TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 3979defm : SIMDFPIndexedSDTiedPatterns<"FMLS", 3980 TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; 3981defm : SIMDFPIndexedSDTiedPatterns<"FMLS", 3982 TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; 3983defm : SIMDFPIndexedSDTiedPatterns<"FMLS", 3984 TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; 3985 3986multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { 3987 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit 3988 // and DUP scalar. 3989 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 3990 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 3991 VectorIndexS:$idx))), 3992 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; 3993 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 3994 (v2f32 (AArch64duplane32 3995 (v4f32 (insert_subvector undef, 3996 (v2f32 (fneg V64:$Rm)), 3997 (i32 0))), 3998 VectorIndexS:$idx)))), 3999 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 4000 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 4001 VectorIndexS:$idx)>; 4002 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 4003 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 4004 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 4005 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 4006 4007 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit 4008 // and DUP scalar. 4009 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 4010 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 4011 VectorIndexS:$idx))), 4012 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, 4013 VectorIndexS:$idx)>; 4014 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 4015 (v4f32 (AArch64duplane32 4016 (v4f32 (insert_subvector undef, 4017 (v2f32 (fneg V64:$Rm)), 4018 (i32 0))), 4019 VectorIndexS:$idx)))), 4020 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 4021 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 4022 VectorIndexS:$idx)>; 4023 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 4024 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 4025 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 4026 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 4027 4028 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar 4029 // (DUPLANE from 64-bit would be trivial). 4030 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 4031 (AArch64duplane64 (v2f64 (fneg V128:$Rm)), 4032 VectorIndexD:$idx))), 4033 (FMLSv2i64_indexed 4034 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; 4035 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 4036 (AArch64dup (f64 (fneg FPR64Op:$Rm))))), 4037 (FMLSv2i64_indexed V128:$Rd, V128:$Rn, 4038 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; 4039 4040 // 2 variants for 32-bit scalar version: extract from .2s or from .4s 4041 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 4042 (vector_extract (v4f32 (fneg V128:$Rm)), 4043 VectorIndexS:$idx))), 4044 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 4045 V128:$Rm, VectorIndexS:$idx)>; 4046 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 4047 (vector_extract (v2f32 (fneg V64:$Rm)), 4048 VectorIndexS:$idx))), 4049 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 4050 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; 4051 4052 // 1 variant for 64-bit scalar version: extract from .1d or from .2d 4053 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), 4054 (vector_extract (v2f64 (fneg V128:$Rm)), 4055 VectorIndexS:$idx))), 4056 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, 4057 V128:$Rm, VectorIndexS:$idx)>; 4058} 4059 4060defm : FMLSIndexedAfterNegPatterns< 4061 TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; 4062defm : FMLSIndexedAfterNegPatterns< 4063 TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; 4064 4065defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; 4066defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>; 4067 4068def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 4069 (FMULv2i32_indexed V64:$Rn, 4070 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 4071 (i64 0))>; 4072def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 4073 (FMULv4i32_indexed V128:$Rn, 4074 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 4075 (i64 0))>; 4076def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), 4077 (FMULv2i64_indexed V128:$Rn, 4078 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), 4079 (i64 0))>; 4080 4081defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; 4082defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 4083defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", 4084 TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; 4085defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", 4086 TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>; 4087defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; 4088defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", 4089 TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 4090defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", 4091 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 4092defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", 4093 int_aarch64_neon_smull>; 4094defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", 4095 int_aarch64_neon_sqadd>; 4096defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", 4097 int_aarch64_neon_sqsub>; 4098defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; 4099defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", 4100 TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 4101defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", 4102 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 4103defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", 4104 int_aarch64_neon_umull>; 4105 4106// A scalar sqdmull with the second operand being a vector lane can be 4107// handled directly with the indexed instruction encoding. 4108def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 4109 (vector_extract (v4i32 V128:$Vm), 4110 VectorIndexS:$idx)), 4111 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; 4112 4113//---------------------------------------------------------------------------- 4114// AdvSIMD scalar shift instructions 4115//---------------------------------------------------------------------------- 4116defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">; 4117defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">; 4118defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">; 4119defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">; 4120// Codegen patterns for the above. We don't put these directly on the 4121// instructions because TableGen's type inference can't handle the truth. 4122// Having the same base pattern for fp <--> int totally freaks it out. 4123def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), 4124 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; 4125def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), 4126 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; 4127def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), 4128 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 4129def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), 4130 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 4131def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), 4132 vecshiftR64:$imm)), 4133 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 4134def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), 4135 vecshiftR64:$imm)), 4136 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 4137def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), 4138 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 4139def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), 4140 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 4141def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 4142 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4143def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 4144 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4145def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), 4146 vecshiftR64:$imm)), 4147 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4148def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), 4149 vecshiftR64:$imm)), 4150 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4151 4152defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; 4153defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; 4154defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", 4155 int_aarch64_neon_sqrshrn>; 4156defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", 4157 int_aarch64_neon_sqrshrun>; 4158defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 4159defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 4160defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", 4161 int_aarch64_neon_sqshrn>; 4162defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", 4163 int_aarch64_neon_sqshrun>; 4164defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; 4165defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; 4166defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", 4167 TriOpFrag<(add node:$LHS, 4168 (AArch64srshri node:$MHS, node:$RHS))>>; 4169defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; 4170defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", 4171 TriOpFrag<(add node:$LHS, 4172 (AArch64vashr node:$MHS, node:$RHS))>>; 4173defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", 4174 int_aarch64_neon_uqrshrn>; 4175defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 4176defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", 4177 int_aarch64_neon_uqshrn>; 4178defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; 4179defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", 4180 TriOpFrag<(add node:$LHS, 4181 (AArch64urshri node:$MHS, node:$RHS))>>; 4182defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; 4183defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", 4184 TriOpFrag<(add node:$LHS, 4185 (AArch64vlshr node:$MHS, node:$RHS))>>; 4186 4187//---------------------------------------------------------------------------- 4188// AdvSIMD vector shift instructions 4189//---------------------------------------------------------------------------- 4190defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; 4191defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; 4192defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf", 4193 int_aarch64_neon_vcvtfxs2fp>; 4194defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", 4195 int_aarch64_neon_rshrn>; 4196defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; 4197defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", 4198 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; 4199defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>; 4200def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 4201 (i32 vecshiftL64:$imm))), 4202 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; 4203defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", 4204 int_aarch64_neon_sqrshrn>; 4205defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", 4206 int_aarch64_neon_sqrshrun>; 4207defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 4208defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 4209defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", 4210 int_aarch64_neon_sqshrn>; 4211defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", 4212 int_aarch64_neon_sqshrun>; 4213defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>; 4214def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 4215 (i32 vecshiftR64:$imm))), 4216 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; 4217defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; 4218defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", 4219 TriOpFrag<(add node:$LHS, 4220 (AArch64srshri node:$MHS, node:$RHS))> >; 4221defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", 4222 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; 4223 4224defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; 4225defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", 4226 TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; 4227defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf", 4228 int_aarch64_neon_vcvtfxu2fp>; 4229defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", 4230 int_aarch64_neon_uqrshrn>; 4231defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 4232defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", 4233 int_aarch64_neon_uqshrn>; 4234defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; 4235defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", 4236 TriOpFrag<(add node:$LHS, 4237 (AArch64urshri node:$MHS, node:$RHS))> >; 4238defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", 4239 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; 4240defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; 4241defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", 4242 TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; 4243 4244// SHRN patterns for when a logical right shift was used instead of arithmetic 4245// (the immediate guarantees no sign bits actually end up in the result so it 4246// doesn't matter). 4247def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), 4248 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; 4249def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), 4250 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; 4251def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), 4252 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; 4253 4254def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), 4255 (trunc (AArch64vlshr (v8i16 V128:$Rn), 4256 vecshiftR16Narrow:$imm)))), 4257 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 4258 V128:$Rn, vecshiftR16Narrow:$imm)>; 4259def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), 4260 (trunc (AArch64vlshr (v4i32 V128:$Rn), 4261 vecshiftR32Narrow:$imm)))), 4262 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 4263 V128:$Rn, vecshiftR32Narrow:$imm)>; 4264def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), 4265 (trunc (AArch64vlshr (v2i64 V128:$Rn), 4266 vecshiftR64Narrow:$imm)))), 4267 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 4268 V128:$Rn, vecshiftR32Narrow:$imm)>; 4269 4270// Vector sign and zero extensions are implemented with SSHLL and USSHLL. 4271// Anyexts are implemented as zexts. 4272def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; 4273def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 4274def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 4275def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; 4276def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 4277def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 4278def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; 4279def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 4280def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 4281// Also match an extend from the upper half of a 128 bit source register. 4282def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 4283 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 4284def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 4285 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 4286def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 4287 (SSHLLv16i8_shift V128:$Rn, (i32 0))>; 4288def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 4289 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 4290def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 4291 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 4292def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 4293 (SSHLLv8i16_shift V128:$Rn, (i32 0))>; 4294def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 4295 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 4296def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 4297 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 4298def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 4299 (SSHLLv4i32_shift V128:$Rn, (i32 0))>; 4300 4301// Vector shift sxtl aliases 4302def : InstAlias<"sxtl.8h $dst, $src1", 4303 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4304def : InstAlias<"sxtl $dst.8h, $src1.8b", 4305 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4306def : InstAlias<"sxtl.4s $dst, $src1", 4307 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4308def : InstAlias<"sxtl $dst.4s, $src1.4h", 4309 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4310def : InstAlias<"sxtl.2d $dst, $src1", 4311 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4312def : InstAlias<"sxtl $dst.2d, $src1.2s", 4313 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4314 4315// Vector shift sxtl2 aliases 4316def : InstAlias<"sxtl2.8h $dst, $src1", 4317 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4318def : InstAlias<"sxtl2 $dst.8h, $src1.16b", 4319 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4320def : InstAlias<"sxtl2.4s $dst, $src1", 4321 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4322def : InstAlias<"sxtl2 $dst.4s, $src1.8h", 4323 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4324def : InstAlias<"sxtl2.2d $dst, $src1", 4325 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4326def : InstAlias<"sxtl2 $dst.2d, $src1.4s", 4327 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4328 4329// Vector shift uxtl aliases 4330def : InstAlias<"uxtl.8h $dst, $src1", 4331 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4332def : InstAlias<"uxtl $dst.8h, $src1.8b", 4333 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4334def : InstAlias<"uxtl.4s $dst, $src1", 4335 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4336def : InstAlias<"uxtl $dst.4s, $src1.4h", 4337 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4338def : InstAlias<"uxtl.2d $dst, $src1", 4339 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4340def : InstAlias<"uxtl $dst.2d, $src1.2s", 4341 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4342 4343// Vector shift uxtl2 aliases 4344def : InstAlias<"uxtl2.8h $dst, $src1", 4345 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4346def : InstAlias<"uxtl2 $dst.8h, $src1.16b", 4347 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4348def : InstAlias<"uxtl2.4s $dst, $src1", 4349 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4350def : InstAlias<"uxtl2 $dst.4s, $src1.8h", 4351 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4352def : InstAlias<"uxtl2.2d $dst, $src1", 4353 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4354def : InstAlias<"uxtl2 $dst.2d, $src1.4s", 4355 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4356 4357// If an integer is about to be converted to a floating point value, 4358// just load it on the floating point unit. 4359// These patterns are more complex because floating point loads do not 4360// support sign extension. 4361// The sign extension has to be explicitly added and is only supported for 4362// one step: byte-to-half, half-to-word, word-to-doubleword. 4363// SCVTF GPR -> FPR is 9 cycles. 4364// SCVTF FPR -> FPR is 4 cyclces. 4365// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. 4366// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR 4367// and still being faster. 4368// However, this is not good for code size. 4369// 8-bits -> float. 2 sizes step-up. 4370class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> 4371 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), 4372 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 4373 (SSHLLv4i16_shift 4374 (f64 4375 (EXTRACT_SUBREG 4376 (SSHLLv8i8_shift 4377 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4378 INST, 4379 bsub), 4380 0), 4381 dsub)), 4382 0), 4383 ssub)))>, Requires<[NotForCodeSize]>; 4384 4385def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), 4386 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; 4387def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), 4388 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; 4389def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), 4390 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; 4391def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), 4392 (LDURBi GPR64sp:$Rn, simm9:$offset)>; 4393 4394// 16-bits -> float. 1 size step-up. 4395class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> 4396 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), 4397 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 4398 (SSHLLv4i16_shift 4399 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4400 INST, 4401 hsub), 4402 0), 4403 ssub)))>, Requires<[NotForCodeSize]>; 4404 4405def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 4406 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 4407def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 4408 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 4409def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 4410 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 4411def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 4412 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 4413 4414// 32-bits to 32-bits are handled in target specific dag combine: 4415// performIntToFpCombine. 4416// 64-bits integer to 32-bits floating point, not possible with 4417// SCVTF on floating point registers (both source and destination 4418// must have the same size). 4419 4420// Here are the patterns for 8, 16, 32, and 64-bits to double. 4421// 8-bits -> double. 3 size step-up: give up. 4422// 16-bits -> double. 2 size step. 4423class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> 4424 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), 4425 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 4426 (SSHLLv2i32_shift 4427 (f64 4428 (EXTRACT_SUBREG 4429 (SSHLLv4i16_shift 4430 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4431 INST, 4432 hsub), 4433 0), 4434 dsub)), 4435 0), 4436 dsub)))>, Requires<[NotForCodeSize]>; 4437 4438def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 4439 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 4440def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 4441 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 4442def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 4443 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 4444def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 4445 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 4446// 32-bits -> double. 1 size step-up. 4447class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> 4448 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), 4449 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 4450 (SSHLLv2i32_shift 4451 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4452 INST, 4453 ssub), 4454 0), 4455 dsub)))>, Requires<[NotForCodeSize]>; 4456 4457def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), 4458 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; 4459def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), 4460 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; 4461def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), 4462 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; 4463def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), 4464 (LDURSi GPR64sp:$Rn, simm9:$offset)>; 4465 4466// 64-bits -> double are handled in target specific dag combine: 4467// performIntToFpCombine. 4468 4469 4470//---------------------------------------------------------------------------- 4471// AdvSIMD Load-Store Structure 4472//---------------------------------------------------------------------------- 4473defm LD1 : SIMDLd1Multiple<"ld1">; 4474defm LD2 : SIMDLd2Multiple<"ld2">; 4475defm LD3 : SIMDLd3Multiple<"ld3">; 4476defm LD4 : SIMDLd4Multiple<"ld4">; 4477 4478defm ST1 : SIMDSt1Multiple<"st1">; 4479defm ST2 : SIMDSt2Multiple<"st2">; 4480defm ST3 : SIMDSt3Multiple<"st3">; 4481defm ST4 : SIMDSt4Multiple<"st4">; 4482 4483class Ld1Pat<ValueType ty, Instruction INST> 4484 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; 4485 4486def : Ld1Pat<v16i8, LD1Onev16b>; 4487def : Ld1Pat<v8i16, LD1Onev8h>; 4488def : Ld1Pat<v4i32, LD1Onev4s>; 4489def : Ld1Pat<v2i64, LD1Onev2d>; 4490def : Ld1Pat<v8i8, LD1Onev8b>; 4491def : Ld1Pat<v4i16, LD1Onev4h>; 4492def : Ld1Pat<v2i32, LD1Onev2s>; 4493def : Ld1Pat<v1i64, LD1Onev1d>; 4494 4495class St1Pat<ValueType ty, Instruction INST> 4496 : Pat<(store ty:$Vt, GPR64sp:$Rn), 4497 (INST ty:$Vt, GPR64sp:$Rn)>; 4498 4499def : St1Pat<v16i8, ST1Onev16b>; 4500def : St1Pat<v8i16, ST1Onev8h>; 4501def : St1Pat<v4i32, ST1Onev4s>; 4502def : St1Pat<v2i64, ST1Onev2d>; 4503def : St1Pat<v8i8, ST1Onev8b>; 4504def : St1Pat<v4i16, ST1Onev4h>; 4505def : St1Pat<v2i32, ST1Onev2s>; 4506def : St1Pat<v1i64, ST1Onev1d>; 4507 4508//--- 4509// Single-element 4510//--- 4511 4512defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; 4513defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; 4514defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; 4515defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; 4516let mayLoad = 1, neverHasSideEffects = 1 in { 4517defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; 4518defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; 4519defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; 4520defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; 4521defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; 4522defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; 4523defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; 4524defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; 4525defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; 4526defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; 4527defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; 4528defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; 4529defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; 4530defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; 4531defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; 4532defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; 4533} 4534 4535def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 4536 (LD1Rv8b GPR64sp:$Rn)>; 4537def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 4538 (LD1Rv16b GPR64sp:$Rn)>; 4539def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 4540 (LD1Rv4h GPR64sp:$Rn)>; 4541def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 4542 (LD1Rv8h GPR64sp:$Rn)>; 4543def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 4544 (LD1Rv2s GPR64sp:$Rn)>; 4545def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 4546 (LD1Rv4s GPR64sp:$Rn)>; 4547def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 4548 (LD1Rv2d GPR64sp:$Rn)>; 4549def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 4550 (LD1Rv1d GPR64sp:$Rn)>; 4551// Grab the floating point version too 4552def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 4553 (LD1Rv2s GPR64sp:$Rn)>; 4554def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 4555 (LD1Rv4s GPR64sp:$Rn)>; 4556def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 4557 (LD1Rv2d GPR64sp:$Rn)>; 4558def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 4559 (LD1Rv1d GPR64sp:$Rn)>; 4560 4561class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, 4562 ValueType VTy, ValueType STy, Instruction LD1> 4563 : Pat<(vector_insert (VTy VecListOne128:$Rd), 4564 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 4565 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; 4566 4567def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; 4568def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; 4569def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; 4570def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; 4571def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; 4572def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; 4573 4574class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, 4575 ValueType VTy, ValueType STy, Instruction LD1> 4576 : Pat<(vector_insert (VTy VecListOne64:$Rd), 4577 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 4578 (EXTRACT_SUBREG 4579 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 4580 VecIndex:$idx, GPR64sp:$Rn), 4581 dsub)>; 4582 4583def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; 4584def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; 4585def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; 4586def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; 4587 4588 4589defm LD1 : SIMDLdSt1SingleAliases<"ld1">; 4590defm LD2 : SIMDLdSt2SingleAliases<"ld2">; 4591defm LD3 : SIMDLdSt3SingleAliases<"ld3">; 4592defm LD4 : SIMDLdSt4SingleAliases<"ld4">; 4593 4594// Stores 4595defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; 4596defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; 4597defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; 4598defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; 4599 4600let AddedComplexity = 15 in 4601class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, 4602 ValueType VTy, ValueType STy, Instruction ST1> 4603 : Pat<(scalar_store 4604 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 4605 GPR64sp:$Rn), 4606 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; 4607 4608def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; 4609def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; 4610def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; 4611def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; 4612def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; 4613def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; 4614 4615let AddedComplexity = 15 in 4616class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, 4617 ValueType VTy, ValueType STy, Instruction ST1> 4618 : Pat<(scalar_store 4619 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 4620 GPR64sp:$Rn), 4621 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 4622 VecIndex:$idx, GPR64sp:$Rn)>; 4623 4624def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; 4625def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; 4626def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; 4627def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; 4628 4629multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, 4630 ValueType VTy, ValueType STy, Instruction ST1, 4631 int offset> { 4632 def : Pat<(scalar_store 4633 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 4634 GPR64sp:$Rn, offset), 4635 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 4636 VecIndex:$idx, GPR64sp:$Rn, XZR)>; 4637 4638 def : Pat<(scalar_store 4639 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 4640 GPR64sp:$Rn, GPR64:$Rm), 4641 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 4642 VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 4643} 4644 4645defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; 4646defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, 4647 2>; 4648defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; 4649defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; 4650defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; 4651defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; 4652 4653multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, 4654 ValueType VTy, ValueType STy, Instruction ST1, 4655 int offset> { 4656 def : Pat<(scalar_store 4657 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 4658 GPR64sp:$Rn, offset), 4659 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; 4660 4661 def : Pat<(scalar_store 4662 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 4663 GPR64sp:$Rn, GPR64:$Rm), 4664 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 4665} 4666 4667defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, 4668 1>; 4669defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, 4670 2>; 4671defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; 4672defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; 4673defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; 4674defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; 4675 4676let mayStore = 1, neverHasSideEffects = 1 in { 4677defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; 4678defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; 4679defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; 4680defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; 4681defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; 4682defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; 4683defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; 4684defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; 4685defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; 4686defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; 4687defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; 4688defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; 4689} 4690 4691defm ST1 : SIMDLdSt1SingleAliases<"st1">; 4692defm ST2 : SIMDLdSt2SingleAliases<"st2">; 4693defm ST3 : SIMDLdSt3SingleAliases<"st3">; 4694defm ST4 : SIMDLdSt4SingleAliases<"st4">; 4695 4696//---------------------------------------------------------------------------- 4697// Crypto extensions 4698//---------------------------------------------------------------------------- 4699 4700def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; 4701def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; 4702def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; 4703def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; 4704 4705def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; 4706def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; 4707def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; 4708def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; 4709def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; 4710def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; 4711def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; 4712 4713def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; 4714def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; 4715def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; 4716 4717//---------------------------------------------------------------------------- 4718// Compiler-pseudos 4719//---------------------------------------------------------------------------- 4720// FIXME: Like for X86, these should go in their own separate .td file. 4721 4722// Any instruction that defines a 32-bit result leaves the high half of the 4723// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may 4724// be copying from a truncate. But any other 32-bit operation will zero-extend 4725// up to 64 bits. 4726// FIXME: X86 also checks for CMOV here. Do we need something similar? 4727def def32 : PatLeaf<(i32 GPR32:$src), [{ 4728 return N->getOpcode() != ISD::TRUNCATE && 4729 N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && 4730 N->getOpcode() != ISD::CopyFromReg; 4731}]>; 4732 4733// In the case of a 32-bit def that is known to implicitly zero-extend, 4734// we can use a SUBREG_TO_REG. 4735def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; 4736 4737// For an anyext, we don't care what the high bits are, so we can perform an 4738// INSERT_SUBREF into an IMPLICIT_DEF. 4739def : Pat<(i64 (anyext GPR32:$src)), 4740 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; 4741 4742// When we need to explicitly zero-extend, we use an unsigned bitfield move 4743// instruction (UBFM) on the enclosing super-reg. 4744def : Pat<(i64 (zext GPR32:$src)), 4745 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 4746 4747// To sign extend, we use a signed bitfield move instruction (SBFM) on the 4748// containing super-reg. 4749def : Pat<(i64 (sext GPR32:$src)), 4750 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 4751def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; 4752def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; 4753def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; 4754def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; 4755def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; 4756def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; 4757def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; 4758 4759def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), 4760 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 4761 (i64 (i32shift_sext_i8 imm0_31:$imm)))>; 4762def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), 4763 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 4764 (i64 (i64shift_sext_i8 imm0_63:$imm)))>; 4765 4766def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), 4767 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 4768 (i64 (i32shift_sext_i16 imm0_31:$imm)))>; 4769def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), 4770 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 4771 (i64 (i64shift_sext_i16 imm0_63:$imm)))>; 4772 4773def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), 4774 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 4775 (i64 (i64shift_a imm0_63:$imm)), 4776 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 4777 4778// sra patterns have an AddedComplexity of 10, so make sure we have a higher 4779// AddedComplexity for the following patterns since we want to match sext + sra 4780// patterns before we attempt to match a single sra node. 4781let AddedComplexity = 20 in { 4782// We support all sext + sra combinations which preserve at least one bit of the 4783// original value which is to be sign extended. E.g. we support shifts up to 4784// bitwidth-1 bits. 4785def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), 4786 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; 4787def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), 4788 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; 4789 4790def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), 4791 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; 4792def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), 4793 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; 4794 4795def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), 4796 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 4797 (i64 imm0_31:$imm), 31)>; 4798} // AddedComplexity = 20 4799 4800// To truncate, we can simply extract from a subregister. 4801def : Pat<(i32 (trunc GPR64sp:$src)), 4802 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; 4803 4804// __builtin_trap() uses the BRK instruction on AArch64. 4805def : Pat<(trap), (BRK 1)>; 4806 4807// Conversions within AdvSIMD types in the same register size are free. 4808// But because we need a consistent lane ordering, in big endian many 4809// conversions require one or more REV instructions. 4810// 4811// Consider a simple memory load followed by a bitconvert then a store. 4812// v0 = load v2i32 4813// v1 = BITCAST v2i32 v0 to v4i16 4814// store v4i16 v2 4815// 4816// In big endian mode every memory access has an implicit byte swap. LDR and 4817// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that 4818// is, they treat the vector as a sequence of elements to be byte-swapped. 4819// The two pairs of instructions are fundamentally incompatible. We've decided 4820// to use LD1/ST1 only to simplify compiler implementation. 4821// 4822// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes 4823// the original code sequence: 4824// v0 = load v2i32 4825// v1 = REV v2i32 (implicit) 4826// v2 = BITCAST v2i32 v1 to v4i16 4827// v3 = REV v4i16 v2 (implicit) 4828// store v4i16 v3 4829// 4830// But this is now broken - the value stored is different to the value loaded 4831// due to lane reordering. To fix this, on every BITCAST we must perform two 4832// other REVs: 4833// v0 = load v2i32 4834// v1 = REV v2i32 (implicit) 4835// v2 = REV v2i32 4836// v3 = BITCAST v2i32 v2 to v4i16 4837// v4 = REV v4i16 4838// v5 = REV v4i16 v4 (implicit) 4839// store v4i16 v5 4840// 4841// This means an extra two instructions, but actually in most cases the two REV 4842// instructions can be combined into one. For example: 4843// (REV64_2s (REV64_4h X)) === (REV32_4h X) 4844// 4845// There is also no 128-bit REV instruction. This must be synthesized with an 4846// EXT instruction. 4847// 4848// Most bitconverts require some sort of conversion. The only exceptions are: 4849// a) Identity conversions - vNfX <-> vNiX 4850// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX 4851// 4852 4853let Predicates = [IsLE] in { 4854def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4855def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4856def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4857def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4858 4859def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 4860 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4861def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 4862 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4863def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 4864 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4865def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 4866 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4867def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 4868 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4869} 4870let Predicates = [IsBE] in { 4871def : Pat<(v8i8 (bitconvert GPR64:$Xn)), 4872 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 4873def : Pat<(v4i16 (bitconvert GPR64:$Xn)), 4874 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 4875def : Pat<(v2i32 (bitconvert GPR64:$Xn)), 4876 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 4877def : Pat<(v2f32 (bitconvert GPR64:$Xn)), 4878 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 4879 4880def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 4881 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 4882def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 4883 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 4884def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 4885 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 4886def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 4887 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 4888} 4889def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4890def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4891def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), 4892 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4893def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), 4894 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4895def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), 4896 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4897def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; 4898 4899def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), 4900 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; 4901def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), 4902 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; 4903def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), 4904 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4905def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), 4906 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; 4907def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 4908 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4909 4910let Predicates = [IsLE] in { 4911def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 4912def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 4913def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 4914def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 4915} 4916let Predicates = [IsBE] in { 4917def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), 4918 (v1i64 (REV64v2i32 FPR64:$src))>; 4919def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), 4920 (v1i64 (REV64v4i16 FPR64:$src))>; 4921def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), 4922 (v1i64 (REV64v8i8 FPR64:$src))>; 4923def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), 4924 (v1i64 (REV64v2i32 FPR64:$src))>; 4925} 4926def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; 4927def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 4928 4929let Predicates = [IsLE] in { 4930def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; 4931def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 4932def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 4933def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 4934def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; 4935} 4936let Predicates = [IsBE] in { 4937def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), 4938 (v2i32 (REV64v2i32 FPR64:$src))>; 4939def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), 4940 (v2i32 (REV32v4i16 FPR64:$src))>; 4941def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), 4942 (v2i32 (REV32v8i8 FPR64:$src))>; 4943def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), 4944 (v2i32 (REV64v2i32 FPR64:$src))>; 4945def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), 4946 (v2i32 (REV64v2i32 FPR64:$src))>; 4947} 4948def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 4949 4950let Predicates = [IsLE] in { 4951def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; 4952def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 4953def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 4954def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 4955def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 4956def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; 4957} 4958let Predicates = [IsBE] in { 4959def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), 4960 (v4i16 (REV64v4i16 FPR64:$src))>; 4961def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), 4962 (v4i16 (REV32v4i16 FPR64:$src))>; 4963def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), 4964 (v4i16 (REV16v8i8 FPR64:$src))>; 4965def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), 4966 (v4i16 (REV64v4i16 FPR64:$src))>; 4967def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), 4968 (v4i16 (REV32v4i16 FPR64:$src))>; 4969def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), 4970 (v4i16 (REV64v4i16 FPR64:$src))>; 4971} 4972 4973let Predicates = [IsLE] in { 4974def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; 4975def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 4976def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 4977def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 4978def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 4979def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; 4980} 4981let Predicates = [IsBE] in { 4982def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), 4983 (v8i8 (REV64v8i8 FPR64:$src))>; 4984def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), 4985 (v8i8 (REV32v8i8 FPR64:$src))>; 4986def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), 4987 (v8i8 (REV16v8i8 FPR64:$src))>; 4988def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), 4989 (v8i8 (REV64v8i8 FPR64:$src))>; 4990def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), 4991 (v8i8 (REV32v8i8 FPR64:$src))>; 4992def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), 4993 (v8i8 (REV64v8i8 FPR64:$src))>; 4994} 4995 4996let Predicates = [IsLE] in { 4997def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; 4998def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; 4999def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; 5000def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; 5001} 5002let Predicates = [IsBE] in { 5003def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), 5004 (f64 (REV64v2i32 FPR64:$src))>; 5005def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), 5006 (f64 (REV64v4i16 FPR64:$src))>; 5007def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), 5008 (f64 (REV64v2i32 FPR64:$src))>; 5009def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), 5010 (f64 (REV64v8i8 FPR64:$src))>; 5011} 5012def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 5013def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 5014 5015let Predicates = [IsLE] in { 5016def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; 5017def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; 5018def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; 5019def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; 5020} 5021let Predicates = [IsBE] in { 5022def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), 5023 (v1f64 (REV64v2i32 FPR64:$src))>; 5024def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), 5025 (v1f64 (REV64v4i16 FPR64:$src))>; 5026def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), 5027 (v1f64 (REV64v8i8 FPR64:$src))>; 5028def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), 5029 (v1f64 (REV64v2i32 FPR64:$src))>; 5030} 5031def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; 5032def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 5033 5034let Predicates = [IsLE] in { 5035def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; 5036def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; 5037def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; 5038def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; 5039def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 5040} 5041let Predicates = [IsBE] in { 5042def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), 5043 (v2f32 (REV64v2i32 FPR64:$src))>; 5044def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), 5045 (v2f32 (REV32v4i16 FPR64:$src))>; 5046def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), 5047 (v2f32 (REV32v8i8 FPR64:$src))>; 5048def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), 5049 (v2f32 (REV64v2i32 FPR64:$src))>; 5050def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), 5051 (v2f32 (REV64v2i32 FPR64:$src))>; 5052} 5053def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 5054 5055let Predicates = [IsLE] in { 5056def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; 5057def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; 5058def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; 5059def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; 5060def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; 5061def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; 5062} 5063let Predicates = [IsBE] in { 5064def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), 5065 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 5066def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), 5067 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 5068 (REV64v4i32 FPR128:$src), (i32 8)))>; 5069def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), 5070 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 5071 (REV64v8i16 FPR128:$src), (i32 8)))>; 5072def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), 5073 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 5074def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), 5075 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 5076 (REV64v4i32 FPR128:$src), (i32 8)))>; 5077def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), 5078 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), 5079 (REV64v16i8 FPR128:$src), (i32 8)))>; 5080} 5081 5082let Predicates = [IsLE] in { 5083def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 5084def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 5085def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 5086def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 5087def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 5088} 5089let Predicates = [IsBE] in { 5090def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), 5091 (v2f64 (EXTv16i8 FPR128:$src, 5092 FPR128:$src, (i32 8)))>; 5093def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), 5094 (v2f64 (REV64v4i32 FPR128:$src))>; 5095def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), 5096 (v2f64 (REV64v8i16 FPR128:$src))>; 5097def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), 5098 (v2f64 (REV64v16i8 FPR128:$src))>; 5099def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), 5100 (v2f64 (REV64v4i32 FPR128:$src))>; 5101} 5102def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 5103 5104let Predicates = [IsLE] in { 5105def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 5106def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 5107def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 5108def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 5109def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 5110} 5111let Predicates = [IsBE] in { 5112def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), 5113 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), 5114 (REV64v4i32 FPR128:$src), (i32 8)))>; 5115def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), 5116 (v4f32 (REV32v8i16 FPR128:$src))>; 5117def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), 5118 (v4f32 (REV32v16i8 FPR128:$src))>; 5119def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), 5120 (v4f32 (REV64v4i32 FPR128:$src))>; 5121def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), 5122 (v4f32 (REV64v4i32 FPR128:$src))>; 5123} 5124def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 5125 5126let Predicates = [IsLE] in { 5127def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 5128def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 5129def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 5130def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 5131def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 5132} 5133let Predicates = [IsBE] in { 5134def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), 5135 (v2i64 (EXTv16i8 FPR128:$src, 5136 FPR128:$src, (i32 8)))>; 5137def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), 5138 (v2i64 (REV64v4i32 FPR128:$src))>; 5139def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), 5140 (v2i64 (REV64v8i16 FPR128:$src))>; 5141def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), 5142 (v2i64 (REV64v16i8 FPR128:$src))>; 5143def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), 5144 (v2i64 (REV64v4i32 FPR128:$src))>; 5145} 5146def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 5147 5148let Predicates = [IsLE] in { 5149def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 5150def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 5151def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 5152def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 5153def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 5154} 5155let Predicates = [IsBE] in { 5156def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), 5157 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), 5158 (REV64v4i32 FPR128:$src), 5159 (i32 8)))>; 5160def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), 5161 (v4i32 (REV64v4i32 FPR128:$src))>; 5162def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), 5163 (v4i32 (REV32v8i16 FPR128:$src))>; 5164def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), 5165 (v4i32 (REV32v16i8 FPR128:$src))>; 5166def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), 5167 (v4i32 (REV64v4i32 FPR128:$src))>; 5168} 5169def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 5170 5171let Predicates = [IsLE] in { 5172def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 5173def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 5174def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 5175def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 5176def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 5177def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 5178} 5179let Predicates = [IsBE] in { 5180def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), 5181 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), 5182 (REV64v8i16 FPR128:$src), 5183 (i32 8)))>; 5184def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), 5185 (v8i16 (REV64v8i16 FPR128:$src))>; 5186def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), 5187 (v8i16 (REV32v8i16 FPR128:$src))>; 5188def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), 5189 (v8i16 (REV16v16i8 FPR128:$src))>; 5190def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), 5191 (v8i16 (REV64v8i16 FPR128:$src))>; 5192def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), 5193 (v8i16 (REV32v8i16 FPR128:$src))>; 5194} 5195 5196let Predicates = [IsLE] in { 5197def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 5198def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 5199def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 5200def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 5201def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 5202def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 5203} 5204let Predicates = [IsBE] in { 5205def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), 5206 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), 5207 (REV64v16i8 FPR128:$src), 5208 (i32 8)))>; 5209def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), 5210 (v16i8 (REV64v16i8 FPR128:$src))>; 5211def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), 5212 (v16i8 (REV32v16i8 FPR128:$src))>; 5213def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), 5214 (v16i8 (REV16v16i8 FPR128:$src))>; 5215def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), 5216 (v16i8 (REV64v16i8 FPR128:$src))>; 5217def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), 5218 (v16i8 (REV32v16i8 FPR128:$src))>; 5219} 5220 5221def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), 5222 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5223def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), 5224 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5225def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), 5226 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5227def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), 5228 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5229 5230// A 64-bit subvector insert to the first 128-bit vector position 5231// is a subregister copy that needs no instruction. 5232def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)), 5233 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5234def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)), 5235 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5236def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)), 5237 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5238def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)), 5239 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5240def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)), 5241 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5242def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)), 5243 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5244 5245// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 5246// or v2f32. 5247def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), 5248 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), 5249 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; 5250def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), 5251 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), 5252 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; 5253 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, 5254 // so we match on v4f32 here, not v2f32. This will also catch adding 5255 // the low two lanes of a true v4f32 vector. 5256def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), 5257 (vector_extract (v4f32 FPR128:$Rn), (i64 1))), 5258 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 5259 5260// Scalar 64-bit shifts in FPR64 registers. 5261def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5262 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5263def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5264 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5265def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5266 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5267def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5268 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5269 5270// Tail call return handling. These are all compiler pseudo-instructions, 5271// so no encoding information or anything like that. 5272let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { 5273 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff),[]>; 5274 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>; 5275} 5276 5277def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), 5278 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>; 5279def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), 5280 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 5281def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), 5282 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 5283 5284include "AArch64InstrAtomics.td" 5285