1dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===// 236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// 336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// The LLVM Compiler Infrastructure 436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// 536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// This file is distributed under the University of Illinois Open Source 636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// License. See LICENSE.TXT for details. 736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// 836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//===----------------------------------------------------------------------===// 936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// \file 1036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// This file implements a TargetTransformInfo analysis pass specific to the 1136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// AArch64 target machine. It uses the target's detailed information to provide 1236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// more precise answers to certain TTI queries, while letting the target 1336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// independent and default TTI implementations handle the rest. 1436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// 1536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//===----------------------------------------------------------------------===// 1636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "AArch64.h" 1836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "AArch64TargetMachine.h" 19dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "MCTargetDesc/AArch64AddressingModes.h" 2036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Analysis/TargetTransformInfo.h" 2136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/Debug.h" 2236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Target/CostTable.h" 2336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Target/TargetLowering.h" 24dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include <algorithm> 2536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesusing namespace llvm; 2636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 27dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "aarch64tti" 28dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 2936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// Declare the pass initialization routine locally as target-specific passes 3036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// don't have a target-wide initialization entry point, and so we rely on the 3136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// pass constructor initialization. 3236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesnamespace llvm { 3336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid initializeAArch64TTIPass(PassRegistry &); 3436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 3536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 3636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesnamespace { 3736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 3836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesclass AArch64TTI final : public ImmutablePass, public TargetTransformInfo { 39dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const AArch64TargetMachine *TM; 4036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const AArch64Subtarget *ST; 4136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const AArch64TargetLowering *TLI; 4236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 43dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// Estimate the overhead of scalarizing an instruction. Insert and Extract 44dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// are set if the result needs to be inserted and/or extracted from vectors. 45dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 4736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinespublic: 48dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { 4936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines llvm_unreachable("This pass cannot be directly constructed"); 5036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 5136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 5236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AArch64TTI(const AArch64TargetMachine *TM) 53dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 5436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TLI(TM->getTargetLowering()) { 5536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines initializeAArch64TTIPass(*PassRegistry::getPassRegistry()); 5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 5736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 58dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void initializePass() override { pushTTIStack(this); } 5936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 60dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void getAnalysisUsage(AnalysisUsage &AU) const override { 6136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TargetTransformInfo::getAnalysisUsage(AU); 6236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 6336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 6436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// Pass identification. 6536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines static char ID; 6636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 6736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// Provide necessary pointer adjustments for the two base classes. 68dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void *getAdjustedAnalysisPointer(const void *ID) override { 6936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (ID == &TargetTransformInfo::ID) 70dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return (TargetTransformInfo *)this; 7136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return this; 7236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 7336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 7436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// \name Scalar TTI Implementations 7536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// @{ 76dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getIntImmCost(int64_t Val) const; 77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; 78dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 79dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Type *Ty) const override; 80dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 81dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Type *Ty) const override; 82dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; 8336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 8436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// @} 8536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 8636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// \name Vector TTI Implementations 8736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// @{ 8836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 89dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getNumberOfRegisters(bool Vector) const override { 9036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Vector) { 9136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (ST->hasNEON()) 9236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 32; 9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 0; 9436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 95dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return 31; 9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 98dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getRegisterBitWidth(bool Vector) const override { 9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Vector) { 10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (ST->hasNEON()) 10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 128; 10236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 0; 10336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 10436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 64; 10536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 107dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getMaximumUnrollFactor() const override { return 2; } 108dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 109dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const 110dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines override; 111dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 112dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const 113dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines override; 114dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 115dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines OperandValueKind Opd1Info = OK_AnyValue, 117dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines OperandValueKind Opd2Info = OK_AnyValue) const 118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines override; 119dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 120dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override; 121dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 122dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const 123dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines override; 124dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 125dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 126dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned AddressSpace) const override; 12736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// @} 12836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}; 12936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 13036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} // end anonymous namespace 13136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 13236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesINITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti", 13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines "AArch64 Target Transform Info", true, true, false) 13436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hineschar AArch64TTI::ID = 0; 13536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 13636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesImmutablePass * 13736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesllvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) { 13836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return new AArch64TTI(TM); 13936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 140dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 141dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// \brief Calculate the cost of materializing a 64-bit value. This helper 142dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// method might only calculate a fraction of a larger immediate. Therefore it 143dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// is valid to return a cost of ZERO. 144dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesunsigned AArch64TTI::getIntImmCost(int64_t Val) const { 145dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Check if the immediate can be encoded within an instruction. 146dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64)) 147dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return 0; 148dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 149dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Val < 0) 150dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Val = ~Val; 151dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 152dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Calculate how many moves we will need to materialize this constant. 153dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned LZ = countLeadingZeros((uint64_t)Val); 154dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return (64 - LZ + 15) / 16; 155dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 156dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 157dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// \brief Calculate the cost of materializing the given constant. 158dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesunsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { 159dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(Ty->isIntegerTy()); 160dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 161dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned BitSize = Ty->getPrimitiveSizeInBits(); 162dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (BitSize == 0) 163dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return ~0U; 164dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 165dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Sign-extend all constants to a multiple of 64-bit. 166dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines APInt ImmVal = Imm; 167dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (BitSize & 0x3f) 168dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); 169dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 170dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Split the constant into 64-bit chunks and calculate the cost for each 171dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // chunk. 172dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Cost = 0; 173dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { 174dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); 175dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines int64_t Val = Tmp.getSExtValue(); 176dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Cost += getIntImmCost(Val); 177dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 178dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // We need at least one instruction to materialze the constant. 179dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return std::max(1U, Cost); 180dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 181dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 182dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesunsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, 183dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const APInt &Imm, Type *Ty) const { 184dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(Ty->isIntegerTy()); 185dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 186dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned BitSize = Ty->getPrimitiveSizeInBits(); 187dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // There is no cost model for constants with a bit size of 0. Return TCC_Free 188dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // here, so that constant hoisting will ignore this constant. 189dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (BitSize == 0) 190dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TCC_Free; 191dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 192dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned ImmIdx = ~0U; 193dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines switch (Opcode) { 194dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines default: 195dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TCC_Free; 196dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::GetElementPtr: 197dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Always hoist the base address of a GetElementPtr. 198dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Idx == 0) 199dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return 2 * TCC_Basic; 200dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TCC_Free; 201dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Store: 202dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ImmIdx = 0; 203dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 204dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Add: 205dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Sub: 206dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Mul: 207dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::UDiv: 208dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::SDiv: 209dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::URem: 210dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::SRem: 211dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::And: 212dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Or: 213dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Xor: 214dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::ICmp: 215dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ImmIdx = 1; 216dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 217dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Always return TCC_Free for the shift value of a shift instruction. 218dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Shl: 219dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::LShr: 220dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::AShr: 221dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Idx == 1) 222dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TCC_Free; 223dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 224dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Trunc: 225dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::ZExt: 226dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::SExt: 227dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::IntToPtr: 228dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::PtrToInt: 229dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::BitCast: 230dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::PHI: 231dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Call: 232dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Select: 233dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Ret: 234dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Instruction::Load: 235dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 236dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 237dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 238dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Idx == ImmIdx) { 239dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned NumConstants = (BitSize + 63) / 64; 240dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); 241dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return (Cost <= NumConstants * TCC_Basic) 242dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ? static_cast<unsigned>(TCC_Free) : Cost; 243dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 244dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64TTI::getIntImmCost(Imm, Ty); 245dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 246dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 247dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesunsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, 248dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const APInt &Imm, Type *Ty) const { 249dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(Ty->isIntegerTy()); 250dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned BitSize = Ty->getPrimitiveSizeInBits(); 252dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // There is no cost model for constants with a bit size of 0. Return TCC_Free 253dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // here, so that constant hoisting will ignore this constant. 254dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (BitSize == 0) 255dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TCC_Free; 256dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 257dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines switch (IID) { 258dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines default: 259dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TCC_Free; 260dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Intrinsic::sadd_with_overflow: 261dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Intrinsic::uadd_with_overflow: 262dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Intrinsic::ssub_with_overflow: 263dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Intrinsic::usub_with_overflow: 264dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Intrinsic::smul_with_overflow: 265dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Intrinsic::umul_with_overflow: 266dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Idx == 1) { 267dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned NumConstants = (BitSize + 63) / 64; 268dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); 269dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return (Cost <= NumConstants * TCC_Basic) 270dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ? static_cast<unsigned>(TCC_Free) : Cost; 271dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 272dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 273dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Intrinsic::experimental_stackmap: 274dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) 275dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TCC_Free; 276dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 277dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Intrinsic::experimental_patchpoint_void: 278dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case Intrinsic::experimental_patchpoint_i64: 279dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) 280dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TCC_Free; 281dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 282dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 283dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64TTI::getIntImmCost(Imm, Ty); 284dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 285dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 286dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesAArch64TTI::PopcntSupportKind 287dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesAArch64TTI::getPopcntSupport(unsigned TyWidth) const { 288dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 289dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (TyWidth == 32 || TyWidth == 64) 290dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return PSK_FastHardware; 291dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount. 292dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return PSK_Software; 293dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 294dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 295dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesunsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, 296dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Type *Src) const { 297dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines int ISD = TLI->InstructionOpcodeToISD(Opcode); 298dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(ISD && "Invalid opcode"); 299dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 300dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines EVT SrcTy = TLI->getValueType(Src); 301dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines EVT DstTy = TLI->getValueType(Dst); 302dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 303dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!SrcTy.isSimple() || !DstTy.isSimple()) 304dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 305dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 306dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = { 307dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // LowerVectorINT_TO_FP: 308dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, 309cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, 310dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, 311dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, 312cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, 313dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, 314cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 315cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Complex: to v2f32 316cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, 317cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, 318cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, 319cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, 320cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, 321cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, 322cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 323cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Complex: to v4f32 324cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 }, 325cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, 326cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, 327cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, 328cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 329cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Complex: to v2f64 330cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, 331cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, 332cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, 333cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, 334cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, 335cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, 336cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 337cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 338dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // LowerVectorFP_TO_INT 339cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 }, 340dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, 341dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, 342cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, 343dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, 344dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, 345cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 346cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext). 347cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 }, 348cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 }, 349cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 }, 350cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 }, 351cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 }, 352cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 }, 353cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 354cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2 355cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, 356cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 }, 357cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, 358cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 }, 359cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 360cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2. 361cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, 362cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, 363cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 }, 364cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, 365cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, 366cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 }, 367dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines }; 368dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 369dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines int Idx = ConvertCostTableLookup<MVT>( 370dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(), 371dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SrcTy.getSimpleVT()); 372dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Idx != -1) 373dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return ConversionTbl[Idx].Cost; 374dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 375dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 376dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 377dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 378dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesunsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, 379dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Index) const { 380dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(Val->isVectorTy() && "This must be a vector type"); 381dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 382dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Index != -1U) { 383dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Legalize the type. 384dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val); 385dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 386dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // This type is legalized to a scalar type. 387dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!LT.second.isVector()) 388dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return 0; 389dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 390dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // The type may be split. Normalize the index to the new type. 391dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Width = LT.second.getVectorNumElements(); 392dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Index = Index % Width; 393dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 394dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // The element at index zero is already inside the vector. 395dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Index == 0) 396dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return 0; 397dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 398dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 399dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // All other insert/extracts cost this much. 400dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return 2; 401dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 402dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 403dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesunsigned AArch64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, 404dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines OperandValueKind Opd1Info, 405dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines OperandValueKind Opd2Info) const { 406dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Legalize the type. 407dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); 408dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 409dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines int ISD = TLI->InstructionOpcodeToISD(Opcode); 410dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 411dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines switch (ISD) { 412dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines default: 413dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info, 414dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Opd2Info); 415dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case ISD::ADD: 416dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case ISD::MUL: 417dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case ISD::XOR: 418dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case ISD::OR: 419dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case ISD::AND: 420dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // These nodes are marked as 'custom' for combining purposes only. 421dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // We know that they are legal. See LowerAdd in ISelLowering. 422dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return 1 * LT.first; 423dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 424dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 425dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 426dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesunsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { 427dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Address computations in vectorized code with non-consecutive addresses will 428dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // likely result in more instructions compared to scalar code where the 429dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // computation can more often be merged into the index mode. The resulting 430dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // extra micro-ops can significantly decrease throughput. 431dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned NumVectorInstToHideOverhead = 10; 432dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 433dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Ty->isVectorTy() && IsComplex) 434dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return NumVectorInstToHideOverhead; 435dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 436dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // In many cases the address computation is not merged into the instruction 437dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // addressing mode. 438dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return 1; 439dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 440dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 441dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesunsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 442dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Type *CondTy) const { 443dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 444dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines int ISD = TLI->InstructionOpcodeToISD(Opcode); 445dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // We don't lower vector selects well that are wider than the register width. 446dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (ValTy->isVectorTy() && ISD == ISD::SELECT) { 447dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // We would need this many instructions to hide the scalarization happening. 448dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned AmortizationCost = 20; 449dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines static const TypeConversionCostTblEntry<MVT::SimpleValueType> 450dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines VectorSelectTbl[] = { 451dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost }, 452dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost }, 453dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost }, 454dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost }, 455dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost }, 456dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } 457dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines }; 458dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 459dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines EVT SelCondTy = TLI->getValueType(CondTy); 460dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines EVT SelValTy = TLI->getValueType(ValTy); 461dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (SelCondTy.isSimple() && SelValTy.isSimple()) { 462dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines int Idx = 463dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(), 464dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SelValTy.getSimpleVT()); 465dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Idx != -1) 466dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return VectorSelectTbl[Idx].Cost; 467dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 468dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 469dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); 470dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 471dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 472dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesunsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, 473dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Alignment, 474dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned AddressSpace) const { 475dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 476dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 477dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 && 478dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Src->getVectorElementType()->isIntegerTy(64)) { 479dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Unaligned stores are extremely inefficient. We don't split 480dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // unaligned v2i64 stores because the negative impact that has shown in 481dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // practice on inlined memcpy code. 482dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // We make v2i64 stores expensive so that we will only vectorize if there 483dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // are 6 other instructions getting vectorized. 484dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned AmortizationCost = 6; 485dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 486dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LT.first * 2 * AmortizationCost; 487dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 488dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 489dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) && 490dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Src->getVectorNumElements() < 8) { 491dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // We scalarize the loads/stores because there is not v.4b register and we 492dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // have to promote the elements to v.4h. 493dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned NumVecElts = Src->getVectorNumElements(); 494dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned NumVectorizableInstsToAmortize = NumVecElts * 2; 495dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // We generate 2 instructions per vector element. 496dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return NumVectorizableInstsToAmortize * NumVecElts * 2; 497dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 498dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 499dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LT.first; 500dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 501