PPCTargetTransformInfo.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// \file 10/// This file implements a TargetTransformInfo analysis pass specific to the 11/// PPC target machine. It uses the target's detailed information to provide 12/// more precise answers to certain TTI queries, while letting the target 13/// independent and default TTI implementations handle the rest. 14/// 15//===----------------------------------------------------------------------===// 16 17#define DEBUG_TYPE "ppctti" 18#include "PPC.h" 19#include "PPCTargetMachine.h" 20#include "llvm/Analysis/TargetTransformInfo.h" 21#include "llvm/Support/Debug.h" 22#include "llvm/Target/CostTable.h" 23#include "llvm/Target/TargetLowering.h" 24using namespace llvm; 25 26// Declare the pass initialization routine locally as target-specific passes 27// don't havve a target-wide initialization entry point, and so we rely on the 28// pass constructor initialization. 29namespace llvm { 30void initializePPCTTIPass(PassRegistry &); 31} 32 33namespace { 34 35class PPCTTI final : public ImmutablePass, public TargetTransformInfo { 36 const PPCTargetMachine *TM; 37 const PPCSubtarget *ST; 38 const PPCTargetLowering *TLI; 39 40 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 41 /// are set if the result needs to be inserted and/or extracted from vectors. 42 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 43 44public: 45 PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { 46 llvm_unreachable("This pass cannot be directly constructed"); 47 } 48 49 PPCTTI(const PPCTargetMachine *TM) 50 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 51 TLI(TM->getTargetLowering()) { 52 initializePPCTTIPass(*PassRegistry::getPassRegistry()); 53 } 54 55 virtual void initializePass() override { 56 pushTTIStack(this); 57 } 58 59 virtual void getAnalysisUsage(AnalysisUsage &AU) const override { 60 TargetTransformInfo::getAnalysisUsage(AU); 61 } 62 63 /// Pass identification. 64 static char ID; 65 66 /// Provide necessary pointer adjustments for the two base classes. 67 virtual void *getAdjustedAnalysisPointer(const void *ID) override { 68 if (ID == &TargetTransformInfo::ID) 69 return (TargetTransformInfo*)this; 70 return this; 71 } 72 73 /// \name Scalar TTI Implementations 74 /// @{ 75 virtual PopcntSupportKind 76 getPopcntSupport(unsigned TyWidth) const override; 77 virtual void getUnrollingPreferences( 78 Loop *L, UnrollingPreferences &UP) const override; 79 80 /// @} 81 82 /// \name Vector TTI Implementations 83 /// @{ 84 85 virtual unsigned getNumberOfRegisters(bool Vector) const override; 86 virtual unsigned getRegisterBitWidth(bool Vector) const override; 87 virtual unsigned getMaximumUnrollFactor() const override; 88 virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 89 OperandValueKind, 90 OperandValueKind) const override; 91 virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, 92 int Index, Type *SubTp) const override; 93 virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 94 Type *Src) const override; 95 virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 96 Type *CondTy) const override; 97 virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 98 unsigned Index) const override; 99 virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, 100 unsigned Alignment, 101 unsigned AddressSpace) const override; 102 103 /// @} 104}; 105 106} // end anonymous namespace 107 108INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti", 109 "PPC Target Transform Info", true, true, false) 110char PPCTTI::ID = 0; 111 112ImmutablePass * 113llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { 114 return new PPCTTI(TM); 115} 116 117 118//===----------------------------------------------------------------------===// 119// 120// PPC cost model. 121// 122//===----------------------------------------------------------------------===// 123 124PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { 125 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 126 if (ST->hasPOPCNTD() && TyWidth <= 64) 127 return PSK_FastHardware; 128 return PSK_Software; 129} 130 131void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { 132 if (ST->getDarwinDirective() == PPC::DIR_A2) { 133 // The A2 is in-order with a deep pipeline, and concatenation unrolling 134 // helps expose latency-hiding opportunities to the instruction scheduler. 135 UP.Partial = UP.Runtime = true; 136 } 137} 138 139unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { 140 if (Vector && !ST->hasAltivec()) 141 return 0; 142 return ST->hasVSX() ? 64 : 32; 143} 144 145unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { 146 if (Vector) { 147 if (ST->hasAltivec()) return 128; 148 return 0; 149 } 150 151 if (ST->isPPC64()) 152 return 64; 153 return 32; 154 155} 156 157unsigned PPCTTI::getMaximumUnrollFactor() const { 158 unsigned Directive = ST->getDarwinDirective(); 159 // The 440 has no SIMD support, but floating-point instructions 160 // have a 5-cycle latency, so unroll by 5x for latency hiding. 161 if (Directive == PPC::DIR_440) 162 return 5; 163 164 // The A2 has no SIMD support, but floating-point instructions 165 // have a 6-cycle latency, so unroll by 6x for latency hiding. 166 if (Directive == PPC::DIR_A2) 167 return 6; 168 169 // FIXME: For lack of any better information, do no harm... 170 if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) 171 return 1; 172 173 // For most things, modern systems have two execution units (and 174 // out-of-order execution). 175 return 2; 176} 177 178unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, 179 OperandValueKind Op1Info, 180 OperandValueKind Op2Info) const { 181 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); 182 183 // Fallback to the default implementation. 184 return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, 185 Op2Info); 186} 187 188unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 189 Type *SubTp) const { 190 return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); 191} 192 193unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { 194 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); 195 196 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 197} 198 199unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 200 Type *CondTy) const { 201 return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); 202} 203 204unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, 205 unsigned Index) const { 206 assert(Val->isVectorTy() && "This must be a vector type"); 207 208 int ISD = TLI->InstructionOpcodeToISD(Opcode); 209 assert(ISD && "Invalid opcode"); 210 211 if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) { 212 // Double-precision scalars are already located in index #0. 213 if (Index == 0) 214 return 0; 215 216 return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 217 } 218 219 // Estimated cost of a load-hit-store delay. This was obtained 220 // experimentally as a minimum needed to prevent unprofitable 221 // vectorization for the paq8p benchmark. It may need to be 222 // raised further if other unprofitable cases remain. 223 unsigned LHSPenalty = 12; 224 225 // Vector element insert/extract with Altivec is very expensive, 226 // because they require store and reload with the attendant 227 // processor stall for load-hit-store. Until VSX is available, 228 // these need to be estimated as very costly. 229 if (ISD == ISD::EXTRACT_VECTOR_ELT || 230 ISD == ISD::INSERT_VECTOR_ELT) 231 return LHSPenalty + 232 TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 233 234 return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 235} 236 237unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 238 unsigned AddressSpace) const { 239 // Legalize the type. 240 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 241 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && 242 "Invalid Opcode"); 243 244 unsigned Cost = 245 TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); 246 247 // FIXME: Update this for VSX loads/stores that support unaligned access. 248 249 // PPC in general does not support unaligned loads and stores. They'll need 250 // to be decomposed based on the alignment factor. 251 unsigned SrcBytes = LT.second.getStoreSize(); 252 if (SrcBytes && Alignment && Alignment < SrcBytes) 253 Cost += LT.first*(SrcBytes/Alignment-1); 254 255 return Cost; 256} 257 258