1//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// \file 10/// This file implements a TargetTransformInfo analysis pass specific to the 11/// PPC target machine. It uses the target's detailed information to provide 12/// more precise answers to certain TTI queries, while letting the target 13/// independent and default TTI implementations handle the rest. 14/// 15//===----------------------------------------------------------------------===// 16 17#define DEBUG_TYPE "ppctti" 18#include "PPC.h" 19#include "PPCTargetMachine.h" 20#include "llvm/Analysis/TargetTransformInfo.h" 21#include "llvm/Support/Debug.h" 22#include "llvm/Target/TargetLowering.h" 23#include "llvm/Target/CostTable.h" 24using namespace llvm; 25 26// Declare the pass initialization routine locally as target-specific passes 27// don't havve a target-wide initialization entry point, and so we rely on the 28// pass constructor initialization. 29namespace llvm { 30void initializePPCTTIPass(PassRegistry &); 31} 32 33namespace { 34 35class PPCTTI : public ImmutablePass, public TargetTransformInfo { 36 const PPCTargetMachine *TM; 37 const PPCSubtarget *ST; 38 const PPCTargetLowering *TLI; 39 40 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 41 /// are set if the result needs to be inserted and/or extracted from vectors. 42 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 43 44public: 45 PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { 46 llvm_unreachable("This pass cannot be directly constructed"); 47 } 48 49 PPCTTI(const PPCTargetMachine *TM) 50 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 51 TLI(TM->getTargetLowering()) { 52 initializePPCTTIPass(*PassRegistry::getPassRegistry()); 53 } 54 55 virtual void initializePass() { 56 pushTTIStack(this); 57 } 58 59 virtual void finalizePass() { 60 popTTIStack(); 61 } 62 63 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 64 TargetTransformInfo::getAnalysisUsage(AU); 65 } 66 67 /// Pass identification. 68 static char ID; 69 70 /// Provide necessary pointer adjustments for the two base classes. 71 virtual void *getAdjustedAnalysisPointer(const void *ID) { 72 if (ID == &TargetTransformInfo::ID) 73 return (TargetTransformInfo*)this; 74 return this; 75 } 76 77 /// \name Scalar TTI Implementations 78 /// @{ 79 virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; 80 81 /// @} 82 83 /// \name Vector TTI Implementations 84 /// @{ 85 86 virtual unsigned getNumberOfRegisters(bool Vector) const; 87 virtual unsigned getRegisterBitWidth(bool Vector) const; 88 virtual unsigned getMaximumUnrollFactor() const; 89 virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; 90 virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, 91 int Index, Type *SubTp) const; 92 virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 93 Type *Src) const; 94 virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 95 Type *CondTy) const; 96 virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 97 unsigned Index) const; 98 virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, 99 unsigned Alignment, 100 unsigned AddressSpace) const; 101 102 /// @} 103}; 104 105} // end anonymous namespace 106 107INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti", 108 "PPC Target Transform Info", true, true, false) 109char PPCTTI::ID = 0; 110 111ImmutablePass * 112llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { 113 return new PPCTTI(TM); 114} 115 116 117//===----------------------------------------------------------------------===// 118// 119// PPC cost model. 120// 121//===----------------------------------------------------------------------===// 122 123PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { 124 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 125 // FIXME: PPC currently does not have custom popcnt lowering even though 126 // there is hardware support. Once this is fixed, update this function 127 // to reflect the real capabilities of the hardware. 128 return PSK_Software; 129} 130 131unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { 132 if (Vector && !ST->hasAltivec()) 133 return 0; 134 return 32; 135} 136 137unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { 138 if (Vector) { 139 if (ST->hasAltivec()) return 128; 140 return 0; 141 } 142 143 if (ST->isPPC64()) 144 return 64; 145 return 32; 146 147} 148 149unsigned PPCTTI::getMaximumUnrollFactor() const { 150 unsigned Directive = ST->getDarwinDirective(); 151 // The 440 has no SIMD support, but floating-point instructions 152 // have a 5-cycle latency, so unroll by 5x for latency hiding. 153 if (Directive == PPC::DIR_440) 154 return 5; 155 156 // The A2 has no SIMD support, but floating-point instructions 157 // have a 6-cycle latency, so unroll by 6x for latency hiding. 158 if (Directive == PPC::DIR_A2) 159 return 6; 160 161 // FIXME: For lack of any better information, do no harm... 162 if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) 163 return 1; 164 165 // For most things, modern systems have two execution units (and 166 // out-of-order execution). 167 return 2; 168} 169 170unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { 171 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); 172 173 // Fallback to the default implementation. 174 return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); 175} 176 177unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 178 Type *SubTp) const { 179 return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); 180} 181 182unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { 183 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); 184 185 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 186} 187 188unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 189 Type *CondTy) const { 190 return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); 191} 192 193unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, 194 unsigned Index) const { 195 assert(Val->isVectorTy() && "This must be a vector type"); 196 197 int ISD = TLI->InstructionOpcodeToISD(Opcode); 198 assert(ISD && "Invalid opcode"); 199 200 // Estimated cost of a load-hit-store delay. This was obtained 201 // experimentally as a minimum needed to prevent unprofitable 202 // vectorization for the paq8p benchmark. It may need to be 203 // raised further if other unprofitable cases remain. 204 unsigned LHSPenalty = 12; 205 206 // Vector element insert/extract with Altivec is very expensive, 207 // because they require store and reload with the attendant 208 // processor stall for load-hit-store. Until VSX is available, 209 // these need to be estimated as very costly. 210 if (ISD == ISD::EXTRACT_VECTOR_ELT || 211 ISD == ISD::INSERT_VECTOR_ELT) 212 return LHSPenalty + 213 TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 214 215 return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 216} 217 218unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 219 unsigned AddressSpace) const { 220 // Legalize the type. 221 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 222 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && 223 "Invalid Opcode"); 224 225 // Each load/store unit costs 1. 226 unsigned Cost = LT.first * 1; 227 228 // PPC in general does not support unaligned loads and stores. They'll need 229 // to be decomposed based on the alignment factor. 230 unsigned SrcBytes = LT.second.getStoreSize(); 231 if (SrcBytes && Alignment && Alignment < SrcBytes) 232 Cost *= (SrcBytes/Alignment); 233 234 return Cost; 235} 236 237