1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===// 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// The LLVM Compiler Infrastructure 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// This file is distributed under the University of Illinois Open Source 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// License. See LICENSE.TXT for details. 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//==-----------------------------------------------------------------------===// 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDILDevices.h" 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDGPUInstrInfo.h" 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/ADT/Statistic.h" 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/ADT/StringExtras.h" 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/ADT/StringRef.h" 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/ADT/Twine.h" 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Constants.h" 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineFunction.h" 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineFunctionAnalysis.h" 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Function.h" 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Instructions.h" 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Module.h" 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Support/Debug.h" 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/Support/MathExtras.h" 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <sstream> 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if 0 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSTATISTIC(PointerAssignments, "Number of dynamic pointer " 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "assigments discovered"); 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSTATISTIC(PointerSubtract, "Number of pointer subtractions discovered"); 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgusing namespace llvm; 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// The Peephole optimization pass is used to do simple last minute optimizations 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// that are required for correct code or to remove redundant functions 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace { 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass OpaqueType; 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass { 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic: 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org TargetMachine &TM; 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org static char ID; 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org AMDGPUPeepholeOpt(TargetMachine &tm); 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ~AMDGPUPeepholeOpt(); 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const char *getPassName() const; 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool runOnFunction(Function &F); 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool doInitialization(Module &M); 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool doFinalization(Module &M); 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void getAnalysisUsage(AnalysisUsage &AU) const; 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprotected: 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Function to initiate all of the instruction level optimizations. 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool instLevelOptimizations(BasicBlock::iterator *inst); 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Quick check to see if we need to dump all of the pointers into the 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // arena. If this is correct, then we set all pointers to exist in arena. This 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // is a workaround for aliasing of pointers in a struct/union. 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool dumpAllIntoArena(Function &F); 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Because I don't want to invalidate any pointers while in the 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // safeNestedForEachFunction. I push atomic conversions to a vector and handle 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // it later. This function does the conversions if required. 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void doAtomicConversionIfNeeded(Function &F); 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Because __amdil_is_constant cannot be properly evaluated if 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // optimizations are disabled, the call's are placed in a vector 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // and evaluated after the __amdil_image* functions are evaluated 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // which should allow the __amdil_is_constant function to be 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // evaluated correctly. 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void doIsConstCallConversionIfNeeded(); 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool mChanged; 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool mDebug; 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool mConvertAtomics; 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CodeGenOpt::Level optLevel; 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Run a series of tests to see if we can optimize a CALL instruction. 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool optimizeCallInst(BasicBlock::iterator *bbb); 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // A peephole optimization to optimize bit extract sequences. 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool optimizeBitExtract(Instruction *inst); 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // A peephole optimization to optimize bit insert sequences. 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool optimizeBitInsert(Instruction *inst); 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool setupBitInsert(Instruction *base, 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *&src, 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *&mask, 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *&shift); 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Expand the bit field insert instruction on versions of OpenCL that 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // don't support it. 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool expandBFI(CallInst *CI); 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Expand the bit field mask instruction on version of OpenCL that 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // don't support it. 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool expandBFM(CallInst *CI); 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // this case we need to expand them. These functions check for 24bit functions 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // and then expand. 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isSigned24BitOps(CallInst *CI); 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void expandSigned24BitOps(CallInst *CI); 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // One optimization that can occur is that if the required workgroup size is 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // specified then the result of get_local_size is known at compile time and 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // can be returned accordingly. 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isRWGLocalOpt(CallInst *CI); 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // On northern island cards, the division is slightly less accurate than on 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // previous generations, so we need to utilize a more accurate division. So we 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // can translate the accurate divide to a normal divide on all other cards. 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool convertAccurateDivide(CallInst *CI); 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void expandAccurateDivide(CallInst *CI); 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If the alignment is set incorrectly, it can produce really inefficient 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // code. This checks for this scenario and fixes it if possible. 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool correctMisalignedMemOp(Instruction *inst); 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If we are in no opt mode, then we need to make sure that 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // local samplers are properly propagated as constant propagation 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // doesn't occur and we need to know the value of kernel defined 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // samplers at compile time. 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool propagateSamplerInst(CallInst *CI); 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Helper functions 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Group of functions that recursively calculate the size of a structure based 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // on it's sub-types. 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t getTypeSize(Type * const T, bool dereferencePtr = false); 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t getTypeSize(StructType * const ST, bool dereferencePtr = false); 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false); 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false); 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false); 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false); 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false); 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false); 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMContext *mCTX; 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Function *mF; 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const AMDGPUSubtarget *mSTM; 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs; 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SmallVector<CallInst *, 16> isConstVec; 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; // class AMDGPUPeepholeOpt 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org char AMDGPUPeepholeOpt::ID = 0; 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// A template function that has two levels of looping before calling the 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// function with a pointer to the current iterator. 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgtemplate<class InputIterator, class SecondIterator, class Function> 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFunction safeNestedForEach(InputIterator First, InputIterator Last, 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SecondIterator S, Function F) 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for ( ; First != Last; ++First) { 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SecondIterator sf, sl; 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (sf = First->begin(), sl = First->end(); 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sf != sl; ) { 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!F(&sf)) { 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++sf; 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return F; 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} // anonymous namespace 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace llvm { 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FunctionPass * 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org createAMDGPUPeepholeOpt(TargetMachine &tm) 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org { 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return new AMDGPUPeepholeOpt(tm); 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} // llvm namespace 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm) 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org : FunctionPass(ID), TM(tm) 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mDebug = false; 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org optLevel = TM.getOptLevel(); 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgconst char * 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::getPassName() const 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return "AMDGPU PeepHole Optimization Pass"; 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgcontainsPointerType(Type *Ty) 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!Ty) { 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(Ty->getTypeID()) { 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::StructTyID: { 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const StructType *ST = dyn_cast<StructType>(Ty); 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (StructType::element_iterator stb = ST->element_begin(), 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ste = ST->element_end(); stb != ste; ++stb) { 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!containsPointerType(*stb)) { 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::VectorTyID: 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::ArrayTyID: 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType()); 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::PointerTyID: 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }; 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool dumpAll = false; 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (Function::const_arg_iterator cab = F.arg_begin(), 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cae = F.arg_end(); cab != cae; ++cab) { 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const Argument *arg = cab; 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const PointerType *PT = dyn_cast<PointerType>(arg->getType()); 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!PT) { 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *DereferencedType = PT->getElementType(); 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!dyn_cast<StructType>(DereferencedType) 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ) { 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!containsPointerType(DereferencedType)) { 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // FIXME: Because a pointer inside of a struct/union may be aliased to 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // another pointer we need to take the conservative approach and place all 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // pointers into the arena until more advanced detection is implemented. 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dumpAll = true; 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return dumpAll; 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isConstVec.empty()) { 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) { 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CallInst *CI = isConstVec[x]; 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *aType = Type::getInt32Ty(*mCTX); 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org : ConstantInt::get(aType, 0); 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->replaceAllUsesWith(Val); 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->eraseFromParent(); 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org isConstVec.clear(); 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Don't do anything if we don't have any atomic operations. 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (atomicFuncs.empty()) { 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Change the function name for the atomic if it is required 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t size = atomicFuncs.size(); 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (uint32_t x = 0; x < size; ++x) { 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org atomicFuncs[x].first->setOperand( 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org atomicFuncs[x].first->getNumOperands()-1, 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org atomicFuncs[x].second); 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mChanged = true; 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mConvertAtomics) { 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::runOnFunction(Function &MF) 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mChanged = false; 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mF = &MF; 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mSTM = &TM.getSubtarget<AMDGPUSubtarget>(); 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MF.dump(); 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mCTX = &MF.getType()->getContext(); 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mConvertAtomics = true; 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations), 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this)); 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org doAtomicConversionIfNeeded(MF); 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org doIsConstCallConversionIfNeeded(); 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MF.dump(); 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return mChanged; 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *inst = (*bbb); 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CallInst *CI = dyn_cast<CallInst>(inst); 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!CI) { 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isSigned24BitOps(CI)) { 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org expandSigned24BitOps(CI); 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++(*bbb); 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->eraseFromParent(); 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (propagateSamplerInst(CI)) { 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (expandBFI(CI) || expandBFM(CI)) { 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++(*bbb); 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->eraseFromParent(); 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (convertAccurateDivide(CI)) { 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org expandAccurateDivide(CI); 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++(*bbb); 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->eraseFromParent(); 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName(); 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (calleeName.startswith("__amdil_is_constant")) { 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If we do not have optimizations, then this 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // cannot be properly evaluated, so we add the 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // call instruction to a vector and process 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // them at the end of processing after the 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // samplers have been correctly handled. 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (optLevel == CodeGenOpt::None) { 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org isConstVec.push_back(CI); 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *aType = Type::getInt32Ty(*mCTX); 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org : ConstantInt::get(aType, 0); 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->replaceAllUsesWith(Val); 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++(*bbb); 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->eraseFromParent(); 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (calleeName.equals("__amdil_is_asic_id_i32")) { 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0)); 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *aType = Type::getInt32Ty(*mCTX); 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *Val = CV; 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (Val) { 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Val = ConstantInt::get(aType, 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mSTM->device()->getDeviceFlag() & CV->getZExtValue()); 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Val = ConstantInt::get(aType, 0); 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->replaceAllUsesWith(Val); 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++(*bbb); 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->eraseFromParent(); 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1)); 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!F) { 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (F->getName().startswith("__atom") && !CI->getNumUses() 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && F->getName().find("_xchg") == StringRef::npos) { 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::string buffer(F->getName().str() + "_noret"); 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org F = dyn_cast<Function>( 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F)); 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment) 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) { 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!mConvertAtomics) { 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org StringRef name = F->getName(); 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (name.startswith("__atom") && name.find("_g") != StringRef::npos) { 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mConvertAtomics = false; 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::setupBitInsert(Instruction *base, 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *&src, 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *&mask, 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *&shift) 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!base) { 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Null pointer passed into function.\n"; 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool andOp = false; 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (base->getOpcode() == Instruction::Shl) { 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shift = dyn_cast<Constant>(base->getOperand(1)); 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (base->getOpcode() == Instruction::And) { 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mask = dyn_cast<Constant>(base->getOperand(1)); 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org andOp = true; 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n"; 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If the base is neither a Shl or a And, we don't fit any of the patterns above. 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src = dyn_cast<Instruction>(base->getOperand(0)); 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!src) { 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Failed setup since the base operand is not an instruction!\n"; 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If we find an 'and' operation, then we don't need to 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // find the next operation as we already know the 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // bits that are valid at this point. 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (andOp) { 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src->getOpcode() == Instruction::Shl && !shift) { 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shift = dyn_cast<Constant>(src->getOperand(1)); 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src = dyn_cast<Instruction>(src->getOperand(0)); 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (src->getOpcode() == Instruction::And && !mask) { 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mask = dyn_cast<Constant>(src->getOperand(1)); 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!mask && !shift) { 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Failed setup since both mask and shift are NULL!\n"; 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Did not find a constant mask or a shift. 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) 444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!inst) { 446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!inst->isBinaryOp()) { 449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->getOpcode() != Instruction::Or) { 452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (optLevel == CodeGenOpt::None) { 455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // We want to do an optimization on a sequence of ops that in the end equals a 458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // single ISA instruction. 459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F) 460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Some simplified versions of this pattern are as follows: 461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0 462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E 463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B 464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // (A & B) | (D << F) when (1 << F) >= B 465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // (A << C) | (D & E) when (1 << C) >= E 466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { 467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // The HD4XXX hardware doesn't support the ubit_insert instruction. 468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *aType = inst->getType(); 471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isVector = aType->isVectorTy(); 472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int numEle = 1; 473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // This optimization only works on 32bit integers. 474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (aType->getScalarType() 475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org != Type::getInt32Ty(inst->getContext())) { 476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isVector) { 479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const VectorType *VT = dyn_cast<VectorType>(aType); 480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org numEle = VT->getNumElements(); 481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // We currently cannot support more than 4 elements in a intrinsic and we 482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // cannot support Vec3 types. 483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (numEle > 4 || numEle == 3) { 484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TODO: Handle vectors. 488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isVector) { 489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "!!! Vectors are not supported yet!\n"; 491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *LHSSrc = NULL, *RHSSrc = NULL; 495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *LHSMask = NULL, *RHSMask = NULL; 496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *LHSShift = NULL, *RHSShift = NULL; 497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0)); 498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1)); 499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) { 500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Found an OR Operation that failed setup!\n"; 502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dump(); 503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (LHS) { LHS->dump(); } 504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (LHSSrc) { LHSSrc->dump(); } 505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (LHSMask) { LHSMask->dump(); } 506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (LHSShift) { LHSShift->dump(); } 507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // There was an issue with the setup for BitInsert. 509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) { 512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Found an OR Operation that failed setup!\n"; 514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dump(); 515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (RHS) { RHS->dump(); } 516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (RHSSrc) { RHSSrc->dump(); } 517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (RHSMask) { RHSMask->dump(); } 518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (RHSShift) { RHSShift->dump(); } 519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // There was an issue with the setup for BitInsert. 521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n"; 525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Op: "; inst->dump(); 526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; } 527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; } 528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; } 529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; } 530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; } 531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; } 532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; } 533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; } 534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *offset = NULL; 536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *width = NULL; 537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t lhsMaskVal = 0, rhsMaskVal = 0; 538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t lhsShiftVal = 0, rhsShiftVal = 0; 539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t lhsMaskWidth = 0, rhsMaskWidth = 0; 540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t lhsMaskOffset = 0, rhsMaskOffset = 0; 541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lhsMaskVal = (int32_t)(LHSMask 542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0); 543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rhsMaskVal = (int32_t)(RHSMask 544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0); 545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lhsShiftVal = (int32_t)(LHSShift 546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0); 547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rhsShiftVal = (int32_t)(RHSShift 548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0); 549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal; 550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal; 551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal; 552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal; 553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks). 554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")"); 556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ; 557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << (RHSMask ? " & E)" : ")"); 558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n"); 559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "A = LHSSrc\t\tD = RHSSrc \n"; 560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n"; 561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n"; 562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "width(B) = " << lhsMaskWidth; 563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n"; 564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "offset(B) = " << lhsMaskOffset; 565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n"; 566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Constraints: \n"; 567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "\t(1) B ^ E == 0\n"; 568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "\t(2-LHS) B is a mask\n"; 569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "\t(2-LHS) E is a mask\n"; 570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n"; 571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n"; 572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) { 574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << lhsMaskVal << " ^ " << rhsMaskVal; 576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n"; 577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Failed constraint 1!\n"; 578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "LHS = " << lhsMaskOffset << ""; 583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = "; 584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)); 585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "\nRHS = " << rhsMaskOffset << ""; 586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = "; 587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)); 588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "\n"; 589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) { 591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset = ConstantInt::get(aType, lhsMaskOffset, false); 592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org width = ConstantInt::get(aType, lhsMaskWidth, false); 593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RHSSrc = RHS; 594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) { 595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n"; 597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Failed constraint 2!\n"; 598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!LHSShift) { 602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, 603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "MaskShr", LHS); 604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (lhsShiftVal != lhsMaskOffset) { 605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, 606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "MaskShr", LHS); 607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Optimizing LHS!\n"; 610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) { 612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset = ConstantInt::get(aType, rhsMaskOffset, false); 613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org width = ConstantInt::get(aType, rhsMaskWidth, false); 614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LHSSrc = RHSSrc; 615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RHSSrc = LHS; 616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) { 617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Non-Mask: " << rhsMaskVal << "\n"; 619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Failed constraint 2!\n"; 620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!RHSShift) { 624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, 625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "MaskShr", RHS); 626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (rhsShiftVal != rhsMaskOffset) { 627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, 628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "MaskShr", RHS); 629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Optimizing RHS!\n"; 632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Failed constraint 3!\n"; 636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; } 641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; } 642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; } 643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; } 644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!offset || !width) { 646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Either width or offset are NULL, failed detection!\n"; 648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Lets create the function signature. 652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::vector<Type *> callTypes; 653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(aType); 654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(aType); 655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(aType); 656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(aType); 657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FunctionType *funcType = FunctionType::get(aType, callTypes, false); 658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::string name = "__amdil_ubit_insert"; 659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } 660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Function *Func = 661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> 662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org getOrInsertFunction(llvm::StringRef(name), funcType)); 663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *Operands[4] = { 664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org width, 665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset, 666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LHSSrc, 667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RHSSrc 668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }; 669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt"); 670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mDebug) { 671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "Old Inst: "; 672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dump(); 673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "New Inst: "; 674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->dump(); 675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dbgs() << "\n\n"; 676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->insertBefore(inst); 678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->replaceAllUsesWith(CI); 679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) 684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!inst) { 686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!inst->isBinaryOp()) { 689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->getOpcode() != Instruction::And) { 692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (optLevel == CodeGenOpt::None) { 695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // We want to do some simple optimizations on Shift right/And patterns. The 698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a 699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // value smaller than 32 and C is a mask. If C is a constant value, then the 700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // following transformation can occur. For signed integers, it turns into the 701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned 702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // integers, it turns into the function call dst = 703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract 704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // can be found in Section 7.9 of the ATI IL spec of the stream SDK for 705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Evergreen hardware. 706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { 707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // This does not work on HD4XXX hardware. 708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *aType = inst->getType(); 711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isVector = aType->isVectorTy(); 712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // XXX Support vector types 714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isVector) { 715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int numEle = 1; 718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // This only works on 32bit integers 719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (aType->getScalarType() 720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org != Type::getInt32Ty(inst->getContext())) { 721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isVector) { 724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const VectorType *VT = dyn_cast<VectorType>(aType); 725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org numEle = VT->getNumElements(); 726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // We currently cannot support more than 4 elements in a intrinsic and we 727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // cannot support Vec3 types. 728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (numEle > 4 || numEle == 3) { 729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0)); 733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If the first operand is not a shift instruction, then we can return as it 734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // doesn't match this pattern. 735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!ShiftInst || !ShiftInst->isShift()) { 736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If we are a shift left, then we need don't match this pattern. 739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ShiftInst->getOpcode() == Instruction::Shl) { 740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isSigned = ShiftInst->isArithmeticShift(); 743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1)); 744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1)); 745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Lets make sure that the shift value and the and mask are constant integers. 746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!AndMask || !ShrVal) { 747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *newMaskConst; 750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *shiftValConst; 751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isVector) { 752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Handle the vector case 753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::vector<Constant *> maskVals; 754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::vector<Constant *> shiftVals; 755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask); 756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal); 757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *scalarType = AndMaskVec->getType()->getScalarType(); 758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(AndMaskVec->getNumOperands() == 759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ShrValVec->getNumOperands() && "cannot have a " 760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "combination where the number of elements to a " 761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "shift and an and are different!"); 762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) { 763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x)); 764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x)); 765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!AndCI || !ShiftIC) { 766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t maskVal = (uint32_t)AndCI->getZExtValue(); 769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isMask_32(maskVal)) { 770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org maskVal = (uint32_t)CountTrailingOnes_32(maskVal); 773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue(); 774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If the mask or shiftval is greater than the bitcount, then break out. 775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (maskVal >= 32 || shiftVal >= 32) { 776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If the mask val is greater than the the number of original bits left 779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // then this optimization is invalid. 780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (maskVal > (32 - shiftVal)) { 781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned)); 784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned)); 785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newMaskConst = ConstantVector::get(maskVals); 787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shiftValConst = ConstantVector::get(shiftVals); 788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Handle the scalar case 790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue(); 791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // This must be a mask value where all lower bits are set to 1 and then any 792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // bit higher is set to 0. 793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isMask_32(maskVal)) { 794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org maskVal = (uint32_t)CountTrailingOnes_32(maskVal); 797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Count the number of bits set in the mask, this is the width of the 798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // resulting bit set that is extracted from the source value. 799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue(); 800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If the mask or shift val is greater than the bitcount, then break out. 801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (maskVal >= 32 || shiftVal >= 32) { 802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If the mask val is greater than the the number of original bits left then 805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // this optimization is invalid. 806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (maskVal > (32 - shiftVal)) { 807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newMaskConst = ConstantInt::get(aType, maskVal, isSigned); 810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shiftValConst = ConstantInt::get(aType, shiftVal, isSigned); 811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Lets create the function signature. 813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::vector<Type *> callTypes; 814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(aType); 815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(aType); 816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(aType); 817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FunctionType *funcType = FunctionType::get(aType, callTypes, false); 818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::string name = "llvm.AMDIL.bit.extract.u32"; 819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isVector) { 820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org name += ".v" + itostr(numEle) + "i32"; 821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org name += "."; 823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Lets create the function. 825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Function *Func = 826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> 827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org getOrInsertFunction(llvm::StringRef(name), funcType)); 828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *Operands[3] = { 829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ShiftInst->getOperand(0), 830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shiftValConst, 831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newMaskConst 832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }; 833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Lets create the Call with the operands 834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); 835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->setDoesNotAccessMemory(); 836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->insertBefore(inst); 837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->replaceAllUsesWith(CI); 838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::expandBFI(CallInst *CI) 843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!CI) { 845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *LHS = CI->getOperand(CI->getNumOperands() - 1); 848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!LHS->getName().startswith("__amdil_bfi")) { 849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type* type = CI->getOperand(0)->getType(); 852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *negOneConst = NULL; 853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (type->isVectorTy()) { 854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::vector<Constant *> negOneVals; 855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org negOneConst = ConstantInt::get(CI->getContext(), 856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org APInt(32, StringRef("-1"), 10)); 857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (size_t x = 0, 858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { 859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org negOneVals.push_back(negOneConst); 860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org negOneConst = ConstantVector::get(negOneVals); 862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org negOneConst = ConstantInt::get(CI->getContext(), 864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org APInt(32, StringRef("-1"), 10)); 865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // __amdil_bfi => (A & B) | (~A & C) 867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator *lhs = 868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator::Create(Instruction::And, CI->getOperand(0), 869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->getOperand(1), "bfi_and", CI); 870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator *rhs = 871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst, 872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "bfi_not", CI); 873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2), 874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "bfi_and", CI); 875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI); 876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->replaceAllUsesWith(lhs); 877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::expandBFM(CallInst *CI) 882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!CI) { 884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *LHS = CI->getOperand(CI->getNumOperands() - 1); 887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!LHS->getName().startswith("__amdil_bfm")) { 888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f) 891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *newMaskConst = NULL; 892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *newShiftConst = NULL; 893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type* type = CI->getOperand(0)->getType(); 894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (type->isVectorTy()) { 895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::vector<Constant*> newMaskVals, newShiftVals; 896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); 897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); 898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (size_t x = 0, 899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { 900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newMaskVals.push_back(newMaskConst); 901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newShiftVals.push_back(newShiftConst); 902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newMaskConst = ConstantVector::get(newMaskVals); 904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newShiftConst = ConstantVector::get(newShiftVals); 905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); 907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); 908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator *lhs = 910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator::Create(Instruction::And, CI->getOperand(0), 911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newMaskConst, "bfm_mask", CI); 912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst, 913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lhs, "bfm_shl", CI); 914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lhs = BinaryOperator::Create(Instruction::Sub, lhs, 915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newShiftConst, "bfm_sub", CI); 916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator *rhs = 917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator::Create(Instruction::And, CI->getOperand(1), 918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newMaskConst, "bfm_mask", CI); 919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI); 920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->replaceAllUsesWith(lhs); 921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) 926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *inst = (*bbb); 928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (optimizeCallInst(bbb)) { 929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (optimizeBitExtract(inst)) { 932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (optimizeBitInsert(inst)) { 935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (correctMisalignedMemOp(inst)) { 938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) 944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LoadInst *linst = dyn_cast<LoadInst>(inst); 946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org StoreInst *sinst = dyn_cast<StoreInst>(inst); 947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned alignment; 948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type* Ty = inst->getType(); 949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (linst) { 950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org alignment = linst->getAlignment(); 951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Ty = inst->getType(); 952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (sinst) { 953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org alignment = sinst->getAlignment(); 954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Ty = sinst->getValueOperand()->getType(); 955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned size = getTypeSize(Ty); 959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (size == alignment || size < alignment) { 960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!Ty->isStructTy()) { 963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (alignment < 4) { 966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (linst) { 967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org linst->setAlignment(0); 968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (sinst) { 970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->setAlignment(0); 971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) 978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!CI) { 980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *LHS = CI->getOperand(CI->getNumOperands() - 1); 983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::string namePrefix = LHS->getName().substr(0, 14); 984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24" 985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && namePrefix != "__amdil__imul24_high") { 986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) { 989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) 996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(isSigned24BitOps(CI) && "Must be a " 998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "signed 24 bit operation to call this function!"); 999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *LHS = CI->getOperand(CI->getNumOperands()-1); 1000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // On 7XX and 8XX we do not have signed 24bit, so we need to 1001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // expand it to the following: 1002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // imul24 turns into 32bit imul 1003f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // imad24 turns into 32bit imad 1004f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // imul24_high turns into 32bit imulhigh 1005f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (LHS->getName().substr(0, 14) == "__amdil_imad24") { 1006f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *aType = CI->getOperand(0)->getType(); 1007f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isVector = aType->isVectorTy(); 1008f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; 1009f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::vector<Type*> callTypes; 1010f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(CI->getOperand(0)->getType()); 1011f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(CI->getOperand(1)->getType()); 1012f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(CI->getOperand(2)->getType()); 1013f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FunctionType *funcType = 1014f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); 1015f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::string name = "__amdil_imad"; 1016f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isVector) { 1017f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org name += "_v" + itostr(numEle) + "i32"; 1018f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1019f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org name += "_i32"; 1020f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1021f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Function *Func = dyn_cast<Function>( 1022f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->getParent()->getParent()->getParent()-> 1023f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org getOrInsertFunction(llvm::StringRef(name), funcType)); 1024f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *Operands[3] = { 1025f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->getOperand(0), 1026f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->getOperand(1), 1027f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->getOperand(2) 1028f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }; 1029f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CallInst *nCI = CallInst::Create(Func, Operands, "imad24"); 1030f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org nCI->insertBefore(CI); 1031f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->replaceAllUsesWith(nCI); 1032f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") { 1033f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator *mulOp = 1034f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator::Create(Instruction::Mul, CI->getOperand(0), 1035f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->getOperand(1), "imul24", CI); 1036f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->replaceAllUsesWith(mulOp); 1037f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") { 1038f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *aType = CI->getOperand(0)->getType(); 1039f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1040f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isVector = aType->isVectorTy(); 1041f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; 1042f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::vector<Type*> callTypes; 1043f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(CI->getOperand(0)->getType()); 1044f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org callTypes.push_back(CI->getOperand(1)->getType()); 1045f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FunctionType *funcType = 1046f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); 1047f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::string name = "__amdil_imul_high"; 1048f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isVector) { 1049f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org name += "_v" + itostr(numEle) + "i32"; 1050f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1051f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org name += "_i32"; 1052f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1053f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Function *Func = dyn_cast<Function>( 1054f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->getParent()->getParent()->getParent()-> 1055f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org getOrInsertFunction(llvm::StringRef(name), funcType)); 1056f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *Operands[2] = { 1057f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->getOperand(0), 1058f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->getOperand(1) 1059f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }; 1060f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high"); 1061f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org nCI->insertBefore(CI); 1062f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->replaceAllUsesWith(nCI); 1063f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1064f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1065f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1066f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1067f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) 1068f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1069f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return (CI != NULL 1070f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && CI->getOperand(CI->getNumOperands() - 1)->getName() 1071f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org == "__amdil_get_local_size_int"); 1072f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1073f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1074f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1075f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) 1076f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1077f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!CI) { 1078f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1079f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1080f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX 1081f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && (mSTM->getDeviceName() == "cayman")) { 1082f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1083f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1084f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) 1085f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org == "__amdil_improved_div"; 1086f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1087f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1088f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1089f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) 1090f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1091f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(convertAccurateDivide(CI) 1092f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && "expanding accurate divide can only happen if it is expandable!"); 1093f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator *divOp = 1094f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0), 1095f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->getOperand(1), "fdiv32", CI); 1096f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CI->replaceAllUsesWith(divOp); 1097f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1098f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1099f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) 1101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (optLevel != CodeGenOpt::None) { 1103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!CI) { 1107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned funcNameIdx = 0; 1111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org funcNameIdx = CI->getNumOperands() - 1; 1112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org StringRef calleeName = CI->getOperand(funcNameIdx)->getName(); 1113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (calleeName != "__amdil_image2d_read_norm" 1114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && calleeName != "__amdil_image2d_read_unnorm" 1115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && calleeName != "__amdil_image3d_read_norm" 1116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && calleeName != "__amdil_image3d_read_unnorm") { 1117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned samplerIdx = 2; 1121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org samplerIdx = 1; 1122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *sampler = CI->getOperand(samplerIdx); 1123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LoadInst *lInst = dyn_cast<LoadInst>(sampler); 1124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!lInst) { 1125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { 1129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand()); 1133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If we are loading from what is not a global value, then we 1134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // fail and return. 1135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!gv) { 1136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If we don't have an initializer or we have an initializer and 1140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // the initializer is not a 32bit integer, we fail. 1141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!gv->hasInitializer() 1142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || !gv->getInitializer()->getType()->isIntegerTy(32)) { 1143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Now that we have the global variable initializer, lets replace 1147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // all uses of the load instruction with the samplerVal and 1148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // reparse the __amdil_is_constant() function. 1149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Constant *samplerVal = gv->getInitializer(); 1150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lInst->replaceAllUsesWith(samplerVal); 1151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::doInitialization(Module &M) 1156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::doFinalization(Module &M) 1162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const 1168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org AU.addRequired<MachineFunctionAnalysis>(); 1170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FunctionPass::getAnalysisUsage(AU); 1171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org AU.setPreservesAll(); 1172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) { 1175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t size = 0; 1176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!T) { 1177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return size; 1178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (T->getTypeID()) { 1180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::X86_FP80TyID: 1181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::FP128TyID: 1182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::PPC_FP128TyID: 1183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::LabelTyID: 1184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0 && "These types are not supported by this backend"); 1185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 1186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::FloatTyID: 1187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::DoubleTyID: 1188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size = T->getPrimitiveSizeInBits() >> 3; 1189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::PointerTyID: 1191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr); 1192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::IntegerTyID: 1194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr); 1195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::StructTyID: 1197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr); 1198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::ArrayTyID: 1200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr); 1201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::FunctionTyID: 1203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr); 1204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case Type::VectorTyID: 1206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr); 1207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }; 1209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return size; 1210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST, 1213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool dereferencePtr) { 1214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t size = 0; 1215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!ST) { 1216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return size; 1217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *curType; 1219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org StructType::element_iterator eib; 1220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org StructType::element_iterator eie; 1221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { 1222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org curType = *eib; 1223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size += getTypeSize(curType, dereferencePtr); 1224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return size; 1226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT, 1229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool dereferencePtr) { 1230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return IT ? (IT->getBitWidth() >> 3) : 0; 1231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT, 1234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool dereferencePtr) { 1235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0 && "Should not be able to calculate the size of an function type"); 1236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 1237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT, 1240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool dereferencePtr) { 1241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return (size_t)(AT ? (getTypeSize(AT->getElementType(), 1242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dereferencePtr) * AT->getNumElements()) 1243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org : 0); 1244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT, 1247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool dereferencePtr) { 1248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return VT ? (VT->getBitWidth() >> 3) : 0; 1249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT, 1252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool dereferencePtr) { 1253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!PT) { 1254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 1255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Type *CT = PT->getElementType(); 1257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (CT->getTypeID() == Type::StructTyID && 1258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { 1259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return getTypeSize(dyn_cast<StructType>(CT)); 1260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (dereferencePtr) { 1261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t size = 0; 1262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { 1263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size += getTypeSize(PT->getContainedType(x), dereferencePtr); 1264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return size; 1266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 4; 1268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgsize_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT, 1272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool dereferencePtr) { 1273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org //assert(0 && "Should not be able to calculate the size of an opaque type"); 1274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 4; 1275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1276