AMDILPeepholeOptimizer.cpp revision a75c6163e605f35b14f26930dd9227e4f337ec9e
1//===-- AMDILPeepholeOptimizer.cpp - TODO: Add brief description -------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//==-----------------------------------------------------------------------===// 9 10#define DEBUG_TYPE "PeepholeOpt" 11#ifdef DEBUG 12#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) 13#else 14#define DEBUGME 0 15#endif 16 17#include "AMDILAlgorithms.tpp" 18#include "AMDILDevices.h" 19#include "AMDILGlobalManager.h" 20#include "AMDILKernelManager.h" 21#include "AMDILMachineFunctionInfo.h" 22#include "AMDILUtilityFunctions.h" 23#include "llvm/ADT/Statistic.h" 24#include "llvm/ADT/StringExtras.h" 25#include "llvm/ADT/StringRef.h" 26#include "llvm/ADT/Twine.h" 27#include "llvm/CodeGen/MachineFunction.h" 28#include "llvm/CodeGen/MachineFunctionAnalysis.h" 29#include "llvm/Function.h" 30#include "llvm/Instructions.h" 31#include "llvm/Module.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/MathExtras.h" 34 35#include <sstream> 36 37#if 0 38STATISTIC(PointerAssignments, "Number of dynamic pointer " 39 "assigments discovered"); 40STATISTIC(PointerSubtract, "Number of pointer subtractions discovered"); 41#endif 42STATISTIC(LocalFuncs, "Number of get_local_size(N) functions removed"); 43 44using namespace llvm; 45// The Peephole optimization pass is used to do simple last minute optimizations 46// that are required for correct code or to remove redundant functions 47namespace { 48class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass { 49public: 50 TargetMachine &TM; 51 static char ID; 52 AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); 53 ~AMDILPeepholeOpt(); 54 const char *getPassName() const; 55 bool runOnFunction(Function &F); 56 bool doInitialization(Module &M); 57 bool doFinalization(Module &M); 58 void getAnalysisUsage(AnalysisUsage &AU) const; 59protected: 60private: 61 // Function to initiate all of the instruction level optimizations. 62 bool instLevelOptimizations(BasicBlock::iterator *inst); 63 // Quick check to see if we need to dump all of the pointers into the 64 // arena. If this is correct, then we set all pointers to exist in arena. This 65 // is a workaround for aliasing of pointers in a struct/union. 66 bool dumpAllIntoArena(Function &F); 67 // Because I don't want to invalidate any pointers while in the 68 // safeNestedForEachFunction. I push atomic conversions to a vector and handle 69 // it later. This function does the conversions if required. 70 void doAtomicConversionIfNeeded(Function &F); 71 // Because __amdil_is_constant cannot be properly evaluated if 72 // optimizations are disabled, the call's are placed in a vector 73 // and evaluated after the __amdil_image* functions are evaluated 74 // which should allow the __amdil_is_constant function to be 75 // evaluated correctly. 76 void doIsConstCallConversionIfNeeded(); 77 bool mChanged; 78 bool mDebug; 79 bool mRWGOpt; 80 bool mConvertAtomics; 81 CodeGenOpt::Level optLevel; 82 // Run a series of tests to see if we can optimize a CALL instruction. 83 bool optimizeCallInst(BasicBlock::iterator *bbb); 84 // A peephole optimization to optimize bit extract sequences. 85 bool optimizeBitExtract(Instruction *inst); 86 // A peephole optimization to optimize bit insert sequences. 87 bool optimizeBitInsert(Instruction *inst); 88 bool setupBitInsert(Instruction *base, 89 Instruction *&src, 90 Constant *&mask, 91 Constant *&shift); 92 // Expand the bit field insert instruction on versions of OpenCL that 93 // don't support it. 94 bool expandBFI(CallInst *CI); 95 // Expand the bit field mask instruction on version of OpenCL that 96 // don't support it. 97 bool expandBFM(CallInst *CI); 98 // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in 99 // this case we need to expand them. These functions check for 24bit functions 100 // and then expand. 101 bool isSigned24BitOps(CallInst *CI); 102 void expandSigned24BitOps(CallInst *CI); 103 // One optimization that can occur is that if the required workgroup size is 104 // specified then the result of get_local_size is known at compile time and 105 // can be returned accordingly. 106 bool isRWGLocalOpt(CallInst *CI); 107 void expandRWGLocalOpt(CallInst *CI); 108 // On northern island cards, the division is slightly less accurate than on 109 // previous generations, so we need to utilize a more accurate division. So we 110 // can translate the accurate divide to a normal divide on all other cards. 111 bool convertAccurateDivide(CallInst *CI); 112 void expandAccurateDivide(CallInst *CI); 113 // If the alignment is set incorrectly, it can produce really inefficient 114 // code. This checks for this scenario and fixes it if possible. 115 bool correctMisalignedMemOp(Instruction *inst); 116 117 // If we are in no opt mode, then we need to make sure that 118 // local samplers are properly propagated as constant propagation 119 // doesn't occur and we need to know the value of kernel defined 120 // samplers at compile time. 121 bool propagateSamplerInst(CallInst *CI); 122 123 LLVMContext *mCTX; 124 Function *mF; 125 const AMDILSubtarget *mSTM; 126 SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs; 127 SmallVector<CallInst *, 16> isConstVec; 128}; // class AMDILPeepholeOpt 129 char AMDILPeepholeOpt::ID = 0; 130} // anonymous namespace 131 132namespace llvm { 133 FunctionPass * 134 createAMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) 135 { 136 return new AMDILPeepholeOpt(tm AMDIL_OPT_LEVEL_VAR); 137 } 138} // llvm namespace 139 140AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) 141 : FunctionPass(ID), TM(tm) 142{ 143 mDebug = DEBUGME; 144 optLevel = TM.getOptLevel(); 145 146} 147 148AMDILPeepholeOpt::~AMDILPeepholeOpt() 149{ 150} 151 152const char * 153AMDILPeepholeOpt::getPassName() const 154{ 155 return "AMDIL PeepHole Optimization Pass"; 156} 157 158bool 159containsPointerType(Type *Ty) 160{ 161 if (!Ty) { 162 return false; 163 } 164 switch(Ty->getTypeID()) { 165 default: 166 return false; 167 case Type::StructTyID: { 168 const StructType *ST = dyn_cast<StructType>(Ty); 169 for (StructType::element_iterator stb = ST->element_begin(), 170 ste = ST->element_end(); stb != ste; ++stb) { 171 if (!containsPointerType(*stb)) { 172 continue; 173 } 174 return true; 175 } 176 break; 177 } 178 case Type::VectorTyID: 179 case Type::ArrayTyID: 180 return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType()); 181 case Type::PointerTyID: 182 return true; 183 }; 184 return false; 185} 186 187bool 188AMDILPeepholeOpt::dumpAllIntoArena(Function &F) 189{ 190 bool dumpAll = false; 191 for (Function::const_arg_iterator cab = F.arg_begin(), 192 cae = F.arg_end(); cab != cae; ++cab) { 193 const Argument *arg = cab; 194 const PointerType *PT = dyn_cast<PointerType>(arg->getType()); 195 if (!PT) { 196 continue; 197 } 198 Type *DereferencedType = PT->getElementType(); 199 if (!dyn_cast<StructType>(DereferencedType) 200 ) { 201 continue; 202 } 203 if (!containsPointerType(DereferencedType)) { 204 continue; 205 } 206 // FIXME: Because a pointer inside of a struct/union may be aliased to 207 // another pointer we need to take the conservative approach and place all 208 // pointers into the arena until more advanced detection is implemented. 209 dumpAll = true; 210 } 211 return dumpAll; 212} 213void 214AMDILPeepholeOpt::doIsConstCallConversionIfNeeded() 215{ 216 if (isConstVec.empty()) { 217 return; 218 } 219 for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) { 220 CallInst *CI = isConstVec[x]; 221 Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); 222 Type *aType = Type::getInt32Ty(*mCTX); 223 Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) 224 : ConstantInt::get(aType, 0); 225 CI->replaceAllUsesWith(Val); 226 CI->eraseFromParent(); 227 } 228 isConstVec.clear(); 229} 230void 231AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F) 232{ 233 // Don't do anything if we don't have any atomic operations. 234 if (atomicFuncs.empty()) { 235 return; 236 } 237 // Change the function name for the atomic if it is required 238 uint32_t size = atomicFuncs.size(); 239 for (uint32_t x = 0; x < size; ++x) { 240 atomicFuncs[x].first->setOperand( 241 atomicFuncs[x].first->getNumOperands()-1, 242 atomicFuncs[x].second); 243 244 } 245 mChanged = true; 246 if (mConvertAtomics) { 247 return; 248 } 249 // If we did not convert all of the atomics, then we need to make sure that 250 // the atomics that were not converted have their base pointers set to use the 251 // arena path. 252 Function::arg_iterator argB = F.arg_begin(); 253 Function::arg_iterator argE = F.arg_end(); 254 AMDILKernelManager *KM = mSTM->getKernelManager(); 255 AMDILMachineFunctionInfo *mMFI = getAnalysis<MachineFunctionAnalysis>().getMF() 256 .getInfo<AMDILMachineFunctionInfo>(); 257 for (; argB != argE; ++argB) { 258 if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) { 259 KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)); 260 mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)); 261 } else { 262 KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)); 263 mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)); 264 } 265 } 266} 267 268bool 269AMDILPeepholeOpt::runOnFunction(Function &MF) 270{ 271 mChanged = false; 272 mF = &MF; 273 mSTM = &TM.getSubtarget<AMDILSubtarget>(); 274 if (mDebug) { 275 MF.dump(); 276 } 277 mCTX = &MF.getType()->getContext(); 278 mConvertAtomics = true; 279 if (dumpAllIntoArena(MF)) { 280 for (Function::const_arg_iterator cab = MF.arg_begin(), 281 cae = MF.arg_end(); cab != cae; ++cab) { 282 const Argument *arg = cab; 283 AMDILKernelManager *KM = mSTM->getKernelManager(); 284 KM->setUAVID(getBasePointerValue(arg), 285 mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)); 286 } 287 } 288 mRWGOpt = mSTM->getGlobalManager()->hasRWG(MF.getName()); 289 safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), 290 std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations), 291 this)); 292 293 doAtomicConversionIfNeeded(MF); 294 doIsConstCallConversionIfNeeded(); 295 296 if (mDebug) { 297 MF.dump(); 298 } 299 return mChanged; 300} 301 302bool 303AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) 304{ 305 Instruction *inst = (*bbb); 306 CallInst *CI = dyn_cast<CallInst>(inst); 307 if (!CI) { 308 return false; 309 } 310 if (isSigned24BitOps(CI)) { 311 expandSigned24BitOps(CI); 312 ++(*bbb); 313 CI->eraseFromParent(); 314 return true; 315 } 316 if (isRWGLocalOpt(CI)) { 317 expandRWGLocalOpt(CI); 318 return false; 319 } 320 if (propagateSamplerInst(CI)) { 321 return false; 322 } 323 if (expandBFI(CI) || expandBFM(CI)) { 324 ++(*bbb); 325 CI->eraseFromParent(); 326 return true; 327 } 328 if (convertAccurateDivide(CI)) { 329 expandAccurateDivide(CI); 330 ++(*bbb); 331 CI->eraseFromParent(); 332 return true; 333 } 334 335 StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName(); 336 if (calleeName.startswith("__amdil_is_constant")) { 337 // If we do not have optimizations, then this 338 // cannot be properly evaluated, so we add the 339 // call instruction to a vector and process 340 // them at the end of processing after the 341 // samplers have been correctly handled. 342 if (optLevel == CodeGenOpt::None) { 343 isConstVec.push_back(CI); 344 return false; 345 } else { 346 Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); 347 Type *aType = Type::getInt32Ty(*mCTX); 348 Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) 349 : ConstantInt::get(aType, 0); 350 CI->replaceAllUsesWith(Val); 351 ++(*bbb); 352 CI->eraseFromParent(); 353 return true; 354 } 355 } 356 357 if (calleeName.equals("__amdil_is_asic_id_i32")) { 358 ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0)); 359 Type *aType = Type::getInt32Ty(*mCTX); 360 Value *Val = CV; 361 if (Val) { 362 Val = ConstantInt::get(aType, 363 mSTM->device()->getDeviceFlag() & CV->getZExtValue()); 364 } else { 365 Val = ConstantInt::get(aType, 0); 366 } 367 CI->replaceAllUsesWith(Val); 368 ++(*bbb); 369 CI->eraseFromParent(); 370 return true; 371 } 372 Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1)); 373 if (!F) { 374 return false; 375 } 376 if (F->getName().startswith("__atom") && !CI->getNumUses() 377 && F->getName().find("_xchg") == StringRef::npos) { 378 std::string buffer(F->getName().str() + "_noret"); 379 F = dyn_cast<Function>( 380 F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); 381 atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F)); 382 } 383 384 if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment) 385 && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) { 386 return false; 387 } 388 if (!mConvertAtomics) { 389 return false; 390 } 391 StringRef name = F->getName(); 392 if (name.startswith("__atom") && name.find("_g") != StringRef::npos) { 393 Value *ptr = CI->getOperand(0); 394 const Value *basePtr = getBasePointerValue(ptr); 395 const Argument *Arg = dyn_cast<Argument>(basePtr); 396 if (Arg) { 397 AMDILGlobalManager *GM = mSTM->getGlobalManager(); 398 int32_t id = GM->getArgID(Arg); 399 if (id >= 0) { 400 std::stringstream ss; 401 ss << name.data() << "_" << id << '\n'; 402 std::string val; 403 ss >> val; 404 F = dyn_cast<Function>( 405 F->getParent() ->getOrInsertFunction(val, F->getFunctionType())); 406 atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F)); 407 } else { 408 mConvertAtomics = false; 409 } 410 } else { 411 mConvertAtomics = false; 412 } 413 } 414 return false; 415} 416 417bool 418AMDILPeepholeOpt::setupBitInsert(Instruction *base, 419 Instruction *&src, 420 Constant *&mask, 421 Constant *&shift) 422{ 423 if (!base) { 424 if (mDebug) { 425 dbgs() << "Null pointer passed into function.\n"; 426 } 427 return false; 428 } 429 bool andOp = false; 430 if (base->getOpcode() == Instruction::Shl) { 431 shift = dyn_cast<Constant>(base->getOperand(1)); 432 } else if (base->getOpcode() == Instruction::And) { 433 mask = dyn_cast<Constant>(base->getOperand(1)); 434 andOp = true; 435 } else { 436 if (mDebug) { 437 dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n"; 438 } 439 // If the base is neither a Shl or a And, we don't fit any of the patterns above. 440 return false; 441 } 442 src = dyn_cast<Instruction>(base->getOperand(0)); 443 if (!src) { 444 if (mDebug) { 445 dbgs() << "Failed setup since the base operand is not an instruction!\n"; 446 } 447 return false; 448 } 449 // If we find an 'and' operation, then we don't need to 450 // find the next operation as we already know the 451 // bits that are valid at this point. 452 if (andOp) { 453 return true; 454 } 455 if (src->getOpcode() == Instruction::Shl && !shift) { 456 shift = dyn_cast<Constant>(src->getOperand(1)); 457 src = dyn_cast<Instruction>(src->getOperand(0)); 458 } else if (src->getOpcode() == Instruction::And && !mask) { 459 mask = dyn_cast<Constant>(src->getOperand(1)); 460 } 461 if (!mask && !shift) { 462 if (mDebug) { 463 dbgs() << "Failed setup since both mask and shift are NULL!\n"; 464 } 465 // Did not find a constant mask or a shift. 466 return false; 467 } 468 return true; 469} 470bool 471AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst) 472{ 473 if (!inst) { 474 return false; 475 } 476 if (!inst->isBinaryOp()) { 477 return false; 478 } 479 if (inst->getOpcode() != Instruction::Or) { 480 return false; 481 } 482 if (optLevel == CodeGenOpt::None) { 483 return false; 484 } 485 // We want to do an optimization on a sequence of ops that in the end equals a 486 // single ISA instruction. 487 // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F) 488 // Some simplified versions of this pattern are as follows: 489 // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0 490 // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E 491 // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B 492 // (A & B) | (D << F) when (1 << F) >= B 493 // (A << C) | (D & E) when (1 << C) >= E 494 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 495 // The HD4XXX hardware doesn't support the ubit_insert instruction. 496 return false; 497 } 498 Type *aType = inst->getType(); 499 bool isVector = aType->isVectorTy(); 500 int numEle = 1; 501 // This optimization only works on 32bit integers. 502 if (aType->getScalarType() 503 != Type::getInt32Ty(inst->getContext())) { 504 return false; 505 } 506 if (isVector) { 507 const VectorType *VT = dyn_cast<VectorType>(aType); 508 numEle = VT->getNumElements(); 509 // We currently cannot support more than 4 elements in a intrinsic and we 510 // cannot support Vec3 types. 511 if (numEle > 4 || numEle == 3) { 512 return false; 513 } 514 } 515 // TODO: Handle vectors. 516 if (isVector) { 517 if (mDebug) { 518 dbgs() << "!!! Vectors are not supported yet!\n"; 519 } 520 return false; 521 } 522 Instruction *LHSSrc = NULL, *RHSSrc = NULL; 523 Constant *LHSMask = NULL, *RHSMask = NULL; 524 Constant *LHSShift = NULL, *RHSShift = NULL; 525 Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0)); 526 Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1)); 527 if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) { 528 if (mDebug) { 529 dbgs() << "Found an OR Operation that failed setup!\n"; 530 inst->dump(); 531 if (LHS) { LHS->dump(); } 532 if (LHSSrc) { LHSSrc->dump(); } 533 if (LHSMask) { LHSMask->dump(); } 534 if (LHSShift) { LHSShift->dump(); } 535 } 536 // There was an issue with the setup for BitInsert. 537 return false; 538 } 539 if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) { 540 if (mDebug) { 541 dbgs() << "Found an OR Operation that failed setup!\n"; 542 inst->dump(); 543 if (RHS) { RHS->dump(); } 544 if (RHSSrc) { RHSSrc->dump(); } 545 if (RHSMask) { RHSMask->dump(); } 546 if (RHSShift) { RHSShift->dump(); } 547 } 548 // There was an issue with the setup for BitInsert. 549 return false; 550 } 551 if (mDebug) { 552 dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n"; 553 dbgs() << "Op: "; inst->dump(); 554 dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; } 555 dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; } 556 dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; } 557 dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; } 558 dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; } 559 dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; } 560 dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; } 561 dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; } 562 } 563 Constant *offset = NULL; 564 Constant *width = NULL; 565 int32_t lhsMaskVal = 0, rhsMaskVal = 0; 566 int32_t lhsShiftVal = 0, rhsShiftVal = 0; 567 int32_t lhsMaskWidth = 0, rhsMaskWidth = 0; 568 int32_t lhsMaskOffset = 0, rhsMaskOffset = 0; 569 lhsMaskVal = (int32_t)(LHSMask 570 ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0); 571 rhsMaskVal = (int32_t)(RHSMask 572 ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0); 573 lhsShiftVal = (int32_t)(LHSShift 574 ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0); 575 rhsShiftVal = (int32_t)(RHSShift 576 ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0); 577 lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal; 578 rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal; 579 lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal; 580 rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal; 581 // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks). 582 if (mDebug) { 583 dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")"); 584 dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ; 585 dbgs() << (RHSMask ? " & E)" : ")"); 586 dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n"); 587 dbgs() << "A = LHSSrc\t\tD = RHSSrc \n"; 588 dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n"; 589 dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n"; 590 dbgs() << "width(B) = " << lhsMaskWidth; 591 dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n"; 592 dbgs() << "offset(B) = " << lhsMaskOffset; 593 dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n"; 594 dbgs() << "Constraints: \n"; 595 dbgs() << "\t(1) B ^ E == 0\n"; 596 dbgs() << "\t(2-LHS) B is a mask\n"; 597 dbgs() << "\t(2-LHS) E is a mask\n"; 598 dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n"; 599 dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n"; 600 } 601 if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) { 602 if (mDebug) { 603 dbgs() << lhsMaskVal << " ^ " << rhsMaskVal; 604 dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n"; 605 dbgs() << "Failed constraint 1!\n"; 606 } 607 return false; 608 } 609 if (mDebug) { 610 dbgs() << "LHS = " << lhsMaskOffset << ""; 611 dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = "; 612 dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)); 613 dbgs() << "\nRHS = " << rhsMaskOffset << ""; 614 dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = "; 615 dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)); 616 dbgs() << "\n"; 617 } 618 if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) { 619 offset = ConstantInt::get(aType, lhsMaskOffset, false); 620 width = ConstantInt::get(aType, lhsMaskWidth, false); 621 RHSSrc = RHS; 622 if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) { 623 if (mDebug) { 624 dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n"; 625 dbgs() << "Failed constraint 2!\n"; 626 } 627 return false; 628 } 629 if (!LHSShift) { 630 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, 631 "MaskShr", LHS); 632 } else if (lhsShiftVal != lhsMaskOffset) { 633 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, 634 "MaskShr", LHS); 635 } 636 if (mDebug) { 637 dbgs() << "Optimizing LHS!\n"; 638 } 639 } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) { 640 offset = ConstantInt::get(aType, rhsMaskOffset, false); 641 width = ConstantInt::get(aType, rhsMaskWidth, false); 642 LHSSrc = RHSSrc; 643 RHSSrc = LHS; 644 if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) { 645 if (mDebug) { 646 dbgs() << "Non-Mask: " << rhsMaskVal << "\n"; 647 dbgs() << "Failed constraint 2!\n"; 648 } 649 return false; 650 } 651 if (!RHSShift) { 652 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, 653 "MaskShr", RHS); 654 } else if (rhsShiftVal != rhsMaskOffset) { 655 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, 656 "MaskShr", RHS); 657 } 658 if (mDebug) { 659 dbgs() << "Optimizing RHS!\n"; 660 } 661 } else { 662 if (mDebug) { 663 dbgs() << "Failed constraint 3!\n"; 664 } 665 return false; 666 } 667 if (mDebug) { 668 dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; } 669 dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; } 670 dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; } 671 dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; } 672 } 673 if (!offset || !width) { 674 if (mDebug) { 675 dbgs() << "Either width or offset are NULL, failed detection!\n"; 676 } 677 return false; 678 } 679 // Lets create the function signature. 680 std::vector<Type *> callTypes; 681 callTypes.push_back(aType); 682 callTypes.push_back(aType); 683 callTypes.push_back(aType); 684 callTypes.push_back(aType); 685 FunctionType *funcType = FunctionType::get(aType, callTypes, false); 686 std::string name = "__amdil_ubit_insert"; 687 if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } 688 Function *Func = 689 dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> 690 getOrInsertFunction(llvm::StringRef(name), funcType)); 691 Value *Operands[4] = { 692 width, 693 offset, 694 LHSSrc, 695 RHSSrc 696 }; 697 CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt"); 698 if (mDebug) { 699 dbgs() << "Old Inst: "; 700 inst->dump(); 701 dbgs() << "New Inst: "; 702 CI->dump(); 703 dbgs() << "\n\n"; 704 } 705 CI->insertBefore(inst); 706 inst->replaceAllUsesWith(CI); 707 return true; 708} 709 710bool 711AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst) 712{ 713 if (!inst) { 714 return false; 715 } 716 if (!inst->isBinaryOp()) { 717 return false; 718 } 719 if (inst->getOpcode() != Instruction::And) { 720 return false; 721 } 722 if (optLevel == CodeGenOpt::None) { 723 return false; 724 } 725 // We want to do some simple optimizations on Shift right/And patterns. The 726 // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a 727 // value smaller than 32 and C is a mask. If C is a constant value, then the 728 // following transformation can occur. For signed integers, it turns into the 729 // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned 730 // integers, it turns into the function call dst = 731 // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract 732 // can be found in Section 7.9 of the ATI IL spec of the stream SDK for 733 // Evergreen hardware. 734 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 735 // This does not work on HD4XXX hardware. 736 return false; 737 } 738 Type *aType = inst->getType(); 739 bool isVector = aType->isVectorTy(); 740 int numEle = 1; 741 // This only works on 32bit integers 742 if (aType->getScalarType() 743 != Type::getInt32Ty(inst->getContext())) { 744 return false; 745 } 746 if (isVector) { 747 const VectorType *VT = dyn_cast<VectorType>(aType); 748 numEle = VT->getNumElements(); 749 // We currently cannot support more than 4 elements in a intrinsic and we 750 // cannot support Vec3 types. 751 if (numEle > 4 || numEle == 3) { 752 return false; 753 } 754 } 755 BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0)); 756 // If the first operand is not a shift instruction, then we can return as it 757 // doesn't match this pattern. 758 if (!ShiftInst || !ShiftInst->isShift()) { 759 return false; 760 } 761 // If we are a shift left, then we need don't match this pattern. 762 if (ShiftInst->getOpcode() == Instruction::Shl) { 763 return false; 764 } 765 bool isSigned = ShiftInst->isArithmeticShift(); 766 Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1)); 767 Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1)); 768 // Lets make sure that the shift value and the and mask are constant integers. 769 if (!AndMask || !ShrVal) { 770 return false; 771 } 772 Constant *newMaskConst; 773 Constant *shiftValConst; 774 if (isVector) { 775 // Handle the vector case 776 std::vector<Constant *> maskVals; 777 std::vector<Constant *> shiftVals; 778 ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask); 779 ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal); 780 Type *scalarType = AndMaskVec->getType()->getScalarType(); 781 assert(AndMaskVec->getNumOperands() == 782 ShrValVec->getNumOperands() && "cannot have a " 783 "combination where the number of elements to a " 784 "shift and an and are different!"); 785 for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) { 786 ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x)); 787 ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x)); 788 if (!AndCI || !ShiftIC) { 789 return false; 790 } 791 uint32_t maskVal = (uint32_t)AndCI->getZExtValue(); 792 if (!isMask_32(maskVal)) { 793 return false; 794 } 795 maskVal = (uint32_t)CountTrailingOnes_32(maskVal); 796 uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue(); 797 // If the mask or shiftval is greater than the bitcount, then break out. 798 if (maskVal >= 32 || shiftVal >= 32) { 799 return false; 800 } 801 // If the mask val is greater than the the number of original bits left 802 // then this optimization is invalid. 803 if (maskVal > (32 - shiftVal)) { 804 return false; 805 } 806 maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned)); 807 shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned)); 808 } 809 newMaskConst = ConstantVector::get(maskVals); 810 shiftValConst = ConstantVector::get(shiftVals); 811 } else { 812 // Handle the scalar case 813 uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue(); 814 // This must be a mask value where all lower bits are set to 1 and then any 815 // bit higher is set to 0. 816 if (!isMask_32(maskVal)) { 817 return false; 818 } 819 maskVal = (uint32_t)CountTrailingOnes_32(maskVal); 820 // Count the number of bits set in the mask, this is the width of the 821 // resulting bit set that is extracted from the source value. 822 uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue(); 823 // If the mask or shift val is greater than the bitcount, then break out. 824 if (maskVal >= 32 || shiftVal >= 32) { 825 return false; 826 } 827 // If the mask val is greater than the the number of original bits left then 828 // this optimization is invalid. 829 if (maskVal > (32 - shiftVal)) { 830 return false; 831 } 832 newMaskConst = ConstantInt::get(aType, maskVal, isSigned); 833 shiftValConst = ConstantInt::get(aType, shiftVal, isSigned); 834 } 835 // Lets create the function signature. 836 std::vector<Type *> callTypes; 837 callTypes.push_back(aType); 838 callTypes.push_back(aType); 839 callTypes.push_back(aType); 840 FunctionType *funcType = FunctionType::get(aType, callTypes, false); 841 std::string name = "__amdil_ubit_extract"; 842 if (isVector) { 843 name += "_v" + itostr(numEle) + "i32"; 844 } else { 845 name += "_i32"; 846 } 847 // Lets create the function. 848 Function *Func = 849 dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> 850 getOrInsertFunction(llvm::StringRef(name), funcType)); 851 Value *Operands[3] = { 852 newMaskConst, 853 shiftValConst, 854 ShiftInst->getOperand(0) 855 }; 856 // Lets create the Call with the operands 857 CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); 858 CI->insertBefore(inst); 859 inst->replaceAllUsesWith(CI); 860 return true; 861} 862 863bool 864AMDILPeepholeOpt::expandBFI(CallInst *CI) 865{ 866 if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) { 867 return false; 868 } 869 Value *LHS = CI->getOperand(CI->getNumOperands() - 1); 870 if (!LHS->getName().startswith("__amdil_bfi")) { 871 return false; 872 } 873 Type* type = CI->getOperand(0)->getType(); 874 Constant *negOneConst = NULL; 875 if (type->isVectorTy()) { 876 std::vector<Constant *> negOneVals; 877 negOneConst = ConstantInt::get(CI->getContext(), 878 APInt(32, StringRef("-1"), 10)); 879 for (size_t x = 0, 880 y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { 881 negOneVals.push_back(negOneConst); 882 } 883 negOneConst = ConstantVector::get(negOneVals); 884 } else { 885 negOneConst = ConstantInt::get(CI->getContext(), 886 APInt(32, StringRef("-1"), 10)); 887 } 888 // __amdil_bfi => (A & B) | (~A & C) 889 BinaryOperator *lhs = 890 BinaryOperator::Create(Instruction::And, CI->getOperand(0), 891 CI->getOperand(1), "bfi_and", CI); 892 BinaryOperator *rhs = 893 BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst, 894 "bfi_not", CI); 895 rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2), 896 "bfi_and", CI); 897 lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI); 898 CI->replaceAllUsesWith(lhs); 899 return true; 900} 901 902bool 903AMDILPeepholeOpt::expandBFM(CallInst *CI) 904{ 905 if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) { 906 return false; 907 } 908 Value *LHS = CI->getOperand(CI->getNumOperands() - 1); 909 if (!LHS->getName().startswith("__amdil_bfm")) { 910 return false; 911 } 912 // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f) 913 Constant *newMaskConst = NULL; 914 Constant *newShiftConst = NULL; 915 Type* type = CI->getOperand(0)->getType(); 916 if (type->isVectorTy()) { 917 std::vector<Constant*> newMaskVals, newShiftVals; 918 newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); 919 newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); 920 for (size_t x = 0, 921 y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { 922 newMaskVals.push_back(newMaskConst); 923 newShiftVals.push_back(newShiftConst); 924 } 925 newMaskConst = ConstantVector::get(newMaskVals); 926 newShiftConst = ConstantVector::get(newShiftVals); 927 } else { 928 newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); 929 newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); 930 } 931 BinaryOperator *lhs = 932 BinaryOperator::Create(Instruction::And, CI->getOperand(0), 933 newMaskConst, "bfm_mask", CI); 934 lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst, 935 lhs, "bfm_shl", CI); 936 lhs = BinaryOperator::Create(Instruction::Sub, lhs, 937 newShiftConst, "bfm_sub", CI); 938 BinaryOperator *rhs = 939 BinaryOperator::Create(Instruction::And, CI->getOperand(1), 940 newMaskConst, "bfm_mask", CI); 941 lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI); 942 CI->replaceAllUsesWith(lhs); 943 return true; 944} 945 946bool 947AMDILPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) 948{ 949 Instruction *inst = (*bbb); 950 if (optimizeCallInst(bbb)) { 951 return true; 952 } 953 if (optimizeBitExtract(inst)) { 954 return false; 955 } 956 if (optimizeBitInsert(inst)) { 957 return false; 958 } 959 if (correctMisalignedMemOp(inst)) { 960 return false; 961 } 962 return false; 963} 964bool 965AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst) 966{ 967 LoadInst *linst = dyn_cast<LoadInst>(inst); 968 StoreInst *sinst = dyn_cast<StoreInst>(inst); 969 unsigned alignment; 970 Type* Ty = inst->getType(); 971 if (linst) { 972 alignment = linst->getAlignment(); 973 Ty = inst->getType(); 974 } else if (sinst) { 975 alignment = sinst->getAlignment(); 976 Ty = sinst->getValueOperand()->getType(); 977 } else { 978 return false; 979 } 980 unsigned size = getTypeSize(Ty); 981 if (size == alignment || size < alignment) { 982 return false; 983 } 984 if (!Ty->isStructTy()) { 985 return false; 986 } 987 if (alignment < 4) { 988 if (linst) { 989 linst->setAlignment(0); 990 return true; 991 } else if (sinst) { 992 sinst->setAlignment(0); 993 return true; 994 } 995 } 996 return false; 997} 998bool 999AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI) 1000{ 1001 if (!CI) { 1002 return false; 1003 } 1004 Value *LHS = CI->getOperand(CI->getNumOperands() - 1); 1005 std::string namePrefix = LHS->getName().substr(0, 14); 1006 if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24" 1007 && namePrefix != "__amdil__imul24_high") { 1008 return false; 1009 } 1010 if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) { 1011 return false; 1012 } 1013 return true; 1014} 1015 1016void 1017AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI) 1018{ 1019 assert(isSigned24BitOps(CI) && "Must be a " 1020 "signed 24 bit operation to call this function!"); 1021 Value *LHS = CI->getOperand(CI->getNumOperands()-1); 1022 // On 7XX and 8XX we do not have signed 24bit, so we need to 1023 // expand it to the following: 1024 // imul24 turns into 32bit imul 1025 // imad24 turns into 32bit imad 1026 // imul24_high turns into 32bit imulhigh 1027 if (LHS->getName().substr(0, 14) == "__amdil_imad24") { 1028 Type *aType = CI->getOperand(0)->getType(); 1029 bool isVector = aType->isVectorTy(); 1030 int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; 1031 std::vector<Type*> callTypes; 1032 callTypes.push_back(CI->getOperand(0)->getType()); 1033 callTypes.push_back(CI->getOperand(1)->getType()); 1034 callTypes.push_back(CI->getOperand(2)->getType()); 1035 FunctionType *funcType = 1036 FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); 1037 std::string name = "__amdil_imad"; 1038 if (isVector) { 1039 name += "_v" + itostr(numEle) + "i32"; 1040 } else { 1041 name += "_i32"; 1042 } 1043 Function *Func = dyn_cast<Function>( 1044 CI->getParent()->getParent()->getParent()-> 1045 getOrInsertFunction(llvm::StringRef(name), funcType)); 1046 Value *Operands[3] = { 1047 CI->getOperand(0), 1048 CI->getOperand(1), 1049 CI->getOperand(2) 1050 }; 1051 CallInst *nCI = CallInst::Create(Func, Operands, "imad24"); 1052 nCI->insertBefore(CI); 1053 CI->replaceAllUsesWith(nCI); 1054 } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") { 1055 BinaryOperator *mulOp = 1056 BinaryOperator::Create(Instruction::Mul, CI->getOperand(0), 1057 CI->getOperand(1), "imul24", CI); 1058 CI->replaceAllUsesWith(mulOp); 1059 } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") { 1060 Type *aType = CI->getOperand(0)->getType(); 1061 1062 bool isVector = aType->isVectorTy(); 1063 int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; 1064 std::vector<Type*> callTypes; 1065 callTypes.push_back(CI->getOperand(0)->getType()); 1066 callTypes.push_back(CI->getOperand(1)->getType()); 1067 FunctionType *funcType = 1068 FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); 1069 std::string name = "__amdil_imul_high"; 1070 if (isVector) { 1071 name += "_v" + itostr(numEle) + "i32"; 1072 } else { 1073 name += "_i32"; 1074 } 1075 Function *Func = dyn_cast<Function>( 1076 CI->getParent()->getParent()->getParent()-> 1077 getOrInsertFunction(llvm::StringRef(name), funcType)); 1078 Value *Operands[2] = { 1079 CI->getOperand(0), 1080 CI->getOperand(1) 1081 }; 1082 CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high"); 1083 nCI->insertBefore(CI); 1084 CI->replaceAllUsesWith(nCI); 1085 } 1086} 1087 1088bool 1089AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI) 1090{ 1091 return (CI != NULL && mRWGOpt 1092 && CI->getOperand(CI->getNumOperands() - 1)->getName() 1093 == "__amdil_get_local_size_int"); 1094} 1095 1096void 1097AMDILPeepholeOpt::expandRWGLocalOpt(CallInst *CI) 1098{ 1099 assert(isRWGLocalOpt(CI) && 1100 "This optmization only works when the call inst is get_local_size!"); 1101 std::vector<Constant *> consts; 1102 for (uint32_t x = 0; x < 3; ++x) { 1103 uint32_t val = mSTM->getGlobalManager()->getLocal(mF->getName(), x); 1104 consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), val)); 1105 } 1106 consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), 0)); 1107 Value *cVec = ConstantVector::get(consts); 1108 CI->replaceAllUsesWith(cVec); 1109 ++LocalFuncs; 1110 return; 1111} 1112 1113bool 1114AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI) 1115{ 1116 if (!CI) { 1117 return false; 1118 } 1119 if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX 1120 && (mSTM->getDeviceName() == "cayman")) { 1121 return false; 1122 } 1123 return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) 1124 == "__amdil_improved_div"; 1125} 1126 1127void 1128AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI) 1129{ 1130 assert(convertAccurateDivide(CI) 1131 && "expanding accurate divide can only happen if it is expandable!"); 1132 BinaryOperator *divOp = 1133 BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0), 1134 CI->getOperand(1), "fdiv32", CI); 1135 CI->replaceAllUsesWith(divOp); 1136} 1137 1138bool 1139AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI) 1140{ 1141 if (optLevel != CodeGenOpt::None) { 1142 return false; 1143 } 1144 1145 if (!CI) { 1146 return false; 1147 } 1148 1149 unsigned funcNameIdx = 0; 1150 funcNameIdx = CI->getNumOperands() - 1; 1151 StringRef calleeName = CI->getOperand(funcNameIdx)->getName(); 1152 if (calleeName != "__amdil_image2d_read_norm" 1153 && calleeName != "__amdil_image2d_read_unnorm" 1154 && calleeName != "__amdil_image3d_read_norm" 1155 && calleeName != "__amdil_image3d_read_unnorm") { 1156 return false; 1157 } 1158 1159 unsigned samplerIdx = 2; 1160 samplerIdx = 1; 1161 Value *sampler = CI->getOperand(samplerIdx); 1162 LoadInst *lInst = dyn_cast<LoadInst>(sampler); 1163 if (!lInst) { 1164 return false; 1165 } 1166 1167 if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) { 1168 return false; 1169 } 1170 1171 GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand()); 1172 // If we are loading from what is not a global value, then we 1173 // fail and return. 1174 if (!gv) { 1175 return false; 1176 } 1177 1178 // If we don't have an initializer or we have an initializer and 1179 // the initializer is not a 32bit integer, we fail. 1180 if (!gv->hasInitializer() 1181 || !gv->getInitializer()->getType()->isIntegerTy(32)) { 1182 return false; 1183 } 1184 1185 // Now that we have the global variable initializer, lets replace 1186 // all uses of the load instruction with the samplerVal and 1187 // reparse the __amdil_is_constant() function. 1188 Constant *samplerVal = gv->getInitializer(); 1189 lInst->replaceAllUsesWith(samplerVal); 1190 return true; 1191} 1192 1193bool 1194AMDILPeepholeOpt::doInitialization(Module &M) 1195{ 1196 return false; 1197} 1198 1199bool 1200AMDILPeepholeOpt::doFinalization(Module &M) 1201{ 1202 return false; 1203} 1204 1205void 1206AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const 1207{ 1208 AU.addRequired<MachineFunctionAnalysis>(); 1209 FunctionPass::getAnalysisUsage(AU); 1210 AU.setPreservesAll(); 1211} 1212