LoopStrengthReduce.cpp revision 4da49122f3f3c8da68a52723d846b88c72166a68
1//===- LoopStrengthReduce.cpp - Strength Reduce GEPs in Loops -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Nate Begeman and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs a strength reduction on array references inside loops that 11// have as one or more of their components the loop induction variable. This is 12// accomplished by creating a new Value to hold the initial value of the array 13// access for the first iteration, and then creating a new GEP instruction in 14// the loop to increment the value by the appropriate amount. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "loop-reduce" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/Constants.h" 21#include "llvm/Instructions.h" 22#include "llvm/Type.h" 23#include "llvm/DerivedTypes.h" 24#include "llvm/Analysis/Dominators.h" 25#include "llvm/Analysis/LoopInfo.h" 26#include "llvm/Analysis/ScalarEvolutionExpander.h" 27#include "llvm/Support/CFG.h" 28#include "llvm/Support/GetElementPtrTypeIterator.h" 29#include "llvm/Transforms/Utils/BasicBlockUtils.h" 30#include "llvm/Transforms/Utils/Local.h" 31#include "llvm/Target/TargetData.h" 32#include "llvm/ADT/Statistic.h" 33#include "llvm/Support/Debug.h" 34#include "llvm/Support/Compiler.h" 35#include "llvm/Target/TargetLowering.h" 36#include <algorithm> 37#include <set> 38using namespace llvm; 39 40namespace { 41 Statistic NumReduced ("loop-reduce", "Number of GEPs strength reduced"); 42 Statistic NumInserted("loop-reduce", "Number of PHIs inserted"); 43 Statistic NumVariable("loop-reduce","Number of PHIs with variable strides"); 44 45 /// IVStrideUse - Keep track of one use of a strided induction variable, where 46 /// the stride is stored externally. The Offset member keeps track of the 47 /// offset from the IV, User is the actual user of the operand, and 'Operand' 48 /// is the operand # of the User that is the use. 49 struct IVStrideUse { 50 SCEVHandle Offset; 51 Instruction *User; 52 Value *OperandValToReplace; 53 54 // isUseOfPostIncrementedValue - True if this should use the 55 // post-incremented version of this IV, not the preincremented version. 56 // This can only be set in special cases, such as the terminating setcc 57 // instruction for a loop or uses dominated by the loop. 58 bool isUseOfPostIncrementedValue; 59 60 IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O) 61 : Offset(Offs), User(U), OperandValToReplace(O), 62 isUseOfPostIncrementedValue(false) {} 63 }; 64 65 /// IVUsersOfOneStride - This structure keeps track of all instructions that 66 /// have an operand that is based on the trip count multiplied by some stride. 67 /// The stride for all of these users is common and kept external to this 68 /// structure. 69 struct IVUsersOfOneStride { 70 /// Users - Keep track of all of the users of this stride as well as the 71 /// initial value and the operand that uses the IV. 72 std::vector<IVStrideUse> Users; 73 74 void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) { 75 Users.push_back(IVStrideUse(Offset, User, Operand)); 76 } 77 }; 78 79 /// IVInfo - This structure keeps track of one IV expression inserted during 80 /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as 81 /// well as the PHI node and increment value created for rewrite. 82 struct IVExpr { 83 SCEVHandle Stride; 84 SCEVHandle Base; 85 PHINode *PHI; 86 Value *IncV; 87 88 IVExpr() 89 : Stride(SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)), 90 Base (SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)) {} 91 IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi, 92 Value *incv) 93 : Stride(stride), Base(base), PHI(phi), IncV(incv) {} 94 }; 95 96 /// IVsOfOneStride - This structure keeps track of all IV expression inserted 97 /// during StrengthReduceStridedIVUsers for a particular stride of the IV. 98 struct IVsOfOneStride { 99 std::vector<IVExpr> IVs; 100 101 void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI, 102 Value *IncV) { 103 IVs.push_back(IVExpr(Stride, Base, PHI, IncV)); 104 } 105 }; 106 107 class VISIBILITY_HIDDEN LoopStrengthReduce : public FunctionPass { 108 LoopInfo *LI; 109 ETForest *EF; 110 ScalarEvolution *SE; 111 const TargetData *TD; 112 const Type *UIntPtrTy; 113 bool Changed; 114 115 /// IVUsesByStride - Keep track of all uses of induction variables that we 116 /// are interested in. The key of the map is the stride of the access. 117 std::map<SCEVHandle, IVUsersOfOneStride> IVUsesByStride; 118 119 /// IVsByStride - Keep track of all IVs that have been inserted for a 120 /// particular stride. 121 std::map<SCEVHandle, IVsOfOneStride> IVsByStride; 122 123 /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: 124 /// We use this to iterate over the IVUsesByStride collection without being 125 /// dependent on random ordering of pointers in the process. 126 std::vector<SCEVHandle> StrideOrder; 127 128 /// CastedValues - As we need to cast values to uintptr_t, this keeps track 129 /// of the casted version of each value. This is accessed by 130 /// getCastedVersionOf. 131 std::map<Value*, Value*> CastedPointers; 132 133 /// DeadInsts - Keep track of instructions we may have made dead, so that 134 /// we can remove them after we are done working. 135 std::set<Instruction*> DeadInsts; 136 137 /// TLI - Keep a pointer of a TargetLowering to consult for determining 138 /// transformation profitability. 139 const TargetLowering *TLI; 140 141 public: 142 LoopStrengthReduce(const TargetLowering *tli = NULL) 143 : TLI(tli) { 144 } 145 146 virtual bool runOnFunction(Function &) { 147 LI = &getAnalysis<LoopInfo>(); 148 EF = &getAnalysis<ETForest>(); 149 SE = &getAnalysis<ScalarEvolution>(); 150 TD = &getAnalysis<TargetData>(); 151 UIntPtrTy = TD->getIntPtrType(); 152 Changed = false; 153 154 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) 155 runOnLoop(*I); 156 157 return Changed; 158 } 159 160 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 161 // We split critical edges, so we change the CFG. However, we do update 162 // many analyses if they are around. 163 AU.addPreservedID(LoopSimplifyID); 164 AU.addPreserved<LoopInfo>(); 165 AU.addPreserved<DominatorSet>(); 166 AU.addPreserved<ETForest>(); 167 AU.addPreserved<ImmediateDominators>(); 168 AU.addPreserved<DominanceFrontier>(); 169 AU.addPreserved<DominatorTree>(); 170 171 AU.addRequiredID(LoopSimplifyID); 172 AU.addRequired<LoopInfo>(); 173 AU.addRequired<ETForest>(); 174 AU.addRequired<TargetData>(); 175 AU.addRequired<ScalarEvolution>(); 176 } 177 178 /// getCastedVersionOf - Return the specified value casted to uintptr_t. 179 /// 180 Value *getCastedVersionOf(Value *V); 181private: 182 void runOnLoop(Loop *L); 183 bool AddUsersIfInteresting(Instruction *I, Loop *L, 184 std::set<Instruction*> &Processed); 185 SCEVHandle GetExpressionSCEV(Instruction *E, Loop *L); 186 187 void OptimizeIndvars(Loop *L); 188 189 unsigned CheckForIVReuse(const SCEVHandle&, IVExpr&, const Type*); 190 191 void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, 192 IVUsersOfOneStride &Uses, 193 Loop *L, bool isOnlyStride); 194 void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts); 195 }; 196 RegisterPass<LoopStrengthReduce> X("loop-reduce", "Loop Strength Reduction"); 197} 198 199FunctionPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { 200 return new LoopStrengthReduce(TLI); 201} 202 203/// getCastedVersionOf - Return the specified value casted to uintptr_t. This 204/// assumes that the Value* V is of integer or pointer type only. 205/// 206Value *LoopStrengthReduce::getCastedVersionOf(Value *V) { 207 if (V->getType() == UIntPtrTy) return V; 208 if (Constant *CB = dyn_cast<Constant>(V)) 209 if (CB->getType()->isInteger()) 210 return ConstantExpr::getIntegerCast(CB, UIntPtrTy, 211 CB->getType()->isSigned()); 212 else 213 return ConstantExpr::getPtrToInt(CB, UIntPtrTy); 214 215 Value *&New = CastedPointers[V]; 216 if (New) return New; 217 218 New = SCEVExpander::InsertCastOfTo(V, UIntPtrTy); 219 DeadInsts.insert(cast<Instruction>(New)); 220 return New; 221} 222 223 224/// DeleteTriviallyDeadInstructions - If any of the instructions is the 225/// specified set are trivially dead, delete them and see if this makes any of 226/// their operands subsequently dead. 227void LoopStrengthReduce:: 228DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts) { 229 while (!Insts.empty()) { 230 Instruction *I = *Insts.begin(); 231 Insts.erase(Insts.begin()); 232 if (isInstructionTriviallyDead(I)) { 233 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 234 if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i))) 235 Insts.insert(U); 236 SE->deleteInstructionFromRecords(I); 237 I->eraseFromParent(); 238 Changed = true; 239 } 240 } 241} 242 243 244/// GetExpressionSCEV - Compute and return the SCEV for the specified 245/// instruction. 246SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { 247 // Scalar Evolutions doesn't know how to compute SCEV's for GEP instructions. 248 // If this is a GEP that SE doesn't know about, compute it now and insert it. 249 // If this is not a GEP, or if we have already done this computation, just let 250 // SE figure it out. 251 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Exp); 252 if (!GEP || SE->hasSCEV(GEP)) 253 return SE->getSCEV(Exp); 254 255 // Analyze all of the subscripts of this getelementptr instruction, looking 256 // for uses that are determined by the trip count of L. First, skip all 257 // operands the are not dependent on the IV. 258 259 // Build up the base expression. Insert an LLVM cast of the pointer to 260 // uintptr_t first. 261 SCEVHandle GEPVal = SCEVUnknown::get(getCastedVersionOf(GEP->getOperand(0))); 262 263 gep_type_iterator GTI = gep_type_begin(GEP); 264 265 for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { 266 // If this is a use of a recurrence that we can analyze, and it comes before 267 // Op does in the GEP operand list, we will handle this when we process this 268 // operand. 269 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 270 const StructLayout *SL = TD->getStructLayout(STy); 271 unsigned Idx = cast<ConstantInt>(GEP->getOperand(i))->getZExtValue(); 272 uint64_t Offset = SL->MemberOffsets[Idx]; 273 GEPVal = SCEVAddExpr::get(GEPVal, 274 SCEVUnknown::getIntegerSCEV(Offset, UIntPtrTy)); 275 } else { 276 Value *OpVal = getCastedVersionOf(GEP->getOperand(i)); 277 SCEVHandle Idx = SE->getSCEV(OpVal); 278 279 uint64_t TypeSize = TD->getTypeSize(GTI.getIndexedType()); 280 if (TypeSize != 1) 281 Idx = SCEVMulExpr::get(Idx, 282 SCEVConstant::get(ConstantInt::get(UIntPtrTy, 283 TypeSize))); 284 GEPVal = SCEVAddExpr::get(GEPVal, Idx); 285 } 286 } 287 288 SE->setSCEV(GEP, GEPVal); 289 return GEPVal; 290} 291 292/// getSCEVStartAndStride - Compute the start and stride of this expression, 293/// returning false if the expression is not a start/stride pair, or true if it 294/// is. The stride must be a loop invariant expression, but the start may be 295/// a mix of loop invariant and loop variant expressions. 296static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, 297 SCEVHandle &Start, SCEVHandle &Stride) { 298 SCEVHandle TheAddRec = Start; // Initialize to zero. 299 300 // If the outer level is an AddExpr, the operands are all start values except 301 // for a nested AddRecExpr. 302 if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) { 303 for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) 304 if (SCEVAddRecExpr *AddRec = 305 dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) { 306 if (AddRec->getLoop() == L) 307 TheAddRec = SCEVAddExpr::get(AddRec, TheAddRec); 308 else 309 return false; // Nested IV of some sort? 310 } else { 311 Start = SCEVAddExpr::get(Start, AE->getOperand(i)); 312 } 313 314 } else if (isa<SCEVAddRecExpr>(SH)) { 315 TheAddRec = SH; 316 } else { 317 return false; // not analyzable. 318 } 319 320 SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec); 321 if (!AddRec || AddRec->getLoop() != L) return false; 322 323 // FIXME: Generalize to non-affine IV's. 324 if (!AddRec->isAffine()) return false; 325 326 Start = SCEVAddExpr::get(Start, AddRec->getOperand(0)); 327 328 if (!isa<SCEVConstant>(AddRec->getOperand(1))) 329 DOUT << "[" << L->getHeader()->getName() 330 << "] Variable stride: " << *AddRec << "\n"; 331 332 Stride = AddRec->getOperand(1); 333 // Check that all constant strides are the unsigned type, we don't want to 334 // have two IV's one of signed stride 4 and one of unsigned stride 4 to not be 335 // merged. 336 assert((!isa<SCEVConstant>(Stride) || Stride->getType()->isUnsigned()) && 337 "Constants should be canonicalized to unsigned!"); 338 339 return true; 340} 341 342/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression 343/// and now we need to decide whether the user should use the preinc or post-inc 344/// value. If this user should use the post-inc version of the IV, return true. 345/// 346/// Choosing wrong here can break dominance properties (if we choose to use the 347/// post-inc value when we cannot) or it can end up adding extra live-ranges to 348/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we 349/// should use the post-inc value). 350static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, 351 Loop *L, ETForest *EF, Pass *P) { 352 // If the user is in the loop, use the preinc value. 353 if (L->contains(User->getParent())) return false; 354 355 BasicBlock *LatchBlock = L->getLoopLatch(); 356 357 // Ok, the user is outside of the loop. If it is dominated by the latch 358 // block, use the post-inc value. 359 if (EF->dominates(LatchBlock, User->getParent())) 360 return true; 361 362 // There is one case we have to be careful of: PHI nodes. These little guys 363 // can live in blocks that do not dominate the latch block, but (since their 364 // uses occur in the predecessor block, not the block the PHI lives in) should 365 // still use the post-inc value. Check for this case now. 366 PHINode *PN = dyn_cast<PHINode>(User); 367 if (!PN) return false; // not a phi, not dominated by latch block. 368 369 // Look at all of the uses of IV by the PHI node. If any use corresponds to 370 // a block that is not dominated by the latch block, give up and use the 371 // preincremented value. 372 unsigned NumUses = 0; 373 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 374 if (PN->getIncomingValue(i) == IV) { 375 ++NumUses; 376 if (!EF->dominates(LatchBlock, PN->getIncomingBlock(i))) 377 return false; 378 } 379 380 // Okay, all uses of IV by PN are in predecessor blocks that really are 381 // dominated by the latch block. Split the critical edges and use the 382 // post-incremented value. 383 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 384 if (PN->getIncomingValue(i) == IV) { 385 SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P, 386 true); 387 // Splitting the critical edge can reduce the number of entries in this 388 // PHI. 389 e = PN->getNumIncomingValues(); 390 if (--NumUses == 0) break; 391 } 392 393 return true; 394} 395 396 397 398/// AddUsersIfInteresting - Inspect the specified instruction. If it is a 399/// reducible SCEV, recursively add its users to the IVUsesByStride set and 400/// return true. Otherwise, return false. 401bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, 402 std::set<Instruction*> &Processed) { 403 if (!I->getType()->isInteger() && !isa<PointerType>(I->getType())) 404 return false; // Void and FP expressions cannot be reduced. 405 if (!Processed.insert(I).second) 406 return true; // Instruction already handled. 407 408 // Get the symbolic expression for this instruction. 409 SCEVHandle ISE = GetExpressionSCEV(I, L); 410 if (isa<SCEVCouldNotCompute>(ISE)) return false; 411 412 // Get the start and stride for this expression. 413 SCEVHandle Start = SCEVUnknown::getIntegerSCEV(0, ISE->getType()); 414 SCEVHandle Stride = Start; 415 if (!getSCEVStartAndStride(ISE, L, Start, Stride)) 416 return false; // Non-reducible symbolic expression, bail out. 417 418 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){ 419 Instruction *User = cast<Instruction>(*UI); 420 421 // Do not infinitely recurse on PHI nodes. 422 if (isa<PHINode>(User) && Processed.count(User)) 423 continue; 424 425 // If this is an instruction defined in a nested loop, or outside this loop, 426 // don't recurse into it. 427 bool AddUserToIVUsers = false; 428 if (LI->getLoopFor(User->getParent()) != L) { 429 DOUT << "FOUND USER in other loop: " << *User 430 << " OF SCEV: " << *ISE << "\n"; 431 AddUserToIVUsers = true; 432 } else if (!AddUsersIfInteresting(User, L, Processed)) { 433 DOUT << "FOUND USER: " << *User 434 << " OF SCEV: " << *ISE << "\n"; 435 AddUserToIVUsers = true; 436 } 437 438 if (AddUserToIVUsers) { 439 IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride]; 440 if (StrideUses.Users.empty()) // First occurance of this stride? 441 StrideOrder.push_back(Stride); 442 443 // Okay, we found a user that we cannot reduce. Analyze the instruction 444 // and decide what to do with it. If we are a use inside of the loop, use 445 // the value before incrementation, otherwise use it after incrementation. 446 if (IVUseShouldUsePostIncValue(User, I, L, EF, this)) { 447 // The value used will be incremented by the stride more than we are 448 // expecting, so subtract this off. 449 SCEVHandle NewStart = SCEV::getMinusSCEV(Start, Stride); 450 StrideUses.addUser(NewStart, User, I); 451 StrideUses.Users.back().isUseOfPostIncrementedValue = true; 452 DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; 453 } else { 454 StrideUses.addUser(Start, User, I); 455 } 456 } 457 } 458 return true; 459} 460 461namespace { 462 /// BasedUser - For a particular base value, keep information about how we've 463 /// partitioned the expression so far. 464 struct BasedUser { 465 /// Base - The Base value for the PHI node that needs to be inserted for 466 /// this use. As the use is processed, information gets moved from this 467 /// field to the Imm field (below). BasedUser values are sorted by this 468 /// field. 469 SCEVHandle Base; 470 471 /// Inst - The instruction using the induction variable. 472 Instruction *Inst; 473 474 /// OperandValToReplace - The operand value of Inst to replace with the 475 /// EmittedBase. 476 Value *OperandValToReplace; 477 478 /// Imm - The immediate value that should be added to the base immediately 479 /// before Inst, because it will be folded into the imm field of the 480 /// instruction. 481 SCEVHandle Imm; 482 483 /// EmittedBase - The actual value* to use for the base value of this 484 /// operation. This is null if we should just use zero so far. 485 Value *EmittedBase; 486 487 // isUseOfPostIncrementedValue - True if this should use the 488 // post-incremented version of this IV, not the preincremented version. 489 // This can only be set in special cases, such as the terminating setcc 490 // instruction for a loop and uses outside the loop that are dominated by 491 // the loop. 492 bool isUseOfPostIncrementedValue; 493 494 BasedUser(IVStrideUse &IVSU) 495 : Base(IVSU.Offset), Inst(IVSU.User), 496 OperandValToReplace(IVSU.OperandValToReplace), 497 Imm(SCEVUnknown::getIntegerSCEV(0, Base->getType())), EmittedBase(0), 498 isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {} 499 500 // Once we rewrite the code to insert the new IVs we want, update the 501 // operands of Inst to use the new expression 'NewBase', with 'Imm' added 502 // to it. 503 void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, 504 SCEVExpander &Rewriter, Loop *L, 505 Pass *P); 506 507 Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, 508 SCEVExpander &Rewriter, 509 Instruction *IP, Loop *L); 510 void dump() const; 511 }; 512} 513 514void BasedUser::dump() const { 515 cerr << " Base=" << *Base; 516 cerr << " Imm=" << *Imm; 517 if (EmittedBase) 518 cerr << " EB=" << *EmittedBase; 519 520 cerr << " Inst: " << *Inst; 521} 522 523Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, 524 SCEVExpander &Rewriter, 525 Instruction *IP, Loop *L) { 526 // Figure out where we *really* want to insert this code. In particular, if 527 // the user is inside of a loop that is nested inside of L, we really don't 528 // want to insert this expression before the user, we'd rather pull it out as 529 // many loops as possible. 530 LoopInfo &LI = Rewriter.getLoopInfo(); 531 Instruction *BaseInsertPt = IP; 532 533 // Figure out the most-nested loop that IP is in. 534 Loop *InsertLoop = LI.getLoopFor(IP->getParent()); 535 536 // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out 537 // the preheader of the outer-most loop where NewBase is not loop invariant. 538 while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { 539 BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); 540 InsertLoop = InsertLoop->getParentLoop(); 541 } 542 543 // If there is no immediate value, skip the next part. 544 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm)) 545 if (SC->getValue()->isNullValue()) 546 return Rewriter.expandCodeFor(NewBase, BaseInsertPt, 547 OperandValToReplace->getType()); 548 549 Value *Base = Rewriter.expandCodeFor(NewBase, BaseInsertPt); 550 551 // Always emit the immediate (if non-zero) into the same block as the user. 552 SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(Base), Imm); 553 return Rewriter.expandCodeFor(NewValSCEV, IP, 554 OperandValToReplace->getType()); 555} 556 557 558// Once we rewrite the code to insert the new IVs we want, update the 559// operands of Inst to use the new expression 'NewBase', with 'Imm' added 560// to it. 561void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, 562 SCEVExpander &Rewriter, 563 Loop *L, Pass *P) { 564 if (!isa<PHINode>(Inst)) { 565 Value *NewVal = InsertCodeForBaseAtPosition(NewBase, Rewriter, Inst, L); 566 // Replace the use of the operand Value with the new Phi we just created. 567 Inst->replaceUsesOfWith(OperandValToReplace, NewVal); 568 DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst; 569 return; 570 } 571 572 // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm 573 // expression into each operand block that uses it. Note that PHI nodes can 574 // have multiple entries for the same predecessor. We use a map to make sure 575 // that a PHI node only has a single Value* for each predecessor (which also 576 // prevents us from inserting duplicate code in some blocks). 577 std::map<BasicBlock*, Value*> InsertedCode; 578 PHINode *PN = cast<PHINode>(Inst); 579 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 580 if (PN->getIncomingValue(i) == OperandValToReplace) { 581 // If this is a critical edge, split the edge so that we do not insert the 582 // code on all predecessor/successor paths. We do this unless this is the 583 // canonical backedge for this loop, as this can make some inserted code 584 // be in an illegal position. 585 BasicBlock *PHIPred = PN->getIncomingBlock(i); 586 if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && 587 (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { 588 589 // First step, split the critical edge. 590 SplitCriticalEdge(PHIPred, PN->getParent(), P, true); 591 592 // Next step: move the basic block. In particular, if the PHI node 593 // is outside of the loop, and PredTI is in the loop, we want to 594 // move the block to be immediately before the PHI block, not 595 // immediately after PredTI. 596 if (L->contains(PHIPred) && !L->contains(PN->getParent())) { 597 BasicBlock *NewBB = PN->getIncomingBlock(i); 598 NewBB->moveBefore(PN->getParent()); 599 } 600 601 // Splitting the edge can reduce the number of PHI entries we have. 602 e = PN->getNumIncomingValues(); 603 } 604 605 Value *&Code = InsertedCode[PN->getIncomingBlock(i)]; 606 if (!Code) { 607 // Insert the code into the end of the predecessor block. 608 Instruction *InsertPt = PN->getIncomingBlock(i)->getTerminator(); 609 Code = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L); 610 } 611 612 // Replace the use of the operand Value with the new Phi we just created. 613 PN->setIncomingValue(i, Code); 614 Rewriter.clear(); 615 } 616 } 617 DOUT << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst; 618} 619 620 621/// isTargetConstant - Return true if the following can be referenced by the 622/// immediate field of a target instruction. 623static bool isTargetConstant(const SCEVHandle &V, const TargetLowering *TLI) { 624 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) { 625 int64_t V = SC->getValue()->getSExtValue(); 626 if (TLI) 627 return TLI->isLegalAddressImmediate(V); 628 else 629 // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field. 630 return (V > -(1 << 16) && V < (1 << 16)-1); 631 } 632 633 if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) 634 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue())) 635 if (CE->getOpcode() == Instruction::PtrToInt) { 636 Constant *Op0 = CE->getOperand(0); 637 if (isa<GlobalValue>(Op0) && TLI && 638 TLI->isLegalAddressImmediate(cast<GlobalValue>(Op0))) 639 return true; 640 } 641 return false; 642} 643 644/// MoveLoopVariantsToImediateField - Move any subexpressions from Val that are 645/// loop varying to the Imm operand. 646static void MoveLoopVariantsToImediateField(SCEVHandle &Val, SCEVHandle &Imm, 647 Loop *L) { 648 if (Val->isLoopInvariant(L)) return; // Nothing to do. 649 650 if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { 651 std::vector<SCEVHandle> NewOps; 652 NewOps.reserve(SAE->getNumOperands()); 653 654 for (unsigned i = 0; i != SAE->getNumOperands(); ++i) 655 if (!SAE->getOperand(i)->isLoopInvariant(L)) { 656 // If this is a loop-variant expression, it must stay in the immediate 657 // field of the expression. 658 Imm = SCEVAddExpr::get(Imm, SAE->getOperand(i)); 659 } else { 660 NewOps.push_back(SAE->getOperand(i)); 661 } 662 663 if (NewOps.empty()) 664 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 665 else 666 Val = SCEVAddExpr::get(NewOps); 667 } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { 668 // Try to pull immediates out of the start value of nested addrec's. 669 SCEVHandle Start = SARE->getStart(); 670 MoveLoopVariantsToImediateField(Start, Imm, L); 671 672 std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); 673 Ops[0] = Start; 674 Val = SCEVAddRecExpr::get(Ops, SARE->getLoop()); 675 } else { 676 // Otherwise, all of Val is variant, move the whole thing over. 677 Imm = SCEVAddExpr::get(Imm, Val); 678 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 679 } 680} 681 682 683/// MoveImmediateValues - Look at Val, and pull out any additions of constants 684/// that can fit into the immediate field of instructions in the target. 685/// Accumulate these immediate values into the Imm value. 686static void MoveImmediateValues(const TargetLowering *TLI, 687 SCEVHandle &Val, SCEVHandle &Imm, 688 bool isAddress, Loop *L) { 689 if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { 690 std::vector<SCEVHandle> NewOps; 691 NewOps.reserve(SAE->getNumOperands()); 692 693 for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { 694 SCEVHandle NewOp = SAE->getOperand(i); 695 MoveImmediateValues(TLI, NewOp, Imm, isAddress, L); 696 697 if (!NewOp->isLoopInvariant(L)) { 698 // If this is a loop-variant expression, it must stay in the immediate 699 // field of the expression. 700 Imm = SCEVAddExpr::get(Imm, NewOp); 701 } else { 702 NewOps.push_back(NewOp); 703 } 704 } 705 706 if (NewOps.empty()) 707 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 708 else 709 Val = SCEVAddExpr::get(NewOps); 710 return; 711 } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { 712 // Try to pull immediates out of the start value of nested addrec's. 713 SCEVHandle Start = SARE->getStart(); 714 MoveImmediateValues(TLI, Start, Imm, isAddress, L); 715 716 if (Start != SARE->getStart()) { 717 std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); 718 Ops[0] = Start; 719 Val = SCEVAddRecExpr::get(Ops, SARE->getLoop()); 720 } 721 return; 722 } else if (SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) { 723 // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field. 724 if (isAddress && isTargetConstant(SME->getOperand(0), TLI) && 725 SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { 726 727 SCEVHandle SubImm = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 728 SCEVHandle NewOp = SME->getOperand(1); 729 MoveImmediateValues(TLI, NewOp, SubImm, isAddress, L); 730 731 // If we extracted something out of the subexpressions, see if we can 732 // simplify this! 733 if (NewOp != SME->getOperand(1)) { 734 // Scale SubImm up by "8". If the result is a target constant, we are 735 // good. 736 SubImm = SCEVMulExpr::get(SubImm, SME->getOperand(0)); 737 if (isTargetConstant(SubImm, TLI)) { 738 // Accumulate the immediate. 739 Imm = SCEVAddExpr::get(Imm, SubImm); 740 741 // Update what is left of 'Val'. 742 Val = SCEVMulExpr::get(SME->getOperand(0), NewOp); 743 return; 744 } 745 } 746 } 747 } 748 749 // Loop-variant expressions must stay in the immediate field of the 750 // expression. 751 if ((isAddress && isTargetConstant(Val, TLI)) || 752 !Val->isLoopInvariant(L)) { 753 Imm = SCEVAddExpr::get(Imm, Val); 754 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 755 return; 756 } 757 758 // Otherwise, no immediates to move. 759} 760 761 762/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are 763/// added together. This is used to reassociate common addition subexprs 764/// together for maximal sharing when rewriting bases. 765static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs, 766 SCEVHandle Expr) { 767 if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) { 768 for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j) 769 SeparateSubExprs(SubExprs, AE->getOperand(j)); 770 } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) { 771 SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Expr->getType()); 772 if (SARE->getOperand(0) == Zero) { 773 SubExprs.push_back(Expr); 774 } else { 775 // Compute the addrec with zero as its base. 776 std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); 777 Ops[0] = Zero; // Start with zero base. 778 SubExprs.push_back(SCEVAddRecExpr::get(Ops, SARE->getLoop())); 779 780 781 SeparateSubExprs(SubExprs, SARE->getOperand(0)); 782 } 783 } else if (!isa<SCEVConstant>(Expr) || 784 !cast<SCEVConstant>(Expr)->getValue()->isNullValue()) { 785 // Do not add zero. 786 SubExprs.push_back(Expr); 787 } 788} 789 790 791/// RemoveCommonExpressionsFromUseBases - Look through all of the uses in Bases, 792/// removing any common subexpressions from it. Anything truly common is 793/// removed, accumulated, and returned. This looks for things like (a+b+c) and 794/// (a+c+d) -> (a+c). The common expression is *removed* from the Bases. 795static SCEVHandle 796RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses) { 797 unsigned NumUses = Uses.size(); 798 799 // Only one use? Use its base, regardless of what it is! 800 SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Uses[0].Base->getType()); 801 SCEVHandle Result = Zero; 802 if (NumUses == 1) { 803 std::swap(Result, Uses[0].Base); 804 return Result; 805 } 806 807 // To find common subexpressions, count how many of Uses use each expression. 808 // If any subexpressions are used Uses.size() times, they are common. 809 std::map<SCEVHandle, unsigned> SubExpressionUseCounts; 810 811 // UniqueSubExprs - Keep track of all of the subexpressions we see in the 812 // order we see them. 813 std::vector<SCEVHandle> UniqueSubExprs; 814 815 std::vector<SCEVHandle> SubExprs; 816 for (unsigned i = 0; i != NumUses; ++i) { 817 // If the base is zero (which is common), return zero now, there are no 818 // CSEs we can find. 819 if (Uses[i].Base == Zero) return Zero; 820 821 // Split the expression into subexprs. 822 SeparateSubExprs(SubExprs, Uses[i].Base); 823 // Add one to SubExpressionUseCounts for each subexpr present. 824 for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) 825 if (++SubExpressionUseCounts[SubExprs[j]] == 1) 826 UniqueSubExprs.push_back(SubExprs[j]); 827 SubExprs.clear(); 828 } 829 830 // Now that we know how many times each is used, build Result. Iterate over 831 // UniqueSubexprs so that we have a stable ordering. 832 for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { 833 std::map<SCEVHandle, unsigned>::iterator I = 834 SubExpressionUseCounts.find(UniqueSubExprs[i]); 835 assert(I != SubExpressionUseCounts.end() && "Entry not found?"); 836 if (I->second == NumUses) { // Found CSE! 837 Result = SCEVAddExpr::get(Result, I->first); 838 } else { 839 // Remove non-cse's from SubExpressionUseCounts. 840 SubExpressionUseCounts.erase(I); 841 } 842 } 843 844 // If we found no CSE's, return now. 845 if (Result == Zero) return Result; 846 847 // Otherwise, remove all of the CSE's we found from each of the base values. 848 for (unsigned i = 0; i != NumUses; ++i) { 849 // Split the expression into subexprs. 850 SeparateSubExprs(SubExprs, Uses[i].Base); 851 852 // Remove any common subexpressions. 853 for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) 854 if (SubExpressionUseCounts.count(SubExprs[j])) { 855 SubExprs.erase(SubExprs.begin()+j); 856 --j; --e; 857 } 858 859 // Finally, the non-shared expressions together. 860 if (SubExprs.empty()) 861 Uses[i].Base = Zero; 862 else 863 Uses[i].Base = SCEVAddExpr::get(SubExprs); 864 SubExprs.clear(); 865 } 866 867 return Result; 868} 869 870/// isZero - returns true if the scalar evolution expression is zero. 871/// 872static bool isZero(SCEVHandle &V) { 873 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) 874 return SC->getValue()->getZExtValue() == 0; 875 return false; 876} 877 878 879/// CheckForIVReuse - Returns the multiple if the stride is the multiple 880/// of a previous stride and it is a legal value for the target addressing 881/// mode scale component. This allows the users of this stride to be rewritten 882/// as prev iv * factor. It returns 0 if no reuse is possible. 883unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride, 884 IVExpr &IV, const Type *Ty) { 885 if (!TLI) return 0; 886 887 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { 888 int64_t SInt = SC->getValue()->getSExtValue(); 889 if (SInt == 1) return 0; 890 891 for (TargetLowering::legal_am_scale_iterator 892 I = TLI->legal_am_scale_begin(), E = TLI->legal_am_scale_end(); 893 I != E; ++I) { 894 unsigned Scale = *I; 895 if (unsigned(abs(SInt)) < Scale || (SInt % Scale) != 0) 896 continue; 897 std::map<SCEVHandle, IVsOfOneStride>::iterator SI = 898 IVsByStride.find(SCEVUnknown::getIntegerSCEV(SInt/Scale, Type::UIntTy)); 899 if (SI == IVsByStride.end()) 900 continue; 901 for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), 902 IE = SI->second.IVs.end(); II != IE; ++II) 903 // FIXME: Only handle base == 0 for now. 904 // Only reuse previous IV if it would not require a type conversion. 905 if (isZero(II->Base) && 906 II->Base->getType()->canLosslesslyBitCastTo(Ty)) { 907 IV = *II; 908 return Scale; 909 } 910 } 911 } 912 913 return 0; 914} 915 916/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that 917/// returns true if Val's isUseOfPostIncrementedValue is true. 918static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { 919 return Val.isUseOfPostIncrementedValue; 920} 921 922/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single 923/// stride of IV. All of the users may have different starting values, and this 924/// may not be the only stride (we know it is if isOnlyStride is true). 925void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, 926 IVUsersOfOneStride &Uses, 927 Loop *L, 928 bool isOnlyStride) { 929 // Transform our list of users and offsets to a bit more complex table. In 930 // this new vector, each 'BasedUser' contains 'Base' the base of the 931 // strided accessas well as the old information from Uses. We progressively 932 // move information from the Base field to the Imm field, until we eventually 933 // have the full access expression to rewrite the use. 934 std::vector<BasedUser> UsersToProcess; 935 UsersToProcess.reserve(Uses.Users.size()); 936 for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) { 937 UsersToProcess.push_back(Uses.Users[i]); 938 939 // Move any loop invariant operands from the offset field to the immediate 940 // field of the use, so that we don't try to use something before it is 941 // computed. 942 MoveLoopVariantsToImediateField(UsersToProcess.back().Base, 943 UsersToProcess.back().Imm, L); 944 assert(UsersToProcess.back().Base->isLoopInvariant(L) && 945 "Base value is not loop invariant!"); 946 } 947 948 // We now have a whole bunch of uses of like-strided induction variables, but 949 // they might all have different bases. We want to emit one PHI node for this 950 // stride which we fold as many common expressions (between the IVs) into as 951 // possible. Start by identifying the common expressions in the base values 952 // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find 953 // "A+B"), emit it to the preheader, then remove the expression from the 954 // UsersToProcess base values. 955 SCEVHandle CommonExprs = 956 RemoveCommonExpressionsFromUseBases(UsersToProcess); 957 958 // Check if it is possible to reuse a IV with stride that is factor of this 959 // stride. And the multiple is a number that can be encoded in the scale 960 // field of the target addressing mode. 961 PHINode *NewPHI = NULL; 962 Value *IncV = NULL; 963 IVExpr ReuseIV; 964 unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV, 965 CommonExprs->getType()); 966 if (RewriteFactor != 0) { 967 DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride 968 << " and BASE " << *ReuseIV.Base << " :\n"; 969 NewPHI = ReuseIV.PHI; 970 IncV = ReuseIV.IncV; 971 } 972 973 // Next, figure out what we can represent in the immediate fields of 974 // instructions. If we can represent anything there, move it to the imm 975 // fields of the BasedUsers. We do this so that it increases the commonality 976 // of the remaining uses. 977 for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { 978 // If the user is not in the current loop, this means it is using the exit 979 // value of the IV. Do not put anything in the base, make sure it's all in 980 // the immediate field to allow as much factoring as possible. 981 if (!L->contains(UsersToProcess[i].Inst->getParent())) { 982 UsersToProcess[i].Imm = SCEVAddExpr::get(UsersToProcess[i].Imm, 983 UsersToProcess[i].Base); 984 UsersToProcess[i].Base = 985 SCEVUnknown::getIntegerSCEV(0, UsersToProcess[i].Base->getType()); 986 } else { 987 988 // Addressing modes can be folded into loads and stores. Be careful that 989 // the store is through the expression, not of the expression though. 990 bool isAddress = isa<LoadInst>(UsersToProcess[i].Inst); 991 if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst)) 992 if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace) 993 isAddress = true; 994 995 MoveImmediateValues(TLI, UsersToProcess[i].Base, UsersToProcess[i].Imm, 996 isAddress, L); 997 } 998 } 999 1000 // Now that we know what we need to do, insert the PHI node itself. 1001 // 1002 DOUT << "INSERTING IV of STRIDE " << *Stride << " and BASE " 1003 << *CommonExprs << " :\n"; 1004 1005 SCEVExpander Rewriter(*SE, *LI); 1006 SCEVExpander PreheaderRewriter(*SE, *LI); 1007 1008 BasicBlock *Preheader = L->getLoopPreheader(); 1009 Instruction *PreInsertPt = Preheader->getTerminator(); 1010 Instruction *PhiInsertBefore = L->getHeader()->begin(); 1011 1012 BasicBlock *LatchBlock = L->getLoopLatch(); 1013 1014 const Type *ReplacedTy = CommonExprs->getType(); 1015 1016 // Emit the initial base value into the loop preheader. 1017 Value *CommonBaseV 1018 = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt, 1019 ReplacedTy); 1020 1021 if (RewriteFactor == 0) { 1022 // Create a new Phi for this base, and stick it in the loop header. 1023 NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore); 1024 ++NumInserted; 1025 1026 // Add common base to the new Phi node. 1027 NewPHI->addIncoming(CommonBaseV, Preheader); 1028 1029 // Insert the stride into the preheader. 1030 Value *StrideV = PreheaderRewriter.expandCodeFor(Stride, PreInsertPt, 1031 ReplacedTy); 1032 if (!isa<ConstantInt>(StrideV)) ++NumVariable; 1033 1034 // Emit the increment of the base value before the terminator of the loop 1035 // latch block, and add it to the Phi node. 1036 SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI), 1037 SCEVUnknown::get(StrideV)); 1038 1039 IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(), 1040 ReplacedTy); 1041 IncV->setName(NewPHI->getName()+".inc"); 1042 NewPHI->addIncoming(IncV, LatchBlock); 1043 1044 // Remember this in case a later stride is multiple of this. 1045 IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV); 1046 } else { 1047 Constant *C = dyn_cast<Constant>(CommonBaseV); 1048 if (!C || 1049 (!C->isNullValue() && 1050 !isTargetConstant(SCEVUnknown::get(CommonBaseV), TLI))) 1051 // We want the common base emitted into the preheader! This is just 1052 // using cast as a copy so BitCast (no-op cast) is appropriate 1053 CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(), 1054 "commonbase", PreInsertPt); 1055 } 1056 1057 // We want to emit code for users inside the loop first. To do this, we 1058 // rearrange BasedUser so that the entries at the end have 1059 // isUseOfPostIncrementedValue = false, because we pop off the end of the 1060 // vector (so we handle them first). 1061 std::partition(UsersToProcess.begin(), UsersToProcess.end(), 1062 PartitionByIsUseOfPostIncrementedValue); 1063 1064 // Sort this by base, so that things with the same base are handled 1065 // together. By partitioning first and stable-sorting later, we are 1066 // guaranteed that within each base we will pop off users from within the 1067 // loop before users outside of the loop with a particular base. 1068 // 1069 // We would like to use stable_sort here, but we can't. The problem is that 1070 // SCEVHandle's don't have a deterministic ordering w.r.t to each other, so 1071 // we don't have anything to do a '<' comparison on. Because we think the 1072 // number of uses is small, do a horrible bubble sort which just relies on 1073 // ==. 1074 for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { 1075 // Get a base value. 1076 SCEVHandle Base = UsersToProcess[i].Base; 1077 1078 // Compact everything with this base to be consequetive with this one. 1079 for (unsigned j = i+1; j != e; ++j) { 1080 if (UsersToProcess[j].Base == Base) { 1081 std::swap(UsersToProcess[i+1], UsersToProcess[j]); 1082 ++i; 1083 } 1084 } 1085 } 1086 1087 // Process all the users now. This outer loop handles all bases, the inner 1088 // loop handles all users of a particular base. 1089 while (!UsersToProcess.empty()) { 1090 SCEVHandle Base = UsersToProcess.back().Base; 1091 1092 DOUT << " INSERTING code for BASE = " << *Base << ":\n"; 1093 1094 // Emit the code for Base into the preheader. 1095 Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt, 1096 ReplacedTy); 1097 1098 // If BaseV is a constant other than 0, make sure that it gets inserted into 1099 // the preheader, instead of being forward substituted into the uses. We do 1100 // this by forcing a BitCast (noop cast) to be inserted into the preheader 1101 // in this case. 1102 if (Constant *C = dyn_cast<Constant>(BaseV)) { 1103 if (!C->isNullValue() && !isTargetConstant(Base, TLI)) { 1104 // We want this constant emitted into the preheader! This is just 1105 // using cast as a copy so BitCast (no-op cast) is appropriate 1106 BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", 1107 PreInsertPt); 1108 } 1109 } 1110 1111 // Emit the code to add the immediate offset to the Phi value, just before 1112 // the instructions that we identified as using this stride and base. 1113 do { 1114 // FIXME: Use emitted users to emit other users. 1115 BasedUser &User = UsersToProcess.back(); 1116 1117 // If this instruction wants to use the post-incremented value, move it 1118 // after the post-inc and use its value instead of the PHI. 1119 Value *RewriteOp = NewPHI; 1120 if (User.isUseOfPostIncrementedValue) { 1121 RewriteOp = IncV; 1122 1123 // If this user is in the loop, make sure it is the last thing in the 1124 // loop to ensure it is dominated by the increment. 1125 if (L->contains(User.Inst->getParent())) 1126 User.Inst->moveBefore(LatchBlock->getTerminator()); 1127 } 1128 if (RewriteOp->getType() != ReplacedTy) 1129 RewriteOp = SCEVExpander::InsertCastOfTo(RewriteOp, ReplacedTy); 1130 1131 SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp); 1132 1133 // Clear the SCEVExpander's expression map so that we are guaranteed 1134 // to have the code emitted where we expect it. 1135 Rewriter.clear(); 1136 1137 // If we are reusing the iv, then it must be multiplied by a constant 1138 // factor take advantage of addressing mode scale component. 1139 if (RewriteFactor != 0) { 1140 RewriteExpr = 1141 SCEVMulExpr::get(SCEVUnknown::getIntegerSCEV(RewriteFactor, 1142 RewriteExpr->getType()), 1143 RewriteExpr); 1144 1145 // The common base is emitted in the loop preheader. But since we 1146 // are reusing an IV, it has not been used to initialize the PHI node. 1147 // Add it to the expression used to rewrite the uses. 1148 if (!isa<ConstantInt>(CommonBaseV) || 1149 !cast<ConstantInt>(CommonBaseV)->isNullValue()) 1150 RewriteExpr = SCEVAddExpr::get(RewriteExpr, 1151 SCEVUnknown::get(CommonBaseV)); 1152 } 1153 1154 // Now that we know what we need to do, insert code before User for the 1155 // immediate and any loop-variant expressions. 1156 if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isNullValue()) 1157 // Add BaseV to the PHI value if needed. 1158 RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV)); 1159 1160 User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this); 1161 1162 // Mark old value we replaced as possibly dead, so that it is elminated 1163 // if we just replaced the last use of that value. 1164 DeadInsts.insert(cast<Instruction>(User.OperandValToReplace)); 1165 1166 UsersToProcess.pop_back(); 1167 ++NumReduced; 1168 1169 // If there are any more users to process with the same base, process them 1170 // now. We sorted by base above, so we just have to check the last elt. 1171 } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base); 1172 // TODO: Next, find out which base index is the most common, pull it out. 1173 } 1174 1175 // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but 1176 // different starting values, into different PHIs. 1177} 1178 1179// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar 1180// uses in the loop, look to see if we can eliminate some, in favor of using 1181// common indvars for the different uses. 1182void LoopStrengthReduce::OptimizeIndvars(Loop *L) { 1183 // TODO: implement optzns here. 1184 1185 1186 1187 1188 // Finally, get the terminating condition for the loop if possible. If we 1189 // can, we want to change it to use a post-incremented version of its 1190 // induction variable, to allow coalescing the live ranges for the IV into 1191 // one register value. 1192 PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin()); 1193 BasicBlock *Preheader = L->getLoopPreheader(); 1194 BasicBlock *LatchBlock = 1195 SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader); 1196 BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator()); 1197 if (!TermBr || TermBr->isUnconditional() || 1198 !isa<SetCondInst>(TermBr->getCondition())) 1199 return; 1200 SetCondInst *Cond = cast<SetCondInst>(TermBr->getCondition()); 1201 1202 // Search IVUsesByStride to find Cond's IVUse if there is one. 1203 IVStrideUse *CondUse = 0; 1204 const SCEVHandle *CondStride = 0; 1205 1206 for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse; 1207 ++Stride) { 1208 std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 1209 IVUsesByStride.find(StrideOrder[Stride]); 1210 assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); 1211 1212 for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(), 1213 E = SI->second.Users.end(); UI != E; ++UI) 1214 if (UI->User == Cond) { 1215 CondUse = &*UI; 1216 CondStride = &SI->first; 1217 // NOTE: we could handle setcc instructions with multiple uses here, but 1218 // InstCombine does it as well for simple uses, it's not clear that it 1219 // occurs enough in real life to handle. 1220 break; 1221 } 1222 } 1223 if (!CondUse) return; // setcc doesn't use the IV. 1224 1225 // It's possible for the setcc instruction to be anywhere in the loop, and 1226 // possible for it to have multiple users. If it is not immediately before 1227 // the latch block branch, move it. 1228 if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) { 1229 if (Cond->hasOneUse()) { // Condition has a single use, just move it. 1230 Cond->moveBefore(TermBr); 1231 } else { 1232 // Otherwise, clone the terminating condition and insert into the loopend. 1233 Cond = cast<SetCondInst>(Cond->clone()); 1234 Cond->setName(L->getHeader()->getName() + ".termcond"); 1235 LatchBlock->getInstList().insert(TermBr, Cond); 1236 1237 // Clone the IVUse, as the old use still exists! 1238 IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond, 1239 CondUse->OperandValToReplace); 1240 CondUse = &IVUsesByStride[*CondStride].Users.back(); 1241 } 1242 } 1243 1244 // If we get to here, we know that we can transform the setcc instruction to 1245 // use the post-incremented version of the IV, allowing us to coalesce the 1246 // live ranges for the IV correctly. 1247 CondUse->Offset = SCEV::getMinusSCEV(CondUse->Offset, *CondStride); 1248 CondUse->isUseOfPostIncrementedValue = true; 1249} 1250 1251namespace { 1252 // Constant strides come first which in turns are sorted by their absolute 1253 // values. If absolute values are the same, then positive strides comes first. 1254 // e.g. 1255 // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X 1256 struct StrideCompare { 1257 bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) { 1258 SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS); 1259 SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); 1260 if (LHSC && RHSC) { 1261 int64_t LV = LHSC->getValue()->getSExtValue(); 1262 int64_t RV = RHSC->getValue()->getSExtValue(); 1263 uint64_t ALV = (LV < 0) ? -LV : LV; 1264 uint64_t ARV = (RV < 0) ? -RV : RV; 1265 if (ALV == ARV) 1266 return LV > RV; 1267 else 1268 return ALV < ARV; 1269 } 1270 return (LHSC && !RHSC); 1271 } 1272 }; 1273} 1274 1275void LoopStrengthReduce::runOnLoop(Loop *L) { 1276 // First step, transform all loops nesting inside of this loop. 1277 for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I) 1278 runOnLoop(*I); 1279 1280 // Next, find all uses of induction variables in this loop, and catagorize 1281 // them by stride. Start by finding all of the PHI nodes in the header for 1282 // this loop. If they are induction variables, inspect their uses. 1283 std::set<Instruction*> Processed; // Don't reprocess instructions. 1284 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) 1285 AddUsersIfInteresting(I, L, Processed); 1286 1287 // If we have nothing to do, return. 1288 if (IVUsesByStride.empty()) return; 1289 1290 // Optimize induction variables. Some indvar uses can be transformed to use 1291 // strides that will be needed for other purposes. A common example of this 1292 // is the exit test for the loop, which can often be rewritten to use the 1293 // computation of some other indvar to decide when to terminate the loop. 1294 OptimizeIndvars(L); 1295 1296 1297 // FIXME: We can widen subreg IV's here for RISC targets. e.g. instead of 1298 // doing computation in byte values, promote to 32-bit values if safe. 1299 1300 // FIXME: Attempt to reuse values across multiple IV's. In particular, we 1301 // could have something like "for(i) { foo(i*8); bar(i*16) }", which should be 1302 // codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. Need 1303 // to be careful that IV's are all the same type. Only works for intptr_t 1304 // indvars. 1305 1306 // If we only have one stride, we can more aggressively eliminate some things. 1307 bool HasOneStride = IVUsesByStride.size() == 1; 1308 1309#ifndef NDEBUG 1310 DOUT << "\nLSR on "; 1311 DEBUG(L->dump()); 1312#endif 1313 1314 // IVsByStride keeps IVs for one particular loop. 1315 IVsByStride.clear(); 1316 1317 // Sort the StrideOrder so we process larger strides first. 1318 std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare()); 1319 1320 // Note: this processes each stride/type pair individually. All users passed 1321 // into StrengthReduceStridedIVUsers have the same type AND stride. Also, 1322 // node that we iterate over IVUsesByStride indirectly by using StrideOrder. 1323 // This extra layer of indirection makes the ordering of strides deterministic 1324 // - not dependent on map order. 1325 for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { 1326 std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 1327 IVUsesByStride.find(StrideOrder[Stride]); 1328 assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); 1329 StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride); 1330 } 1331 1332 // Clean up after ourselves 1333 if (!DeadInsts.empty()) { 1334 DeleteTriviallyDeadInstructions(DeadInsts); 1335 1336 BasicBlock::iterator I = L->getHeader()->begin(); 1337 PHINode *PN; 1338 while ((PN = dyn_cast<PHINode>(I))) { 1339 ++I; // Preincrement iterator to avoid invalidating it when deleting PN. 1340 1341 // At this point, we know that we have killed one or more GEP 1342 // instructions. It is worth checking to see if the cann indvar is also 1343 // dead, so that we can remove it as well. The requirements for the cann 1344 // indvar to be considered dead are: 1345 // 1. the cann indvar has one use 1346 // 2. the use is an add instruction 1347 // 3. the add has one use 1348 // 4. the add is used by the cann indvar 1349 // If all four cases above are true, then we can remove both the add and 1350 // the cann indvar. 1351 // FIXME: this needs to eliminate an induction variable even if it's being 1352 // compared against some value to decide loop termination. 1353 if (PN->hasOneUse()) { 1354 BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin())); 1355 if (BO && BO->hasOneUse()) { 1356 if (PN == *(BO->use_begin())) { 1357 DeadInsts.insert(BO); 1358 // Break the cycle, then delete the PHI. 1359 PN->replaceAllUsesWith(UndefValue::get(PN->getType())); 1360 SE->deleteInstructionFromRecords(PN); 1361 PN->eraseFromParent(); 1362 } 1363 } 1364 } 1365 } 1366 DeleteTriviallyDeadInstructions(DeadInsts); 1367 } 1368 1369 CastedPointers.clear(); 1370 IVUsesByStride.clear(); 1371 StrideOrder.clear(); 1372 return; 1373} 1374