ScalarEvolutionExpander.cpp revision dd1f22f25d8496b10cfddedb63d674dd39897bb0
1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the implementation of the scalar evolution expander, 11// which is used to generate the code corresponding to a given scalar evolution 12// expression. 13// 14//===----------------------------------------------------------------------===// 15 16#include "llvm/Analysis/ScalarEvolutionExpander.h" 17#include "llvm/Analysis/LoopInfo.h" 18#include "llvm/IntrinsicInst.h" 19#include "llvm/LLVMContext.h" 20#include "llvm/Support/Debug.h" 21#include "llvm/Target/TargetData.h" 22#include "llvm/Target/TargetLowering.h" 23#include "llvm/ADT/STLExtras.h" 24 25using namespace llvm; 26 27/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, 28/// reusing an existing cast if a suitable one exists, moving an existing 29/// cast if a suitable one exists but isn't in the right place, or 30/// creating a new one. 31Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, 32 Instruction::CastOps Op, 33 BasicBlock::iterator IP) { 34 // Check to see if there is already a cast! 35 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); 36 UI != E; ++UI) { 37 User *U = *UI; 38 if (U->getType() == Ty) 39 if (CastInst *CI = dyn_cast<CastInst>(U)) 40 if (CI->getOpcode() == Op) { 41 // If the cast isn't where we want it, fix it. 42 if (BasicBlock::iterator(CI) != IP) { 43 // Create a new cast, and leave the old cast in place in case 44 // it is being used as an insert point. Clear its operand 45 // so that it doesn't hold anything live. 46 Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP); 47 NewCI->takeName(CI); 48 CI->replaceAllUsesWith(NewCI); 49 CI->setOperand(0, UndefValue::get(V->getType())); 50 rememberInstruction(NewCI); 51 return NewCI; 52 } 53 rememberInstruction(CI); 54 return CI; 55 } 56 } 57 58 // Create a new cast. 59 Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP); 60 rememberInstruction(I); 61 return I; 62} 63 64/// InsertNoopCastOfTo - Insert a cast of V to the specified type, 65/// which must be possible with a noop cast, doing what we can to share 66/// the casts. 67Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { 68 Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); 69 assert((Op == Instruction::BitCast || 70 Op == Instruction::PtrToInt || 71 Op == Instruction::IntToPtr) && 72 "InsertNoopCastOfTo cannot perform non-noop casts!"); 73 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && 74 "InsertNoopCastOfTo cannot change sizes!"); 75 76 // Short-circuit unnecessary bitcasts. 77 if (Op == Instruction::BitCast) { 78 if (V->getType() == Ty) 79 return V; 80 if (CastInst *CI = dyn_cast<CastInst>(V)) { 81 if (CI->getOperand(0)->getType() == Ty) 82 return CI->getOperand(0); 83 } 84 } 85 // Short-circuit unnecessary inttoptr<->ptrtoint casts. 86 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && 87 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { 88 if (CastInst *CI = dyn_cast<CastInst>(V)) 89 if ((CI->getOpcode() == Instruction::PtrToInt || 90 CI->getOpcode() == Instruction::IntToPtr) && 91 SE.getTypeSizeInBits(CI->getType()) == 92 SE.getTypeSizeInBits(CI->getOperand(0)->getType())) 93 return CI->getOperand(0); 94 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 95 if ((CE->getOpcode() == Instruction::PtrToInt || 96 CE->getOpcode() == Instruction::IntToPtr) && 97 SE.getTypeSizeInBits(CE->getType()) == 98 SE.getTypeSizeInBits(CE->getOperand(0)->getType())) 99 return CE->getOperand(0); 100 } 101 102 // Fold a cast of a constant. 103 if (Constant *C = dyn_cast<Constant>(V)) 104 return ConstantExpr::getCast(Op, C, Ty); 105 106 // Cast the argument at the beginning of the entry block, after 107 // any bitcasts of other arguments. 108 if (Argument *A = dyn_cast<Argument>(V)) { 109 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); 110 while ((isa<BitCastInst>(IP) && 111 isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && 112 cast<BitCastInst>(IP)->getOperand(0) != A) || 113 isa<DbgInfoIntrinsic>(IP) || 114 isa<LandingPadInst>(IP)) 115 ++IP; 116 return ReuseOrCreateCast(A, Ty, Op, IP); 117 } 118 119 // Cast the instruction immediately after the instruction. 120 Instruction *I = cast<Instruction>(V); 121 BasicBlock::iterator IP = I; ++IP; 122 if (InvokeInst *II = dyn_cast<InvokeInst>(I)) 123 IP = II->getNormalDest()->begin(); 124 while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP) || 125 isa<LandingPadInst>(IP)) 126 ++IP; 127 return ReuseOrCreateCast(I, Ty, Op, IP); 128} 129 130/// InsertBinop - Insert the specified binary operator, doing a small amount 131/// of work to avoid inserting an obviously redundant operation. 132Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, 133 Value *LHS, Value *RHS) { 134 // Fold a binop with constant operands. 135 if (Constant *CLHS = dyn_cast<Constant>(LHS)) 136 if (Constant *CRHS = dyn_cast<Constant>(RHS)) 137 return ConstantExpr::get(Opcode, CLHS, CRHS); 138 139 // Do a quick scan to see if we have this binop nearby. If so, reuse it. 140 unsigned ScanLimit = 6; 141 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); 142 // Scanning starts from the last instruction before the insertion point. 143 BasicBlock::iterator IP = Builder.GetInsertPoint(); 144 if (IP != BlockBegin) { 145 --IP; 146 for (; ScanLimit; --IP, --ScanLimit) { 147 // Don't count dbg.value against the ScanLimit, to avoid perturbing the 148 // generated code. 149 if (isa<DbgInfoIntrinsic>(IP)) 150 ScanLimit++; 151 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && 152 IP->getOperand(1) == RHS) 153 return IP; 154 if (IP == BlockBegin) break; 155 } 156 } 157 158 // Save the original insertion point so we can restore it when we're done. 159 BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 160 BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 161 162 // Move the insertion point out of as many loops as we can. 163 while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { 164 if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; 165 BasicBlock *Preheader = L->getLoopPreheader(); 166 if (!Preheader) break; 167 168 // Ok, move up a level. 169 Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); 170 } 171 172 // If we haven't found this binop, insert it. 173 Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); 174 BO->setDebugLoc(SaveInsertPt->getDebugLoc()); 175 rememberInstruction(BO); 176 177 // Restore the original insert point. 178 if (SaveInsertBB) 179 restoreInsertPoint(SaveInsertBB, SaveInsertPt); 180 181 return BO; 182} 183 184/// FactorOutConstant - Test if S is divisible by Factor, using signed 185/// division. If so, update S with Factor divided out and return true. 186/// S need not be evenly divisible if a reasonable remainder can be 187/// computed. 188/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made 189/// unnecessary; in its place, just signed-divide Ops[i] by the scale and 190/// check to see if the divide was folded. 191static bool FactorOutConstant(const SCEV *&S, 192 const SCEV *&Remainder, 193 const SCEV *Factor, 194 ScalarEvolution &SE, 195 const TargetData *TD) { 196 // Everything is divisible by one. 197 if (Factor->isOne()) 198 return true; 199 200 // x/x == 1. 201 if (S == Factor) { 202 S = SE.getConstant(S->getType(), 1); 203 return true; 204 } 205 206 // For a Constant, check for a multiple of the given factor. 207 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { 208 // 0/x == 0. 209 if (C->isZero()) 210 return true; 211 // Check for divisibility. 212 if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { 213 ConstantInt *CI = 214 ConstantInt::get(SE.getContext(), 215 C->getValue()->getValue().sdiv( 216 FC->getValue()->getValue())); 217 // If the quotient is zero and the remainder is non-zero, reject 218 // the value at this scale. It will be considered for subsequent 219 // smaller scales. 220 if (!CI->isZero()) { 221 const SCEV *Div = SE.getConstant(CI); 222 S = Div; 223 Remainder = 224 SE.getAddExpr(Remainder, 225 SE.getConstant(C->getValue()->getValue().srem( 226 FC->getValue()->getValue()))); 227 return true; 228 } 229 } 230 } 231 232 // In a Mul, check if there is a constant operand which is a multiple 233 // of the given factor. 234 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { 235 if (TD) { 236 // With TargetData, the size is known. Check if there is a constant 237 // operand which is a multiple of the given factor. If so, we can 238 // factor it. 239 const SCEVConstant *FC = cast<SCEVConstant>(Factor); 240 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) 241 if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { 242 SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); 243 NewMulOps[0] = 244 SE.getConstant(C->getValue()->getValue().sdiv( 245 FC->getValue()->getValue())); 246 S = SE.getMulExpr(NewMulOps); 247 return true; 248 } 249 } else { 250 // Without TargetData, check if Factor can be factored out of any of the 251 // Mul's operands. If so, we can just remove it. 252 for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { 253 const SCEV *SOp = M->getOperand(i); 254 const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); 255 if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && 256 Remainder->isZero()) { 257 SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); 258 NewMulOps[i] = SOp; 259 S = SE.getMulExpr(NewMulOps); 260 return true; 261 } 262 } 263 } 264 } 265 266 // In an AddRec, check if both start and step are divisible. 267 if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { 268 const SCEV *Step = A->getStepRecurrence(SE); 269 const SCEV *StepRem = SE.getConstant(Step->getType(), 0); 270 if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) 271 return false; 272 if (!StepRem->isZero()) 273 return false; 274 const SCEV *Start = A->getStart(); 275 if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) 276 return false; 277 // FIXME: can use A->getNoWrapFlags(FlagNW) 278 S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap); 279 return true; 280 } 281 282 return false; 283} 284 285/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs 286/// is the number of SCEVAddRecExprs present, which are kept at the end of 287/// the list. 288/// 289static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, 290 Type *Ty, 291 ScalarEvolution &SE) { 292 unsigned NumAddRecs = 0; 293 for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) 294 ++NumAddRecs; 295 // Group Ops into non-addrecs and addrecs. 296 SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); 297 SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); 298 // Let ScalarEvolution sort and simplify the non-addrecs list. 299 const SCEV *Sum = NoAddRecs.empty() ? 300 SE.getConstant(Ty, 0) : 301 SE.getAddExpr(NoAddRecs); 302 // If it returned an add, use the operands. Otherwise it simplified 303 // the sum into a single value, so just use that. 304 Ops.clear(); 305 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) 306 Ops.append(Add->op_begin(), Add->op_end()); 307 else if (!Sum->isZero()) 308 Ops.push_back(Sum); 309 // Then append the addrecs. 310 Ops.append(AddRecs.begin(), AddRecs.end()); 311} 312 313/// SplitAddRecs - Flatten a list of add operands, moving addrec start values 314/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. 315/// This helps expose more opportunities for folding parts of the expressions 316/// into GEP indices. 317/// 318static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, 319 Type *Ty, 320 ScalarEvolution &SE) { 321 // Find the addrecs. 322 SmallVector<const SCEV *, 8> AddRecs; 323 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 324 while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { 325 const SCEV *Start = A->getStart(); 326 if (Start->isZero()) break; 327 const SCEV *Zero = SE.getConstant(Ty, 0); 328 AddRecs.push_back(SE.getAddRecExpr(Zero, 329 A->getStepRecurrence(SE), 330 A->getLoop(), 331 // FIXME: A->getNoWrapFlags(FlagNW) 332 SCEV::FlagAnyWrap)); 333 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { 334 Ops[i] = Zero; 335 Ops.append(Add->op_begin(), Add->op_end()); 336 e += Add->getNumOperands(); 337 } else { 338 Ops[i] = Start; 339 } 340 } 341 if (!AddRecs.empty()) { 342 // Add the addrecs onto the end of the list. 343 Ops.append(AddRecs.begin(), AddRecs.end()); 344 // Resort the operand list, moving any constants to the front. 345 SimplifyAddOperands(Ops, Ty, SE); 346 } 347} 348 349/// expandAddToGEP - Expand an addition expression with a pointer type into 350/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps 351/// BasicAliasAnalysis and other passes analyze the result. See the rules 352/// for getelementptr vs. inttoptr in 353/// http://llvm.org/docs/LangRef.html#pointeraliasing 354/// for details. 355/// 356/// Design note: The correctness of using getelementptr here depends on 357/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as 358/// they may introduce pointer arithmetic which may not be safely converted 359/// into getelementptr. 360/// 361/// Design note: It might seem desirable for this function to be more 362/// loop-aware. If some of the indices are loop-invariant while others 363/// aren't, it might seem desirable to emit multiple GEPs, keeping the 364/// loop-invariant portions of the overall computation outside the loop. 365/// However, there are a few reasons this is not done here. Hoisting simple 366/// arithmetic is a low-level optimization that often isn't very 367/// important until late in the optimization process. In fact, passes 368/// like InstructionCombining will combine GEPs, even if it means 369/// pushing loop-invariant computation down into loops, so even if the 370/// GEPs were split here, the work would quickly be undone. The 371/// LoopStrengthReduction pass, which is usually run quite late (and 372/// after the last InstructionCombining pass), takes care of hoisting 373/// loop-invariant portions of expressions, after considering what 374/// can be folded using target addressing modes. 375/// 376Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, 377 const SCEV *const *op_end, 378 PointerType *PTy, 379 Type *Ty, 380 Value *V) { 381 Type *ElTy = PTy->getElementType(); 382 SmallVector<Value *, 4> GepIndices; 383 SmallVector<const SCEV *, 8> Ops(op_begin, op_end); 384 bool AnyNonZeroIndices = false; 385 386 // Split AddRecs up into parts as either of the parts may be usable 387 // without the other. 388 SplitAddRecs(Ops, Ty, SE); 389 390 // Descend down the pointer's type and attempt to convert the other 391 // operands into GEP indices, at each level. The first index in a GEP 392 // indexes into the array implied by the pointer operand; the rest of 393 // the indices index into the element or field type selected by the 394 // preceding index. 395 for (;;) { 396 // If the scale size is not 0, attempt to factor out a scale for 397 // array indexing. 398 SmallVector<const SCEV *, 8> ScaledOps; 399 if (ElTy->isSized()) { 400 const SCEV *ElSize = SE.getSizeOfExpr(ElTy); 401 if (!ElSize->isZero()) { 402 SmallVector<const SCEV *, 8> NewOps; 403 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 404 const SCEV *Op = Ops[i]; 405 const SCEV *Remainder = SE.getConstant(Ty, 0); 406 if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { 407 // Op now has ElSize factored out. 408 ScaledOps.push_back(Op); 409 if (!Remainder->isZero()) 410 NewOps.push_back(Remainder); 411 AnyNonZeroIndices = true; 412 } else { 413 // The operand was not divisible, so add it to the list of operands 414 // we'll scan next iteration. 415 NewOps.push_back(Ops[i]); 416 } 417 } 418 // If we made any changes, update Ops. 419 if (!ScaledOps.empty()) { 420 Ops = NewOps; 421 SimplifyAddOperands(Ops, Ty, SE); 422 } 423 } 424 } 425 426 // Record the scaled array index for this level of the type. If 427 // we didn't find any operands that could be factored, tentatively 428 // assume that element zero was selected (since the zero offset 429 // would obviously be folded away). 430 Value *Scaled = ScaledOps.empty() ? 431 Constant::getNullValue(Ty) : 432 expandCodeFor(SE.getAddExpr(ScaledOps), Ty); 433 GepIndices.push_back(Scaled); 434 435 // Collect struct field index operands. 436 while (StructType *STy = dyn_cast<StructType>(ElTy)) { 437 bool FoundFieldNo = false; 438 // An empty struct has no fields. 439 if (STy->getNumElements() == 0) break; 440 if (SE.TD) { 441 // With TargetData, field offsets are known. See if a constant offset 442 // falls within any of the struct fields. 443 if (Ops.empty()) break; 444 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) 445 if (SE.getTypeSizeInBits(C->getType()) <= 64) { 446 const StructLayout &SL = *SE.TD->getStructLayout(STy); 447 uint64_t FullOffset = C->getValue()->getZExtValue(); 448 if (FullOffset < SL.getSizeInBytes()) { 449 unsigned ElIdx = SL.getElementContainingOffset(FullOffset); 450 GepIndices.push_back( 451 ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); 452 ElTy = STy->getTypeAtIndex(ElIdx); 453 Ops[0] = 454 SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); 455 AnyNonZeroIndices = true; 456 FoundFieldNo = true; 457 } 458 } 459 } else { 460 // Without TargetData, just check for an offsetof expression of the 461 // appropriate struct type. 462 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 463 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) { 464 Type *CTy; 465 Constant *FieldNo; 466 if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { 467 GepIndices.push_back(FieldNo); 468 ElTy = 469 STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue()); 470 Ops[i] = SE.getConstant(Ty, 0); 471 AnyNonZeroIndices = true; 472 FoundFieldNo = true; 473 break; 474 } 475 } 476 } 477 // If no struct field offsets were found, tentatively assume that 478 // field zero was selected (since the zero offset would obviously 479 // be folded away). 480 if (!FoundFieldNo) { 481 ElTy = STy->getTypeAtIndex(0u); 482 GepIndices.push_back( 483 Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); 484 } 485 } 486 487 if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) 488 ElTy = ATy->getElementType(); 489 else 490 break; 491 } 492 493 // If none of the operands were convertible to proper GEP indices, cast 494 // the base to i8* and do an ugly getelementptr with that. It's still 495 // better than ptrtoint+arithmetic+inttoptr at least. 496 if (!AnyNonZeroIndices) { 497 // Cast the base to i8*. 498 V = InsertNoopCastOfTo(V, 499 Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); 500 501 // Expand the operands for a plain byte offset. 502 Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); 503 504 // Fold a GEP with constant operands. 505 if (Constant *CLHS = dyn_cast<Constant>(V)) 506 if (Constant *CRHS = dyn_cast<Constant>(Idx)) 507 return ConstantExpr::getGetElementPtr(CLHS, CRHS); 508 509 // Do a quick scan to see if we have this GEP nearby. If so, reuse it. 510 unsigned ScanLimit = 6; 511 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); 512 // Scanning starts from the last instruction before the insertion point. 513 BasicBlock::iterator IP = Builder.GetInsertPoint(); 514 if (IP != BlockBegin) { 515 --IP; 516 for (; ScanLimit; --IP, --ScanLimit) { 517 // Don't count dbg.value against the ScanLimit, to avoid perturbing the 518 // generated code. 519 if (isa<DbgInfoIntrinsic>(IP)) 520 ScanLimit++; 521 if (IP->getOpcode() == Instruction::GetElementPtr && 522 IP->getOperand(0) == V && IP->getOperand(1) == Idx) 523 return IP; 524 if (IP == BlockBegin) break; 525 } 526 } 527 528 // Save the original insertion point so we can restore it when we're done. 529 BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 530 BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 531 532 // Move the insertion point out of as many loops as we can. 533 while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { 534 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; 535 BasicBlock *Preheader = L->getLoopPreheader(); 536 if (!Preheader) break; 537 538 // Ok, move up a level. 539 Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); 540 } 541 542 // Emit a GEP. 543 Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); 544 rememberInstruction(GEP); 545 546 // Restore the original insert point. 547 if (SaveInsertBB) 548 restoreInsertPoint(SaveInsertBB, SaveInsertPt); 549 550 return GEP; 551 } 552 553 // Save the original insertion point so we can restore it when we're done. 554 BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 555 BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 556 557 // Move the insertion point out of as many loops as we can. 558 while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { 559 if (!L->isLoopInvariant(V)) break; 560 561 bool AnyIndexNotLoopInvariant = false; 562 for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(), 563 E = GepIndices.end(); I != E; ++I) 564 if (!L->isLoopInvariant(*I)) { 565 AnyIndexNotLoopInvariant = true; 566 break; 567 } 568 if (AnyIndexNotLoopInvariant) 569 break; 570 571 BasicBlock *Preheader = L->getLoopPreheader(); 572 if (!Preheader) break; 573 574 // Ok, move up a level. 575 Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); 576 } 577 578 // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, 579 // because ScalarEvolution may have changed the address arithmetic to 580 // compute a value which is beyond the end of the allocated object. 581 Value *Casted = V; 582 if (V->getType() != PTy) 583 Casted = InsertNoopCastOfTo(Casted, PTy); 584 Value *GEP = Builder.CreateGEP(Casted, 585 GepIndices, 586 "scevgep"); 587 Ops.push_back(SE.getUnknown(GEP)); 588 rememberInstruction(GEP); 589 590 // Restore the original insert point. 591 if (SaveInsertBB) 592 restoreInsertPoint(SaveInsertBB, SaveInsertPt); 593 594 return expand(SE.getAddExpr(Ops)); 595} 596 597/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for 598/// SCEV expansion. If they are nested, this is the most nested. If they are 599/// neighboring, pick the later. 600static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, 601 DominatorTree &DT) { 602 if (!A) return B; 603 if (!B) return A; 604 if (A->contains(B)) return B; 605 if (B->contains(A)) return A; 606 if (DT.dominates(A->getHeader(), B->getHeader())) return B; 607 if (DT.dominates(B->getHeader(), A->getHeader())) return A; 608 return A; // Arbitrarily break the tie. 609} 610 611/// getRelevantLoop - Get the most relevant loop associated with the given 612/// expression, according to PickMostRelevantLoop. 613const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { 614 // Test whether we've already computed the most relevant loop for this SCEV. 615 std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair = 616 RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0))); 617 if (!Pair.second) 618 return Pair.first->second; 619 620 if (isa<SCEVConstant>(S)) 621 // A constant has no relevant loops. 622 return 0; 623 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 624 if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) 625 return Pair.first->second = SE.LI->getLoopFor(I->getParent()); 626 // A non-instruction has no relevant loops. 627 return 0; 628 } 629 if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { 630 const Loop *L = 0; 631 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) 632 L = AR->getLoop(); 633 for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); 634 I != E; ++I) 635 L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT); 636 return RelevantLoops[N] = L; 637 } 638 if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { 639 const Loop *Result = getRelevantLoop(C->getOperand()); 640 return RelevantLoops[C] = Result; 641 } 642 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { 643 const Loop *Result = 644 PickMostRelevantLoop(getRelevantLoop(D->getLHS()), 645 getRelevantLoop(D->getRHS()), 646 *SE.DT); 647 return RelevantLoops[D] = Result; 648 } 649 llvm_unreachable("Unexpected SCEV type!"); 650 return 0; 651} 652 653namespace { 654 655/// LoopCompare - Compare loops by PickMostRelevantLoop. 656class LoopCompare { 657 DominatorTree &DT; 658public: 659 explicit LoopCompare(DominatorTree &dt) : DT(dt) {} 660 661 bool operator()(std::pair<const Loop *, const SCEV *> LHS, 662 std::pair<const Loop *, const SCEV *> RHS) const { 663 // Keep pointer operands sorted at the end. 664 if (LHS.second->getType()->isPointerTy() != 665 RHS.second->getType()->isPointerTy()) 666 return LHS.second->getType()->isPointerTy(); 667 668 // Compare loops with PickMostRelevantLoop. 669 if (LHS.first != RHS.first) 670 return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; 671 672 // If one operand is a non-constant negative and the other is not, 673 // put the non-constant negative on the right so that a sub can 674 // be used instead of a negate and add. 675 if (LHS.second->isNonConstantNegative()) { 676 if (!RHS.second->isNonConstantNegative()) 677 return false; 678 } else if (RHS.second->isNonConstantNegative()) 679 return true; 680 681 // Otherwise they are equivalent according to this comparison. 682 return false; 683 } 684}; 685 686} 687 688Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { 689 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 690 691 // Collect all the add operands in a loop, along with their associated loops. 692 // Iterate in reverse so that constants are emitted last, all else equal, and 693 // so that pointer operands are inserted first, which the code below relies on 694 // to form more involved GEPs. 695 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; 696 for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), 697 E(S->op_begin()); I != E; ++I) 698 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); 699 700 // Sort by loop. Use a stable sort so that constants follow non-constants and 701 // pointer operands precede non-pointer operands. 702 std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); 703 704 // Emit instructions to add all the operands. Hoist as much as possible 705 // out of loops, and form meaningful getelementptrs where possible. 706 Value *Sum = 0; 707 for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator 708 I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { 709 const Loop *CurLoop = I->first; 710 const SCEV *Op = I->second; 711 if (!Sum) { 712 // This is the first operand. Just expand it. 713 Sum = expand(Op); 714 ++I; 715 } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { 716 // The running sum expression is a pointer. Try to form a getelementptr 717 // at this level with that as the base. 718 SmallVector<const SCEV *, 4> NewOps; 719 for (; I != E && I->first == CurLoop; ++I) { 720 // If the operand is SCEVUnknown and not instructions, peek through 721 // it, to enable more of it to be folded into the GEP. 722 const SCEV *X = I->second; 723 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) 724 if (!isa<Instruction>(U->getValue())) 725 X = SE.getSCEV(U->getValue()); 726 NewOps.push_back(X); 727 } 728 Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); 729 } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { 730 // The running sum is an integer, and there's a pointer at this level. 731 // Try to form a getelementptr. If the running sum is instructions, 732 // use a SCEVUnknown to avoid re-analyzing them. 733 SmallVector<const SCEV *, 4> NewOps; 734 NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) : 735 SE.getSCEV(Sum)); 736 for (++I; I != E && I->first == CurLoop; ++I) 737 NewOps.push_back(I->second); 738 Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); 739 } else if (Op->isNonConstantNegative()) { 740 // Instead of doing a negate and add, just do a subtract. 741 Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); 742 Sum = InsertNoopCastOfTo(Sum, Ty); 743 Sum = InsertBinop(Instruction::Sub, Sum, W); 744 ++I; 745 } else { 746 // A simple add. 747 Value *W = expandCodeFor(Op, Ty); 748 Sum = InsertNoopCastOfTo(Sum, Ty); 749 // Canonicalize a constant to the RHS. 750 if (isa<Constant>(Sum)) std::swap(Sum, W); 751 Sum = InsertBinop(Instruction::Add, Sum, W); 752 ++I; 753 } 754 } 755 756 return Sum; 757} 758 759Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { 760 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 761 762 // Collect all the mul operands in a loop, along with their associated loops. 763 // Iterate in reverse so that constants are emitted last, all else equal. 764 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; 765 for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), 766 E(S->op_begin()); I != E; ++I) 767 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); 768 769 // Sort by loop. Use a stable sort so that constants follow non-constants. 770 std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); 771 772 // Emit instructions to mul all the operands. Hoist as much as possible 773 // out of loops. 774 Value *Prod = 0; 775 for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator 776 I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { 777 const SCEV *Op = I->second; 778 if (!Prod) { 779 // This is the first operand. Just expand it. 780 Prod = expand(Op); 781 ++I; 782 } else if (Op->isAllOnesValue()) { 783 // Instead of doing a multiply by negative one, just do a negate. 784 Prod = InsertNoopCastOfTo(Prod, Ty); 785 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); 786 ++I; 787 } else { 788 // A simple mul. 789 Value *W = expandCodeFor(Op, Ty); 790 Prod = InsertNoopCastOfTo(Prod, Ty); 791 // Canonicalize a constant to the RHS. 792 if (isa<Constant>(Prod)) std::swap(Prod, W); 793 Prod = InsertBinop(Instruction::Mul, Prod, W); 794 ++I; 795 } 796 } 797 798 return Prod; 799} 800 801Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { 802 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 803 804 Value *LHS = expandCodeFor(S->getLHS(), Ty); 805 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { 806 const APInt &RHS = SC->getValue()->getValue(); 807 if (RHS.isPowerOf2()) 808 return InsertBinop(Instruction::LShr, LHS, 809 ConstantInt::get(Ty, RHS.logBase2())); 810 } 811 812 Value *RHS = expandCodeFor(S->getRHS(), Ty); 813 return InsertBinop(Instruction::UDiv, LHS, RHS); 814} 815 816/// Move parts of Base into Rest to leave Base with the minimal 817/// expression that provides a pointer operand suitable for a 818/// GEP expansion. 819static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, 820 ScalarEvolution &SE) { 821 while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { 822 Base = A->getStart(); 823 Rest = SE.getAddExpr(Rest, 824 SE.getAddRecExpr(SE.getConstant(A->getType(), 0), 825 A->getStepRecurrence(SE), 826 A->getLoop(), 827 // FIXME: A->getNoWrapFlags(FlagNW) 828 SCEV::FlagAnyWrap)); 829 } 830 if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { 831 Base = A->getOperand(A->getNumOperands()-1); 832 SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end()); 833 NewAddOps.back() = Rest; 834 Rest = SE.getAddExpr(NewAddOps); 835 ExposePointerBase(Base, Rest, SE); 836 } 837} 838 839/// Determine if this is a well-behaved chain of instructions leading back to 840/// the PHI. If so, it may be reused by expanded expressions. 841bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, 842 const Loop *L) { 843 if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || 844 (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) 845 return false; 846 // If any of the operands don't dominate the insert position, bail. 847 // Addrec operands are always loop-invariant, so this can only happen 848 // if there are instructions which haven't been hoisted. 849 if (L == IVIncInsertLoop) { 850 for (User::op_iterator OI = IncV->op_begin()+1, 851 OE = IncV->op_end(); OI != OE; ++OI) 852 if (Instruction *OInst = dyn_cast<Instruction>(OI)) 853 if (!SE.DT->dominates(OInst, IVIncInsertPos)) 854 return false; 855 } 856 // Advance to the next instruction. 857 IncV = dyn_cast<Instruction>(IncV->getOperand(0)); 858 if (!IncV) 859 return false; 860 861 if (IncV->mayHaveSideEffects()) 862 return false; 863 864 if (IncV != PN) 865 return true; 866 867 return isNormalAddRecExprPHI(PN, IncV, L); 868} 869 870/// Determine if this cyclic phi is in a form that would have been generated by 871/// LSR. We don't care if the phi was actually expanded in this pass, as long 872/// as it is in a low-cost form, for example, no implied multiplication. This 873/// should match any patterns generated by getAddRecExprPHILiterally and 874/// expandAddtoGEP. 875bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, 876 const Loop *L) { 877 if (ChainedPhis.count(PN)) 878 return true; 879 880 switch (IncV->getOpcode()) { 881 // Check for a simple Add/Sub or GEP of a loop invariant step. 882 case Instruction::Add: 883 case Instruction::Sub: 884 return IncV->getOperand(0) == PN 885 && L->isLoopInvariant(IncV->getOperand(1)); 886 case Instruction::BitCast: 887 IncV = dyn_cast<GetElementPtrInst>(IncV->getOperand(0)); 888 if (!IncV) 889 return false; 890 // fall-thru to GEP handling 891 case Instruction::GetElementPtr: { 892 // This must be a pointer addition of constants (pretty) or some number of 893 // address-size elements (ugly). 894 for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); 895 I != E; ++I) { 896 if (isa<Constant>(*I)) 897 continue; 898 // ugly geps have 2 operands. 899 // i1* is used by the expander to represent an address-size element. 900 if (IncV->getNumOperands() != 2) 901 return false; 902 unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); 903 if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) 904 && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) 905 return false; 906 // Ensure the operands dominate the insertion point. I don't know of a 907 // case when this would not be true, so this is somewhat untested. 908 if (L == IVIncInsertLoop) { 909 for (User::op_iterator OI = IncV->op_begin()+1, 910 OE = IncV->op_end(); OI != OE; ++OI) 911 if (Instruction *OInst = dyn_cast<Instruction>(OI)) 912 if (!SE.DT->dominates(OInst, IVIncInsertPos)) 913 return false; 914 } 915 break; 916 } 917 IncV = dyn_cast<Instruction>(IncV->getOperand(0)); 918 if (IncV && IncV->getOpcode() == Instruction::BitCast) 919 IncV = dyn_cast<Instruction>(IncV->getOperand(0)); 920 return IncV == PN; 921 } 922 default: 923 return false; 924 } 925} 926 927/// expandIVInc - Expand an IV increment at Builder's current InsertPos. 928/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may 929/// need to materialize IV increments elsewhere to handle difficult situations. 930Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, 931 Type *ExpandTy, Type *IntTy, 932 bool useSubtract) { 933 Value *IncV; 934 // If the PHI is a pointer, use a GEP, otherwise use an add or sub. 935 if (ExpandTy->isPointerTy()) { 936 PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); 937 // If the step isn't constant, don't use an implicitly scaled GEP, because 938 // that would require a multiply inside the loop. 939 if (!isa<ConstantInt>(StepV)) 940 GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), 941 GEPPtrTy->getAddressSpace()); 942 const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; 943 IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); 944 if (IncV->getType() != PN->getType()) { 945 IncV = Builder.CreateBitCast(IncV, PN->getType()); 946 rememberInstruction(IncV); 947 } 948 } else { 949 IncV = useSubtract ? 950 Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : 951 Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); 952 rememberInstruction(IncV); 953 } 954 return IncV; 955} 956 957/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand 958/// the base addrec, which is the addrec without any non-loop-dominating 959/// values, and return the PHI. 960PHINode * 961SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, 962 const Loop *L, 963 Type *ExpandTy, 964 Type *IntTy) { 965 assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); 966 967 // Reuse a previously-inserted PHI, if present. 968 BasicBlock *LatchBlock = L->getLoopLatch(); 969 if (LatchBlock) { 970 for (BasicBlock::iterator I = L->getHeader()->begin(); 971 PHINode *PN = dyn_cast<PHINode>(I); ++I) { 972 if (!SE.isSCEVable(PN->getType()) || 973 (SE.getEffectiveSCEVType(PN->getType()) != 974 SE.getEffectiveSCEVType(Normalized->getType())) || 975 SE.getSCEV(PN) != Normalized) 976 continue; 977 978 Instruction *IncV = 979 cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); 980 981 if (LSRMode) { 982 if (!isExpandedAddRecExprPHI(PN, IncV, L)) 983 continue; 984 } 985 else { 986 if (!isNormalAddRecExprPHI(PN, IncV, L)) 987 continue; 988 } 989 // Ok, the add recurrence looks usable. 990 // Remember this PHI, even in post-inc mode. 991 InsertedValues.insert(PN); 992 // Remember the increment. 993 rememberInstruction(IncV); 994 if (L == IVIncInsertLoop) 995 do { 996 if (SE.DT->dominates(IncV, IVIncInsertPos)) 997 break; 998 // Make sure the increment is where we want it. But don't move it 999 // down past a potential existing post-inc user. 1000 IncV->moveBefore(IVIncInsertPos); 1001 IVIncInsertPos = IncV; 1002 IncV = cast<Instruction>(IncV->getOperand(0)); 1003 } while (IncV != PN); 1004 return PN; 1005 } 1006 } 1007 1008 // Save the original insertion point so we can restore it when we're done. 1009 BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 1010 BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 1011 1012 // Another AddRec may need to be recursively expanded below. For example, if 1013 // this AddRec is quadratic, the StepV may itself be an AddRec in this 1014 // loop. Remove this loop from the PostIncLoops set before expanding such 1015 // AddRecs. Otherwise, we cannot find a valid position for the step 1016 // (i.e. StepV can never dominate its loop header). Ideally, we could do 1017 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, 1018 // so it's not worth implementing SmallPtrSet::swap. 1019 PostIncLoopSet SavedPostIncLoops = PostIncLoops; 1020 PostIncLoops.clear(); 1021 1022 // Expand code for the start value. 1023 Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, 1024 L->getHeader()->begin()); 1025 1026 // StartV must be hoisted into L's preheader to dominate the new phi. 1027 assert(!isa<Instruction>(StartV) || 1028 SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(), 1029 L->getHeader())); 1030 1031 // Expand code for the step value. Do this before creating the PHI so that PHI 1032 // reuse code doesn't see an incomplete PHI. 1033 const SCEV *Step = Normalized->getStepRecurrence(SE); 1034 // If the stride is negative, insert a sub instead of an add for the increment 1035 // (unless it's a constant, because subtracts of constants are canonicalized 1036 // to adds). 1037 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); 1038 if (useSubtract) 1039 Step = SE.getNegativeSCEV(Step); 1040 // Expand the step somewhere that dominates the loop header. 1041 Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); 1042 1043 // Create the PHI. 1044 BasicBlock *Header = L->getHeader(); 1045 Builder.SetInsertPoint(Header, Header->begin()); 1046 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); 1047 PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), 1048 Twine(IVName) + ".iv"); 1049 rememberInstruction(PN); 1050 1051 // Create the step instructions and populate the PHI. 1052 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { 1053 BasicBlock *Pred = *HPI; 1054 1055 // Add a start value. 1056 if (!L->contains(Pred)) { 1057 PN->addIncoming(StartV, Pred); 1058 continue; 1059 } 1060 1061 // Create a step value and add it to the PHI. 1062 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the 1063 // instructions at IVIncInsertPos. 1064 Instruction *InsertPos = L == IVIncInsertLoop ? 1065 IVIncInsertPos : Pred->getTerminator(); 1066 Builder.SetInsertPoint(InsertPos); 1067 Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); 1068 1069 PN->addIncoming(IncV, Pred); 1070 } 1071 1072 // Restore the original insert point. 1073 if (SaveInsertBB) 1074 restoreInsertPoint(SaveInsertBB, SaveInsertPt); 1075 1076 // After expanding subexpressions, restore the PostIncLoops set so the caller 1077 // can ensure that IVIncrement dominates the current uses. 1078 PostIncLoops = SavedPostIncLoops; 1079 1080 // Remember this PHI, even in post-inc mode. 1081 InsertedValues.insert(PN); 1082 1083 return PN; 1084} 1085 1086Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { 1087 Type *STy = S->getType(); 1088 Type *IntTy = SE.getEffectiveSCEVType(STy); 1089 const Loop *L = S->getLoop(); 1090 1091 // Determine a normalized form of this expression, which is the expression 1092 // before any post-inc adjustment is made. 1093 const SCEVAddRecExpr *Normalized = S; 1094 if (PostIncLoops.count(L)) { 1095 PostIncLoopSet Loops; 1096 Loops.insert(L); 1097 Normalized = 1098 cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, 1099 Loops, SE, *SE.DT)); 1100 } 1101 1102 // Strip off any non-loop-dominating component from the addrec start. 1103 const SCEV *Start = Normalized->getStart(); 1104 const SCEV *PostLoopOffset = 0; 1105 if (!SE.properlyDominates(Start, L->getHeader())) { 1106 PostLoopOffset = Start; 1107 Start = SE.getConstant(Normalized->getType(), 0); 1108 Normalized = cast<SCEVAddRecExpr>( 1109 SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), 1110 Normalized->getLoop(), 1111 // FIXME: Normalized->getNoWrapFlags(FlagNW) 1112 SCEV::FlagAnyWrap)); 1113 } 1114 1115 // Strip off any non-loop-dominating component from the addrec step. 1116 const SCEV *Step = Normalized->getStepRecurrence(SE); 1117 const SCEV *PostLoopScale = 0; 1118 if (!SE.dominates(Step, L->getHeader())) { 1119 PostLoopScale = Step; 1120 Step = SE.getConstant(Normalized->getType(), 1); 1121 Normalized = 1122 cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step, 1123 Normalized->getLoop(), 1124 // FIXME: Normalized 1125 // ->getNoWrapFlags(FlagNW) 1126 SCEV::FlagAnyWrap)); 1127 } 1128 1129 // Expand the core addrec. If we need post-loop scaling, force it to 1130 // expand to an integer type to avoid the need for additional casting. 1131 Type *ExpandTy = PostLoopScale ? IntTy : STy; 1132 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); 1133 1134 // Accommodate post-inc mode, if necessary. 1135 Value *Result; 1136 if (!PostIncLoops.count(L)) 1137 Result = PN; 1138 else { 1139 // In PostInc mode, use the post-incremented value. 1140 BasicBlock *LatchBlock = L->getLoopLatch(); 1141 assert(LatchBlock && "PostInc mode requires a unique loop latch!"); 1142 Result = PN->getIncomingValueForBlock(LatchBlock); 1143 1144 // For an expansion to use the postinc form, the client must call 1145 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop 1146 // or dominated by IVIncInsertPos. 1147 if (isa<Instruction>(Result) 1148 && !SE.DT->dominates(cast<Instruction>(Result), 1149 Builder.GetInsertPoint())) { 1150 // The induction variable's postinc expansion does not dominate this use. 1151 // IVUsers tries to prevent this case, so it is rare. However, it can 1152 // happen when an IVUser outside the loop is not dominated by the latch 1153 // block. Adjusting IVIncInsertPos before expansion begins cannot handle 1154 // all cases. Consider a phi outide whose operand is replaced during 1155 // expansion with the value of the postinc user. Without fundamentally 1156 // changing the way postinc users are tracked, the only remedy is 1157 // inserting an extra IV increment. StepV might fold into PostLoopOffset, 1158 // but hopefully expandCodeFor handles that. 1159 bool useSubtract = 1160 !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); 1161 if (useSubtract) 1162 Step = SE.getNegativeSCEV(Step); 1163 // Expand the step somewhere that dominates the loop header. 1164 BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 1165 BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 1166 Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); 1167 // Restore the insertion point to the place where the caller has 1168 // determined dominates all uses. 1169 restoreInsertPoint(SaveInsertBB, SaveInsertPt); 1170 Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); 1171 } 1172 } 1173 1174 // Re-apply any non-loop-dominating scale. 1175 if (PostLoopScale) { 1176 Result = InsertNoopCastOfTo(Result, IntTy); 1177 Result = Builder.CreateMul(Result, 1178 expandCodeFor(PostLoopScale, IntTy)); 1179 rememberInstruction(Result); 1180 } 1181 1182 // Re-apply any non-loop-dominating offset. 1183 if (PostLoopOffset) { 1184 if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { 1185 const SCEV *const OffsetArray[1] = { PostLoopOffset }; 1186 Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); 1187 } else { 1188 Result = InsertNoopCastOfTo(Result, IntTy); 1189 Result = Builder.CreateAdd(Result, 1190 expandCodeFor(PostLoopOffset, IntTy)); 1191 rememberInstruction(Result); 1192 } 1193 } 1194 1195 return Result; 1196} 1197 1198Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { 1199 if (!CanonicalMode) return expandAddRecExprLiterally(S); 1200 1201 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1202 const Loop *L = S->getLoop(); 1203 1204 // First check for an existing canonical IV in a suitable type. 1205 PHINode *CanonicalIV = 0; 1206 if (PHINode *PN = L->getCanonicalInductionVariable()) 1207 if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) 1208 CanonicalIV = PN; 1209 1210 // Rewrite an AddRec in terms of the canonical induction variable, if 1211 // its type is more narrow. 1212 if (CanonicalIV && 1213 SE.getTypeSizeInBits(CanonicalIV->getType()) > 1214 SE.getTypeSizeInBits(Ty)) { 1215 SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); 1216 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) 1217 NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); 1218 Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), 1219 // FIXME: S->getNoWrapFlags(FlagNW) 1220 SCEV::FlagAnyWrap)); 1221 BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 1222 BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 1223 BasicBlock::iterator NewInsertPt = 1224 llvm::next(BasicBlock::iterator(cast<Instruction>(V))); 1225 while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) || 1226 isa<LandingPadInst>(NewInsertPt)) 1227 ++NewInsertPt; 1228 V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, 1229 NewInsertPt); 1230 restoreInsertPoint(SaveInsertBB, SaveInsertPt); 1231 return V; 1232 } 1233 1234 // {X,+,F} --> X + {0,+,F} 1235 if (!S->getStart()->isZero()) { 1236 SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); 1237 NewOps[0] = SE.getConstant(Ty, 0); 1238 // FIXME: can use S->getNoWrapFlags() 1239 const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap); 1240 1241 // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the 1242 // comments on expandAddToGEP for details. 1243 const SCEV *Base = S->getStart(); 1244 const SCEV *RestArray[1] = { Rest }; 1245 // Dig into the expression to find the pointer base for a GEP. 1246 ExposePointerBase(Base, RestArray[0], SE); 1247 // If we found a pointer, expand the AddRec with a GEP. 1248 if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { 1249 // Make sure the Base isn't something exotic, such as a multiplied 1250 // or divided pointer value. In those cases, the result type isn't 1251 // actually a pointer type. 1252 if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { 1253 Value *StartV = expand(Base); 1254 assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); 1255 return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); 1256 } 1257 } 1258 1259 // Just do a normal add. Pre-expand the operands to suppress folding. 1260 return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())), 1261 SE.getUnknown(expand(Rest)))); 1262 } 1263 1264 // If we don't yet have a canonical IV, create one. 1265 if (!CanonicalIV) { 1266 // Create and insert the PHI node for the induction variable in the 1267 // specified loop. 1268 BasicBlock *Header = L->getHeader(); 1269 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); 1270 CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", 1271 Header->begin()); 1272 rememberInstruction(CanonicalIV); 1273 1274 Constant *One = ConstantInt::get(Ty, 1); 1275 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { 1276 BasicBlock *HP = *HPI; 1277 if (L->contains(HP)) { 1278 // Insert a unit add instruction right before the terminator 1279 // corresponding to the back-edge. 1280 Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, 1281 "indvar.next", 1282 HP->getTerminator()); 1283 Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); 1284 rememberInstruction(Add); 1285 CanonicalIV->addIncoming(Add, HP); 1286 } else { 1287 CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); 1288 } 1289 } 1290 } 1291 1292 // {0,+,1} --> Insert a canonical induction variable into the loop! 1293 if (S->isAffine() && S->getOperand(1)->isOne()) { 1294 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && 1295 "IVs with types different from the canonical IV should " 1296 "already have been handled!"); 1297 return CanonicalIV; 1298 } 1299 1300 // {0,+,F} --> {0,+,1} * F 1301 1302 // If this is a simple linear addrec, emit it now as a special case. 1303 if (S->isAffine()) // {0,+,F} --> i*F 1304 return 1305 expand(SE.getTruncateOrNoop( 1306 SE.getMulExpr(SE.getUnknown(CanonicalIV), 1307 SE.getNoopOrAnyExtend(S->getOperand(1), 1308 CanonicalIV->getType())), 1309 Ty)); 1310 1311 // If this is a chain of recurrences, turn it into a closed form, using the 1312 // folders, then expandCodeFor the closed form. This allows the folders to 1313 // simplify the expression without having to build a bunch of special code 1314 // into this folder. 1315 const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. 1316 1317 // Promote S up to the canonical IV type, if the cast is foldable. 1318 const SCEV *NewS = S; 1319 const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); 1320 if (isa<SCEVAddRecExpr>(Ext)) 1321 NewS = Ext; 1322 1323 const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); 1324 //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; 1325 1326 // Truncate the result down to the original type, if needed. 1327 const SCEV *T = SE.getTruncateOrNoop(V, Ty); 1328 return expand(T); 1329} 1330 1331Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { 1332 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1333 Value *V = expandCodeFor(S->getOperand(), 1334 SE.getEffectiveSCEVType(S->getOperand()->getType())); 1335 Value *I = Builder.CreateTrunc(V, Ty); 1336 rememberInstruction(I); 1337 return I; 1338} 1339 1340Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { 1341 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1342 Value *V = expandCodeFor(S->getOperand(), 1343 SE.getEffectiveSCEVType(S->getOperand()->getType())); 1344 Value *I = Builder.CreateZExt(V, Ty); 1345 rememberInstruction(I); 1346 return I; 1347} 1348 1349Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { 1350 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1351 Value *V = expandCodeFor(S->getOperand(), 1352 SE.getEffectiveSCEVType(S->getOperand()->getType())); 1353 Value *I = Builder.CreateSExt(V, Ty); 1354 rememberInstruction(I); 1355 return I; 1356} 1357 1358Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { 1359 Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); 1360 Type *Ty = LHS->getType(); 1361 for (int i = S->getNumOperands()-2; i >= 0; --i) { 1362 // In the case of mixed integer and pointer types, do the 1363 // rest of the comparisons as integer. 1364 if (S->getOperand(i)->getType() != Ty) { 1365 Ty = SE.getEffectiveSCEVType(Ty); 1366 LHS = InsertNoopCastOfTo(LHS, Ty); 1367 } 1368 Value *RHS = expandCodeFor(S->getOperand(i), Ty); 1369 Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); 1370 rememberInstruction(ICmp); 1371 Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); 1372 rememberInstruction(Sel); 1373 LHS = Sel; 1374 } 1375 // In the case of mixed integer and pointer types, cast the 1376 // final result back to the pointer type. 1377 if (LHS->getType() != S->getType()) 1378 LHS = InsertNoopCastOfTo(LHS, S->getType()); 1379 return LHS; 1380} 1381 1382Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { 1383 Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); 1384 Type *Ty = LHS->getType(); 1385 for (int i = S->getNumOperands()-2; i >= 0; --i) { 1386 // In the case of mixed integer and pointer types, do the 1387 // rest of the comparisons as integer. 1388 if (S->getOperand(i)->getType() != Ty) { 1389 Ty = SE.getEffectiveSCEVType(Ty); 1390 LHS = InsertNoopCastOfTo(LHS, Ty); 1391 } 1392 Value *RHS = expandCodeFor(S->getOperand(i), Ty); 1393 Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); 1394 rememberInstruction(ICmp); 1395 Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); 1396 rememberInstruction(Sel); 1397 LHS = Sel; 1398 } 1399 // In the case of mixed integer and pointer types, cast the 1400 // final result back to the pointer type. 1401 if (LHS->getType() != S->getType()) 1402 LHS = InsertNoopCastOfTo(LHS, S->getType()); 1403 return LHS; 1404} 1405 1406Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, 1407 Instruction *I) { 1408 BasicBlock::iterator IP = I; 1409 while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP)) 1410 ++IP; 1411 Builder.SetInsertPoint(IP->getParent(), IP); 1412 return expandCodeFor(SH, Ty); 1413} 1414 1415Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { 1416 // Expand the code for this SCEV. 1417 Value *V = expand(SH); 1418 if (Ty) { 1419 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && 1420 "non-trivial casts should be done with the SCEVs directly!"); 1421 V = InsertNoopCastOfTo(V, Ty); 1422 } 1423 return V; 1424} 1425 1426Value *SCEVExpander::expand(const SCEV *S) { 1427 // Compute an insertion point for this SCEV object. Hoist the instructions 1428 // as far out in the loop nest as possible. 1429 Instruction *InsertPt = Builder.GetInsertPoint(); 1430 for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; 1431 L = L->getParentLoop()) 1432 if (SE.isLoopInvariant(S, L)) { 1433 if (!L) break; 1434 if (BasicBlock *Preheader = L->getLoopPreheader()) 1435 InsertPt = Preheader->getTerminator(); 1436 else { 1437 // LSR sets the insertion point for AddRec start/step values to the 1438 // block start to simplify value reuse, even though it's an invalid 1439 // position. SCEVExpander must correct for this in all cases. 1440 InsertPt = L->getHeader()->getFirstInsertionPt(); 1441 } 1442 } else { 1443 // If the SCEV is computable at this level, insert it into the header 1444 // after the PHIs (and after any other instructions that we've inserted 1445 // there) so that it is guaranteed to dominate any user inside the loop. 1446 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) 1447 InsertPt = L->getHeader()->getFirstInsertionPt(); 1448 while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) 1449 InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); 1450 break; 1451 } 1452 1453 // Check to see if we already expanded this here. 1454 std::map<std::pair<const SCEV *, Instruction *>, 1455 AssertingVH<Value> >::iterator I = 1456 InsertedExpressions.find(std::make_pair(S, InsertPt)); 1457 if (I != InsertedExpressions.end()) 1458 return I->second; 1459 1460 BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 1461 BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 1462 Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); 1463 1464 // Expand the expression into instructions. 1465 Value *V = visit(S); 1466 1467 // Remember the expanded value for this SCEV at this location. 1468 // 1469 // This is independent of PostIncLoops. The mapped value simply materializes 1470 // the expression at this insertion point. If the mapped value happened to be 1471 // a postinc expansion, it could be reused by a non postinc user, but only if 1472 // its insertion point was already at the head of the loop. 1473 InsertedExpressions[std::make_pair(S, InsertPt)] = V; 1474 1475 restoreInsertPoint(SaveInsertBB, SaveInsertPt); 1476 return V; 1477} 1478 1479void SCEVExpander::rememberInstruction(Value *I) { 1480 if (!PostIncLoops.empty()) 1481 InsertedPostIncValues.insert(I); 1482 else 1483 InsertedValues.insert(I); 1484 1485 // If we just claimed an existing instruction and that instruction had 1486 // been the insert point, adjust the insert point forward so that 1487 // subsequently inserted code will be dominated. 1488 if (Builder.GetInsertPoint() == I) { 1489 BasicBlock::iterator It = cast<Instruction>(I); 1490 do { ++It; } while (isInsertedInstruction(It) || 1491 isa<DbgInfoIntrinsic>(It)); 1492 Builder.SetInsertPoint(Builder.GetInsertBlock(), It); 1493 } 1494} 1495 1496void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { 1497 // If we acquired more instructions since the old insert point was saved, 1498 // advance past them. 1499 while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I; 1500 1501 Builder.SetInsertPoint(BB, I); 1502} 1503 1504/// getOrInsertCanonicalInductionVariable - This method returns the 1505/// canonical induction variable of the specified type for the specified 1506/// loop (inserting one if there is none). A canonical induction variable 1507/// starts at zero and steps by one on each iteration. 1508PHINode * 1509SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, 1510 Type *Ty) { 1511 assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); 1512 1513 // Build a SCEV for {0,+,1}<L>. 1514 // Conservatively use FlagAnyWrap for now. 1515 const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), 1516 SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); 1517 1518 // Emit code for it. 1519 BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 1520 BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 1521 PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin())); 1522 if (SaveInsertBB) 1523 restoreInsertPoint(SaveInsertBB, SaveInsertPt); 1524 1525 return V; 1526} 1527 1528/// hoistStep - Attempt to hoist an IV increment above a potential use. 1529/// 1530/// To successfully hoist, two criteria must be met: 1531/// - IncV operands dominate InsertPos and 1532/// - InsertPos dominates IncV 1533/// 1534/// Meeting the second condition means that we don't need to check all of IncV's 1535/// existing uses (it's moving up in the domtree). 1536/// 1537/// This does not yet recursively hoist the operands, although that would 1538/// not be difficult. 1539/// 1540/// This does not require a SCEVExpander instance and could be replaced by a 1541/// general code-insertion helper. 1542bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, 1543 const DominatorTree *DT) { 1544 // Phi nodes are strangely positional but don't follow normal rules for 1545 // instruction dominance. Handle them immediately. 1546 if (isa<PHINode>(InsertPos)) 1547 return isa<PHINode>(IncV); 1548 else if (isa<PHINode>(IncV)) 1549 return false; 1550 1551 if (DT->dominates(IncV, InsertPos)) 1552 return true; 1553 1554 if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) 1555 return false; 1556 1557 if (IncV->mayHaveSideEffects()) 1558 return false; 1559 1560 // Attempt to hoist IncV 1561 for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); 1562 OI != OE; ++OI) { 1563 Instruction *OInst = dyn_cast<Instruction>(OI); 1564 if (OInst && (OInst == InsertPos || !DT->dominates(OInst, InsertPos))) 1565 return false; 1566 } 1567 IncV->moveBefore(InsertPos); 1568 return true; 1569} 1570 1571/// Sort values by integer width for replaceCongruentIVs. 1572static bool width_descending(Value *lhs, Value *rhs) { 1573 // Put pointers at the back and make sure pointer < pointer = false. 1574 if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy()) 1575 return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy(); 1576 return rhs->getType()->getPrimitiveSizeInBits() 1577 < lhs->getType()->getPrimitiveSizeInBits(); 1578} 1579 1580/// replaceCongruentIVs - Check for congruent phis in this loop header and 1581/// replace them with their most canonical representative. Return the number of 1582/// phis eliminated. 1583/// 1584/// This does not depend on any SCEVExpander state but should be used in 1585/// the same context that SCEVExpander is used. 1586unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, 1587 SmallVectorImpl<WeakVH> &DeadInsts, 1588 const TargetLowering *TLI) { 1589 // Find integer phis in order of increasing width. 1590 SmallVector<PHINode*, 8> Phis; 1591 for (BasicBlock::iterator I = L->getHeader()->begin(); 1592 PHINode *Phi = dyn_cast<PHINode>(I); ++I) { 1593 Phis.push_back(Phi); 1594 } 1595 if (TLI) 1596 std::sort(Phis.begin(), Phis.end(), width_descending); 1597 1598 unsigned NumElim = 0; 1599 DenseMap<const SCEV *, PHINode *> ExprToIVMap; 1600 // Process phis from wide to narrow. Mapping wide phis to the their truncation 1601 // so narrow phis can reuse them. 1602 for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(), 1603 PEnd = Phis.end(); PIter != PEnd; ++PIter) { 1604 PHINode *Phi = *PIter; 1605 1606 if (!SE.isSCEVable(Phi->getType())) 1607 continue; 1608 1609 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; 1610 if (!OrigPhiRef) { 1611 OrigPhiRef = Phi; 1612 if (Phi->getType()->isIntegerTy() && TLI 1613 && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { 1614 // This phi can be freely truncated to the narrowest phi type. Map the 1615 // truncated expression to it so it will be reused for narrow types. 1616 const SCEV *TruncExpr = 1617 SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); 1618 ExprToIVMap[TruncExpr] = Phi; 1619 } 1620 continue; 1621 } 1622 1623 // Replacing a pointer phi with an integer phi or vice-versa doesn't make 1624 // sense. 1625 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) 1626 continue; 1627 1628 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1629 Instruction *OrigInc = 1630 cast<Instruction>(OrigPhiRef->getIncomingValueForBlock(LatchBlock)); 1631 Instruction *IsomorphicInc = 1632 cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); 1633 1634 // If this phi has the same width but is more canonical, replace the 1635 // original with it. 1636 if (OrigPhiRef->getType() == Phi->getType() 1637 && !isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L) 1638 && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) { 1639 std::swap(OrigPhiRef, Phi); 1640 std::swap(OrigInc, IsomorphicInc); 1641 } 1642 // Replacing the congruent phi is sufficient because acyclic redundancy 1643 // elimination, CSE/GVN, should handle the rest. However, once SCEV proves 1644 // that a phi is congruent, it's often the head of an IV user cycle that 1645 // is isomorphic with the original phi. It's worth eagerly cleaning up the 1646 // common case of a single IV increment so that DeleteDeadPHIs can remove 1647 // cycles that had postinc uses. 1648 const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc), 1649 IsomorphicInc->getType()); 1650 if (OrigInc != IsomorphicInc 1651 && TruncExpr == SE.getSCEV(IsomorphicInc) 1652 && hoistStep(OrigInc, IsomorphicInc, DT)) { 1653 DEBUG_WITH_TYPE(DebugType, dbgs() 1654 << "INDVARS: Eliminated congruent iv.inc: " 1655 << *IsomorphicInc << '\n'); 1656 Value *NewInc = OrigInc; 1657 if (OrigInc->getType() != IsomorphicInc->getType()) { 1658 Instruction *IP = isa<PHINode>(OrigInc) 1659 ? (Instruction*)L->getHeader()->getFirstInsertionPt() 1660 : OrigInc->getNextNode(); 1661 IRBuilder<> Builder(IP); 1662 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); 1663 NewInc = Builder. 1664 CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName); 1665 } 1666 IsomorphicInc->replaceAllUsesWith(NewInc); 1667 DeadInsts.push_back(IsomorphicInc); 1668 } 1669 } 1670 DEBUG_WITH_TYPE(DebugType, dbgs() 1671 << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); 1672 ++NumElim; 1673 Value *NewIV = OrigPhiRef; 1674 if (OrigPhiRef->getType() != Phi->getType()) { 1675 IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt()); 1676 Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); 1677 NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); 1678 } 1679 Phi->replaceAllUsesWith(NewIV); 1680 DeadInsts.push_back(Phi); 1681 } 1682 return NumElim; 1683} 1684