ScalarEvolution.cpp revision 5e5dd68c7f5115c245745c496ab3e4cd338a181c
1//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the implementation of the scalar evolution analysis 11// engine, which is used primarily to analyze expressions involving induction 12// variables in loops. 13// 14// There are several aspects to this library. First is the representation of 15// scalar expressions, which are represented as subclasses of the SCEV class. 16// These classes are used to represent certain types of subexpressions that we 17// can handle. We only create one SCEV of a particular shape, so 18// pointer-comparisons for equality are legal. 19// 20// One important aspect of the SCEV objects is that they are never cyclic, even 21// if there is a cycle in the dataflow for an expression (ie, a PHI node). If 22// the PHI node is one of the idioms that we can represent (e.g., a polynomial 23// recurrence) then we represent it directly as a recurrence node, otherwise we 24// represent it as a SCEVUnknown node. 25// 26// In addition to being able to represent expressions of various types, we also 27// have folders that are used to build the *canonical* representation for a 28// particular expression. These folders are capable of using a variety of 29// rewrite rules to simplify the expressions. 30// 31// Once the folders are defined, we can implement the more interesting 32// higher-level code, such as the code that recognizes PHI nodes of various 33// types, computes the execution count of a loop, etc. 34// 35// TODO: We should use these routines and value representations to implement 36// dependence analysis! 37// 38//===----------------------------------------------------------------------===// 39// 40// There are several good references for the techniques used in this analysis. 41// 42// Chains of recurrences -- a method to expedite the evaluation 43// of closed-form functions 44// Olaf Bachmann, Paul S. Wang, Eugene V. Zima 45// 46// On computational properties of chains of recurrences 47// Eugene V. Zima 48// 49// Symbolic Evaluation of Chains of Recurrences for Loop Optimization 50// Robert A. van Engelen 51// 52// Efficient Symbolic Analysis for Optimizing Compilers 53// Robert A. van Engelen 54// 55// Using the chains of recurrences algebra for data dependence testing and 56// induction variable substitution 57// MS Thesis, Johnie Birch 58// 59//===----------------------------------------------------------------------===// 60 61#define DEBUG_TYPE "scalar-evolution" 62#include "llvm/Analysis/ScalarEvolutionExpressions.h" 63#include "llvm/Constants.h" 64#include "llvm/DerivedTypes.h" 65#include "llvm/GlobalVariable.h" 66#include "llvm/GlobalAlias.h" 67#include "llvm/Instructions.h" 68#include "llvm/LLVMContext.h" 69#include "llvm/Operator.h" 70#include "llvm/Analysis/ConstantFolding.h" 71#include "llvm/Analysis/Dominators.h" 72#include "llvm/Analysis/LoopInfo.h" 73#include "llvm/Analysis/ValueTracking.h" 74#include "llvm/Assembly/Writer.h" 75#include "llvm/Target/TargetData.h" 76#include "llvm/Support/CommandLine.h" 77#include "llvm/Support/ConstantRange.h" 78#include "llvm/Support/Debug.h" 79#include "llvm/Support/ErrorHandling.h" 80#include "llvm/Support/GetElementPtrTypeIterator.h" 81#include "llvm/Support/InstIterator.h" 82#include "llvm/Support/MathExtras.h" 83#include "llvm/Support/raw_ostream.h" 84#include "llvm/ADT/Statistic.h" 85#include "llvm/ADT/STLExtras.h" 86#include "llvm/ADT/SmallPtrSet.h" 87#include <algorithm> 88using namespace llvm; 89 90STATISTIC(NumArrayLenItCounts, 91 "Number of trip counts computed with array length"); 92STATISTIC(NumTripCountsComputed, 93 "Number of loops with predictable loop counts"); 94STATISTIC(NumTripCountsNotComputed, 95 "Number of loops without predictable loop counts"); 96STATISTIC(NumBruteForceTripCountsComputed, 97 "Number of loops with trip counts computed by force"); 98 99static cl::opt<unsigned> 100MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, 101 cl::desc("Maximum number of iterations SCEV will " 102 "symbolically execute a constant " 103 "derived loop"), 104 cl::init(100)); 105 106static RegisterPass<ScalarEvolution> 107R("scalar-evolution", "Scalar Evolution Analysis", false, true); 108char ScalarEvolution::ID = 0; 109 110//===----------------------------------------------------------------------===// 111// SCEV class definitions 112//===----------------------------------------------------------------------===// 113 114//===----------------------------------------------------------------------===// 115// Implementation of the SCEV class. 116// 117 118SCEV::~SCEV() {} 119 120void SCEV::dump() const { 121 print(dbgs()); 122 dbgs() << '\n'; 123} 124 125bool SCEV::isZero() const { 126 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) 127 return SC->getValue()->isZero(); 128 return false; 129} 130 131bool SCEV::isOne() const { 132 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) 133 return SC->getValue()->isOne(); 134 return false; 135} 136 137bool SCEV::isAllOnesValue() const { 138 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) 139 return SC->getValue()->isAllOnesValue(); 140 return false; 141} 142 143SCEVCouldNotCompute::SCEVCouldNotCompute() : 144 SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} 145 146bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const { 147 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); 148 return false; 149} 150 151const Type *SCEVCouldNotCompute::getType() const { 152 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); 153 return 0; 154} 155 156bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const { 157 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); 158 return false; 159} 160 161bool SCEVCouldNotCompute::hasOperand(const SCEV *) const { 162 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); 163 return false; 164} 165 166void SCEVCouldNotCompute::print(raw_ostream &OS) const { 167 OS << "***COULDNOTCOMPUTE***"; 168} 169 170bool SCEVCouldNotCompute::classof(const SCEV *S) { 171 return S->getSCEVType() == scCouldNotCompute; 172} 173 174const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { 175 FoldingSetNodeID ID; 176 ID.AddInteger(scConstant); 177 ID.AddPointer(V); 178 void *IP = 0; 179 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 180 SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); 181 UniqueSCEVs.InsertNode(S, IP); 182 return S; 183} 184 185const SCEV *ScalarEvolution::getConstant(const APInt& Val) { 186 return getConstant(ConstantInt::get(getContext(), Val)); 187} 188 189const SCEV * 190ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { 191 const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); 192 return getConstant(ConstantInt::get(ITy, V, isSigned)); 193} 194 195const Type *SCEVConstant::getType() const { return V->getType(); } 196 197void SCEVConstant::print(raw_ostream &OS) const { 198 WriteAsOperand(OS, V, false); 199} 200 201SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, 202 unsigned SCEVTy, const SCEV *op, const Type *ty) 203 : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} 204 205bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { 206 return Op->dominates(BB, DT); 207} 208 209bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { 210 return Op->properlyDominates(BB, DT); 211} 212 213SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, 214 const SCEV *op, const Type *ty) 215 : SCEVCastExpr(ID, scTruncate, op, ty) { 216 assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && 217 (Ty->isIntegerTy() || Ty->isPointerTy()) && 218 "Cannot truncate non-integer value!"); 219} 220 221void SCEVTruncateExpr::print(raw_ostream &OS) const { 222 OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; 223} 224 225SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, 226 const SCEV *op, const Type *ty) 227 : SCEVCastExpr(ID, scZeroExtend, op, ty) { 228 assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && 229 (Ty->isIntegerTy() || Ty->isPointerTy()) && 230 "Cannot zero extend non-integer value!"); 231} 232 233void SCEVZeroExtendExpr::print(raw_ostream &OS) const { 234 OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; 235} 236 237SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, 238 const SCEV *op, const Type *ty) 239 : SCEVCastExpr(ID, scSignExtend, op, ty) { 240 assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && 241 (Ty->isIntegerTy() || Ty->isPointerTy()) && 242 "Cannot sign extend non-integer value!"); 243} 244 245void SCEVSignExtendExpr::print(raw_ostream &OS) const { 246 OS << "(sext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; 247} 248 249void SCEVCommutativeExpr::print(raw_ostream &OS) const { 250 const char *OpStr = getOperationStr(); 251 OS << "("; 252 for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { 253 OS << **I; 254 if (next(I) != E) 255 OS << OpStr; 256 } 257 OS << ")"; 258} 259 260bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { 261 for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { 262 if (!getOperand(i)->dominates(BB, DT)) 263 return false; 264 } 265 return true; 266} 267 268bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { 269 for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { 270 if (!getOperand(i)->properlyDominates(BB, DT)) 271 return false; 272 } 273 return true; 274} 275 276bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { 277 return LHS->dominates(BB, DT) && RHS->dominates(BB, DT); 278} 279 280bool SCEVUDivExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { 281 return LHS->properlyDominates(BB, DT) && RHS->properlyDominates(BB, DT); 282} 283 284void SCEVUDivExpr::print(raw_ostream &OS) const { 285 OS << "(" << *LHS << " /u " << *RHS << ")"; 286} 287 288const Type *SCEVUDivExpr::getType() const { 289 // In most cases the types of LHS and RHS will be the same, but in some 290 // crazy cases one or the other may be a pointer. ScalarEvolution doesn't 291 // depend on the type for correctness, but handling types carefully can 292 // avoid extra casts in the SCEVExpander. The LHS is more likely to be 293 // a pointer type than the RHS, so use the RHS' type here. 294 return RHS->getType(); 295} 296 297bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const { 298 // Add recurrences are never invariant in the function-body (null loop). 299 if (!QueryLoop) 300 return false; 301 302 // This recurrence is variant w.r.t. QueryLoop if QueryLoop contains L. 303 if (QueryLoop->contains(L)) 304 return false; 305 306 // This recurrence is variant w.r.t. QueryLoop if any of its operands 307 // are variant. 308 for (unsigned i = 0, e = getNumOperands(); i != e; ++i) 309 if (!getOperand(i)->isLoopInvariant(QueryLoop)) 310 return false; 311 312 // Otherwise it's loop-invariant. 313 return true; 314} 315 316bool 317SCEVAddRecExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { 318 return DT->dominates(L->getHeader(), BB) && 319 SCEVNAryExpr::dominates(BB, DT); 320} 321 322bool 323SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { 324 // This uses a "dominates" query instead of "properly dominates" query because 325 // the instruction which produces the addrec's value is a PHI, and a PHI 326 // effectively properly dominates its entire containing block. 327 return DT->dominates(L->getHeader(), BB) && 328 SCEVNAryExpr::properlyDominates(BB, DT); 329} 330 331void SCEVAddRecExpr::print(raw_ostream &OS) const { 332 OS << "{" << *Operands[0]; 333 for (unsigned i = 1, e = NumOperands; i != e; ++i) 334 OS << ",+," << *Operands[i]; 335 OS << "}<"; 336 WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); 337 OS << ">"; 338} 339 340bool SCEVUnknown::isLoopInvariant(const Loop *L) const { 341 // All non-instruction values are loop invariant. All instructions are loop 342 // invariant if they are not contained in the specified loop. 343 // Instructions are never considered invariant in the function body 344 // (null loop) because they are defined within the "loop". 345 if (Instruction *I = dyn_cast<Instruction>(V)) 346 return L && !L->contains(I); 347 return true; 348} 349 350bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const { 351 if (Instruction *I = dyn_cast<Instruction>(getValue())) 352 return DT->dominates(I->getParent(), BB); 353 return true; 354} 355 356bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { 357 if (Instruction *I = dyn_cast<Instruction>(getValue())) 358 return DT->properlyDominates(I->getParent(), BB); 359 return true; 360} 361 362const Type *SCEVUnknown::getType() const { 363 return V->getType(); 364} 365 366bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { 367 if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V)) 368 if (VCE->getOpcode() == Instruction::PtrToInt) 369 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) 370 if (CE->getOpcode() == Instruction::GetElementPtr && 371 CE->getOperand(0)->isNullValue() && 372 CE->getNumOperands() == 2) 373 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1))) 374 if (CI->isOne()) { 375 AllocTy = cast<PointerType>(CE->getOperand(0)->getType()) 376 ->getElementType(); 377 return true; 378 } 379 380 return false; 381} 382 383bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { 384 if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V)) 385 if (VCE->getOpcode() == Instruction::PtrToInt) 386 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) 387 if (CE->getOpcode() == Instruction::GetElementPtr && 388 CE->getOperand(0)->isNullValue()) { 389 const Type *Ty = 390 cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); 391 if (const StructType *STy = dyn_cast<StructType>(Ty)) 392 if (!STy->isPacked() && 393 CE->getNumOperands() == 3 && 394 CE->getOperand(1)->isNullValue()) { 395 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2))) 396 if (CI->isOne() && 397 STy->getNumElements() == 2 && 398 STy->getElementType(0)->isIntegerTy(1)) { 399 AllocTy = STy->getElementType(1); 400 return true; 401 } 402 } 403 } 404 405 return false; 406} 407 408bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { 409 if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V)) 410 if (VCE->getOpcode() == Instruction::PtrToInt) 411 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) 412 if (CE->getOpcode() == Instruction::GetElementPtr && 413 CE->getNumOperands() == 3 && 414 CE->getOperand(0)->isNullValue() && 415 CE->getOperand(1)->isNullValue()) { 416 const Type *Ty = 417 cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); 418 // Ignore vector types here so that ScalarEvolutionExpander doesn't 419 // emit getelementptrs that index into vectors. 420 if (Ty->isStructTy() || Ty->isArrayTy()) { 421 CTy = Ty; 422 FieldNo = CE->getOperand(2); 423 return true; 424 } 425 } 426 427 return false; 428} 429 430void SCEVUnknown::print(raw_ostream &OS) const { 431 const Type *AllocTy; 432 if (isSizeOf(AllocTy)) { 433 OS << "sizeof(" << *AllocTy << ")"; 434 return; 435 } 436 if (isAlignOf(AllocTy)) { 437 OS << "alignof(" << *AllocTy << ")"; 438 return; 439 } 440 441 const Type *CTy; 442 Constant *FieldNo; 443 if (isOffsetOf(CTy, FieldNo)) { 444 OS << "offsetof(" << *CTy << ", "; 445 WriteAsOperand(OS, FieldNo, false); 446 OS << ")"; 447 return; 448 } 449 450 // Otherwise just print it normally. 451 WriteAsOperand(OS, V, false); 452} 453 454//===----------------------------------------------------------------------===// 455// SCEV Utilities 456//===----------------------------------------------------------------------===// 457 458static bool CompareTypes(const Type *A, const Type *B) { 459 if (A->getTypeID() != B->getTypeID()) 460 return A->getTypeID() < B->getTypeID(); 461 if (const IntegerType *AI = dyn_cast<IntegerType>(A)) { 462 const IntegerType *BI = cast<IntegerType>(B); 463 return AI->getBitWidth() < BI->getBitWidth(); 464 } 465 if (const PointerType *AI = dyn_cast<PointerType>(A)) { 466 const PointerType *BI = cast<PointerType>(B); 467 return CompareTypes(AI->getElementType(), BI->getElementType()); 468 } 469 if (const ArrayType *AI = dyn_cast<ArrayType>(A)) { 470 const ArrayType *BI = cast<ArrayType>(B); 471 if (AI->getNumElements() != BI->getNumElements()) 472 return AI->getNumElements() < BI->getNumElements(); 473 return CompareTypes(AI->getElementType(), BI->getElementType()); 474 } 475 if (const VectorType *AI = dyn_cast<VectorType>(A)) { 476 const VectorType *BI = cast<VectorType>(B); 477 if (AI->getNumElements() != BI->getNumElements()) 478 return AI->getNumElements() < BI->getNumElements(); 479 return CompareTypes(AI->getElementType(), BI->getElementType()); 480 } 481 if (const StructType *AI = dyn_cast<StructType>(A)) { 482 const StructType *BI = cast<StructType>(B); 483 if (AI->getNumElements() != BI->getNumElements()) 484 return AI->getNumElements() < BI->getNumElements(); 485 for (unsigned i = 0, e = AI->getNumElements(); i != e; ++i) 486 if (CompareTypes(AI->getElementType(i), BI->getElementType(i)) || 487 CompareTypes(BI->getElementType(i), AI->getElementType(i))) 488 return CompareTypes(AI->getElementType(i), BI->getElementType(i)); 489 } 490 return false; 491} 492 493namespace { 494 /// SCEVComplexityCompare - Return true if the complexity of the LHS is less 495 /// than the complexity of the RHS. This comparator is used to canonicalize 496 /// expressions. 497 class SCEVComplexityCompare { 498 LoopInfo *LI; 499 public: 500 explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {} 501 502 bool operator()(const SCEV *LHS, const SCEV *RHS) const { 503 // Fast-path: SCEVs are uniqued so we can do a quick equality check. 504 if (LHS == RHS) 505 return false; 506 507 // Primarily, sort the SCEVs by their getSCEVType(). 508 unsigned LST = LHS->getSCEVType(); 509 unsigned RST = RHS->getSCEVType(); 510 if (LST != RST) 511 return LST < RST; 512 513 // Then, pick an arbitrary sort. Use the profiling data for speed. 514 const FoldingSetNodeIDRef &L = LHS->getProfile(); 515 const FoldingSetNodeIDRef &R = RHS->getProfile(); 516 size_t LSize = L.getSize(); 517 size_t RSize = R.getSize(); 518 if (LSize != RSize) 519 return LSize < RSize; 520 return memcmp(L.getData(), R.getData(), 521 LSize * sizeof(*L.getData())) < 0; 522 } 523 }; 524} 525 526/// GroupByComplexity - Given a list of SCEV objects, order them by their 527/// complexity, and group objects of the same complexity together by value. 528/// When this routine is finished, we know that any duplicates in the vector are 529/// consecutive and that complexity is monotonically increasing. 530/// 531/// Note that we go take special precautions to ensure that we get deterministic 532/// results from this routine. In other words, we don't want the results of 533/// this to depend on where the addresses of various SCEV objects happened to 534/// land in memory. 535/// 536static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, 537 LoopInfo *LI) { 538 if (Ops.size() < 2) return; // Noop 539 540 SCEVComplexityCompare Comp(LI); 541 542 if (Ops.size() == 2) { 543 // This is the common case, which also happens to be trivially simple. 544 // Special case it. 545 if (Comp(Ops[1], Ops[0])) 546 std::swap(Ops[0], Ops[1]); 547 return; 548 } 549 550 std::stable_sort(Ops.begin(), Ops.end(), Comp); 551} 552 553 554 555//===----------------------------------------------------------------------===// 556// Simple SCEV method implementations 557//===----------------------------------------------------------------------===// 558 559/// BinomialCoefficient - Compute BC(It, K). The result has width W. 560/// Assume, K > 0. 561static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, 562 ScalarEvolution &SE, 563 const Type* ResultTy) { 564 // Handle the simplest case efficiently. 565 if (K == 1) 566 return SE.getTruncateOrZeroExtend(It, ResultTy); 567 568 // We are using the following formula for BC(It, K): 569 // 570 // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! 571 // 572 // Suppose, W is the bitwidth of the return value. We must be prepared for 573 // overflow. Hence, we must assure that the result of our computation is 574 // equal to the accurate one modulo 2^W. Unfortunately, division isn't 575 // safe in modular arithmetic. 576 // 577 // However, this code doesn't use exactly that formula; the formula it uses 578 // is something like the following, where T is the number of factors of 2 in 579 // K! (i.e. trailing zeros in the binary representation of K!), and ^ is 580 // exponentiation: 581 // 582 // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) 583 // 584 // This formula is trivially equivalent to the previous formula. However, 585 // this formula can be implemented much more efficiently. The trick is that 586 // K! / 2^T is odd, and exact division by an odd number *is* safe in modular 587 // arithmetic. To do exact division in modular arithmetic, all we have 588 // to do is multiply by the inverse. Therefore, this step can be done at 589 // width W. 590 // 591 // The next issue is how to safely do the division by 2^T. The way this 592 // is done is by doing the multiplication step at a width of at least W + T 593 // bits. This way, the bottom W+T bits of the product are accurate. Then, 594 // when we perform the division by 2^T (which is equivalent to a right shift 595 // by T), the bottom W bits are accurate. Extra bits are okay; they'll get 596 // truncated out after the division by 2^T. 597 // 598 // In comparison to just directly using the first formula, this technique 599 // is much more efficient; using the first formula requires W * K bits, 600 // but this formula less than W + K bits. Also, the first formula requires 601 // a division step, whereas this formula only requires multiplies and shifts. 602 // 603 // It doesn't matter whether the subtraction step is done in the calculation 604 // width or the input iteration count's width; if the subtraction overflows, 605 // the result must be zero anyway. We prefer here to do it in the width of 606 // the induction variable because it helps a lot for certain cases; CodeGen 607 // isn't smart enough to ignore the overflow, which leads to much less 608 // efficient code if the width of the subtraction is wider than the native 609 // register width. 610 // 611 // (It's possible to not widen at all by pulling out factors of 2 before 612 // the multiplication; for example, K=2 can be calculated as 613 // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires 614 // extra arithmetic, so it's not an obvious win, and it gets 615 // much more complicated for K > 3.) 616 617 // Protection from insane SCEVs; this bound is conservative, 618 // but it probably doesn't matter. 619 if (K > 1000) 620 return SE.getCouldNotCompute(); 621 622 unsigned W = SE.getTypeSizeInBits(ResultTy); 623 624 // Calculate K! / 2^T and T; we divide out the factors of two before 625 // multiplying for calculating K! / 2^T to avoid overflow. 626 // Other overflow doesn't matter because we only care about the bottom 627 // W bits of the result. 628 APInt OddFactorial(W, 1); 629 unsigned T = 1; 630 for (unsigned i = 3; i <= K; ++i) { 631 APInt Mult(W, i); 632 unsigned TwoFactors = Mult.countTrailingZeros(); 633 T += TwoFactors; 634 Mult = Mult.lshr(TwoFactors); 635 OddFactorial *= Mult; 636 } 637 638 // We need at least W + T bits for the multiplication step 639 unsigned CalculationBits = W + T; 640 641 // Calculate 2^T, at width T+W. 642 APInt DivFactor = APInt(CalculationBits, 1).shl(T); 643 644 // Calculate the multiplicative inverse of K! / 2^T; 645 // this multiplication factor will perform the exact division by 646 // K! / 2^T. 647 APInt Mod = APInt::getSignedMinValue(W+1); 648 APInt MultiplyFactor = OddFactorial.zext(W+1); 649 MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); 650 MultiplyFactor = MultiplyFactor.trunc(W); 651 652 // Calculate the product, at width T+W 653 const IntegerType *CalculationTy = IntegerType::get(SE.getContext(), 654 CalculationBits); 655 const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); 656 for (unsigned i = 1; i != K; ++i) { 657 const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i)); 658 Dividend = SE.getMulExpr(Dividend, 659 SE.getTruncateOrZeroExtend(S, CalculationTy)); 660 } 661 662 // Divide by 2^T 663 const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); 664 665 // Truncate the result, and divide by K! / 2^T. 666 667 return SE.getMulExpr(SE.getConstant(MultiplyFactor), 668 SE.getTruncateOrZeroExtend(DivResult, ResultTy)); 669} 670 671/// evaluateAtIteration - Return the value of this chain of recurrences at 672/// the specified iteration number. We can evaluate this recurrence by 673/// multiplying each element in the chain by the binomial coefficient 674/// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as: 675/// 676/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) 677/// 678/// where BC(It, k) stands for binomial coefficient. 679/// 680const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, 681 ScalarEvolution &SE) const { 682 const SCEV *Result = getStart(); 683 for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { 684 // The computation is correct in the face of overflow provided that the 685 // multiplication is performed _after_ the evaluation of the binomial 686 // coefficient. 687 const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType()); 688 if (isa<SCEVCouldNotCompute>(Coeff)) 689 return Coeff; 690 691 Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff)); 692 } 693 return Result; 694} 695 696//===----------------------------------------------------------------------===// 697// SCEV Expression folder implementations 698//===----------------------------------------------------------------------===// 699 700const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, 701 const Type *Ty) { 702 assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && 703 "This is not a truncating conversion!"); 704 assert(isSCEVable(Ty) && 705 "This is not a conversion to a SCEVable type!"); 706 Ty = getEffectiveSCEVType(Ty); 707 708 FoldingSetNodeID ID; 709 ID.AddInteger(scTruncate); 710 ID.AddPointer(Op); 711 ID.AddPointer(Ty); 712 void *IP = 0; 713 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 714 715 // Fold if the operand is constant. 716 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) 717 return getConstant( 718 cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty))); 719 720 // trunc(trunc(x)) --> trunc(x) 721 if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) 722 return getTruncateExpr(ST->getOperand(), Ty); 723 724 // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing 725 if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) 726 return getTruncateOrSignExtend(SS->getOperand(), Ty); 727 728 // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing 729 if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) 730 return getTruncateOrZeroExtend(SZ->getOperand(), Ty); 731 732 // If the input value is a chrec scev, truncate the chrec's operands. 733 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { 734 SmallVector<const SCEV *, 4> Operands; 735 for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) 736 Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); 737 return getAddRecExpr(Operands, AddRec->getLoop()); 738 } 739 740 // The cast wasn't folded; create an explicit cast node. 741 // Recompute the insert position, as it may have been invalidated. 742 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 743 SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), 744 Op, Ty); 745 UniqueSCEVs.InsertNode(S, IP); 746 return S; 747} 748 749const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, 750 const Type *Ty) { 751 assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && 752 "This is not an extending conversion!"); 753 assert(isSCEVable(Ty) && 754 "This is not a conversion to a SCEVable type!"); 755 Ty = getEffectiveSCEVType(Ty); 756 757 // Fold if the operand is constant. 758 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) { 759 const Type *IntTy = getEffectiveSCEVType(Ty); 760 Constant *C = ConstantExpr::getZExt(SC->getValue(), IntTy); 761 if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty); 762 return getConstant(cast<ConstantInt>(C)); 763 } 764 765 // zext(zext(x)) --> zext(x) 766 if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) 767 return getZeroExtendExpr(SZ->getOperand(), Ty); 768 769 // Before doing any expensive analysis, check to see if we've already 770 // computed a SCEV for this Op and Ty. 771 FoldingSetNodeID ID; 772 ID.AddInteger(scZeroExtend); 773 ID.AddPointer(Op); 774 ID.AddPointer(Ty); 775 void *IP = 0; 776 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 777 778 // If the input value is a chrec scev, and we can prove that the value 779 // did not overflow the old, smaller, value, we can zero extend all of the 780 // operands (often constants). This allows analysis of something like 781 // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } 782 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) 783 if (AR->isAffine()) { 784 const SCEV *Start = AR->getStart(); 785 const SCEV *Step = AR->getStepRecurrence(*this); 786 unsigned BitWidth = getTypeSizeInBits(AR->getType()); 787 const Loop *L = AR->getLoop(); 788 789 // If we have special knowledge that this addrec won't overflow, 790 // we don't need to do any further analysis. 791 if (AR->hasNoUnsignedWrap()) 792 return getAddRecExpr(getZeroExtendExpr(Start, Ty), 793 getZeroExtendExpr(Step, Ty), 794 L); 795 796 // Check whether the backedge-taken count is SCEVCouldNotCompute. 797 // Note that this serves two purposes: It filters out loops that are 798 // simply not analyzable, and it covers the case where this code is 799 // being called from within backedge-taken count analysis, such that 800 // attempting to ask for the backedge-taken count would likely result 801 // in infinite recursion. In the later case, the analysis code will 802 // cope with a conservative value, and it will take care to purge 803 // that value once it has finished. 804 const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); 805 if (!isa<SCEVCouldNotCompute>(MaxBECount)) { 806 // Manually compute the final value for AR, checking for 807 // overflow. 808 809 // Check whether the backedge-taken count can be losslessly casted to 810 // the addrec's type. The count is always unsigned. 811 const SCEV *CastedMaxBECount = 812 getTruncateOrZeroExtend(MaxBECount, Start->getType()); 813 const SCEV *RecastedMaxBECount = 814 getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); 815 if (MaxBECount == RecastedMaxBECount) { 816 const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); 817 // Check whether Start+Step*MaxBECount has no unsigned overflow. 818 const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); 819 const SCEV *Add = getAddExpr(Start, ZMul); 820 const SCEV *OperandExtendedAdd = 821 getAddExpr(getZeroExtendExpr(Start, WideTy), 822 getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), 823 getZeroExtendExpr(Step, WideTy))); 824 if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) 825 // Return the expression with the addrec on the outside. 826 return getAddRecExpr(getZeroExtendExpr(Start, Ty), 827 getZeroExtendExpr(Step, Ty), 828 L); 829 830 // Similar to above, only this time treat the step value as signed. 831 // This covers loops that count down. 832 const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); 833 Add = getAddExpr(Start, SMul); 834 OperandExtendedAdd = 835 getAddExpr(getZeroExtendExpr(Start, WideTy), 836 getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), 837 getSignExtendExpr(Step, WideTy))); 838 if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) 839 // Return the expression with the addrec on the outside. 840 return getAddRecExpr(getZeroExtendExpr(Start, Ty), 841 getSignExtendExpr(Step, Ty), 842 L); 843 } 844 845 // If the backedge is guarded by a comparison with the pre-inc value 846 // the addrec is safe. Also, if the entry is guarded by a comparison 847 // with the start value and the backedge is guarded by a comparison 848 // with the post-inc value, the addrec is safe. 849 if (isKnownPositive(Step)) { 850 const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - 851 getUnsignedRange(Step).getUnsignedMax()); 852 if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || 853 (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && 854 isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, 855 AR->getPostIncExpr(*this), N))) 856 // Return the expression with the addrec on the outside. 857 return getAddRecExpr(getZeroExtendExpr(Start, Ty), 858 getZeroExtendExpr(Step, Ty), 859 L); 860 } else if (isKnownNegative(Step)) { 861 const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - 862 getSignedRange(Step).getSignedMin()); 863 if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || 864 (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && 865 isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, 866 AR->getPostIncExpr(*this), N))) 867 // Return the expression with the addrec on the outside. 868 return getAddRecExpr(getZeroExtendExpr(Start, Ty), 869 getSignExtendExpr(Step, Ty), 870 L); 871 } 872 } 873 } 874 875 // The cast wasn't folded; create an explicit cast node. 876 // Recompute the insert position, as it may have been invalidated. 877 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 878 SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), 879 Op, Ty); 880 UniqueSCEVs.InsertNode(S, IP); 881 return S; 882} 883 884const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, 885 const Type *Ty) { 886 assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && 887 "This is not an extending conversion!"); 888 assert(isSCEVable(Ty) && 889 "This is not a conversion to a SCEVable type!"); 890 Ty = getEffectiveSCEVType(Ty); 891 892 // Fold if the operand is constant. 893 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) { 894 const Type *IntTy = getEffectiveSCEVType(Ty); 895 Constant *C = ConstantExpr::getSExt(SC->getValue(), IntTy); 896 if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty); 897 return getConstant(cast<ConstantInt>(C)); 898 } 899 900 // sext(sext(x)) --> sext(x) 901 if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) 902 return getSignExtendExpr(SS->getOperand(), Ty); 903 904 // Before doing any expensive analysis, check to see if we've already 905 // computed a SCEV for this Op and Ty. 906 FoldingSetNodeID ID; 907 ID.AddInteger(scSignExtend); 908 ID.AddPointer(Op); 909 ID.AddPointer(Ty); 910 void *IP = 0; 911 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 912 913 // If the input value is a chrec scev, and we can prove that the value 914 // did not overflow the old, smaller, value, we can sign extend all of the 915 // operands (often constants). This allows analysis of something like 916 // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } 917 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) 918 if (AR->isAffine()) { 919 const SCEV *Start = AR->getStart(); 920 const SCEV *Step = AR->getStepRecurrence(*this); 921 unsigned BitWidth = getTypeSizeInBits(AR->getType()); 922 const Loop *L = AR->getLoop(); 923 924 // If we have special knowledge that this addrec won't overflow, 925 // we don't need to do any further analysis. 926 if (AR->hasNoSignedWrap()) 927 return getAddRecExpr(getSignExtendExpr(Start, Ty), 928 getSignExtendExpr(Step, Ty), 929 L); 930 931 // Check whether the backedge-taken count is SCEVCouldNotCompute. 932 // Note that this serves two purposes: It filters out loops that are 933 // simply not analyzable, and it covers the case where this code is 934 // being called from within backedge-taken count analysis, such that 935 // attempting to ask for the backedge-taken count would likely result 936 // in infinite recursion. In the later case, the analysis code will 937 // cope with a conservative value, and it will take care to purge 938 // that value once it has finished. 939 const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); 940 if (!isa<SCEVCouldNotCompute>(MaxBECount)) { 941 // Manually compute the final value for AR, checking for 942 // overflow. 943 944 // Check whether the backedge-taken count can be losslessly casted to 945 // the addrec's type. The count is always unsigned. 946 const SCEV *CastedMaxBECount = 947 getTruncateOrZeroExtend(MaxBECount, Start->getType()); 948 const SCEV *RecastedMaxBECount = 949 getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); 950 if (MaxBECount == RecastedMaxBECount) { 951 const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); 952 // Check whether Start+Step*MaxBECount has no signed overflow. 953 const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); 954 const SCEV *Add = getAddExpr(Start, SMul); 955 const SCEV *OperandExtendedAdd = 956 getAddExpr(getSignExtendExpr(Start, WideTy), 957 getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), 958 getSignExtendExpr(Step, WideTy))); 959 if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) 960 // Return the expression with the addrec on the outside. 961 return getAddRecExpr(getSignExtendExpr(Start, Ty), 962 getSignExtendExpr(Step, Ty), 963 L); 964 965 // Similar to above, only this time treat the step value as unsigned. 966 // This covers loops that count up with an unsigned step. 967 const SCEV *UMul = getMulExpr(CastedMaxBECount, Step); 968 Add = getAddExpr(Start, UMul); 969 OperandExtendedAdd = 970 getAddExpr(getSignExtendExpr(Start, WideTy), 971 getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), 972 getZeroExtendExpr(Step, WideTy))); 973 if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) 974 // Return the expression with the addrec on the outside. 975 return getAddRecExpr(getSignExtendExpr(Start, Ty), 976 getZeroExtendExpr(Step, Ty), 977 L); 978 } 979 980 // If the backedge is guarded by a comparison with the pre-inc value 981 // the addrec is safe. Also, if the entry is guarded by a comparison 982 // with the start value and the backedge is guarded by a comparison 983 // with the post-inc value, the addrec is safe. 984 if (isKnownPositive(Step)) { 985 const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) - 986 getSignedRange(Step).getSignedMax()); 987 if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) || 988 (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) && 989 isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, 990 AR->getPostIncExpr(*this), N))) 991 // Return the expression with the addrec on the outside. 992 return getAddRecExpr(getSignExtendExpr(Start, Ty), 993 getSignExtendExpr(Step, Ty), 994 L); 995 } else if (isKnownNegative(Step)) { 996 const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) - 997 getSignedRange(Step).getSignedMin()); 998 if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) || 999 (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) && 1000 isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, 1001 AR->getPostIncExpr(*this), N))) 1002 // Return the expression with the addrec on the outside. 1003 return getAddRecExpr(getSignExtendExpr(Start, Ty), 1004 getSignExtendExpr(Step, Ty), 1005 L); 1006 } 1007 } 1008 } 1009 1010 // The cast wasn't folded; create an explicit cast node. 1011 // Recompute the insert position, as it may have been invalidated. 1012 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 1013 SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), 1014 Op, Ty); 1015 UniqueSCEVs.InsertNode(S, IP); 1016 return S; 1017} 1018 1019/// getAnyExtendExpr - Return a SCEV for the given operand extended with 1020/// unspecified bits out to the given type. 1021/// 1022const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, 1023 const Type *Ty) { 1024 assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && 1025 "This is not an extending conversion!"); 1026 assert(isSCEVable(Ty) && 1027 "This is not a conversion to a SCEVable type!"); 1028 Ty = getEffectiveSCEVType(Ty); 1029 1030 // Sign-extend negative constants. 1031 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) 1032 if (SC->getValue()->getValue().isNegative()) 1033 return getSignExtendExpr(Op, Ty); 1034 1035 // Peel off a truncate cast. 1036 if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) { 1037 const SCEV *NewOp = T->getOperand(); 1038 if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) 1039 return getAnyExtendExpr(NewOp, Ty); 1040 return getTruncateOrNoop(NewOp, Ty); 1041 } 1042 1043 // Next try a zext cast. If the cast is folded, use it. 1044 const SCEV *ZExt = getZeroExtendExpr(Op, Ty); 1045 if (!isa<SCEVZeroExtendExpr>(ZExt)) 1046 return ZExt; 1047 1048 // Next try a sext cast. If the cast is folded, use it. 1049 const SCEV *SExt = getSignExtendExpr(Op, Ty); 1050 if (!isa<SCEVSignExtendExpr>(SExt)) 1051 return SExt; 1052 1053 // Force the cast to be folded into the operands of an addrec. 1054 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) { 1055 SmallVector<const SCEV *, 4> Ops; 1056 for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); 1057 I != E; ++I) 1058 Ops.push_back(getAnyExtendExpr(*I, Ty)); 1059 return getAddRecExpr(Ops, AR->getLoop()); 1060 } 1061 1062 // If the expression is obviously signed, use the sext cast value. 1063 if (isa<SCEVSMaxExpr>(Op)) 1064 return SExt; 1065 1066 // Absent any other information, use the zext cast value. 1067 return ZExt; 1068} 1069 1070/// CollectAddOperandsWithScales - Process the given Ops list, which is 1071/// a list of operands to be added under the given scale, update the given 1072/// map. This is a helper function for getAddRecExpr. As an example of 1073/// what it does, given a sequence of operands that would form an add 1074/// expression like this: 1075/// 1076/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r) 1077/// 1078/// where A and B are constants, update the map with these values: 1079/// 1080/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) 1081/// 1082/// and add 13 + A*B*29 to AccumulatedConstant. 1083/// This will allow getAddRecExpr to produce this: 1084/// 1085/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) 1086/// 1087/// This form often exposes folding opportunities that are hidden in 1088/// the original operand list. 1089/// 1090/// Return true iff it appears that any interesting folding opportunities 1091/// may be exposed. This helps getAddRecExpr short-circuit extra work in 1092/// the common case where no interesting opportunities are present, and 1093/// is also used as a check to avoid infinite recursion. 1094/// 1095static bool 1096CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, 1097 SmallVector<const SCEV *, 8> &NewOps, 1098 APInt &AccumulatedConstant, 1099 const SCEV *const *Ops, size_t NumOperands, 1100 const APInt &Scale, 1101 ScalarEvolution &SE) { 1102 bool Interesting = false; 1103 1104 // Iterate over the add operands. They are sorted, with constants first. 1105 unsigned i = 0; 1106 while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { 1107 ++i; 1108 // Pull a buried constant out to the outside. 1109 if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) 1110 Interesting = true; 1111 AccumulatedConstant += Scale * C->getValue()->getValue(); 1112 } 1113 1114 // Next comes everything else. We're especially interested in multiplies 1115 // here, but they're in the middle, so just visit the rest with one loop. 1116 for (; i != NumOperands; ++i) { 1117 const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); 1118 if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) { 1119 APInt NewScale = 1120 Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue(); 1121 if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) { 1122 // A multiplication of a constant with another add; recurse. 1123 const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1)); 1124 Interesting |= 1125 CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, 1126 Add->op_begin(), Add->getNumOperands(), 1127 NewScale, SE); 1128 } else { 1129 // A multiplication of a constant with some other value. Update 1130 // the map. 1131 SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); 1132 const SCEV *Key = SE.getMulExpr(MulOps); 1133 std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = 1134 M.insert(std::make_pair(Key, NewScale)); 1135 if (Pair.second) { 1136 NewOps.push_back(Pair.first->first); 1137 } else { 1138 Pair.first->second += NewScale; 1139 // The map already had an entry for this value, which may indicate 1140 // a folding opportunity. 1141 Interesting = true; 1142 } 1143 } 1144 } else { 1145 // An ordinary operand. Update the map. 1146 std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = 1147 M.insert(std::make_pair(Ops[i], Scale)); 1148 if (Pair.second) { 1149 NewOps.push_back(Pair.first->first); 1150 } else { 1151 Pair.first->second += Scale; 1152 // The map already had an entry for this value, which may indicate 1153 // a folding opportunity. 1154 Interesting = true; 1155 } 1156 } 1157 } 1158 1159 return Interesting; 1160} 1161 1162namespace { 1163 struct APIntCompare { 1164 bool operator()(const APInt &LHS, const APInt &RHS) const { 1165 return LHS.ult(RHS); 1166 } 1167 }; 1168} 1169 1170/// getAddExpr - Get a canonical add expression, or something simpler if 1171/// possible. 1172const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, 1173 bool HasNUW, bool HasNSW) { 1174 assert(!Ops.empty() && "Cannot get empty add!"); 1175 if (Ops.size() == 1) return Ops[0]; 1176#ifndef NDEBUG 1177 const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); 1178 for (unsigned i = 1, e = Ops.size(); i != e; ++i) 1179 assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && 1180 "SCEVAddExpr operand types don't match!"); 1181#endif 1182 1183 // If HasNSW is true and all the operands are non-negative, infer HasNUW. 1184 if (!HasNUW && HasNSW) { 1185 bool All = true; 1186 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1187 if (!isKnownNonNegative(Ops[i])) { 1188 All = false; 1189 break; 1190 } 1191 if (All) HasNUW = true; 1192 } 1193 1194 // Sort by complexity, this groups all similar expression types together. 1195 GroupByComplexity(Ops, LI); 1196 1197 // If there are any constants, fold them together. 1198 unsigned Idx = 0; 1199 if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { 1200 ++Idx; 1201 assert(Idx < Ops.size()); 1202 while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { 1203 // We found two constants, fold them together! 1204 Ops[0] = getConstant(LHSC->getValue()->getValue() + 1205 RHSC->getValue()->getValue()); 1206 if (Ops.size() == 2) return Ops[0]; 1207 Ops.erase(Ops.begin()+1); // Erase the folded element 1208 LHSC = cast<SCEVConstant>(Ops[0]); 1209 } 1210 1211 // If we are left with a constant zero being added, strip it off. 1212 if (LHSC->getValue()->isZero()) { 1213 Ops.erase(Ops.begin()); 1214 --Idx; 1215 } 1216 1217 if (Ops.size() == 1) return Ops[0]; 1218 } 1219 1220 // Okay, check to see if the same value occurs in the operand list twice. If 1221 // so, merge them together into an multiply expression. Since we sorted the 1222 // list, these values are required to be adjacent. 1223 const Type *Ty = Ops[0]->getType(); 1224 for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) 1225 if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 1226 // Found a match, merge the two values into a multiply, and add any 1227 // remaining values to the result. 1228 const SCEV *Two = getConstant(Ty, 2); 1229 const SCEV *Mul = getMulExpr(Ops[i], Two); 1230 if (Ops.size() == 2) 1231 return Mul; 1232 Ops.erase(Ops.begin()+i, Ops.begin()+i+2); 1233 Ops.push_back(Mul); 1234 return getAddExpr(Ops, HasNUW, HasNSW); 1235 } 1236 1237 // Check for truncates. If all the operands are truncated from the same 1238 // type, see if factoring out the truncate would permit the result to be 1239 // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n) 1240 // if the contents of the resulting outer trunc fold to something simple. 1241 for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) { 1242 const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); 1243 const Type *DstType = Trunc->getType(); 1244 const Type *SrcType = Trunc->getOperand()->getType(); 1245 SmallVector<const SCEV *, 8> LargeOps; 1246 bool Ok = true; 1247 // Check all the operands to see if they can be represented in the 1248 // source type of the truncate. 1249 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1250 if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) { 1251 if (T->getOperand()->getType() != SrcType) { 1252 Ok = false; 1253 break; 1254 } 1255 LargeOps.push_back(T->getOperand()); 1256 } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { 1257 LargeOps.push_back(getAnyExtendExpr(C, SrcType)); 1258 } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { 1259 SmallVector<const SCEV *, 8> LargeMulOps; 1260 for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { 1261 if (const SCEVTruncateExpr *T = 1262 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) { 1263 if (T->getOperand()->getType() != SrcType) { 1264 Ok = false; 1265 break; 1266 } 1267 LargeMulOps.push_back(T->getOperand()); 1268 } else if (const SCEVConstant *C = 1269 dyn_cast<SCEVConstant>(M->getOperand(j))) { 1270 LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); 1271 } else { 1272 Ok = false; 1273 break; 1274 } 1275 } 1276 if (Ok) 1277 LargeOps.push_back(getMulExpr(LargeMulOps)); 1278 } else { 1279 Ok = false; 1280 break; 1281 } 1282 } 1283 if (Ok) { 1284 // Evaluate the expression in the larger type. 1285 const SCEV *Fold = getAddExpr(LargeOps, HasNUW, HasNSW); 1286 // If it folds to something simple, use it. Otherwise, don't. 1287 if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) 1288 return getTruncateExpr(Fold, DstType); 1289 } 1290 } 1291 1292 // Skip past any other cast SCEVs. 1293 while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) 1294 ++Idx; 1295 1296 // If there are add operands they would be next. 1297 if (Idx < Ops.size()) { 1298 bool DeletedAdd = false; 1299 while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) { 1300 // If we have an add, expand the add operands onto the end of the operands 1301 // list. 1302 Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); 1303 Ops.erase(Ops.begin()+Idx); 1304 DeletedAdd = true; 1305 } 1306 1307 // If we deleted at least one add, we added operands to the end of the list, 1308 // and they are not necessarily sorted. Recurse to resort and resimplify 1309 // any operands we just acquired. 1310 if (DeletedAdd) 1311 return getAddExpr(Ops); 1312 } 1313 1314 // Skip over the add expression until we get to a multiply. 1315 while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) 1316 ++Idx; 1317 1318 // Check to see if there are any folding opportunities present with 1319 // operands multiplied by constant values. 1320 if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) { 1321 uint64_t BitWidth = getTypeSizeInBits(Ty); 1322 DenseMap<const SCEV *, APInt> M; 1323 SmallVector<const SCEV *, 8> NewOps; 1324 APInt AccumulatedConstant(BitWidth, 0); 1325 if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, 1326 Ops.data(), Ops.size(), 1327 APInt(BitWidth, 1), *this)) { 1328 // Some interesting folding opportunity is present, so its worthwhile to 1329 // re-generate the operands list. Group the operands by constant scale, 1330 // to avoid multiplying by the same constant scale multiple times. 1331 std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; 1332 for (SmallVector<const SCEV *, 8>::iterator I = NewOps.begin(), 1333 E = NewOps.end(); I != E; ++I) 1334 MulOpLists[M.find(*I)->second].push_back(*I); 1335 // Re-generate the operands list. 1336 Ops.clear(); 1337 if (AccumulatedConstant != 0) 1338 Ops.push_back(getConstant(AccumulatedConstant)); 1339 for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator 1340 I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I) 1341 if (I->first != 0) 1342 Ops.push_back(getMulExpr(getConstant(I->first), 1343 getAddExpr(I->second))); 1344 if (Ops.empty()) 1345 return getConstant(Ty, 0); 1346 if (Ops.size() == 1) 1347 return Ops[0]; 1348 return getAddExpr(Ops); 1349 } 1350 } 1351 1352 // If we are adding something to a multiply expression, make sure the 1353 // something is not already an operand of the multiply. If so, merge it into 1354 // the multiply. 1355 for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) { 1356 const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]); 1357 for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { 1358 const SCEV *MulOpSCEV = Mul->getOperand(MulOp); 1359 for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) 1360 if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) { 1361 // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) 1362 const SCEV *InnerMul = Mul->getOperand(MulOp == 0); 1363 if (Mul->getNumOperands() != 2) { 1364 // If the multiply has more than two operands, we must get the 1365 // Y*Z term. 1366 SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), Mul->op_end()); 1367 MulOps.erase(MulOps.begin()+MulOp); 1368 InnerMul = getMulExpr(MulOps); 1369 } 1370 const SCEV *One = getConstant(Ty, 1); 1371 const SCEV *AddOne = getAddExpr(InnerMul, One); 1372 const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]); 1373 if (Ops.size() == 2) return OuterMul; 1374 if (AddOp < Idx) { 1375 Ops.erase(Ops.begin()+AddOp); 1376 Ops.erase(Ops.begin()+Idx-1); 1377 } else { 1378 Ops.erase(Ops.begin()+Idx); 1379 Ops.erase(Ops.begin()+AddOp-1); 1380 } 1381 Ops.push_back(OuterMul); 1382 return getAddExpr(Ops); 1383 } 1384 1385 // Check this multiply against other multiplies being added together. 1386 for (unsigned OtherMulIdx = Idx+1; 1387 OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]); 1388 ++OtherMulIdx) { 1389 const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]); 1390 // If MulOp occurs in OtherMul, we can fold the two multiplies 1391 // together. 1392 for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); 1393 OMulOp != e; ++OMulOp) 1394 if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { 1395 // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) 1396 const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); 1397 if (Mul->getNumOperands() != 2) { 1398 SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), 1399 Mul->op_end()); 1400 MulOps.erase(MulOps.begin()+MulOp); 1401 InnerMul1 = getMulExpr(MulOps); 1402 } 1403 const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); 1404 if (OtherMul->getNumOperands() != 2) { 1405 SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(), 1406 OtherMul->op_end()); 1407 MulOps.erase(MulOps.begin()+OMulOp); 1408 InnerMul2 = getMulExpr(MulOps); 1409 } 1410 const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2); 1411 const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); 1412 if (Ops.size() == 2) return OuterMul; 1413 Ops.erase(Ops.begin()+Idx); 1414 Ops.erase(Ops.begin()+OtherMulIdx-1); 1415 Ops.push_back(OuterMul); 1416 return getAddExpr(Ops); 1417 } 1418 } 1419 } 1420 } 1421 1422 // If there are any add recurrences in the operands list, see if any other 1423 // added values are loop invariant. If so, we can fold them into the 1424 // recurrence. 1425 while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) 1426 ++Idx; 1427 1428 // Scan over all recurrences, trying to fold loop invariants into them. 1429 for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { 1430 // Scan all of the other operands to this add and add them to the vector if 1431 // they are loop invariant w.r.t. the recurrence. 1432 SmallVector<const SCEV *, 8> LIOps; 1433 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); 1434 const Loop *AddRecLoop = AddRec->getLoop(); 1435 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1436 if (Ops[i]->isLoopInvariant(AddRecLoop)) { 1437 LIOps.push_back(Ops[i]); 1438 Ops.erase(Ops.begin()+i); 1439 --i; --e; 1440 } 1441 1442 // If we found some loop invariants, fold them into the recurrence. 1443 if (!LIOps.empty()) { 1444 // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} 1445 LIOps.push_back(AddRec->getStart()); 1446 1447 SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), 1448 AddRec->op_end()); 1449 AddRecOps[0] = getAddExpr(LIOps); 1450 1451 // It's tempting to propagate NUW/NSW flags here, but nuw/nsw addition 1452 // is not associative so this isn't necessarily safe. 1453 const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop); 1454 1455 // If all of the other operands were loop invariant, we are done. 1456 if (Ops.size() == 1) return NewRec; 1457 1458 // Otherwise, add the folded AddRec by the non-liv parts. 1459 for (unsigned i = 0;; ++i) 1460 if (Ops[i] == AddRec) { 1461 Ops[i] = NewRec; 1462 break; 1463 } 1464 return getAddExpr(Ops); 1465 } 1466 1467 // Okay, if there weren't any loop invariants to be folded, check to see if 1468 // there are multiple AddRec's with the same loop induction variable being 1469 // added together. If so, we can fold them. 1470 for (unsigned OtherIdx = Idx+1; 1471 OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx) 1472 if (OtherIdx != Idx) { 1473 const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]); 1474 if (AddRecLoop == OtherAddRec->getLoop()) { 1475 // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} 1476 SmallVector<const SCEV *, 4> NewOps(AddRec->op_begin(), 1477 AddRec->op_end()); 1478 for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { 1479 if (i >= NewOps.size()) { 1480 NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i, 1481 OtherAddRec->op_end()); 1482 break; 1483 } 1484 NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i)); 1485 } 1486 const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRecLoop); 1487 1488 if (Ops.size() == 2) return NewAddRec; 1489 1490 Ops.erase(Ops.begin()+Idx); 1491 Ops.erase(Ops.begin()+OtherIdx-1); 1492 Ops.push_back(NewAddRec); 1493 return getAddExpr(Ops); 1494 } 1495 } 1496 1497 // Otherwise couldn't fold anything into this recurrence. Move onto the 1498 // next one. 1499 } 1500 1501 // Okay, it looks like we really DO need an add expr. Check to see if we 1502 // already have one, otherwise create a new one. 1503 FoldingSetNodeID ID; 1504 ID.AddInteger(scAddExpr); 1505 ID.AddInteger(Ops.size()); 1506 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1507 ID.AddPointer(Ops[i]); 1508 void *IP = 0; 1509 SCEVAddExpr *S = 1510 static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); 1511 if (!S) { 1512 const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); 1513 std::uninitialized_copy(Ops.begin(), Ops.end(), O); 1514 S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), 1515 O, Ops.size()); 1516 UniqueSCEVs.InsertNode(S, IP); 1517 } 1518 if (HasNUW) S->setHasNoUnsignedWrap(true); 1519 if (HasNSW) S->setHasNoSignedWrap(true); 1520 return S; 1521} 1522 1523/// getMulExpr - Get a canonical multiply expression, or something simpler if 1524/// possible. 1525const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, 1526 bool HasNUW, bool HasNSW) { 1527 assert(!Ops.empty() && "Cannot get empty mul!"); 1528 if (Ops.size() == 1) return Ops[0]; 1529#ifndef NDEBUG 1530 for (unsigned i = 1, e = Ops.size(); i != e; ++i) 1531 assert(getEffectiveSCEVType(Ops[i]->getType()) == 1532 getEffectiveSCEVType(Ops[0]->getType()) && 1533 "SCEVMulExpr operand types don't match!"); 1534#endif 1535 1536 // If HasNSW is true and all the operands are non-negative, infer HasNUW. 1537 if (!HasNUW && HasNSW) { 1538 bool All = true; 1539 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1540 if (!isKnownNonNegative(Ops[i])) { 1541 All = false; 1542 break; 1543 } 1544 if (All) HasNUW = true; 1545 } 1546 1547 // Sort by complexity, this groups all similar expression types together. 1548 GroupByComplexity(Ops, LI); 1549 1550 // If there are any constants, fold them together. 1551 unsigned Idx = 0; 1552 if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { 1553 1554 // C1*(C2+V) -> C1*C2 + C1*V 1555 if (Ops.size() == 2) 1556 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) 1557 if (Add->getNumOperands() == 2 && 1558 isa<SCEVConstant>(Add->getOperand(0))) 1559 return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), 1560 getMulExpr(LHSC, Add->getOperand(1))); 1561 1562 ++Idx; 1563 while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { 1564 // We found two constants, fold them together! 1565 ConstantInt *Fold = ConstantInt::get(getContext(), 1566 LHSC->getValue()->getValue() * 1567 RHSC->getValue()->getValue()); 1568 Ops[0] = getConstant(Fold); 1569 Ops.erase(Ops.begin()+1); // Erase the folded element 1570 if (Ops.size() == 1) return Ops[0]; 1571 LHSC = cast<SCEVConstant>(Ops[0]); 1572 } 1573 1574 // If we are left with a constant one being multiplied, strip it off. 1575 if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) { 1576 Ops.erase(Ops.begin()); 1577 --Idx; 1578 } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) { 1579 // If we have a multiply of zero, it will always be zero. 1580 return Ops[0]; 1581 } else if (Ops[0]->isAllOnesValue()) { 1582 // If we have a mul by -1 of an add, try distributing the -1 among the 1583 // add operands. 1584 if (Ops.size() == 2) 1585 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { 1586 SmallVector<const SCEV *, 4> NewOps; 1587 bool AnyFolded = false; 1588 for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); 1589 I != E; ++I) { 1590 const SCEV *Mul = getMulExpr(Ops[0], *I); 1591 if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true; 1592 NewOps.push_back(Mul); 1593 } 1594 if (AnyFolded) 1595 return getAddExpr(NewOps); 1596 } 1597 } 1598 1599 if (Ops.size() == 1) 1600 return Ops[0]; 1601 } 1602 1603 // Skip over the add expression until we get to a multiply. 1604 while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) 1605 ++Idx; 1606 1607 // If there are mul operands inline them all into this expression. 1608 if (Idx < Ops.size()) { 1609 bool DeletedMul = false; 1610 while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { 1611 // If we have an mul, expand the mul operands onto the end of the operands 1612 // list. 1613 Ops.insert(Ops.end(), Mul->op_begin(), Mul->op_end()); 1614 Ops.erase(Ops.begin()+Idx); 1615 DeletedMul = true; 1616 } 1617 1618 // If we deleted at least one mul, we added operands to the end of the list, 1619 // and they are not necessarily sorted. Recurse to resort and resimplify 1620 // any operands we just acquired. 1621 if (DeletedMul) 1622 return getMulExpr(Ops); 1623 } 1624 1625 // If there are any add recurrences in the operands list, see if any other 1626 // added values are loop invariant. If so, we can fold them into the 1627 // recurrence. 1628 while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) 1629 ++Idx; 1630 1631 // Scan over all recurrences, trying to fold loop invariants into them. 1632 for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { 1633 // Scan all of the other operands to this mul and add them to the vector if 1634 // they are loop invariant w.r.t. the recurrence. 1635 SmallVector<const SCEV *, 8> LIOps; 1636 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); 1637 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1638 if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { 1639 LIOps.push_back(Ops[i]); 1640 Ops.erase(Ops.begin()+i); 1641 --i; --e; 1642 } 1643 1644 // If we found some loop invariants, fold them into the recurrence. 1645 if (!LIOps.empty()) { 1646 // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} 1647 SmallVector<const SCEV *, 4> NewOps; 1648 NewOps.reserve(AddRec->getNumOperands()); 1649 if (LIOps.size() == 1) { 1650 const SCEV *Scale = LIOps[0]; 1651 for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) 1652 NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); 1653 } else { 1654 for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { 1655 SmallVector<const SCEV *, 4> MulOps(LIOps.begin(), LIOps.end()); 1656 MulOps.push_back(AddRec->getOperand(i)); 1657 NewOps.push_back(getMulExpr(MulOps)); 1658 } 1659 } 1660 1661 // It's tempting to propagate the NSW flag here, but nsw multiplication 1662 // is not associative so this isn't necessarily safe. 1663 const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(), 1664 HasNUW && AddRec->hasNoUnsignedWrap(), 1665 /*HasNSW=*/false); 1666 1667 // If all of the other operands were loop invariant, we are done. 1668 if (Ops.size() == 1) return NewRec; 1669 1670 // Otherwise, multiply the folded AddRec by the non-liv parts. 1671 for (unsigned i = 0;; ++i) 1672 if (Ops[i] == AddRec) { 1673 Ops[i] = NewRec; 1674 break; 1675 } 1676 return getMulExpr(Ops); 1677 } 1678 1679 // Okay, if there weren't any loop invariants to be folded, check to see if 1680 // there are multiple AddRec's with the same loop induction variable being 1681 // multiplied together. If so, we can fold them. 1682 for (unsigned OtherIdx = Idx+1; 1683 OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx) 1684 if (OtherIdx != Idx) { 1685 const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]); 1686 if (AddRec->getLoop() == OtherAddRec->getLoop()) { 1687 // F * G --> {A,+,B} * {C,+,D} --> {A*C,+,F*D + G*B + B*D} 1688 const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; 1689 const SCEV *NewStart = getMulExpr(F->getStart(), 1690 G->getStart()); 1691 const SCEV *B = F->getStepRecurrence(*this); 1692 const SCEV *D = G->getStepRecurrence(*this); 1693 const SCEV *NewStep = getAddExpr(getMulExpr(F, D), 1694 getMulExpr(G, B), 1695 getMulExpr(B, D)); 1696 const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, 1697 F->getLoop()); 1698 if (Ops.size() == 2) return NewAddRec; 1699 1700 Ops.erase(Ops.begin()+Idx); 1701 Ops.erase(Ops.begin()+OtherIdx-1); 1702 Ops.push_back(NewAddRec); 1703 return getMulExpr(Ops); 1704 } 1705 } 1706 1707 // Otherwise couldn't fold anything into this recurrence. Move onto the 1708 // next one. 1709 } 1710 1711 // Okay, it looks like we really DO need an mul expr. Check to see if we 1712 // already have one, otherwise create a new one. 1713 FoldingSetNodeID ID; 1714 ID.AddInteger(scMulExpr); 1715 ID.AddInteger(Ops.size()); 1716 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1717 ID.AddPointer(Ops[i]); 1718 void *IP = 0; 1719 SCEVMulExpr *S = 1720 static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); 1721 if (!S) { 1722 const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); 1723 std::uninitialized_copy(Ops.begin(), Ops.end(), O); 1724 S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), 1725 O, Ops.size()); 1726 UniqueSCEVs.InsertNode(S, IP); 1727 } 1728 if (HasNUW) S->setHasNoUnsignedWrap(true); 1729 if (HasNSW) S->setHasNoSignedWrap(true); 1730 return S; 1731} 1732 1733/// getUDivExpr - Get a canonical unsigned division expression, or something 1734/// simpler if possible. 1735const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, 1736 const SCEV *RHS) { 1737 assert(getEffectiveSCEVType(LHS->getType()) == 1738 getEffectiveSCEVType(RHS->getType()) && 1739 "SCEVUDivExpr operand types don't match!"); 1740 1741 if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { 1742 if (RHSC->getValue()->equalsInt(1)) 1743 return LHS; // X udiv 1 --> x 1744 // If the denominator is zero, the result of the udiv is undefined. Don't 1745 // try to analyze it, because the resolution chosen here may differ from 1746 // the resolution chosen in other parts of the compiler. 1747 if (!RHSC->getValue()->isZero()) { 1748 // Determine if the division can be folded into the operands of 1749 // its operands. 1750 // TODO: Generalize this to non-constants by using known-bits information. 1751 const Type *Ty = LHS->getType(); 1752 unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); 1753 unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ; 1754 // For non-power-of-two values, effectively round the value up to the 1755 // nearest power of two. 1756 if (!RHSC->getValue()->getValue().isPowerOf2()) 1757 ++MaxShiftAmt; 1758 const IntegerType *ExtTy = 1759 IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); 1760 // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. 1761 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) 1762 if (const SCEVConstant *Step = 1763 dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) 1764 if (!Step->getValue()->getValue() 1765 .urem(RHSC->getValue()->getValue()) && 1766 getZeroExtendExpr(AR, ExtTy) == 1767 getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), 1768 getZeroExtendExpr(Step, ExtTy), 1769 AR->getLoop())) { 1770 SmallVector<const SCEV *, 4> Operands; 1771 for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) 1772 Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); 1773 return getAddRecExpr(Operands, AR->getLoop()); 1774 } 1775 // (A*B)/C --> A*(B/C) if safe and B/C can be folded. 1776 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { 1777 SmallVector<const SCEV *, 4> Operands; 1778 for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) 1779 Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); 1780 if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) 1781 // Find an operand that's safely divisible. 1782 for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { 1783 const SCEV *Op = M->getOperand(i); 1784 const SCEV *Div = getUDivExpr(Op, RHSC); 1785 if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { 1786 Operands = SmallVector<const SCEV *, 4>(M->op_begin(), 1787 M->op_end()); 1788 Operands[i] = Div; 1789 return getMulExpr(Operands); 1790 } 1791 } 1792 } 1793 // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. 1794 if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) { 1795 SmallVector<const SCEV *, 4> Operands; 1796 for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) 1797 Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); 1798 if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { 1799 Operands.clear(); 1800 for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { 1801 const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); 1802 if (isa<SCEVUDivExpr>(Op) || 1803 getMulExpr(Op, RHS) != A->getOperand(i)) 1804 break; 1805 Operands.push_back(Op); 1806 } 1807 if (Operands.size() == A->getNumOperands()) 1808 return getAddExpr(Operands); 1809 } 1810 } 1811 1812 // Fold if both operands are constant. 1813 if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { 1814 Constant *LHSCV = LHSC->getValue(); 1815 Constant *RHSCV = RHSC->getValue(); 1816 return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV, 1817 RHSCV))); 1818 } 1819 } 1820 } 1821 1822 FoldingSetNodeID ID; 1823 ID.AddInteger(scUDivExpr); 1824 ID.AddPointer(LHS); 1825 ID.AddPointer(RHS); 1826 void *IP = 0; 1827 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 1828 SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), 1829 LHS, RHS); 1830 UniqueSCEVs.InsertNode(S, IP); 1831 return S; 1832} 1833 1834 1835/// getAddRecExpr - Get an add recurrence expression for the specified loop. 1836/// Simplify the expression as much as possible. 1837const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, 1838 const SCEV *Step, const Loop *L, 1839 bool HasNUW, bool HasNSW) { 1840 SmallVector<const SCEV *, 4> Operands; 1841 Operands.push_back(Start); 1842 if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) 1843 if (StepChrec->getLoop() == L) { 1844 Operands.insert(Operands.end(), StepChrec->op_begin(), 1845 StepChrec->op_end()); 1846 return getAddRecExpr(Operands, L); 1847 } 1848 1849 Operands.push_back(Step); 1850 return getAddRecExpr(Operands, L, HasNUW, HasNSW); 1851} 1852 1853/// getAddRecExpr - Get an add recurrence expression for the specified loop. 1854/// Simplify the expression as much as possible. 1855const SCEV * 1856ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, 1857 const Loop *L, 1858 bool HasNUW, bool HasNSW) { 1859 if (Operands.size() == 1) return Operands[0]; 1860#ifndef NDEBUG 1861 for (unsigned i = 1, e = Operands.size(); i != e; ++i) 1862 assert(getEffectiveSCEVType(Operands[i]->getType()) == 1863 getEffectiveSCEVType(Operands[0]->getType()) && 1864 "SCEVAddRecExpr operand types don't match!"); 1865#endif 1866 1867 if (Operands.back()->isZero()) { 1868 Operands.pop_back(); 1869 return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X 1870 } 1871 1872 // It's tempting to want to call getMaxBackedgeTakenCount count here and 1873 // use that information to infer NUW and NSW flags. However, computing a 1874 // BE count requires calling getAddRecExpr, so we may not yet have a 1875 // meaningful BE count at this point (and if we don't, we'd be stuck 1876 // with a SCEVCouldNotCompute as the cached BE count). 1877 1878 // If HasNSW is true and all the operands are non-negative, infer HasNUW. 1879 if (!HasNUW && HasNSW) { 1880 bool All = true; 1881 for (unsigned i = 0, e = Operands.size(); i != e; ++i) 1882 if (!isKnownNonNegative(Operands[i])) { 1883 All = false; 1884 break; 1885 } 1886 if (All) HasNUW = true; 1887 } 1888 1889 // Canonicalize nested AddRecs in by nesting them in order of loop depth. 1890 if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { 1891 const Loop *NestedLoop = NestedAR->getLoop(); 1892 if (L->contains(NestedLoop->getHeader()) ? 1893 (L->getLoopDepth() < NestedLoop->getLoopDepth()) : 1894 (!NestedLoop->contains(L->getHeader()) && 1895 DT->dominates(L->getHeader(), NestedLoop->getHeader()))) { 1896 SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), 1897 NestedAR->op_end()); 1898 Operands[0] = NestedAR->getStart(); 1899 // AddRecs require their operands be loop-invariant with respect to their 1900 // loops. Don't perform this transformation if it would break this 1901 // requirement. 1902 bool AllInvariant = true; 1903 for (unsigned i = 0, e = Operands.size(); i != e; ++i) 1904 if (!Operands[i]->isLoopInvariant(L)) { 1905 AllInvariant = false; 1906 break; 1907 } 1908 if (AllInvariant) { 1909 NestedOperands[0] = getAddRecExpr(Operands, L); 1910 AllInvariant = true; 1911 for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i) 1912 if (!NestedOperands[i]->isLoopInvariant(NestedLoop)) { 1913 AllInvariant = false; 1914 break; 1915 } 1916 if (AllInvariant) 1917 // Ok, both add recurrences are valid after the transformation. 1918 return getAddRecExpr(NestedOperands, NestedLoop, HasNUW, HasNSW); 1919 } 1920 // Reset Operands to its original state. 1921 Operands[0] = NestedAR; 1922 } 1923 } 1924 1925 // Okay, it looks like we really DO need an addrec expr. Check to see if we 1926 // already have one, otherwise create a new one. 1927 FoldingSetNodeID ID; 1928 ID.AddInteger(scAddRecExpr); 1929 ID.AddInteger(Operands.size()); 1930 for (unsigned i = 0, e = Operands.size(); i != e; ++i) 1931 ID.AddPointer(Operands[i]); 1932 ID.AddPointer(L); 1933 void *IP = 0; 1934 SCEVAddRecExpr *S = 1935 static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); 1936 if (!S) { 1937 const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size()); 1938 std::uninitialized_copy(Operands.begin(), Operands.end(), O); 1939 S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), 1940 O, Operands.size(), L); 1941 UniqueSCEVs.InsertNode(S, IP); 1942 } 1943 if (HasNUW) S->setHasNoUnsignedWrap(true); 1944 if (HasNSW) S->setHasNoSignedWrap(true); 1945 return S; 1946} 1947 1948const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, 1949 const SCEV *RHS) { 1950 SmallVector<const SCEV *, 2> Ops; 1951 Ops.push_back(LHS); 1952 Ops.push_back(RHS); 1953 return getSMaxExpr(Ops); 1954} 1955 1956const SCEV * 1957ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { 1958 assert(!Ops.empty() && "Cannot get empty smax!"); 1959 if (Ops.size() == 1) return Ops[0]; 1960#ifndef NDEBUG 1961 for (unsigned i = 1, e = Ops.size(); i != e; ++i) 1962 assert(getEffectiveSCEVType(Ops[i]->getType()) == 1963 getEffectiveSCEVType(Ops[0]->getType()) && 1964 "SCEVSMaxExpr operand types don't match!"); 1965#endif 1966 1967 // Sort by complexity, this groups all similar expression types together. 1968 GroupByComplexity(Ops, LI); 1969 1970 // If there are any constants, fold them together. 1971 unsigned Idx = 0; 1972 if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { 1973 ++Idx; 1974 assert(Idx < Ops.size()); 1975 while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { 1976 // We found two constants, fold them together! 1977 ConstantInt *Fold = ConstantInt::get(getContext(), 1978 APIntOps::smax(LHSC->getValue()->getValue(), 1979 RHSC->getValue()->getValue())); 1980 Ops[0] = getConstant(Fold); 1981 Ops.erase(Ops.begin()+1); // Erase the folded element 1982 if (Ops.size() == 1) return Ops[0]; 1983 LHSC = cast<SCEVConstant>(Ops[0]); 1984 } 1985 1986 // If we are left with a constant minimum-int, strip it off. 1987 if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) { 1988 Ops.erase(Ops.begin()); 1989 --Idx; 1990 } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) { 1991 // If we have an smax with a constant maximum-int, it will always be 1992 // maximum-int. 1993 return Ops[0]; 1994 } 1995 1996 if (Ops.size() == 1) return Ops[0]; 1997 } 1998 1999 // Find the first SMax 2000 while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) 2001 ++Idx; 2002 2003 // Check to see if one of the operands is an SMax. If so, expand its operands 2004 // onto our operand list, and recurse to simplify. 2005 if (Idx < Ops.size()) { 2006 bool DeletedSMax = false; 2007 while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) { 2008 Ops.insert(Ops.end(), SMax->op_begin(), SMax->op_end()); 2009 Ops.erase(Ops.begin()+Idx); 2010 DeletedSMax = true; 2011 } 2012 2013 if (DeletedSMax) 2014 return getSMaxExpr(Ops); 2015 } 2016 2017 // Okay, check to see if the same value occurs in the operand list twice. If 2018 // so, delete one. Since we sorted the list, these values are required to 2019 // be adjacent. 2020 for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) 2021 // X smax Y smax Y --> X smax Y 2022 // X smax Y --> X, if X is always greater than Y 2023 if (Ops[i] == Ops[i+1] || 2024 isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { 2025 Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); 2026 --i; --e; 2027 } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { 2028 Ops.erase(Ops.begin()+i, Ops.begin()+i+1); 2029 --i; --e; 2030 } 2031 2032 if (Ops.size() == 1) return Ops[0]; 2033 2034 assert(!Ops.empty() && "Reduced smax down to nothing!"); 2035 2036 // Okay, it looks like we really DO need an smax expr. Check to see if we 2037 // already have one, otherwise create a new one. 2038 FoldingSetNodeID ID; 2039 ID.AddInteger(scSMaxExpr); 2040 ID.AddInteger(Ops.size()); 2041 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 2042 ID.AddPointer(Ops[i]); 2043 void *IP = 0; 2044 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 2045 const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); 2046 std::uninitialized_copy(Ops.begin(), Ops.end(), O); 2047 SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), 2048 O, Ops.size()); 2049 UniqueSCEVs.InsertNode(S, IP); 2050 return S; 2051} 2052 2053const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, 2054 const SCEV *RHS) { 2055 SmallVector<const SCEV *, 2> Ops; 2056 Ops.push_back(LHS); 2057 Ops.push_back(RHS); 2058 return getUMaxExpr(Ops); 2059} 2060 2061const SCEV * 2062ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { 2063 assert(!Ops.empty() && "Cannot get empty umax!"); 2064 if (Ops.size() == 1) return Ops[0]; 2065#ifndef NDEBUG 2066 for (unsigned i = 1, e = Ops.size(); i != e; ++i) 2067 assert(getEffectiveSCEVType(Ops[i]->getType()) == 2068 getEffectiveSCEVType(Ops[0]->getType()) && 2069 "SCEVUMaxExpr operand types don't match!"); 2070#endif 2071 2072 // Sort by complexity, this groups all similar expression types together. 2073 GroupByComplexity(Ops, LI); 2074 2075 // If there are any constants, fold them together. 2076 unsigned Idx = 0; 2077 if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { 2078 ++Idx; 2079 assert(Idx < Ops.size()); 2080 while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { 2081 // We found two constants, fold them together! 2082 ConstantInt *Fold = ConstantInt::get(getContext(), 2083 APIntOps::umax(LHSC->getValue()->getValue(), 2084 RHSC->getValue()->getValue())); 2085 Ops[0] = getConstant(Fold); 2086 Ops.erase(Ops.begin()+1); // Erase the folded element 2087 if (Ops.size() == 1) return Ops[0]; 2088 LHSC = cast<SCEVConstant>(Ops[0]); 2089 } 2090 2091 // If we are left with a constant minimum-int, strip it off. 2092 if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) { 2093 Ops.erase(Ops.begin()); 2094 --Idx; 2095 } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) { 2096 // If we have an umax with a constant maximum-int, it will always be 2097 // maximum-int. 2098 return Ops[0]; 2099 } 2100 2101 if (Ops.size() == 1) return Ops[0]; 2102 } 2103 2104 // Find the first UMax 2105 while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) 2106 ++Idx; 2107 2108 // Check to see if one of the operands is a UMax. If so, expand its operands 2109 // onto our operand list, and recurse to simplify. 2110 if (Idx < Ops.size()) { 2111 bool DeletedUMax = false; 2112 while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) { 2113 Ops.insert(Ops.end(), UMax->op_begin(), UMax->op_end()); 2114 Ops.erase(Ops.begin()+Idx); 2115 DeletedUMax = true; 2116 } 2117 2118 if (DeletedUMax) 2119 return getUMaxExpr(Ops); 2120 } 2121 2122 // Okay, check to see if the same value occurs in the operand list twice. If 2123 // so, delete one. Since we sorted the list, these values are required to 2124 // be adjacent. 2125 for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) 2126 // X umax Y umax Y --> X umax Y 2127 // X umax Y --> X, if X is always greater than Y 2128 if (Ops[i] == Ops[i+1] || 2129 isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { 2130 Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); 2131 --i; --e; 2132 } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { 2133 Ops.erase(Ops.begin()+i, Ops.begin()+i+1); 2134 --i; --e; 2135 } 2136 2137 if (Ops.size() == 1) return Ops[0]; 2138 2139 assert(!Ops.empty() && "Reduced umax down to nothing!"); 2140 2141 // Okay, it looks like we really DO need a umax expr. Check to see if we 2142 // already have one, otherwise create a new one. 2143 FoldingSetNodeID ID; 2144 ID.AddInteger(scUMaxExpr); 2145 ID.AddInteger(Ops.size()); 2146 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 2147 ID.AddPointer(Ops[i]); 2148 void *IP = 0; 2149 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 2150 const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); 2151 std::uninitialized_copy(Ops.begin(), Ops.end(), O); 2152 SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), 2153 O, Ops.size()); 2154 UniqueSCEVs.InsertNode(S, IP); 2155 return S; 2156} 2157 2158const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, 2159 const SCEV *RHS) { 2160 // ~smax(~x, ~y) == smin(x, y). 2161 return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); 2162} 2163 2164const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, 2165 const SCEV *RHS) { 2166 // ~umax(~x, ~y) == umin(x, y) 2167 return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); 2168} 2169 2170const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { 2171 // If we have TargetData, we can bypass creating a target-independent 2172 // constant expression and then folding it back into a ConstantInt. 2173 // This is just a compile-time optimization. 2174 if (TD) 2175 return getConstant(TD->getIntPtrType(getContext()), 2176 TD->getTypeAllocSize(AllocTy)); 2177 2178 Constant *C = ConstantExpr::getSizeOf(AllocTy); 2179 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) 2180 if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) 2181 C = Folded; 2182 const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); 2183 return getTruncateOrZeroExtend(getSCEV(C), Ty); 2184} 2185 2186const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) { 2187 Constant *C = ConstantExpr::getAlignOf(AllocTy); 2188 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) 2189 if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) 2190 C = Folded; 2191 const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); 2192 return getTruncateOrZeroExtend(getSCEV(C), Ty); 2193} 2194 2195const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, 2196 unsigned FieldNo) { 2197 // If we have TargetData, we can bypass creating a target-independent 2198 // constant expression and then folding it back into a ConstantInt. 2199 // This is just a compile-time optimization. 2200 if (TD) 2201 return getConstant(TD->getIntPtrType(getContext()), 2202 TD->getStructLayout(STy)->getElementOffset(FieldNo)); 2203 2204 Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); 2205 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) 2206 if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) 2207 C = Folded; 2208 const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); 2209 return getTruncateOrZeroExtend(getSCEV(C), Ty); 2210} 2211 2212const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy, 2213 Constant *FieldNo) { 2214 Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); 2215 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) 2216 if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) 2217 C = Folded; 2218 const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); 2219 return getTruncateOrZeroExtend(getSCEV(C), Ty); 2220} 2221 2222const SCEV *ScalarEvolution::getUnknown(Value *V) { 2223 // Don't attempt to do anything other than create a SCEVUnknown object 2224 // here. createSCEV only calls getUnknown after checking for all other 2225 // interesting possibilities, and any other code that calls getUnknown 2226 // is doing so in order to hide a value from SCEV canonicalization. 2227 2228 FoldingSetNodeID ID; 2229 ID.AddInteger(scUnknown); 2230 ID.AddPointer(V); 2231 void *IP = 0; 2232 if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 2233 SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V); 2234 UniqueSCEVs.InsertNode(S, IP); 2235 return S; 2236} 2237 2238//===----------------------------------------------------------------------===// 2239// Basic SCEV Analysis and PHI Idiom Recognition Code 2240// 2241 2242/// isSCEVable - Test if values of the given type are analyzable within 2243/// the SCEV framework. This primarily includes integer types, and it 2244/// can optionally include pointer types if the ScalarEvolution class 2245/// has access to target-specific information. 2246bool ScalarEvolution::isSCEVable(const Type *Ty) const { 2247 // Integers and pointers are always SCEVable. 2248 return Ty->isIntegerTy() || Ty->isPointerTy(); 2249} 2250 2251/// getTypeSizeInBits - Return the size in bits of the specified type, 2252/// for which isSCEVable must return true. 2253uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { 2254 assert(isSCEVable(Ty) && "Type is not SCEVable!"); 2255 2256 // If we have a TargetData, use it! 2257 if (TD) 2258 return TD->getTypeSizeInBits(Ty); 2259 2260 // Integer types have fixed sizes. 2261 if (Ty->isIntegerTy()) 2262 return Ty->getPrimitiveSizeInBits(); 2263 2264 // The only other support type is pointer. Without TargetData, conservatively 2265 // assume pointers are 64-bit. 2266 assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!"); 2267 return 64; 2268} 2269 2270/// getEffectiveSCEVType - Return a type with the same bitwidth as 2271/// the given type and which represents how SCEV will treat the given 2272/// type, for which isSCEVable must return true. For pointer types, 2273/// this is the pointer-sized integer type. 2274const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { 2275 assert(isSCEVable(Ty) && "Type is not SCEVable!"); 2276 2277 if (Ty->isIntegerTy()) 2278 return Ty; 2279 2280 // The only other support type is pointer. 2281 assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); 2282 if (TD) return TD->getIntPtrType(getContext()); 2283 2284 // Without TargetData, conservatively assume pointers are 64-bit. 2285 return Type::getInt64Ty(getContext()); 2286} 2287 2288const SCEV *ScalarEvolution::getCouldNotCompute() { 2289 return &CouldNotCompute; 2290} 2291 2292/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the 2293/// expression and create a new one. 2294const SCEV *ScalarEvolution::getSCEV(Value *V) { 2295 assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); 2296 2297 std::map<SCEVCallbackVH, const SCEV *>::iterator I = Scalars.find(V); 2298 if (I != Scalars.end()) return I->second; 2299 const SCEV *S = createSCEV(V); 2300 Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S)); 2301 return S; 2302} 2303 2304/// getIntegerSCEV - Given a SCEVable type, create a constant for the 2305/// specified signed integer value and return a SCEV for the constant. 2306const SCEV *ScalarEvolution::getIntegerSCEV(int64_t Val, const Type *Ty) { 2307 const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); 2308 return getConstant(ConstantInt::get(ITy, Val)); 2309} 2310 2311/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V 2312/// 2313const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { 2314 if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) 2315 return getConstant( 2316 cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); 2317 2318 const Type *Ty = V->getType(); 2319 Ty = getEffectiveSCEVType(Ty); 2320 return getMulExpr(V, 2321 getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)))); 2322} 2323 2324/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V 2325const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { 2326 if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) 2327 return getConstant( 2328 cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); 2329 2330 const Type *Ty = V->getType(); 2331 Ty = getEffectiveSCEVType(Ty); 2332 const SCEV *AllOnes = 2333 getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))); 2334 return getMinusSCEV(AllOnes, V); 2335} 2336 2337/// getMinusSCEV - Return a SCEV corresponding to LHS - RHS. 2338/// 2339const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, 2340 const SCEV *RHS) { 2341 // X - Y --> X + -Y 2342 return getAddExpr(LHS, getNegativeSCEV(RHS)); 2343} 2344 2345/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the 2346/// input value to the specified type. If the type must be extended, it is zero 2347/// extended. 2348const SCEV * 2349ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, 2350 const Type *Ty) { 2351 const Type *SrcTy = V->getType(); 2352 assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && 2353 (Ty->isIntegerTy() || Ty->isPointerTy()) && 2354 "Cannot truncate or zero extend with non-integer arguments!"); 2355 if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 2356 return V; // No conversion 2357 if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) 2358 return getTruncateExpr(V, Ty); 2359 return getZeroExtendExpr(V, Ty); 2360} 2361 2362/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the 2363/// input value to the specified type. If the type must be extended, it is sign 2364/// extended. 2365const SCEV * 2366ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, 2367 const Type *Ty) { 2368 const Type *SrcTy = V->getType(); 2369 assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && 2370 (Ty->isIntegerTy() || Ty->isPointerTy()) && 2371 "Cannot truncate or zero extend with non-integer arguments!"); 2372 if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 2373 return V; // No conversion 2374 if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) 2375 return getTruncateExpr(V, Ty); 2376 return getSignExtendExpr(V, Ty); 2377} 2378 2379/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the 2380/// input value to the specified type. If the type must be extended, it is zero 2381/// extended. The conversion must not be narrowing. 2382const SCEV * 2383ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { 2384 const Type *SrcTy = V->getType(); 2385 assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && 2386 (Ty->isIntegerTy() || Ty->isPointerTy()) && 2387 "Cannot noop or zero extend with non-integer arguments!"); 2388 assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && 2389 "getNoopOrZeroExtend cannot truncate!"); 2390 if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 2391 return V; // No conversion 2392 return getZeroExtendExpr(V, Ty); 2393} 2394 2395/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the 2396/// input value to the specified type. If the type must be extended, it is sign 2397/// extended. The conversion must not be narrowing. 2398const SCEV * 2399ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { 2400 const Type *SrcTy = V->getType(); 2401 assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && 2402 (Ty->isIntegerTy() || Ty->isPointerTy()) && 2403 "Cannot noop or sign extend with non-integer arguments!"); 2404 assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && 2405 "getNoopOrSignExtend cannot truncate!"); 2406 if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 2407 return V; // No conversion 2408 return getSignExtendExpr(V, Ty); 2409} 2410 2411/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of 2412/// the input value to the specified type. If the type must be extended, 2413/// it is extended with unspecified bits. The conversion must not be 2414/// narrowing. 2415const SCEV * 2416ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { 2417 const Type *SrcTy = V->getType(); 2418 assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && 2419 (Ty->isIntegerTy() || Ty->isPointerTy()) && 2420 "Cannot noop or any extend with non-integer arguments!"); 2421 assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && 2422 "getNoopOrAnyExtend cannot truncate!"); 2423 if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 2424 return V; // No conversion 2425 return getAnyExtendExpr(V, Ty); 2426} 2427 2428/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the 2429/// input value to the specified type. The conversion must not be widening. 2430const SCEV * 2431ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) { 2432 const Type *SrcTy = V->getType(); 2433 assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && 2434 (Ty->isIntegerTy() || Ty->isPointerTy()) && 2435 "Cannot truncate or noop with non-integer arguments!"); 2436 assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && 2437 "getTruncateOrNoop cannot extend!"); 2438 if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 2439 return V; // No conversion 2440 return getTruncateExpr(V, Ty); 2441} 2442 2443/// getUMaxFromMismatchedTypes - Promote the operands to the wider of 2444/// the types using zero-extension, and then perform a umax operation 2445/// with them. 2446const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, 2447 const SCEV *RHS) { 2448 const SCEV *PromotedLHS = LHS; 2449 const SCEV *PromotedRHS = RHS; 2450 2451 if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) 2452 PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); 2453 else 2454 PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); 2455 2456 return getUMaxExpr(PromotedLHS, PromotedRHS); 2457} 2458 2459/// getUMinFromMismatchedTypes - Promote the operands to the wider of 2460/// the types using zero-extension, and then perform a umin operation 2461/// with them. 2462const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, 2463 const SCEV *RHS) { 2464 const SCEV *PromotedLHS = LHS; 2465 const SCEV *PromotedRHS = RHS; 2466 2467 if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) 2468 PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); 2469 else 2470 PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); 2471 2472 return getUMinExpr(PromotedLHS, PromotedRHS); 2473} 2474 2475/// PushDefUseChildren - Push users of the given Instruction 2476/// onto the given Worklist. 2477static void 2478PushDefUseChildren(Instruction *I, 2479 SmallVectorImpl<Instruction *> &Worklist) { 2480 // Push the def-use children onto the Worklist stack. 2481 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); 2482 UI != UE; ++UI) 2483 Worklist.push_back(cast<Instruction>(UI)); 2484} 2485 2486/// ForgetSymbolicValue - This looks up computed SCEV values for all 2487/// instructions that depend on the given instruction and removes them from 2488/// the Scalars map if they reference SymName. This is used during PHI 2489/// resolution. 2490void 2491ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { 2492 SmallVector<Instruction *, 16> Worklist; 2493 PushDefUseChildren(PN, Worklist); 2494 2495 SmallPtrSet<Instruction *, 8> Visited; 2496 Visited.insert(PN); 2497 while (!Worklist.empty()) { 2498 Instruction *I = Worklist.pop_back_val(); 2499 if (!Visited.insert(I)) continue; 2500 2501 std::map<SCEVCallbackVH, const SCEV *>::iterator It = 2502 Scalars.find(static_cast<Value *>(I)); 2503 if (It != Scalars.end()) { 2504 // Short-circuit the def-use traversal if the symbolic name 2505 // ceases to appear in expressions. 2506 if (It->second != SymName && !It->second->hasOperand(SymName)) 2507 continue; 2508 2509 // SCEVUnknown for a PHI either means that it has an unrecognized 2510 // structure, it's a PHI that's in the progress of being computed 2511 // by createNodeForPHI, or it's a single-value PHI. In the first case, 2512 // additional loop trip count information isn't going to change anything. 2513 // In the second case, createNodeForPHI will perform the necessary 2514 // updates on its own when it gets to that point. In the third, we do 2515 // want to forget the SCEVUnknown. 2516 if (!isa<PHINode>(I) || 2517 !isa<SCEVUnknown>(It->second) || 2518 (I != PN && It->second == SymName)) { 2519 ValuesAtScopes.erase(It->second); 2520 Scalars.erase(It); 2521 } 2522 } 2523 2524 PushDefUseChildren(I, Worklist); 2525 } 2526} 2527 2528/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in 2529/// a loop header, making it a potential recurrence, or it doesn't. 2530/// 2531const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { 2532 if (const Loop *L = LI->getLoopFor(PN->getParent())) 2533 if (L->getHeader() == PN->getParent()) { 2534 // The loop may have multiple entrances or multiple exits; we can analyze 2535 // this phi as an addrec if it has a unique entry value and a unique 2536 // backedge value. 2537 Value *BEValueV = 0, *StartValueV = 0; 2538 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 2539 Value *V = PN->getIncomingValue(i); 2540 if (L->contains(PN->getIncomingBlock(i))) { 2541 if (!BEValueV) { 2542 BEValueV = V; 2543 } else if (BEValueV != V) { 2544 BEValueV = 0; 2545 break; 2546 } 2547 } else if (!StartValueV) { 2548 StartValueV = V; 2549 } else if (StartValueV != V) { 2550 StartValueV = 0; 2551 break; 2552 } 2553 } 2554 if (BEValueV && StartValueV) { 2555 // While we are analyzing this PHI node, handle its value symbolically. 2556 const SCEV *SymbolicName = getUnknown(PN); 2557 assert(Scalars.find(PN) == Scalars.end() && 2558 "PHI node already processed?"); 2559 Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); 2560 2561 // Using this symbolic name for the PHI, analyze the value coming around 2562 // the back-edge. 2563 const SCEV *BEValue = getSCEV(BEValueV); 2564 2565 // NOTE: If BEValue is loop invariant, we know that the PHI node just 2566 // has a special value for the first iteration of the loop. 2567 2568 // If the value coming around the backedge is an add with the symbolic 2569 // value we just inserted, then we found a simple induction variable! 2570 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { 2571 // If there is a single occurrence of the symbolic value, replace it 2572 // with a recurrence. 2573 unsigned FoundIndex = Add->getNumOperands(); 2574 for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) 2575 if (Add->getOperand(i) == SymbolicName) 2576 if (FoundIndex == e) { 2577 FoundIndex = i; 2578 break; 2579 } 2580 2581 if (FoundIndex != Add->getNumOperands()) { 2582 // Create an add with everything but the specified operand. 2583 SmallVector<const SCEV *, 8> Ops; 2584 for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) 2585 if (i != FoundIndex) 2586 Ops.push_back(Add->getOperand(i)); 2587 const SCEV *Accum = getAddExpr(Ops); 2588 2589 // This is not a valid addrec if the step amount is varying each 2590 // loop iteration, but is not itself an addrec in this loop. 2591 if (Accum->isLoopInvariant(L) || 2592 (isa<SCEVAddRecExpr>(Accum) && 2593 cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { 2594 bool HasNUW = false; 2595 bool HasNSW = false; 2596 2597 // If the increment doesn't overflow, then neither the addrec nor 2598 // the post-increment will overflow. 2599 if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { 2600 if (OBO->hasNoUnsignedWrap()) 2601 HasNUW = true; 2602 if (OBO->hasNoSignedWrap()) 2603 HasNSW = true; 2604 } 2605 2606 const SCEV *StartVal = getSCEV(StartValueV); 2607 const SCEV *PHISCEV = 2608 getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW); 2609 2610 // Since the no-wrap flags are on the increment, they apply to the 2611 // post-incremented value as well. 2612 if (Accum->isLoopInvariant(L)) 2613 (void)getAddRecExpr(getAddExpr(StartVal, Accum), 2614 Accum, L, HasNUW, HasNSW); 2615 2616 // Okay, for the entire analysis of this edge we assumed the PHI 2617 // to be symbolic. We now need to go back and purge all of the 2618 // entries for the scalars that use the symbolic expression. 2619 ForgetSymbolicName(PN, SymbolicName); 2620 Scalars[SCEVCallbackVH(PN, this)] = PHISCEV; 2621 return PHISCEV; 2622 } 2623 } 2624 } else if (const SCEVAddRecExpr *AddRec = 2625 dyn_cast<SCEVAddRecExpr>(BEValue)) { 2626 // Otherwise, this could be a loop like this: 2627 // i = 0; for (j = 1; ..; ++j) { .... i = j; } 2628 // In this case, j = {1,+,1} and BEValue is j. 2629 // Because the other in-value of i (0) fits the evolution of BEValue 2630 // i really is an addrec evolution. 2631 if (AddRec->getLoop() == L && AddRec->isAffine()) { 2632 const SCEV *StartVal = getSCEV(StartValueV); 2633 2634 // If StartVal = j.start - j.stride, we can use StartVal as the 2635 // initial step of the addrec evolution. 2636 if (StartVal == getMinusSCEV(AddRec->getOperand(0), 2637 AddRec->getOperand(1))) { 2638 const SCEV *PHISCEV = 2639 getAddRecExpr(StartVal, AddRec->getOperand(1), L); 2640 2641 // Okay, for the entire analysis of this edge we assumed the PHI 2642 // to be symbolic. We now need to go back and purge all of the 2643 // entries for the scalars that use the symbolic expression. 2644 ForgetSymbolicName(PN, SymbolicName); 2645 Scalars[SCEVCallbackVH(PN, this)] = PHISCEV; 2646 return PHISCEV; 2647 } 2648 } 2649 } 2650 } 2651 } 2652 2653 // If the PHI has a single incoming value, follow that value, unless the 2654 // PHI's incoming blocks are in a different loop, in which case doing so 2655 // risks breaking LCSSA form. Instcombine would normally zap these, but 2656 // it doesn't have DominatorTree information, so it may miss cases. 2657 if (Value *V = PN->hasConstantValue(DT)) { 2658 bool AllSameLoop = true; 2659 Loop *PNLoop = LI->getLoopFor(PN->getParent()); 2660 for (size_t i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 2661 if (LI->getLoopFor(PN->getIncomingBlock(i)) != PNLoop) { 2662 AllSameLoop = false; 2663 break; 2664 } 2665 if (AllSameLoop) 2666 return getSCEV(V); 2667 } 2668 2669 // If it's not a loop phi, we can't handle it yet. 2670 return getUnknown(PN); 2671} 2672 2673/// createNodeForGEP - Expand GEP instructions into add and multiply 2674/// operations. This allows them to be analyzed by regular SCEV code. 2675/// 2676const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { 2677 2678 bool InBounds = GEP->isInBounds(); 2679 const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); 2680 Value *Base = GEP->getOperand(0); 2681 // Don't attempt to analyze GEPs over unsized objects. 2682 if (!cast<PointerType>(Base->getType())->getElementType()->isSized()) 2683 return getUnknown(GEP); 2684 const SCEV *TotalOffset = getConstant(IntPtrTy, 0); 2685 gep_type_iterator GTI = gep_type_begin(GEP); 2686 for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()), 2687 E = GEP->op_end(); 2688 I != E; ++I) { 2689 Value *Index = *I; 2690 // Compute the (potentially symbolic) offset in bytes for this index. 2691 if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { 2692 // For a struct, add the member offset. 2693 unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); 2694 TotalOffset = getAddExpr(TotalOffset, 2695 getOffsetOfExpr(STy, FieldNo), 2696 /*HasNUW=*/false, /*HasNSW=*/InBounds); 2697 } else { 2698 // For an array, add the element offset, explicitly scaled. 2699 const SCEV *LocalOffset = getSCEV(Index); 2700 // Getelementptr indices are signed. 2701 LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy); 2702 // Lower "inbounds" GEPs to NSW arithmetic. 2703 LocalOffset = getMulExpr(LocalOffset, getSizeOfExpr(*GTI), 2704 /*HasNUW=*/false, /*HasNSW=*/InBounds); 2705 TotalOffset = getAddExpr(TotalOffset, LocalOffset, 2706 /*HasNUW=*/false, /*HasNSW=*/InBounds); 2707 } 2708 } 2709 return getAddExpr(getSCEV(Base), TotalOffset, 2710 /*HasNUW=*/false, /*HasNSW=*/InBounds); 2711} 2712 2713/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is 2714/// guaranteed to end in (at every loop iteration). It is, at the same time, 2715/// the minimum number of times S is divisible by 2. For example, given {4,+,8} 2716/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S. 2717uint32_t 2718ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { 2719 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) 2720 return C->getValue()->getValue().countTrailingZeros(); 2721 2722 if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S)) 2723 return std::min(GetMinTrailingZeros(T->getOperand()), 2724 (uint32_t)getTypeSizeInBits(T->getType())); 2725 2726 if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) { 2727 uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); 2728 return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? 2729 getTypeSizeInBits(E->getType()) : OpRes; 2730 } 2731 2732 if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) { 2733 uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); 2734 return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? 2735 getTypeSizeInBits(E->getType()) : OpRes; 2736 } 2737 2738 if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { 2739 // The result is the min of all operands results. 2740 uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); 2741 for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) 2742 MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); 2743 return MinOpRes; 2744 } 2745 2746 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { 2747 // The result is the sum of all operands results. 2748 uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0)); 2749 uint32_t BitWidth = getTypeSizeInBits(M->getType()); 2750 for (unsigned i = 1, e = M->getNumOperands(); 2751 SumOpRes != BitWidth && i != e; ++i) 2752 SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), 2753 BitWidth); 2754 return SumOpRes; 2755 } 2756 2757 if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { 2758 // The result is the min of all operands results. 2759 uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); 2760 for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) 2761 MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); 2762 return MinOpRes; 2763 } 2764 2765 if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) { 2766 // The result is the min of all operands results. 2767 uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); 2768 for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) 2769 MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); 2770 return MinOpRes; 2771 } 2772 2773 if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) { 2774 // The result is the min of all operands results. 2775 uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); 2776 for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) 2777 MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); 2778 return MinOpRes; 2779 } 2780 2781 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 2782 // For a SCEVUnknown, ask ValueTracking. 2783 unsigned BitWidth = getTypeSizeInBits(U->getType()); 2784 APInt Mask = APInt::getAllOnesValue(BitWidth); 2785 APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); 2786 ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones); 2787 return Zeros.countTrailingOnes(); 2788 } 2789 2790 // SCEVUDivExpr 2791 return 0; 2792} 2793 2794/// getUnsignedRange - Determine the unsigned range for a particular SCEV. 2795/// 2796ConstantRange 2797ScalarEvolution::getUnsignedRange(const SCEV *S) { 2798 2799 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) 2800 return ConstantRange(C->getValue()->getValue()); 2801 2802 unsigned BitWidth = getTypeSizeInBits(S->getType()); 2803 ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); 2804 2805 // If the value has known zeros, the maximum unsigned value will have those 2806 // known zeros as well. 2807 uint32_t TZ = GetMinTrailingZeros(S); 2808 if (TZ != 0) 2809 ConservativeResult = 2810 ConstantRange(APInt::getMinValue(BitWidth), 2811 APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); 2812 2813 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 2814 ConstantRange X = getUnsignedRange(Add->getOperand(0)); 2815 for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) 2816 X = X.add(getUnsignedRange(Add->getOperand(i))); 2817 return ConservativeResult.intersectWith(X); 2818 } 2819 2820 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { 2821 ConstantRange X = getUnsignedRange(Mul->getOperand(0)); 2822 for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) 2823 X = X.multiply(getUnsignedRange(Mul->getOperand(i))); 2824 return ConservativeResult.intersectWith(X); 2825 } 2826 2827 if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { 2828 ConstantRange X = getUnsignedRange(SMax->getOperand(0)); 2829 for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) 2830 X = X.smax(getUnsignedRange(SMax->getOperand(i))); 2831 return ConservativeResult.intersectWith(X); 2832 } 2833 2834 if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { 2835 ConstantRange X = getUnsignedRange(UMax->getOperand(0)); 2836 for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) 2837 X = X.umax(getUnsignedRange(UMax->getOperand(i))); 2838 return ConservativeResult.intersectWith(X); 2839 } 2840 2841 if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { 2842 ConstantRange X = getUnsignedRange(UDiv->getLHS()); 2843 ConstantRange Y = getUnsignedRange(UDiv->getRHS()); 2844 return ConservativeResult.intersectWith(X.udiv(Y)); 2845 } 2846 2847 if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { 2848 ConstantRange X = getUnsignedRange(ZExt->getOperand()); 2849 return ConservativeResult.intersectWith(X.zeroExtend(BitWidth)); 2850 } 2851 2852 if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { 2853 ConstantRange X = getUnsignedRange(SExt->getOperand()); 2854 return ConservativeResult.intersectWith(X.signExtend(BitWidth)); 2855 } 2856 2857 if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { 2858 ConstantRange X = getUnsignedRange(Trunc->getOperand()); 2859 return ConservativeResult.intersectWith(X.truncate(BitWidth)); 2860 } 2861 2862 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 2863 // If there's no unsigned wrap, the value will never be less than its 2864 // initial value. 2865 if (AddRec->hasNoUnsignedWrap()) 2866 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) 2867 if (!C->getValue()->isZero()) 2868 ConservativeResult = 2869 ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)); 2870 2871 // TODO: non-affine addrec 2872 if (AddRec->isAffine()) { 2873 const Type *Ty = AddRec->getType(); 2874 const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); 2875 if (!isa<SCEVCouldNotCompute>(MaxBECount) && 2876 getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { 2877 MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); 2878 2879 const SCEV *Start = AddRec->getStart(); 2880 const SCEV *Step = AddRec->getStepRecurrence(*this); 2881 2882 ConstantRange StartRange = getUnsignedRange(Start); 2883 ConstantRange StepRange = getSignedRange(Step); 2884 ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); 2885 ConstantRange EndRange = 2886 StartRange.add(MaxBECountRange.multiply(StepRange)); 2887 2888 // Check for overflow. This must be done with ConstantRange arithmetic 2889 // because we could be called from within the ScalarEvolution overflow 2890 // checking code. 2891 ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1); 2892 ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); 2893 ConstantRange ExtMaxBECountRange = 2894 MaxBECountRange.zextOrTrunc(BitWidth*2+1); 2895 ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1); 2896 if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != 2897 ExtEndRange) 2898 return ConservativeResult; 2899 2900 APInt Min = APIntOps::umin(StartRange.getUnsignedMin(), 2901 EndRange.getUnsignedMin()); 2902 APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), 2903 EndRange.getUnsignedMax()); 2904 if (Min.isMinValue() && Max.isMaxValue()) 2905 return ConservativeResult; 2906 return ConservativeResult.intersectWith(ConstantRange(Min, Max+1)); 2907 } 2908 } 2909 2910 return ConservativeResult; 2911 } 2912 2913 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 2914 // For a SCEVUnknown, ask ValueTracking. 2915 APInt Mask = APInt::getAllOnesValue(BitWidth); 2916 APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); 2917 ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); 2918 if (Ones == ~Zeros + 1) 2919 return ConservativeResult; 2920 return ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); 2921 } 2922 2923 return ConservativeResult; 2924} 2925 2926/// getSignedRange - Determine the signed range for a particular SCEV. 2927/// 2928ConstantRange 2929ScalarEvolution::getSignedRange(const SCEV *S) { 2930 2931 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) 2932 return ConstantRange(C->getValue()->getValue()); 2933 2934 unsigned BitWidth = getTypeSizeInBits(S->getType()); 2935 ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); 2936 2937 // If the value has known zeros, the maximum signed value will have those 2938 // known zeros as well. 2939 uint32_t TZ = GetMinTrailingZeros(S); 2940 if (TZ != 0) 2941 ConservativeResult = 2942 ConstantRange(APInt::getSignedMinValue(BitWidth), 2943 APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); 2944 2945 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 2946 ConstantRange X = getSignedRange(Add->getOperand(0)); 2947 for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) 2948 X = X.add(getSignedRange(Add->getOperand(i))); 2949 return ConservativeResult.intersectWith(X); 2950 } 2951 2952 if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { 2953 ConstantRange X = getSignedRange(Mul->getOperand(0)); 2954 for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) 2955 X = X.multiply(getSignedRange(Mul->getOperand(i))); 2956 return ConservativeResult.intersectWith(X); 2957 } 2958 2959 if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { 2960 ConstantRange X = getSignedRange(SMax->getOperand(0)); 2961 for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) 2962 X = X.smax(getSignedRange(SMax->getOperand(i))); 2963 return ConservativeResult.intersectWith(X); 2964 } 2965 2966 if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { 2967 ConstantRange X = getSignedRange(UMax->getOperand(0)); 2968 for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) 2969 X = X.umax(getSignedRange(UMax->getOperand(i))); 2970 return ConservativeResult.intersectWith(X); 2971 } 2972 2973 if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { 2974 ConstantRange X = getSignedRange(UDiv->getLHS()); 2975 ConstantRange Y = getSignedRange(UDiv->getRHS()); 2976 return ConservativeResult.intersectWith(X.udiv(Y)); 2977 } 2978 2979 if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { 2980 ConstantRange X = getSignedRange(ZExt->getOperand()); 2981 return ConservativeResult.intersectWith(X.zeroExtend(BitWidth)); 2982 } 2983 2984 if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { 2985 ConstantRange X = getSignedRange(SExt->getOperand()); 2986 return ConservativeResult.intersectWith(X.signExtend(BitWidth)); 2987 } 2988 2989 if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { 2990 ConstantRange X = getSignedRange(Trunc->getOperand()); 2991 return ConservativeResult.intersectWith(X.truncate(BitWidth)); 2992 } 2993 2994 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 2995 // If there's no signed wrap, and all the operands have the same sign or 2996 // zero, the value won't ever change sign. 2997 if (AddRec->hasNoSignedWrap()) { 2998 bool AllNonNeg = true; 2999 bool AllNonPos = true; 3000 for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { 3001 if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false; 3002 if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false; 3003 } 3004 if (AllNonNeg) 3005 ConservativeResult = ConservativeResult.intersectWith( 3006 ConstantRange(APInt(BitWidth, 0), 3007 APInt::getSignedMinValue(BitWidth))); 3008 else if (AllNonPos) 3009 ConservativeResult = ConservativeResult.intersectWith( 3010 ConstantRange(APInt::getSignedMinValue(BitWidth), 3011 APInt(BitWidth, 1))); 3012 } 3013 3014 // TODO: non-affine addrec 3015 if (AddRec->isAffine()) { 3016 const Type *Ty = AddRec->getType(); 3017 const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); 3018 if (!isa<SCEVCouldNotCompute>(MaxBECount) && 3019 getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { 3020 MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); 3021 3022 const SCEV *Start = AddRec->getStart(); 3023 const SCEV *Step = AddRec->getStepRecurrence(*this); 3024 3025 ConstantRange StartRange = getSignedRange(Start); 3026 ConstantRange StepRange = getSignedRange(Step); 3027 ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); 3028 ConstantRange EndRange = 3029 StartRange.add(MaxBECountRange.multiply(StepRange)); 3030 3031 // Check for overflow. This must be done with ConstantRange arithmetic 3032 // because we could be called from within the ScalarEvolution overflow 3033 // checking code. 3034 ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1); 3035 ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); 3036 ConstantRange ExtMaxBECountRange = 3037 MaxBECountRange.zextOrTrunc(BitWidth*2+1); 3038 ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1); 3039 if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != 3040 ExtEndRange) 3041 return ConservativeResult; 3042 3043 APInt Min = APIntOps::smin(StartRange.getSignedMin(), 3044 EndRange.getSignedMin()); 3045 APInt Max = APIntOps::smax(StartRange.getSignedMax(), 3046 EndRange.getSignedMax()); 3047 if (Min.isMinSignedValue() && Max.isMaxSignedValue()) 3048 return ConservativeResult; 3049 return ConservativeResult.intersectWith(ConstantRange(Min, Max+1)); 3050 } 3051 } 3052 3053 return ConservativeResult; 3054 } 3055 3056 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 3057 // For a SCEVUnknown, ask ValueTracking. 3058 if (!U->getValue()->getType()->isIntegerTy() && !TD) 3059 return ConservativeResult; 3060 unsigned NS = ComputeNumSignBits(U->getValue(), TD); 3061 if (NS == 1) 3062 return ConservativeResult; 3063 return ConservativeResult.intersectWith( 3064 ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), 3065 APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)); 3066 } 3067 3068 return ConservativeResult; 3069} 3070 3071/// createSCEV - We know that there is no SCEV for the specified value. 3072/// Analyze the expression. 3073/// 3074const SCEV *ScalarEvolution::createSCEV(Value *V) { 3075 if (!isSCEVable(V->getType())) 3076 return getUnknown(V); 3077 3078 unsigned Opcode = Instruction::UserOp1; 3079 if (Instruction *I = dyn_cast<Instruction>(V)) { 3080 Opcode = I->getOpcode(); 3081 3082 // Don't attempt to analyze instructions in blocks that aren't 3083 // reachable. Such instructions don't matter, and they aren't required 3084 // to obey basic rules for definitions dominating uses which this 3085 // analysis depends on. 3086 if (!DT->isReachableFromEntry(I->getParent())) 3087 return getUnknown(V); 3088 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 3089 Opcode = CE->getOpcode(); 3090 else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) 3091 return getConstant(CI); 3092 else if (isa<ConstantPointerNull>(V)) 3093 return getConstant(V->getType(), 0); 3094 else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) 3095 return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee()); 3096 else 3097 return getUnknown(V); 3098 3099 Operator *U = cast<Operator>(V); 3100 switch (Opcode) { 3101 case Instruction::Add: 3102 // Don't transfer the NSW and NUW bits from the Add instruction to the 3103 // Add expression, because the Instruction may be guarded by control 3104 // flow and the no-overflow bits may not be valid for the expression in 3105 // any context. 3106 return getAddExpr(getSCEV(U->getOperand(0)), 3107 getSCEV(U->getOperand(1))); 3108 case Instruction::Mul: 3109 // Don't transfer the NSW and NUW bits from the Mul instruction to the 3110 // Mul expression, as with Add. 3111 return getMulExpr(getSCEV(U->getOperand(0)), 3112 getSCEV(U->getOperand(1))); 3113 case Instruction::UDiv: 3114 return getUDivExpr(getSCEV(U->getOperand(0)), 3115 getSCEV(U->getOperand(1))); 3116 case Instruction::Sub: 3117 return getMinusSCEV(getSCEV(U->getOperand(0)), 3118 getSCEV(U->getOperand(1))); 3119 case Instruction::And: 3120 // For an expression like x&255 that merely masks off the high bits, 3121 // use zext(trunc(x)) as the SCEV expression. 3122 if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 3123 if (CI->isNullValue()) 3124 return getSCEV(U->getOperand(1)); 3125 if (CI->isAllOnesValue()) 3126 return getSCEV(U->getOperand(0)); 3127 const APInt &A = CI->getValue(); 3128 3129 // Instcombine's ShrinkDemandedConstant may strip bits out of 3130 // constants, obscuring what would otherwise be a low-bits mask. 3131 // Use ComputeMaskedBits to compute what ShrinkDemandedConstant 3132 // knew about to reconstruct a low-bits mask value. 3133 unsigned LZ = A.countLeadingZeros(); 3134 unsigned BitWidth = A.getBitWidth(); 3135 APInt AllOnes = APInt::getAllOnesValue(BitWidth); 3136 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 3137 ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD); 3138 3139 APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); 3140 3141 if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask)) 3142 return 3143 getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)), 3144 IntegerType::get(getContext(), BitWidth - LZ)), 3145 U->getType()); 3146 } 3147 break; 3148 3149 case Instruction::Or: 3150 // If the RHS of the Or is a constant, we may have something like: 3151 // X*4+1 which got turned into X*4|1. Handle this as an Add so loop 3152 // optimizations will transparently handle this case. 3153 // 3154 // In order for this transformation to be safe, the LHS must be of the 3155 // form X*(2^n) and the Or constant must be less than 2^n. 3156 if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 3157 const SCEV *LHS = getSCEV(U->getOperand(0)); 3158 const APInt &CIVal = CI->getValue(); 3159 if (GetMinTrailingZeros(LHS) >= 3160 (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { 3161 // Build a plain add SCEV. 3162 const SCEV *S = getAddExpr(LHS, getSCEV(CI)); 3163 // If the LHS of the add was an addrec and it has no-wrap flags, 3164 // transfer the no-wrap flags, since an or won't introduce a wrap. 3165 if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { 3166 const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); 3167 if (OldAR->hasNoUnsignedWrap()) 3168 const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoUnsignedWrap(true); 3169 if (OldAR->hasNoSignedWrap()) 3170 const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoSignedWrap(true); 3171 } 3172 return S; 3173 } 3174 } 3175 break; 3176 case Instruction::Xor: 3177 if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 3178 // If the RHS of the xor is a signbit, then this is just an add. 3179 // Instcombine turns add of signbit into xor as a strength reduction step. 3180 if (CI->getValue().isSignBit()) 3181 return getAddExpr(getSCEV(U->getOperand(0)), 3182 getSCEV(U->getOperand(1))); 3183 3184 // If the RHS of xor is -1, then this is a not operation. 3185 if (CI->isAllOnesValue()) 3186 return getNotSCEV(getSCEV(U->getOperand(0))); 3187 3188 // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. 3189 // This is a variant of the check for xor with -1, and it handles 3190 // the case where instcombine has trimmed non-demanded bits out 3191 // of an xor with -1. 3192 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) 3193 if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1))) 3194 if (BO->getOpcode() == Instruction::And && 3195 LCI->getValue() == CI->getValue()) 3196 if (const SCEVZeroExtendExpr *Z = 3197 dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) { 3198 const Type *UTy = U->getType(); 3199 const SCEV *Z0 = Z->getOperand(); 3200 const Type *Z0Ty = Z0->getType(); 3201 unsigned Z0TySize = getTypeSizeInBits(Z0Ty); 3202 3203 // If C is a low-bits mask, the zero extend is serving to 3204 // mask off the high bits. Complement the operand and 3205 // re-apply the zext. 3206 if (APIntOps::isMask(Z0TySize, CI->getValue())) 3207 return getZeroExtendExpr(getNotSCEV(Z0), UTy); 3208 3209 // If C is a single bit, it may be in the sign-bit position 3210 // before the zero-extend. In this case, represent the xor 3211 // using an add, which is equivalent, and re-apply the zext. 3212 APInt Trunc = APInt(CI->getValue()).trunc(Z0TySize); 3213 if (APInt(Trunc).zext(getTypeSizeInBits(UTy)) == CI->getValue() && 3214 Trunc.isSignBit()) 3215 return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), 3216 UTy); 3217 } 3218 } 3219 break; 3220 3221 case Instruction::Shl: 3222 // Turn shift left of a constant amount into a multiply. 3223 if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { 3224 uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); 3225 3226 // If the shift count is not less than the bitwidth, the result of 3227 // the shift is undefined. Don't try to analyze it, because the 3228 // resolution chosen here may differ from the resolution chosen in 3229 // other parts of the compiler. 3230 if (SA->getValue().uge(BitWidth)) 3231 break; 3232 3233 Constant *X = ConstantInt::get(getContext(), 3234 APInt(BitWidth, 1).shl(SA->getZExtValue())); 3235 return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); 3236 } 3237 break; 3238 3239 case Instruction::LShr: 3240 // Turn logical shift right of a constant into a unsigned divide. 3241 if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { 3242 uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); 3243 3244 // If the shift count is not less than the bitwidth, the result of 3245 // the shift is undefined. Don't try to analyze it, because the 3246 // resolution chosen here may differ from the resolution chosen in 3247 // other parts of the compiler. 3248 if (SA->getValue().uge(BitWidth)) 3249 break; 3250 3251 Constant *X = ConstantInt::get(getContext(), 3252 APInt(BitWidth, 1).shl(SA->getZExtValue())); 3253 return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); 3254 } 3255 break; 3256 3257 case Instruction::AShr: 3258 // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression. 3259 if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) 3260 if (Operator *L = dyn_cast<Operator>(U->getOperand(0))) 3261 if (L->getOpcode() == Instruction::Shl && 3262 L->getOperand(1) == U->getOperand(1)) { 3263 uint64_t BitWidth = getTypeSizeInBits(U->getType()); 3264 3265 // If the shift count is not less than the bitwidth, the result of 3266 // the shift is undefined. Don't try to analyze it, because the 3267 // resolution chosen here may differ from the resolution chosen in 3268 // other parts of the compiler. 3269 if (CI->getValue().uge(BitWidth)) 3270 break; 3271 3272 uint64_t Amt = BitWidth - CI->getZExtValue(); 3273 if (Amt == BitWidth) 3274 return getSCEV(L->getOperand(0)); // shift by zero --> noop 3275 return 3276 getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)), 3277 IntegerType::get(getContext(), 3278 Amt)), 3279 U->getType()); 3280 } 3281 break; 3282 3283 case Instruction::Trunc: 3284 return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType()); 3285 3286 case Instruction::ZExt: 3287 return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); 3288 3289 case Instruction::SExt: 3290 return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); 3291 3292 case Instruction::BitCast: 3293 // BitCasts are no-op casts so we just eliminate the cast. 3294 if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) 3295 return getSCEV(U->getOperand(0)); 3296 break; 3297 3298 // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can 3299 // lead to pointer expressions which cannot safely be expanded to GEPs, 3300 // because ScalarEvolution doesn't respect the GEP aliasing rules when 3301 // simplifying integer expressions. 3302 3303 case Instruction::GetElementPtr: 3304 return createNodeForGEP(cast<GEPOperator>(U)); 3305 3306 case Instruction::PHI: 3307 return createNodeForPHI(cast<PHINode>(U)); 3308 3309 case Instruction::Select: 3310 // This could be a smax or umax that was lowered earlier. 3311 // Try to recover it. 3312 if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) { 3313 Value *LHS = ICI->getOperand(0); 3314 Value *RHS = ICI->getOperand(1); 3315 switch (ICI->getPredicate()) { 3316 case ICmpInst::ICMP_SLT: 3317 case ICmpInst::ICMP_SLE: 3318 std::swap(LHS, RHS); 3319 // fall through 3320 case ICmpInst::ICMP_SGT: 3321 case ICmpInst::ICMP_SGE: 3322 // a >s b ? a+x : b+x -> smax(a, b)+x 3323 // a >s b ? b+x : a+x -> smin(a, b)+x 3324 if (LHS->getType() == U->getType()) { 3325 const SCEV *LS = getSCEV(LHS); 3326 const SCEV *RS = getSCEV(RHS); 3327 const SCEV *LA = getSCEV(U->getOperand(1)); 3328 const SCEV *RA = getSCEV(U->getOperand(2)); 3329 const SCEV *LDiff = getMinusSCEV(LA, LS); 3330 const SCEV *RDiff = getMinusSCEV(RA, RS); 3331 if (LDiff == RDiff) 3332 return getAddExpr(getSMaxExpr(LS, RS), LDiff); 3333 LDiff = getMinusSCEV(LA, RS); 3334 RDiff = getMinusSCEV(RA, LS); 3335 if (LDiff == RDiff) 3336 return getAddExpr(getSMinExpr(LS, RS), LDiff); 3337 } 3338 break; 3339 case ICmpInst::ICMP_ULT: 3340 case ICmpInst::ICMP_ULE: 3341 std::swap(LHS, RHS); 3342 // fall through 3343 case ICmpInst::ICMP_UGT: 3344 case ICmpInst::ICMP_UGE: 3345 // a >u b ? a+x : b+x -> umax(a, b)+x 3346 // a >u b ? b+x : a+x -> umin(a, b)+x 3347 if (LHS->getType() == U->getType()) { 3348 const SCEV *LS = getSCEV(LHS); 3349 const SCEV *RS = getSCEV(RHS); 3350 const SCEV *LA = getSCEV(U->getOperand(1)); 3351 const SCEV *RA = getSCEV(U->getOperand(2)); 3352 const SCEV *LDiff = getMinusSCEV(LA, LS); 3353 const SCEV *RDiff = getMinusSCEV(RA, RS); 3354 if (LDiff == RDiff) 3355 return getAddExpr(getUMaxExpr(LS, RS), LDiff); 3356 LDiff = getMinusSCEV(LA, RS); 3357 RDiff = getMinusSCEV(RA, LS); 3358 if (LDiff == RDiff) 3359 return getAddExpr(getUMinExpr(LS, RS), LDiff); 3360 } 3361 break; 3362 case ICmpInst::ICMP_NE: 3363 // n != 0 ? n+x : 1+x -> umax(n, 1)+x 3364 if (LHS->getType() == U->getType() && 3365 isa<ConstantInt>(RHS) && 3366 cast<ConstantInt>(RHS)->isZero()) { 3367 const SCEV *One = getConstant(LHS->getType(), 1); 3368 const SCEV *LS = getSCEV(LHS); 3369 const SCEV *LA = getSCEV(U->getOperand(1)); 3370 const SCEV *RA = getSCEV(U->getOperand(2)); 3371 const SCEV *LDiff = getMinusSCEV(LA, LS); 3372 const SCEV *RDiff = getMinusSCEV(RA, One); 3373 if (LDiff == RDiff) 3374 return getAddExpr(getUMaxExpr(LS, One), LDiff); 3375 } 3376 break; 3377 case ICmpInst::ICMP_EQ: 3378 // n == 0 ? 1+x : n+x -> umax(n, 1)+x 3379 if (LHS->getType() == U->getType() && 3380 isa<ConstantInt>(RHS) && 3381 cast<ConstantInt>(RHS)->isZero()) { 3382 const SCEV *One = getConstant(LHS->getType(), 1); 3383 const SCEV *LS = getSCEV(LHS); 3384 const SCEV *LA = getSCEV(U->getOperand(1)); 3385 const SCEV *RA = getSCEV(U->getOperand(2)); 3386 const SCEV *LDiff = getMinusSCEV(LA, One); 3387 const SCEV *RDiff = getMinusSCEV(RA, LS); 3388 if (LDiff == RDiff) 3389 return getAddExpr(getUMaxExpr(LS, One), LDiff); 3390 } 3391 break; 3392 default: 3393 break; 3394 } 3395 } 3396 3397 default: // We cannot analyze this expression. 3398 break; 3399 } 3400 3401 return getUnknown(V); 3402} 3403 3404 3405 3406//===----------------------------------------------------------------------===// 3407// Iteration Count Computation Code 3408// 3409 3410/// getBackedgeTakenCount - If the specified loop has a predictable 3411/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute 3412/// object. The backedge-taken count is the number of times the loop header 3413/// will be branched to from within the loop. This is one less than the 3414/// trip count of the loop, since it doesn't count the first iteration, 3415/// when the header is branched to from outside the loop. 3416/// 3417/// Note that it is not valid to call this method on a loop without a 3418/// loop-invariant backedge-taken count (see 3419/// hasLoopInvariantBackedgeTakenCount). 3420/// 3421const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { 3422 return getBackedgeTakenInfo(L).Exact; 3423} 3424 3425/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except 3426/// return the least SCEV value that is known never to be less than the 3427/// actual backedge taken count. 3428const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { 3429 return getBackedgeTakenInfo(L).Max; 3430} 3431 3432/// PushLoopPHIs - Push PHI nodes in the header of the given loop 3433/// onto the given Worklist. 3434static void 3435PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { 3436 BasicBlock *Header = L->getHeader(); 3437 3438 // Push all Loop-header PHIs onto the Worklist stack. 3439 for (BasicBlock::iterator I = Header->begin(); 3440 PHINode *PN = dyn_cast<PHINode>(I); ++I) 3441 Worklist.push_back(PN); 3442} 3443 3444const ScalarEvolution::BackedgeTakenInfo & 3445ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { 3446 // Initially insert a CouldNotCompute for this loop. If the insertion 3447 // succeeds, proceed to actually compute a backedge-taken count and 3448 // update the value. The temporary CouldNotCompute value tells SCEV 3449 // code elsewhere that it shouldn't attempt to request a new 3450 // backedge-taken count, which could result in infinite recursion. 3451 std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = 3452 BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); 3453 if (Pair.second) { 3454 BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L); 3455 if (BECount.Exact != getCouldNotCompute()) { 3456 assert(BECount.Exact->isLoopInvariant(L) && 3457 BECount.Max->isLoopInvariant(L) && 3458 "Computed backedge-taken count isn't loop invariant for loop!"); 3459 ++NumTripCountsComputed; 3460 3461 // Update the value in the map. 3462 Pair.first->second = BECount; 3463 } else { 3464 if (BECount.Max != getCouldNotCompute()) 3465 // Update the value in the map. 3466 Pair.first->second = BECount; 3467 if (isa<PHINode>(L->getHeader()->begin())) 3468 // Only count loops that have phi nodes as not being computable. 3469 ++NumTripCountsNotComputed; 3470 } 3471 3472 // Now that we know more about the trip count for this loop, forget any 3473 // existing SCEV values for PHI nodes in this loop since they are only 3474 // conservative estimates made without the benefit of trip count 3475 // information. This is similar to the code in forgetLoop, except that 3476 // it handles SCEVUnknown PHI nodes specially. 3477 if (BECount.hasAnyInfo()) { 3478 SmallVector<Instruction *, 16> Worklist; 3479 PushLoopPHIs(L, Worklist); 3480 3481 SmallPtrSet<Instruction *, 8> Visited; 3482 while (!Worklist.empty()) { 3483 Instruction *I = Worklist.pop_back_val(); 3484 if (!Visited.insert(I)) continue; 3485 3486 std::map<SCEVCallbackVH, const SCEV *>::iterator It = 3487 Scalars.find(static_cast<Value *>(I)); 3488 if (It != Scalars.end()) { 3489 // SCEVUnknown for a PHI either means that it has an unrecognized 3490 // structure, or it's a PHI that's in the progress of being computed 3491 // by createNodeForPHI. In the former case, additional loop trip 3492 // count information isn't going to change anything. In the later 3493 // case, createNodeForPHI will perform the necessary updates on its 3494 // own when it gets to that point. 3495 if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) { 3496 ValuesAtScopes.erase(It->second); 3497 Scalars.erase(It); 3498 } 3499 if (PHINode *PN = dyn_cast<PHINode>(I)) 3500 ConstantEvolutionLoopExitValue.erase(PN); 3501 } 3502 3503 PushDefUseChildren(I, Worklist); 3504 } 3505 } 3506 } 3507 return Pair.first->second; 3508} 3509 3510/// forgetLoop - This method should be called by the client when it has 3511/// changed a loop in a way that may effect ScalarEvolution's ability to 3512/// compute a trip count, or if the loop is deleted. 3513void ScalarEvolution::forgetLoop(const Loop *L) { 3514 // Drop any stored trip count value. 3515 BackedgeTakenCounts.erase(L); 3516 3517 // Drop information about expressions based on loop-header PHIs. 3518 SmallVector<Instruction *, 16> Worklist; 3519 PushLoopPHIs(L, Worklist); 3520 3521 SmallPtrSet<Instruction *, 8> Visited; 3522 while (!Worklist.empty()) { 3523 Instruction *I = Worklist.pop_back_val(); 3524 if (!Visited.insert(I)) continue; 3525 3526 std::map<SCEVCallbackVH, const SCEV *>::iterator It = 3527 Scalars.find(static_cast<Value *>(I)); 3528 if (It != Scalars.end()) { 3529 ValuesAtScopes.erase(It->second); 3530 Scalars.erase(It); 3531 if (PHINode *PN = dyn_cast<PHINode>(I)) 3532 ConstantEvolutionLoopExitValue.erase(PN); 3533 } 3534 3535 PushDefUseChildren(I, Worklist); 3536 } 3537} 3538 3539/// forgetValue - This method should be called by the client when it has 3540/// changed a value in a way that may effect its value, or which may 3541/// disconnect it from a def-use chain linking it to a loop. 3542void ScalarEvolution::forgetValue(Value *V) { 3543 Instruction *I = dyn_cast<Instruction>(V); 3544 if (!I) return; 3545 3546 // Drop information about expressions based on loop-header PHIs. 3547 SmallVector<Instruction *, 16> Worklist; 3548 Worklist.push_back(I); 3549 3550 SmallPtrSet<Instruction *, 8> Visited; 3551 while (!Worklist.empty()) { 3552 I = Worklist.pop_back_val(); 3553 if (!Visited.insert(I)) continue; 3554 3555 std::map<SCEVCallbackVH, const SCEV *>::iterator It = 3556 Scalars.find(static_cast<Value *>(I)); 3557 if (It != Scalars.end()) { 3558 ValuesAtScopes.erase(It->second); 3559 Scalars.erase(It); 3560 if (PHINode *PN = dyn_cast<PHINode>(I)) 3561 ConstantEvolutionLoopExitValue.erase(PN); 3562 } 3563 3564 PushDefUseChildren(I, Worklist); 3565 } 3566} 3567 3568/// ComputeBackedgeTakenCount - Compute the number of times the backedge 3569/// of the specified loop will execute. 3570ScalarEvolution::BackedgeTakenInfo 3571ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { 3572 SmallVector<BasicBlock *, 8> ExitingBlocks; 3573 L->getExitingBlocks(ExitingBlocks); 3574 3575 // Examine all exits and pick the most conservative values. 3576 const SCEV *BECount = getCouldNotCompute(); 3577 const SCEV *MaxBECount = getCouldNotCompute(); 3578 bool CouldNotComputeBECount = false; 3579 for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { 3580 BackedgeTakenInfo NewBTI = 3581 ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]); 3582 3583 if (NewBTI.Exact == getCouldNotCompute()) { 3584 // We couldn't compute an exact value for this exit, so 3585 // we won't be able to compute an exact value for the loop. 3586 CouldNotComputeBECount = true; 3587 BECount = getCouldNotCompute(); 3588 } else if (!CouldNotComputeBECount) { 3589 if (BECount == getCouldNotCompute()) 3590 BECount = NewBTI.Exact; 3591 else 3592 BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact); 3593 } 3594 if (MaxBECount == getCouldNotCompute()) 3595 MaxBECount = NewBTI.Max; 3596 else if (NewBTI.Max != getCouldNotCompute()) 3597 MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max); 3598 } 3599 3600 return BackedgeTakenInfo(BECount, MaxBECount); 3601} 3602 3603/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge 3604/// of the specified loop will execute if it exits via the specified block. 3605ScalarEvolution::BackedgeTakenInfo 3606ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L, 3607 BasicBlock *ExitingBlock) { 3608 3609 // Okay, we've chosen an exiting block. See what condition causes us to 3610 // exit at this block. 3611 // 3612 // FIXME: we should be able to handle switch instructions (with a single exit) 3613 BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); 3614 if (ExitBr == 0) return getCouldNotCompute(); 3615 assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!"); 3616 3617 // At this point, we know we have a conditional branch that determines whether 3618 // the loop is exited. However, we don't know if the branch is executed each 3619 // time through the loop. If not, then the execution count of the branch will 3620 // not be equal to the trip count of the loop. 3621 // 3622 // Currently we check for this by checking to see if the Exit branch goes to 3623 // the loop header. If so, we know it will always execute the same number of 3624 // times as the loop. We also handle the case where the exit block *is* the 3625 // loop header. This is common for un-rotated loops. 3626 // 3627 // If both of those tests fail, walk up the unique predecessor chain to the 3628 // header, stopping if there is an edge that doesn't exit the loop. If the 3629 // header is reached, the execution count of the branch will be equal to the 3630 // trip count of the loop. 3631 // 3632 // More extensive analysis could be done to handle more cases here. 3633 // 3634 if (ExitBr->getSuccessor(0) != L->getHeader() && 3635 ExitBr->getSuccessor(1) != L->getHeader() && 3636 ExitBr->getParent() != L->getHeader()) { 3637 // The simple checks failed, try climbing the unique predecessor chain 3638 // up to the header. 3639 bool Ok = false; 3640 for (BasicBlock *BB = ExitBr->getParent(); BB; ) { 3641 BasicBlock *Pred = BB->getUniquePredecessor(); 3642 if (!Pred) 3643 return getCouldNotCompute(); 3644 TerminatorInst *PredTerm = Pred->getTerminator(); 3645 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { 3646 BasicBlock *PredSucc = PredTerm->getSuccessor(i); 3647 if (PredSucc == BB) 3648 continue; 3649 // If the predecessor has a successor that isn't BB and isn't 3650 // outside the loop, assume the worst. 3651 if (L->contains(PredSucc)) 3652 return getCouldNotCompute(); 3653 } 3654 if (Pred == L->getHeader()) { 3655 Ok = true; 3656 break; 3657 } 3658 BB = Pred; 3659 } 3660 if (!Ok) 3661 return getCouldNotCompute(); 3662 } 3663 3664 // Proceed to the next level to examine the exit condition expression. 3665 return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(), 3666 ExitBr->getSuccessor(0), 3667 ExitBr->getSuccessor(1)); 3668} 3669 3670/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the 3671/// backedge of the specified loop will execute if its exit condition 3672/// were a conditional branch of ExitCond, TBB, and FBB. 3673ScalarEvolution::BackedgeTakenInfo 3674ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, 3675 Value *ExitCond, 3676 BasicBlock *TBB, 3677 BasicBlock *FBB) { 3678 // Check if the controlling expression for this loop is an And or Or. 3679 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { 3680 if (BO->getOpcode() == Instruction::And) { 3681 // Recurse on the operands of the and. 3682 BackedgeTakenInfo BTI0 = 3683 ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); 3684 BackedgeTakenInfo BTI1 = 3685 ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); 3686 const SCEV *BECount = getCouldNotCompute(); 3687 const SCEV *MaxBECount = getCouldNotCompute(); 3688 if (L->contains(TBB)) { 3689 // Both conditions must be true for the loop to continue executing. 3690 // Choose the less conservative count. 3691 if (BTI0.Exact == getCouldNotCompute() || 3692 BTI1.Exact == getCouldNotCompute()) 3693 BECount = getCouldNotCompute(); 3694 else 3695 BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); 3696 if (BTI0.Max == getCouldNotCompute()) 3697 MaxBECount = BTI1.Max; 3698 else if (BTI1.Max == getCouldNotCompute()) 3699 MaxBECount = BTI0.Max; 3700 else 3701 MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); 3702 } else { 3703 // Both conditions must be true for the loop to exit. 3704 assert(L->contains(FBB) && "Loop block has no successor in loop!"); 3705 if (BTI0.Exact != getCouldNotCompute() && 3706 BTI1.Exact != getCouldNotCompute()) 3707 BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact); 3708 if (BTI0.Max != getCouldNotCompute() && 3709 BTI1.Max != getCouldNotCompute()) 3710 MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max); 3711 } 3712 3713 return BackedgeTakenInfo(BECount, MaxBECount); 3714 } 3715 if (BO->getOpcode() == Instruction::Or) { 3716 // Recurse on the operands of the or. 3717 BackedgeTakenInfo BTI0 = 3718 ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); 3719 BackedgeTakenInfo BTI1 = 3720 ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); 3721 const SCEV *BECount = getCouldNotCompute(); 3722 const SCEV *MaxBECount = getCouldNotCompute(); 3723 if (L->contains(FBB)) { 3724 // Both conditions must be false for the loop to continue executing. 3725 // Choose the less conservative count. 3726 if (BTI0.Exact == getCouldNotCompute() || 3727 BTI1.Exact == getCouldNotCompute()) 3728 BECount = getCouldNotCompute(); 3729 else 3730 BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); 3731 if (BTI0.Max == getCouldNotCompute()) 3732 MaxBECount = BTI1.Max; 3733 else if (BTI1.Max == getCouldNotCompute()) 3734 MaxBECount = BTI0.Max; 3735 else 3736 MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); 3737 } else { 3738 // Both conditions must be false for the loop to exit. 3739 assert(L->contains(TBB) && "Loop block has no successor in loop!"); 3740 if (BTI0.Exact != getCouldNotCompute() && 3741 BTI1.Exact != getCouldNotCompute()) 3742 BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact); 3743 if (BTI0.Max != getCouldNotCompute() && 3744 BTI1.Max != getCouldNotCompute()) 3745 MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max); 3746 } 3747 3748 return BackedgeTakenInfo(BECount, MaxBECount); 3749 } 3750 } 3751 3752 // With an icmp, it may be feasible to compute an exact backedge-taken count. 3753 // Proceed to the next level to examine the icmp. 3754 if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) 3755 return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB); 3756 3757 // Check for a constant condition. These are normally stripped out by 3758 // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to 3759 // preserve the CFG and is temporarily leaving constant conditions 3760 // in place. 3761 if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) { 3762 if (L->contains(FBB) == !CI->getZExtValue()) 3763 // The backedge is always taken. 3764 return getCouldNotCompute(); 3765 else 3766 // The backedge is never taken. 3767 return getConstant(CI->getType(), 0); 3768 } 3769 3770 // If it's not an integer or pointer comparison then compute it the hard way. 3771 return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); 3772} 3773 3774/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the 3775/// backedge of the specified loop will execute if its exit condition 3776/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. 3777ScalarEvolution::BackedgeTakenInfo 3778ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, 3779 ICmpInst *ExitCond, 3780 BasicBlock *TBB, 3781 BasicBlock *FBB) { 3782 3783 // If the condition was exit on true, convert the condition to exit on false 3784 ICmpInst::Predicate Cond; 3785 if (!L->contains(FBB)) 3786 Cond = ExitCond->getPredicate(); 3787 else 3788 Cond = ExitCond->getInversePredicate(); 3789 3790 // Handle common loops like: for (X = "string"; *X; ++X) 3791 if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) 3792 if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { 3793 BackedgeTakenInfo ItCnt = 3794 ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); 3795 if (ItCnt.hasAnyInfo()) 3796 return ItCnt; 3797 } 3798 3799 const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); 3800 const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); 3801 3802 // Try to evaluate any dependencies out of the loop. 3803 LHS = getSCEVAtScope(LHS, L); 3804 RHS = getSCEVAtScope(RHS, L); 3805 3806 // At this point, we would like to compute how many iterations of the 3807 // loop the predicate will return true for these inputs. 3808 if (LHS->isLoopInvariant(L) && !RHS->isLoopInvariant(L)) { 3809 // If there is a loop-invariant, force it into the RHS. 3810 std::swap(LHS, RHS); 3811 Cond = ICmpInst::getSwappedPredicate(Cond); 3812 } 3813 3814 // Simplify the operands before analyzing them. 3815 (void)SimplifyICmpOperands(Cond, LHS, RHS); 3816 3817 // If we have a comparison of a chrec against a constant, try to use value 3818 // ranges to answer this query. 3819 if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) 3820 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS)) 3821 if (AddRec->getLoop() == L) { 3822 // Form the constant range. 3823 ConstantRange CompRange( 3824 ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); 3825 3826 const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); 3827 if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; 3828 } 3829 3830 switch (Cond) { 3831 case ICmpInst::ICMP_NE: { // while (X != Y) 3832 // Convert to: while (X-Y != 0) 3833 BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L); 3834 if (BTI.hasAnyInfo()) return BTI; 3835 break; 3836 } 3837 case ICmpInst::ICMP_EQ: { // while (X == Y) 3838 // Convert to: while (X-Y == 0) 3839 BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); 3840 if (BTI.hasAnyInfo()) return BTI; 3841 break; 3842 } 3843 case ICmpInst::ICMP_SLT: { 3844 BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true); 3845 if (BTI.hasAnyInfo()) return BTI; 3846 break; 3847 } 3848 case ICmpInst::ICMP_SGT: { 3849 BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), 3850 getNotSCEV(RHS), L, true); 3851 if (BTI.hasAnyInfo()) return BTI; 3852 break; 3853 } 3854 case ICmpInst::ICMP_ULT: { 3855 BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false); 3856 if (BTI.hasAnyInfo()) return BTI; 3857 break; 3858 } 3859 case ICmpInst::ICMP_UGT: { 3860 BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), 3861 getNotSCEV(RHS), L, false); 3862 if (BTI.hasAnyInfo()) return BTI; 3863 break; 3864 } 3865 default: 3866#if 0 3867 dbgs() << "ComputeBackedgeTakenCount "; 3868 if (ExitCond->getOperand(0)->getType()->isUnsigned()) 3869 dbgs() << "[unsigned] "; 3870 dbgs() << *LHS << " " 3871 << Instruction::getOpcodeName(Instruction::ICmp) 3872 << " " << *RHS << "\n"; 3873#endif 3874 break; 3875 } 3876 return 3877 ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); 3878} 3879 3880static ConstantInt * 3881EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, 3882 ScalarEvolution &SE) { 3883 const SCEV *InVal = SE.getConstant(C); 3884 const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); 3885 assert(isa<SCEVConstant>(Val) && 3886 "Evaluation of SCEV at constant didn't fold correctly?"); 3887 return cast<SCEVConstant>(Val)->getValue(); 3888} 3889 3890/// GetAddressedElementFromGlobal - Given a global variable with an initializer 3891/// and a GEP expression (missing the pointer index) indexing into it, return 3892/// the addressed element of the initializer or null if the index expression is 3893/// invalid. 3894static Constant * 3895GetAddressedElementFromGlobal(GlobalVariable *GV, 3896 const std::vector<ConstantInt*> &Indices) { 3897 Constant *Init = GV->getInitializer(); 3898 for (unsigned i = 0, e = Indices.size(); i != e; ++i) { 3899 uint64_t Idx = Indices[i]->getZExtValue(); 3900 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { 3901 assert(Idx < CS->getNumOperands() && "Bad struct index!"); 3902 Init = cast<Constant>(CS->getOperand(Idx)); 3903 } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { 3904 if (Idx >= CA->getNumOperands()) return 0; // Bogus program 3905 Init = cast<Constant>(CA->getOperand(Idx)); 3906 } else if (isa<ConstantAggregateZero>(Init)) { 3907 if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { 3908 assert(Idx < STy->getNumElements() && "Bad struct index!"); 3909 Init = Constant::getNullValue(STy->getElementType(Idx)); 3910 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) { 3911 if (Idx >= ATy->getNumElements()) return 0; // Bogus program 3912 Init = Constant::getNullValue(ATy->getElementType()); 3913 } else { 3914 llvm_unreachable("Unknown constant aggregate type!"); 3915 } 3916 return 0; 3917 } else { 3918 return 0; // Unknown initializer type 3919 } 3920 } 3921 return Init; 3922} 3923 3924/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of 3925/// 'icmp op load X, cst', try to see if we can compute the backedge 3926/// execution count. 3927ScalarEvolution::BackedgeTakenInfo 3928ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( 3929 LoadInst *LI, 3930 Constant *RHS, 3931 const Loop *L, 3932 ICmpInst::Predicate predicate) { 3933 if (LI->isVolatile()) return getCouldNotCompute(); 3934 3935 // Check to see if the loaded pointer is a getelementptr of a global. 3936 // TODO: Use SCEV instead of manually grubbing with GEPs. 3937 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)); 3938 if (!GEP) return getCouldNotCompute(); 3939 3940 // Make sure that it is really a constant global we are gepping, with an 3941 // initializer, and make sure the first IDX is really 0. 3942 GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); 3943 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || 3944 GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) || 3945 !cast<Constant>(GEP->getOperand(1))->isNullValue()) 3946 return getCouldNotCompute(); 3947 3948 // Okay, we allow one non-constant index into the GEP instruction. 3949 Value *VarIdx = 0; 3950 std::vector<ConstantInt*> Indexes; 3951 unsigned VarIdxNum = 0; 3952 for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) 3953 if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { 3954 Indexes.push_back(CI); 3955 } else if (!isa<ConstantInt>(GEP->getOperand(i))) { 3956 if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. 3957 VarIdx = GEP->getOperand(i); 3958 VarIdxNum = i-2; 3959 Indexes.push_back(0); 3960 } 3961 3962 // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. 3963 // Check to see if X is a loop variant variable value now. 3964 const SCEV *Idx = getSCEV(VarIdx); 3965 Idx = getSCEVAtScope(Idx, L); 3966 3967 // We can only recognize very limited forms of loop index expressions, in 3968 // particular, only affine AddRec's like {C1,+,C2}. 3969 const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx); 3970 if (!IdxExpr || !IdxExpr->isAffine() || IdxExpr->isLoopInvariant(L) || 3971 !isa<SCEVConstant>(IdxExpr->getOperand(0)) || 3972 !isa<SCEVConstant>(IdxExpr->getOperand(1))) 3973 return getCouldNotCompute(); 3974 3975 unsigned MaxSteps = MaxBruteForceIterations; 3976 for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { 3977 ConstantInt *ItCst = ConstantInt::get( 3978 cast<IntegerType>(IdxExpr->getType()), IterationNum); 3979 ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); 3980 3981 // Form the GEP offset. 3982 Indexes[VarIdxNum] = Val; 3983 3984 Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); 3985 if (Result == 0) break; // Cannot compute! 3986 3987 // Evaluate the condition for this iteration. 3988 Result = ConstantExpr::getICmp(predicate, Result, RHS); 3989 if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure 3990 if (cast<ConstantInt>(Result)->getValue().isMinValue()) { 3991#if 0 3992 dbgs() << "\n***\n*** Computed loop count " << *ItCst 3993 << "\n*** From global " << *GV << "*** BB: " << *L->getHeader() 3994 << "***\n"; 3995#endif 3996 ++NumArrayLenItCounts; 3997 return getConstant(ItCst); // Found terminating iteration! 3998 } 3999 } 4000 return getCouldNotCompute(); 4001} 4002 4003 4004/// CanConstantFold - Return true if we can constant fold an instruction of the 4005/// specified type, assuming that all operands were constants. 4006static bool CanConstantFold(const Instruction *I) { 4007 if (isa<BinaryOperator>(I) || isa<CmpInst>(I) || 4008 isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I)) 4009 return true; 4010 4011 if (const CallInst *CI = dyn_cast<CallInst>(I)) 4012 if (const Function *F = CI->getCalledFunction()) 4013 return canConstantFoldCallTo(F); 4014 return false; 4015} 4016 4017/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node 4018/// in the loop that V is derived from. We allow arbitrary operations along the 4019/// way, but the operands of an operation must either be constants or a value 4020/// derived from a constant PHI. If this expression does not fit with these 4021/// constraints, return null. 4022static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { 4023 // If this is not an instruction, or if this is an instruction outside of the 4024 // loop, it can't be derived from a loop PHI. 4025 Instruction *I = dyn_cast<Instruction>(V); 4026 if (I == 0 || !L->contains(I)) return 0; 4027 4028 if (PHINode *PN = dyn_cast<PHINode>(I)) { 4029 if (L->getHeader() == I->getParent()) 4030 return PN; 4031 else 4032 // We don't currently keep track of the control flow needed to evaluate 4033 // PHIs, so we cannot handle PHIs inside of loops. 4034 return 0; 4035 } 4036 4037 // If we won't be able to constant fold this expression even if the operands 4038 // are constants, return early. 4039 if (!CanConstantFold(I)) return 0; 4040 4041 // Otherwise, we can evaluate this instruction if all of its operands are 4042 // constant or derived from a PHI node themselves. 4043 PHINode *PHI = 0; 4044 for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op) 4045 if (!(isa<Constant>(I->getOperand(Op)) || 4046 isa<GlobalValue>(I->getOperand(Op)))) { 4047 PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L); 4048 if (P == 0) return 0; // Not evolving from PHI 4049 if (PHI == 0) 4050 PHI = P; 4051 else if (PHI != P) 4052 return 0; // Evolving from multiple different PHIs. 4053 } 4054 4055 // This is a expression evolving from a constant PHI! 4056 return PHI; 4057} 4058 4059/// EvaluateExpression - Given an expression that passes the 4060/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node 4061/// in the loop has the value PHIVal. If we can't fold this expression for some 4062/// reason, return null. 4063static Constant *EvaluateExpression(Value *V, Constant *PHIVal, 4064 const TargetData *TD) { 4065 if (isa<PHINode>(V)) return PHIVal; 4066 if (Constant *C = dyn_cast<Constant>(V)) return C; 4067 if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV; 4068 Instruction *I = cast<Instruction>(V); 4069 4070 std::vector<Constant*> Operands; 4071 Operands.resize(I->getNumOperands()); 4072 4073 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 4074 Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD); 4075 if (Operands[i] == 0) return 0; 4076 } 4077 4078 if (const CmpInst *CI = dyn_cast<CmpInst>(I)) 4079 return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], 4080 Operands[1], TD); 4081 return ConstantFoldInstOperands(I->getOpcode(), I->getType(), 4082 &Operands[0], Operands.size(), TD); 4083} 4084 4085/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is 4086/// in the header of its containing loop, we know the loop executes a 4087/// constant number of times, and the PHI node is just a recurrence 4088/// involving constants, fold it. 4089Constant * 4090ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, 4091 const APInt &BEs, 4092 const Loop *L) { 4093 std::map<PHINode*, Constant*>::iterator I = 4094 ConstantEvolutionLoopExitValue.find(PN); 4095 if (I != ConstantEvolutionLoopExitValue.end()) 4096 return I->second; 4097 4098 if (BEs.ugt(MaxBruteForceIterations)) 4099 return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it. 4100 4101 Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; 4102 4103 // Since the loop is canonicalized, the PHI node must have two entries. One 4104 // entry must be a constant (coming in from outside of the loop), and the 4105 // second must be derived from the same PHI. 4106 bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); 4107 Constant *StartCST = 4108 dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge)); 4109 if (StartCST == 0) 4110 return RetVal = 0; // Must be a constant. 4111 4112 Value *BEValue = PN->getIncomingValue(SecondIsBackedge); 4113 PHINode *PN2 = getConstantEvolvingPHI(BEValue, L); 4114 if (PN2 != PN) 4115 return RetVal = 0; // Not derived from same PHI. 4116 4117 // Execute the loop symbolically to determine the exit value. 4118 if (BEs.getActiveBits() >= 32) 4119 return RetVal = 0; // More than 2^32-1 iterations?? Not doing it! 4120 4121 unsigned NumIterations = BEs.getZExtValue(); // must be in range 4122 unsigned IterationNum = 0; 4123 for (Constant *PHIVal = StartCST; ; ++IterationNum) { 4124 if (IterationNum == NumIterations) 4125 return RetVal = PHIVal; // Got exit value! 4126 4127 // Compute the value of the PHI node for the next iteration. 4128 Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); 4129 if (NextPHI == PHIVal) 4130 return RetVal = NextPHI; // Stopped evolving! 4131 if (NextPHI == 0) 4132 return 0; // Couldn't evaluate! 4133 PHIVal = NextPHI; 4134 } 4135} 4136 4137/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a 4138/// constant number of times (the condition evolves only from constants), 4139/// try to evaluate a few iterations of the loop until we get the exit 4140/// condition gets a value of ExitWhen (true or false). If we cannot 4141/// evaluate the trip count of the loop, return getCouldNotCompute(). 4142const SCEV * 4143ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, 4144 Value *Cond, 4145 bool ExitWhen) { 4146 PHINode *PN = getConstantEvolvingPHI(Cond, L); 4147 if (PN == 0) return getCouldNotCompute(); 4148 4149 // Since the loop is canonicalized, the PHI node must have two entries. One 4150 // entry must be a constant (coming in from outside of the loop), and the 4151 // second must be derived from the same PHI. 4152 bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); 4153 Constant *StartCST = 4154 dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge)); 4155 if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. 4156 4157 Value *BEValue = PN->getIncomingValue(SecondIsBackedge); 4158 PHINode *PN2 = getConstantEvolvingPHI(BEValue, L); 4159 if (PN2 != PN) return getCouldNotCompute(); // Not derived from same PHI. 4160 4161 // Okay, we find a PHI node that defines the trip count of this loop. Execute 4162 // the loop symbolically to determine when the condition gets a value of 4163 // "ExitWhen". 4164 unsigned IterationNum = 0; 4165 unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. 4166 for (Constant *PHIVal = StartCST; 4167 IterationNum != MaxIterations; ++IterationNum) { 4168 ConstantInt *CondVal = 4169 dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD)); 4170 4171 // Couldn't symbolically evaluate. 4172 if (!CondVal) return getCouldNotCompute(); 4173 4174 if (CondVal->getValue() == uint64_t(ExitWhen)) { 4175 ++NumBruteForceTripCountsComputed; 4176 return getConstant(Type::getInt32Ty(getContext()), IterationNum); 4177 } 4178 4179 // Compute the value of the PHI node for the next iteration. 4180 Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); 4181 if (NextPHI == 0 || NextPHI == PHIVal) 4182 return getCouldNotCompute();// Couldn't evaluate or not making progress... 4183 PHIVal = NextPHI; 4184 } 4185 4186 // Too many iterations were needed to evaluate. 4187 return getCouldNotCompute(); 4188} 4189 4190/// getSCEVAtScope - Return a SCEV expression for the specified value 4191/// at the specified scope in the program. The L value specifies a loop 4192/// nest to evaluate the expression at, where null is the top-level or a 4193/// specified loop is immediately inside of the loop. 4194/// 4195/// This method can be used to compute the exit value for a variable defined 4196/// in a loop by querying what the value will hold in the parent loop. 4197/// 4198/// In the case that a relevant loop exit value cannot be computed, the 4199/// original value V is returned. 4200const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { 4201 // Check to see if we've folded this expression at this loop before. 4202 std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V]; 4203 std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair = 4204 Values.insert(std::make_pair(L, static_cast<const SCEV *>(0))); 4205 if (!Pair.second) 4206 return Pair.first->second ? Pair.first->second : V; 4207 4208 // Otherwise compute it. 4209 const SCEV *C = computeSCEVAtScope(V, L); 4210 ValuesAtScopes[V][L] = C; 4211 return C; 4212} 4213 4214const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { 4215 if (isa<SCEVConstant>(V)) return V; 4216 4217 // If this instruction is evolved from a constant-evolving PHI, compute the 4218 // exit value from the loop without using SCEVs. 4219 if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) { 4220 if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { 4221 const Loop *LI = (*this->LI)[I->getParent()]; 4222 if (LI && LI->getParentLoop() == L) // Looking for loop exit value. 4223 if (PHINode *PN = dyn_cast<PHINode>(I)) 4224 if (PN->getParent() == LI->getHeader()) { 4225 // Okay, there is no closed form solution for the PHI node. Check 4226 // to see if the loop that contains it has a known backedge-taken 4227 // count. If so, we may be able to force computation of the exit 4228 // value. 4229 const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); 4230 if (const SCEVConstant *BTCC = 4231 dyn_cast<SCEVConstant>(BackedgeTakenCount)) { 4232 // Okay, we know how many times the containing loop executes. If 4233 // this is a constant evolving PHI node, get the final value at 4234 // the specified iteration number. 4235 Constant *RV = getConstantEvolutionLoopExitValue(PN, 4236 BTCC->getValue()->getValue(), 4237 LI); 4238 if (RV) return getSCEV(RV); 4239 } 4240 } 4241 4242 // Okay, this is an expression that we cannot symbolically evaluate 4243 // into a SCEV. Check to see if it's possible to symbolically evaluate 4244 // the arguments into constants, and if so, try to constant propagate the 4245 // result. This is particularly useful for computing loop exit values. 4246 if (CanConstantFold(I)) { 4247 std::vector<Constant*> Operands; 4248 Operands.reserve(I->getNumOperands()); 4249 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 4250 Value *Op = I->getOperand(i); 4251 if (Constant *C = dyn_cast<Constant>(Op)) { 4252 Operands.push_back(C); 4253 } else { 4254 // If any of the operands is non-constant and if they are 4255 // non-integer and non-pointer, don't even try to analyze them 4256 // with scev techniques. 4257 if (!isSCEVable(Op->getType())) 4258 return V; 4259 4260 const SCEV *OpV = getSCEVAtScope(Op, L); 4261 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) { 4262 Constant *C = SC->getValue(); 4263 if (C->getType() != Op->getType()) 4264 C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, 4265 Op->getType(), 4266 false), 4267 C, Op->getType()); 4268 Operands.push_back(C); 4269 } else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV)) { 4270 if (Constant *C = dyn_cast<Constant>(SU->getValue())) { 4271 if (C->getType() != Op->getType()) 4272 C = 4273 ConstantExpr::getCast(CastInst::getCastOpcode(C, false, 4274 Op->getType(), 4275 false), 4276 C, Op->getType()); 4277 Operands.push_back(C); 4278 } else 4279 return V; 4280 } else { 4281 return V; 4282 } 4283 } 4284 } 4285 4286 Constant *C = 0; 4287 if (const CmpInst *CI = dyn_cast<CmpInst>(I)) 4288 C = ConstantFoldCompareInstOperands(CI->getPredicate(), 4289 Operands[0], Operands[1], TD); 4290 else 4291 C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), 4292 &Operands[0], Operands.size(), TD); 4293 if (C) 4294 return getSCEV(C); 4295 } 4296 } 4297 4298 // This is some other type of SCEVUnknown, just return it. 4299 return V; 4300 } 4301 4302 if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) { 4303 // Avoid performing the look-up in the common case where the specified 4304 // expression has no loop-variant portions. 4305 for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { 4306 const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); 4307 if (OpAtScope != Comm->getOperand(i)) { 4308 // Okay, at least one of these operands is loop variant but might be 4309 // foldable. Build a new instance of the folded commutative expression. 4310 SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(), 4311 Comm->op_begin()+i); 4312 NewOps.push_back(OpAtScope); 4313 4314 for (++i; i != e; ++i) { 4315 OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); 4316 NewOps.push_back(OpAtScope); 4317 } 4318 if (isa<SCEVAddExpr>(Comm)) 4319 return getAddExpr(NewOps); 4320 if (isa<SCEVMulExpr>(Comm)) 4321 return getMulExpr(NewOps); 4322 if (isa<SCEVSMaxExpr>(Comm)) 4323 return getSMaxExpr(NewOps); 4324 if (isa<SCEVUMaxExpr>(Comm)) 4325 return getUMaxExpr(NewOps); 4326 llvm_unreachable("Unknown commutative SCEV type!"); 4327 } 4328 } 4329 // If we got here, all operands are loop invariant. 4330 return Comm; 4331 } 4332 4333 if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) { 4334 const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L); 4335 const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L); 4336 if (LHS == Div->getLHS() && RHS == Div->getRHS()) 4337 return Div; // must be loop invariant 4338 return getUDivExpr(LHS, RHS); 4339 } 4340 4341 // If this is a loop recurrence for a loop that does not contain L, then we 4342 // are dealing with the final value computed by the loop. 4343 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) { 4344 if (!L || !AddRec->getLoop()->contains(L)) { 4345 // To evaluate this recurrence, we need to know how many times the AddRec 4346 // loop iterates. Compute this now. 4347 const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); 4348 if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; 4349 4350 // Then, evaluate the AddRec. 4351 return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); 4352 } 4353 return AddRec; 4354 } 4355 4356 if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) { 4357 const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); 4358 if (Op == Cast->getOperand()) 4359 return Cast; // must be loop invariant 4360 return getZeroExtendExpr(Op, Cast->getType()); 4361 } 4362 4363 if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) { 4364 const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); 4365 if (Op == Cast->getOperand()) 4366 return Cast; // must be loop invariant 4367 return getSignExtendExpr(Op, Cast->getType()); 4368 } 4369 4370 if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) { 4371 const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); 4372 if (Op == Cast->getOperand()) 4373 return Cast; // must be loop invariant 4374 return getTruncateExpr(Op, Cast->getType()); 4375 } 4376 4377 llvm_unreachable("Unknown SCEV type!"); 4378 return 0; 4379} 4380 4381/// getSCEVAtScope - This is a convenience function which does 4382/// getSCEVAtScope(getSCEV(V), L). 4383const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { 4384 return getSCEVAtScope(getSCEV(V), L); 4385} 4386 4387/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the 4388/// following equation: 4389/// 4390/// A * X = B (mod N) 4391/// 4392/// where N = 2^BW and BW is the common bit width of A and B. The signedness of 4393/// A and B isn't important. 4394/// 4395/// If the equation does not have a solution, SCEVCouldNotCompute is returned. 4396static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, 4397 ScalarEvolution &SE) { 4398 uint32_t BW = A.getBitWidth(); 4399 assert(BW == B.getBitWidth() && "Bit widths must be the same."); 4400 assert(A != 0 && "A must be non-zero."); 4401 4402 // 1. D = gcd(A, N) 4403 // 4404 // The gcd of A and N may have only one prime factor: 2. The number of 4405 // trailing zeros in A is its multiplicity 4406 uint32_t Mult2 = A.countTrailingZeros(); 4407 // D = 2^Mult2 4408 4409 // 2. Check if B is divisible by D. 4410 // 4411 // B is divisible by D if and only if the multiplicity of prime factor 2 for B 4412 // is not less than multiplicity of this prime factor for D. 4413 if (B.countTrailingZeros() < Mult2) 4414 return SE.getCouldNotCompute(); 4415 4416 // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic 4417 // modulo (N / D). 4418 // 4419 // (N / D) may need BW+1 bits in its representation. Hence, we'll use this 4420 // bit width during computations. 4421 APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D 4422 APInt Mod(BW + 1, 0); 4423 Mod.set(BW - Mult2); // Mod = N / D 4424 APInt I = AD.multiplicativeInverse(Mod); 4425 4426 // 4. Compute the minimum unsigned root of the equation: 4427 // I * (B / D) mod (N / D) 4428 APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod); 4429 4430 // The result is guaranteed to be less than 2^BW so we may truncate it to BW 4431 // bits. 4432 return SE.getConstant(Result.trunc(BW)); 4433} 4434 4435/// SolveQuadraticEquation - Find the roots of the quadratic equation for the 4436/// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which 4437/// might be the same) or two SCEVCouldNotCompute objects. 4438/// 4439static std::pair<const SCEV *,const SCEV *> 4440SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { 4441 assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); 4442 const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0)); 4443 const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1)); 4444 const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2)); 4445 4446 // We currently can only solve this if the coefficients are constants. 4447 if (!LC || !MC || !NC) { 4448 const SCEV *CNC = SE.getCouldNotCompute(); 4449 return std::make_pair(CNC, CNC); 4450 } 4451 4452 uint32_t BitWidth = LC->getValue()->getValue().getBitWidth(); 4453 const APInt &L = LC->getValue()->getValue(); 4454 const APInt &M = MC->getValue()->getValue(); 4455 const APInt &N = NC->getValue()->getValue(); 4456 APInt Two(BitWidth, 2); 4457 APInt Four(BitWidth, 4); 4458 4459 { 4460 using namespace APIntOps; 4461 const APInt& C = L; 4462 // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C 4463 // The B coefficient is M-N/2 4464 APInt B(M); 4465 B -= sdiv(N,Two); 4466 4467 // The A coefficient is N/2 4468 APInt A(N.sdiv(Two)); 4469 4470 // Compute the B^2-4ac term. 4471 APInt SqrtTerm(B); 4472 SqrtTerm *= B; 4473 SqrtTerm -= Four * (A * C); 4474 4475 // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest 4476 // integer value or else APInt::sqrt() will assert. 4477 APInt SqrtVal(SqrtTerm.sqrt()); 4478 4479 // Compute the two solutions for the quadratic formula. 4480 // The divisions must be performed as signed divisions. 4481 APInt NegB(-B); 4482 APInt TwoA( A << 1 ); 4483 if (TwoA.isMinValue()) { 4484 const SCEV *CNC = SE.getCouldNotCompute(); 4485 return std::make_pair(CNC, CNC); 4486 } 4487 4488 LLVMContext &Context = SE.getContext(); 4489 4490 ConstantInt *Solution1 = 4491 ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); 4492 ConstantInt *Solution2 = 4493 ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); 4494 4495 return std::make_pair(SE.getConstant(Solution1), 4496 SE.getConstant(Solution2)); 4497 } // end APIntOps namespace 4498} 4499 4500/// HowFarToZero - Return the number of times a backedge comparing the specified 4501/// value to zero will execute. If not computable, return CouldNotCompute. 4502ScalarEvolution::BackedgeTakenInfo 4503ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { 4504 // If the value is a constant 4505 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { 4506 // If the value is already zero, the branch will execute zero times. 4507 if (C->getValue()->isZero()) return C; 4508 return getCouldNotCompute(); // Otherwise it will loop infinitely. 4509 } 4510 4511 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V); 4512 if (!AddRec || AddRec->getLoop() != L) 4513 return getCouldNotCompute(); 4514 4515 if (AddRec->isAffine()) { 4516 // If this is an affine expression, the execution count of this branch is 4517 // the minimum unsigned root of the following equation: 4518 // 4519 // Start + Step*N = 0 (mod 2^BW) 4520 // 4521 // equivalent to: 4522 // 4523 // Step*N = -Start (mod 2^BW) 4524 // 4525 // where BW is the common bit width of Start and Step. 4526 4527 // Get the initial value for the loop. 4528 const SCEV *Start = getSCEVAtScope(AddRec->getStart(), 4529 L->getParentLoop()); 4530 const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), 4531 L->getParentLoop()); 4532 4533 if (const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step)) { 4534 // For now we handle only constant steps. 4535 4536 // First, handle unitary steps. 4537 if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so: 4538 return getNegativeSCEV(Start); // N = -Start (as unsigned) 4539 if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so: 4540 return Start; // N = Start (as unsigned) 4541 4542 // Then, try to solve the above equation provided that Start is constant. 4543 if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) 4544 return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), 4545 -StartC->getValue()->getValue(), 4546 *this); 4547 } 4548 } else if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { 4549 // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of 4550 // the quadratic equation to solve it. 4551 std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec, 4552 *this); 4553 const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); 4554 const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); 4555 if (R1) { 4556#if 0 4557 dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1 4558 << " sol#2: " << *R2 << "\n"; 4559#endif 4560 // Pick the smallest positive root value. 4561 if (ConstantInt *CB = 4562 dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, 4563 R1->getValue(), R2->getValue()))) { 4564 if (CB->getZExtValue() == false) 4565 std::swap(R1, R2); // R1 is the minimum root now. 4566 4567 // We can only use this value if the chrec ends up with an exact zero 4568 // value at this index. When solving for "X*X != 5", for example, we 4569 // should not accept a root of 2. 4570 const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); 4571 if (Val->isZero()) 4572 return R1; // We found a quadratic root! 4573 } 4574 } 4575 } 4576 4577 return getCouldNotCompute(); 4578} 4579 4580/// HowFarToNonZero - Return the number of times a backedge checking the 4581/// specified value for nonzero will execute. If not computable, return 4582/// CouldNotCompute 4583ScalarEvolution::BackedgeTakenInfo 4584ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { 4585 // Loops that look like: while (X == 0) are very strange indeed. We don't 4586 // handle them yet except for the trivial case. This could be expanded in the 4587 // future as needed. 4588 4589 // If the value is a constant, check to see if it is known to be non-zero 4590 // already. If so, the backedge will execute zero times. 4591 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { 4592 if (!C->getValue()->isNullValue()) 4593 return getConstant(C->getType(), 0); 4594 return getCouldNotCompute(); // Otherwise it will loop infinitely. 4595 } 4596 4597 // We could implement others, but I really doubt anyone writes loops like 4598 // this, and if they did, they would already be constant folded. 4599 return getCouldNotCompute(); 4600} 4601 4602/// getLoopPredecessor - If the given loop's header has exactly one unique 4603/// predecessor outside the loop, return it. Otherwise return null. 4604/// This is less strict that the loop "preheader" concept, which requires 4605/// the predecessor to have only one single successor. 4606/// 4607BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) { 4608 BasicBlock *Header = L->getHeader(); 4609 BasicBlock *Pred = 0; 4610 for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); 4611 PI != E; ++PI) 4612 if (!L->contains(*PI)) { 4613 if (Pred && Pred != *PI) return 0; // Multiple predecessors. 4614 Pred = *PI; 4615 } 4616 return Pred; 4617} 4618 4619/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB 4620/// (which may not be an immediate predecessor) which has exactly one 4621/// successor from which BB is reachable, or null if no such block is 4622/// found. 4623/// 4624std::pair<BasicBlock *, BasicBlock *> 4625ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { 4626 // If the block has a unique predecessor, then there is no path from the 4627 // predecessor to the block that does not go through the direct edge 4628 // from the predecessor to the block. 4629 if (BasicBlock *Pred = BB->getSinglePredecessor()) 4630 return std::make_pair(Pred, BB); 4631 4632 // A loop's header is defined to be a block that dominates the loop. 4633 // If the header has a unique predecessor outside the loop, it must be 4634 // a block that has exactly one successor that can reach the loop. 4635 if (Loop *L = LI->getLoopFor(BB)) 4636 return std::make_pair(getLoopPredecessor(L), L->getHeader()); 4637 4638 return std::pair<BasicBlock *, BasicBlock *>(); 4639} 4640 4641/// HasSameValue - SCEV structural equivalence is usually sufficient for 4642/// testing whether two expressions are equal, however for the purposes of 4643/// looking for a condition guarding a loop, it can be useful to be a little 4644/// more general, since a front-end may have replicated the controlling 4645/// expression. 4646/// 4647static bool HasSameValue(const SCEV *A, const SCEV *B) { 4648 // Quick check to see if they are the same SCEV. 4649 if (A == B) return true; 4650 4651 // Otherwise, if they're both SCEVUnknown, it's possible that they hold 4652 // two different instructions with the same value. Check for this case. 4653 if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A)) 4654 if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B)) 4655 if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue())) 4656 if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) 4657 if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory()) 4658 return true; 4659 4660 // Otherwise assume they may have a different value. 4661 return false; 4662} 4663 4664/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with 4665/// predicate Pred. Return true iff any changes were made. 4666/// 4667bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, 4668 const SCEV *&LHS, const SCEV *&RHS) { 4669 bool Changed = false; 4670 4671 // Canonicalize a constant to the right side. 4672 if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { 4673 // Check for both operands constant. 4674 if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { 4675 if (ConstantExpr::getICmp(Pred, 4676 LHSC->getValue(), 4677 RHSC->getValue())->isNullValue()) 4678 goto trivially_false; 4679 else 4680 goto trivially_true; 4681 } 4682 // Otherwise swap the operands to put the constant on the right. 4683 std::swap(LHS, RHS); 4684 Pred = ICmpInst::getSwappedPredicate(Pred); 4685 Changed = true; 4686 } 4687 4688 // If we're comparing an addrec with a value which is loop-invariant in the 4689 // addrec's loop, put the addrec on the left. Also make a dominance check, 4690 // as both operands could be addrecs loop-invariant in each other's loop. 4691 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) { 4692 const Loop *L = AR->getLoop(); 4693 if (LHS->isLoopInvariant(L) && LHS->properlyDominates(L->getHeader(), DT)) { 4694 std::swap(LHS, RHS); 4695 Pred = ICmpInst::getSwappedPredicate(Pred); 4696 Changed = true; 4697 } 4698 } 4699 4700 // If there's a constant operand, canonicalize comparisons with boundary 4701 // cases, and canonicalize *-or-equal comparisons to regular comparisons. 4702 if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { 4703 const APInt &RA = RC->getValue()->getValue(); 4704 switch (Pred) { 4705 default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); 4706 case ICmpInst::ICMP_EQ: 4707 case ICmpInst::ICMP_NE: 4708 break; 4709 case ICmpInst::ICMP_UGE: 4710 if ((RA - 1).isMinValue()) { 4711 Pred = ICmpInst::ICMP_NE; 4712 RHS = getConstant(RA - 1); 4713 Changed = true; 4714 break; 4715 } 4716 if (RA.isMaxValue()) { 4717 Pred = ICmpInst::ICMP_EQ; 4718 Changed = true; 4719 break; 4720 } 4721 if (RA.isMinValue()) goto trivially_true; 4722 4723 Pred = ICmpInst::ICMP_UGT; 4724 RHS = getConstant(RA - 1); 4725 Changed = true; 4726 break; 4727 case ICmpInst::ICMP_ULE: 4728 if ((RA + 1).isMaxValue()) { 4729 Pred = ICmpInst::ICMP_NE; 4730 RHS = getConstant(RA + 1); 4731 Changed = true; 4732 break; 4733 } 4734 if (RA.isMinValue()) { 4735 Pred = ICmpInst::ICMP_EQ; 4736 Changed = true; 4737 break; 4738 } 4739 if (RA.isMaxValue()) goto trivially_true; 4740 4741 Pred = ICmpInst::ICMP_ULT; 4742 RHS = getConstant(RA + 1); 4743 Changed = true; 4744 break; 4745 case ICmpInst::ICMP_SGE: 4746 if ((RA - 1).isMinSignedValue()) { 4747 Pred = ICmpInst::ICMP_NE; 4748 RHS = getConstant(RA - 1); 4749 Changed = true; 4750 break; 4751 } 4752 if (RA.isMaxSignedValue()) { 4753 Pred = ICmpInst::ICMP_EQ; 4754 Changed = true; 4755 break; 4756 } 4757 if (RA.isMinSignedValue()) goto trivially_true; 4758 4759 Pred = ICmpInst::ICMP_SGT; 4760 RHS = getConstant(RA - 1); 4761 Changed = true; 4762 break; 4763 case ICmpInst::ICMP_SLE: 4764 if ((RA + 1).isMaxSignedValue()) { 4765 Pred = ICmpInst::ICMP_NE; 4766 RHS = getConstant(RA + 1); 4767 Changed = true; 4768 break; 4769 } 4770 if (RA.isMinSignedValue()) { 4771 Pred = ICmpInst::ICMP_EQ; 4772 Changed = true; 4773 break; 4774 } 4775 if (RA.isMaxSignedValue()) goto trivially_true; 4776 4777 Pred = ICmpInst::ICMP_SLT; 4778 RHS = getConstant(RA + 1); 4779 Changed = true; 4780 break; 4781 case ICmpInst::ICMP_UGT: 4782 if (RA.isMinValue()) { 4783 Pred = ICmpInst::ICMP_NE; 4784 Changed = true; 4785 break; 4786 } 4787 if ((RA + 1).isMaxValue()) { 4788 Pred = ICmpInst::ICMP_EQ; 4789 RHS = getConstant(RA + 1); 4790 Changed = true; 4791 break; 4792 } 4793 if (RA.isMaxValue()) goto trivially_false; 4794 break; 4795 case ICmpInst::ICMP_ULT: 4796 if (RA.isMaxValue()) { 4797 Pred = ICmpInst::ICMP_NE; 4798 Changed = true; 4799 break; 4800 } 4801 if ((RA - 1).isMinValue()) { 4802 Pred = ICmpInst::ICMP_EQ; 4803 RHS = getConstant(RA - 1); 4804 Changed = true; 4805 break; 4806 } 4807 if (RA.isMinValue()) goto trivially_false; 4808 break; 4809 case ICmpInst::ICMP_SGT: 4810 if (RA.isMinSignedValue()) { 4811 Pred = ICmpInst::ICMP_NE; 4812 Changed = true; 4813 break; 4814 } 4815 if ((RA + 1).isMaxSignedValue()) { 4816 Pred = ICmpInst::ICMP_EQ; 4817 RHS = getConstant(RA + 1); 4818 Changed = true; 4819 break; 4820 } 4821 if (RA.isMaxSignedValue()) goto trivially_false; 4822 break; 4823 case ICmpInst::ICMP_SLT: 4824 if (RA.isMaxSignedValue()) { 4825 Pred = ICmpInst::ICMP_NE; 4826 Changed = true; 4827 break; 4828 } 4829 if ((RA - 1).isMinSignedValue()) { 4830 Pred = ICmpInst::ICMP_EQ; 4831 RHS = getConstant(RA - 1); 4832 Changed = true; 4833 break; 4834 } 4835 if (RA.isMinSignedValue()) goto trivially_false; 4836 break; 4837 } 4838 } 4839 4840 // Check for obvious equality. 4841 if (HasSameValue(LHS, RHS)) { 4842 if (ICmpInst::isTrueWhenEqual(Pred)) 4843 goto trivially_true; 4844 if (ICmpInst::isFalseWhenEqual(Pred)) 4845 goto trivially_false; 4846 } 4847 4848 // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by 4849 // adding or subtracting 1 from one of the operands. 4850 switch (Pred) { 4851 case ICmpInst::ICMP_SLE: 4852 if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) { 4853 RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, 4854 /*HasNUW=*/false, /*HasNSW=*/true); 4855 Pred = ICmpInst::ICMP_SLT; 4856 Changed = true; 4857 } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) { 4858 LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, 4859 /*HasNUW=*/false, /*HasNSW=*/true); 4860 Pred = ICmpInst::ICMP_SLT; 4861 Changed = true; 4862 } 4863 break; 4864 case ICmpInst::ICMP_SGE: 4865 if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) { 4866 RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, 4867 /*HasNUW=*/false, /*HasNSW=*/true); 4868 Pred = ICmpInst::ICMP_SGT; 4869 Changed = true; 4870 } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) { 4871 LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, 4872 /*HasNUW=*/false, /*HasNSW=*/true); 4873 Pred = ICmpInst::ICMP_SGT; 4874 Changed = true; 4875 } 4876 break; 4877 case ICmpInst::ICMP_ULE: 4878 if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) { 4879 RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, 4880 /*HasNUW=*/true, /*HasNSW=*/false); 4881 Pred = ICmpInst::ICMP_ULT; 4882 Changed = true; 4883 } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { 4884 LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, 4885 /*HasNUW=*/true, /*HasNSW=*/false); 4886 Pred = ICmpInst::ICMP_ULT; 4887 Changed = true; 4888 } 4889 break; 4890 case ICmpInst::ICMP_UGE: 4891 if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { 4892 RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, 4893 /*HasNUW=*/true, /*HasNSW=*/false); 4894 Pred = ICmpInst::ICMP_UGT; 4895 Changed = true; 4896 } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { 4897 LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, 4898 /*HasNUW=*/true, /*HasNSW=*/false); 4899 Pred = ICmpInst::ICMP_UGT; 4900 Changed = true; 4901 } 4902 break; 4903 default: 4904 break; 4905 } 4906 4907 // TODO: More simplifications are possible here. 4908 4909 return Changed; 4910 4911trivially_true: 4912 // Return 0 == 0. 4913 LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0); 4914 Pred = ICmpInst::ICMP_EQ; 4915 return true; 4916 4917trivially_false: 4918 // Return 0 != 0. 4919 LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0); 4920 Pred = ICmpInst::ICMP_NE; 4921 return true; 4922} 4923 4924bool ScalarEvolution::isKnownNegative(const SCEV *S) { 4925 return getSignedRange(S).getSignedMax().isNegative(); 4926} 4927 4928bool ScalarEvolution::isKnownPositive(const SCEV *S) { 4929 return getSignedRange(S).getSignedMin().isStrictlyPositive(); 4930} 4931 4932bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { 4933 return !getSignedRange(S).getSignedMin().isNegative(); 4934} 4935 4936bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { 4937 return !getSignedRange(S).getSignedMax().isStrictlyPositive(); 4938} 4939 4940bool ScalarEvolution::isKnownNonZero(const SCEV *S) { 4941 return isKnownNegative(S) || isKnownPositive(S); 4942} 4943 4944bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, 4945 const SCEV *LHS, const SCEV *RHS) { 4946 // Canonicalize the inputs first. 4947 (void)SimplifyICmpOperands(Pred, LHS, RHS); 4948 4949 // If LHS or RHS is an addrec, check to see if the condition is true in 4950 // every iteration of the loop. 4951 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) 4952 if (isLoopEntryGuardedByCond( 4953 AR->getLoop(), Pred, AR->getStart(), RHS) && 4954 isLoopBackedgeGuardedByCond( 4955 AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS)) 4956 return true; 4957 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) 4958 if (isLoopEntryGuardedByCond( 4959 AR->getLoop(), Pred, LHS, AR->getStart()) && 4960 isLoopBackedgeGuardedByCond( 4961 AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this))) 4962 return true; 4963 4964 // Otherwise see what can be done with known constant ranges. 4965 return isKnownPredicateWithRanges(Pred, LHS, RHS); 4966} 4967 4968bool 4969ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, 4970 const SCEV *LHS, const SCEV *RHS) { 4971 if (HasSameValue(LHS, RHS)) 4972 return ICmpInst::isTrueWhenEqual(Pred); 4973 4974 // This code is split out from isKnownPredicate because it is called from 4975 // within isLoopEntryGuardedByCond. 4976 switch (Pred) { 4977 default: 4978 llvm_unreachable("Unexpected ICmpInst::Predicate value!"); 4979 break; 4980 case ICmpInst::ICMP_SGT: 4981 Pred = ICmpInst::ICMP_SLT; 4982 std::swap(LHS, RHS); 4983 case ICmpInst::ICMP_SLT: { 4984 ConstantRange LHSRange = getSignedRange(LHS); 4985 ConstantRange RHSRange = getSignedRange(RHS); 4986 if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin())) 4987 return true; 4988 if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax())) 4989 return false; 4990 break; 4991 } 4992 case ICmpInst::ICMP_SGE: 4993 Pred = ICmpInst::ICMP_SLE; 4994 std::swap(LHS, RHS); 4995 case ICmpInst::ICMP_SLE: { 4996 ConstantRange LHSRange = getSignedRange(LHS); 4997 ConstantRange RHSRange = getSignedRange(RHS); 4998 if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin())) 4999 return true; 5000 if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax())) 5001 return false; 5002 break; 5003 } 5004 case ICmpInst::ICMP_UGT: 5005 Pred = ICmpInst::ICMP_ULT; 5006 std::swap(LHS, RHS); 5007 case ICmpInst::ICMP_ULT: { 5008 ConstantRange LHSRange = getUnsignedRange(LHS); 5009 ConstantRange RHSRange = getUnsignedRange(RHS); 5010 if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin())) 5011 return true; 5012 if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax())) 5013 return false; 5014 break; 5015 } 5016 case ICmpInst::ICMP_UGE: 5017 Pred = ICmpInst::ICMP_ULE; 5018 std::swap(LHS, RHS); 5019 case ICmpInst::ICMP_ULE: { 5020 ConstantRange LHSRange = getUnsignedRange(LHS); 5021 ConstantRange RHSRange = getUnsignedRange(RHS); 5022 if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin())) 5023 return true; 5024 if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax())) 5025 return false; 5026 break; 5027 } 5028 case ICmpInst::ICMP_NE: { 5029 if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet()) 5030 return true; 5031 if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet()) 5032 return true; 5033 5034 const SCEV *Diff = getMinusSCEV(LHS, RHS); 5035 if (isKnownNonZero(Diff)) 5036 return true; 5037 break; 5038 } 5039 case ICmpInst::ICMP_EQ: 5040 // The check at the top of the function catches the case where 5041 // the values are known to be equal. 5042 break; 5043 } 5044 return false; 5045} 5046 5047/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is 5048/// protected by a conditional between LHS and RHS. This is used to 5049/// to eliminate casts. 5050bool 5051ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, 5052 ICmpInst::Predicate Pred, 5053 const SCEV *LHS, const SCEV *RHS) { 5054 // Interpret a null as meaning no loop, where there is obviously no guard 5055 // (interprocedural conditions notwithstanding). 5056 if (!L) return true; 5057 5058 BasicBlock *Latch = L->getLoopLatch(); 5059 if (!Latch) 5060 return false; 5061 5062 BranchInst *LoopContinuePredicate = 5063 dyn_cast<BranchInst>(Latch->getTerminator()); 5064 if (!LoopContinuePredicate || 5065 LoopContinuePredicate->isUnconditional()) 5066 return false; 5067 5068 return isImpliedCond(LoopContinuePredicate->getCondition(), Pred, LHS, RHS, 5069 LoopContinuePredicate->getSuccessor(0) != L->getHeader()); 5070} 5071 5072/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected 5073/// by a conditional between LHS and RHS. This is used to help avoid max 5074/// expressions in loop trip counts, and to eliminate casts. 5075bool 5076ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, 5077 ICmpInst::Predicate Pred, 5078 const SCEV *LHS, const SCEV *RHS) { 5079 // Interpret a null as meaning no loop, where there is obviously no guard 5080 // (interprocedural conditions notwithstanding). 5081 if (!L) return false; 5082 5083 // Starting at the loop predecessor, climb up the predecessor chain, as long 5084 // as there are predecessors that can be found that have unique successors 5085 // leading to the original header. 5086 for (std::pair<BasicBlock *, BasicBlock *> 5087 Pair(getLoopPredecessor(L), L->getHeader()); 5088 Pair.first; 5089 Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { 5090 5091 BranchInst *LoopEntryPredicate = 5092 dyn_cast<BranchInst>(Pair.first->getTerminator()); 5093 if (!LoopEntryPredicate || 5094 LoopEntryPredicate->isUnconditional()) 5095 continue; 5096 5097 if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS, 5098 LoopEntryPredicate->getSuccessor(0) != Pair.second)) 5099 return true; 5100 } 5101 5102 return false; 5103} 5104 5105/// isImpliedCond - Test whether the condition described by Pred, LHS, 5106/// and RHS is true whenever the given Cond value evaluates to true. 5107bool ScalarEvolution::isImpliedCond(Value *CondValue, 5108 ICmpInst::Predicate Pred, 5109 const SCEV *LHS, const SCEV *RHS, 5110 bool Inverse) { 5111 // Recursively handle And and Or conditions. 5112 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) { 5113 if (BO->getOpcode() == Instruction::And) { 5114 if (!Inverse) 5115 return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || 5116 isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); 5117 } else if (BO->getOpcode() == Instruction::Or) { 5118 if (Inverse) 5119 return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || 5120 isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); 5121 } 5122 } 5123 5124 ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue); 5125 if (!ICI) return false; 5126 5127 // Bail if the ICmp's operands' types are wider than the needed type 5128 // before attempting to call getSCEV on them. This avoids infinite 5129 // recursion, since the analysis of widening casts can require loop 5130 // exit condition information for overflow checking, which would 5131 // lead back here. 5132 if (getTypeSizeInBits(LHS->getType()) < 5133 getTypeSizeInBits(ICI->getOperand(0)->getType())) 5134 return false; 5135 5136 // Now that we found a conditional branch that dominates the loop, check to 5137 // see if it is the comparison we are looking for. 5138 ICmpInst::Predicate FoundPred; 5139 if (Inverse) 5140 FoundPred = ICI->getInversePredicate(); 5141 else 5142 FoundPred = ICI->getPredicate(); 5143 5144 const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); 5145 const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); 5146 5147 // Balance the types. The case where FoundLHS' type is wider than 5148 // LHS' type is checked for above. 5149 if (getTypeSizeInBits(LHS->getType()) > 5150 getTypeSizeInBits(FoundLHS->getType())) { 5151 if (CmpInst::isSigned(Pred)) { 5152 FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); 5153 FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); 5154 } else { 5155 FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); 5156 FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); 5157 } 5158 } 5159 5160 // Canonicalize the query to match the way instcombine will have 5161 // canonicalized the comparison. 5162 if (SimplifyICmpOperands(Pred, LHS, RHS)) 5163 if (LHS == RHS) 5164 return CmpInst::isTrueWhenEqual(Pred); 5165 if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) 5166 if (FoundLHS == FoundRHS) 5167 return CmpInst::isFalseWhenEqual(Pred); 5168 5169 // Check to see if we can make the LHS or RHS match. 5170 if (LHS == FoundRHS || RHS == FoundLHS) { 5171 if (isa<SCEVConstant>(RHS)) { 5172 std::swap(FoundLHS, FoundRHS); 5173 FoundPred = ICmpInst::getSwappedPredicate(FoundPred); 5174 } else { 5175 std::swap(LHS, RHS); 5176 Pred = ICmpInst::getSwappedPredicate(Pred); 5177 } 5178 } 5179 5180 // Check whether the found predicate is the same as the desired predicate. 5181 if (FoundPred == Pred) 5182 return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); 5183 5184 // Check whether swapping the found predicate makes it the same as the 5185 // desired predicate. 5186 if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { 5187 if (isa<SCEVConstant>(RHS)) 5188 return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); 5189 else 5190 return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), 5191 RHS, LHS, FoundLHS, FoundRHS); 5192 } 5193 5194 // Check whether the actual condition is beyond sufficient. 5195 if (FoundPred == ICmpInst::ICMP_EQ) 5196 if (ICmpInst::isTrueWhenEqual(Pred)) 5197 if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) 5198 return true; 5199 if (Pred == ICmpInst::ICMP_NE) 5200 if (!ICmpInst::isTrueWhenEqual(FoundPred)) 5201 if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) 5202 return true; 5203 5204 // Otherwise assume the worst. 5205 return false; 5206} 5207 5208/// isImpliedCondOperands - Test whether the condition described by Pred, 5209/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS, 5210/// and FoundRHS is true. 5211bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, 5212 const SCEV *LHS, const SCEV *RHS, 5213 const SCEV *FoundLHS, 5214 const SCEV *FoundRHS) { 5215 return isImpliedCondOperandsHelper(Pred, LHS, RHS, 5216 FoundLHS, FoundRHS) || 5217 // ~x < ~y --> x > y 5218 isImpliedCondOperandsHelper(Pred, LHS, RHS, 5219 getNotSCEV(FoundRHS), 5220 getNotSCEV(FoundLHS)); 5221} 5222 5223/// isImpliedCondOperandsHelper - Test whether the condition described by 5224/// Pred, LHS, and RHS is true whenever the condition described by Pred, 5225/// FoundLHS, and FoundRHS is true. 5226bool 5227ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, 5228 const SCEV *LHS, const SCEV *RHS, 5229 const SCEV *FoundLHS, 5230 const SCEV *FoundRHS) { 5231 switch (Pred) { 5232 default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); 5233 case ICmpInst::ICMP_EQ: 5234 case ICmpInst::ICMP_NE: 5235 if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) 5236 return true; 5237 break; 5238 case ICmpInst::ICMP_SLT: 5239 case ICmpInst::ICMP_SLE: 5240 if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) && 5241 isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS)) 5242 return true; 5243 break; 5244 case ICmpInst::ICMP_SGT: 5245 case ICmpInst::ICMP_SGE: 5246 if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) && 5247 isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS)) 5248 return true; 5249 break; 5250 case ICmpInst::ICMP_ULT: 5251 case ICmpInst::ICMP_ULE: 5252 if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) && 5253 isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS)) 5254 return true; 5255 break; 5256 case ICmpInst::ICMP_UGT: 5257 case ICmpInst::ICMP_UGE: 5258 if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) && 5259 isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS)) 5260 return true; 5261 break; 5262 } 5263 5264 return false; 5265} 5266 5267/// getBECount - Subtract the end and start values and divide by the step, 5268/// rounding up, to get the number of times the backedge is executed. Return 5269/// CouldNotCompute if an intermediate computation overflows. 5270const SCEV *ScalarEvolution::getBECount(const SCEV *Start, 5271 const SCEV *End, 5272 const SCEV *Step, 5273 bool NoWrap) { 5274 assert(!isKnownNegative(Step) && 5275 "This code doesn't handle negative strides yet!"); 5276 5277 const Type *Ty = Start->getType(); 5278 const SCEV *NegOne = getConstant(Ty, (uint64_t)-1); 5279 const SCEV *Diff = getMinusSCEV(End, Start); 5280 const SCEV *RoundUp = getAddExpr(Step, NegOne); 5281 5282 // Add an adjustment to the difference between End and Start so that 5283 // the division will effectively round up. 5284 const SCEV *Add = getAddExpr(Diff, RoundUp); 5285 5286 if (!NoWrap) { 5287 // Check Add for unsigned overflow. 5288 // TODO: More sophisticated things could be done here. 5289 const Type *WideTy = IntegerType::get(getContext(), 5290 getTypeSizeInBits(Ty) + 1); 5291 const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy); 5292 const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy); 5293 const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp); 5294 if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) 5295 return getCouldNotCompute(); 5296 } 5297 5298 return getUDivExpr(Add, Step); 5299} 5300 5301/// HowManyLessThans - Return the number of times a backedge containing the 5302/// specified less-than comparison will execute. If not computable, return 5303/// CouldNotCompute. 5304ScalarEvolution::BackedgeTakenInfo 5305ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, 5306 const Loop *L, bool isSigned) { 5307 // Only handle: "ADDREC < LoopInvariant". 5308 if (!RHS->isLoopInvariant(L)) return getCouldNotCompute(); 5309 5310 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS); 5311 if (!AddRec || AddRec->getLoop() != L) 5312 return getCouldNotCompute(); 5313 5314 // Check to see if we have a flag which makes analysis easy. 5315 bool NoWrap = isSigned ? AddRec->hasNoSignedWrap() : 5316 AddRec->hasNoUnsignedWrap(); 5317 5318 if (AddRec->isAffine()) { 5319 unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); 5320 const SCEV *Step = AddRec->getStepRecurrence(*this); 5321 5322 if (Step->isZero()) 5323 return getCouldNotCompute(); 5324 if (Step->isOne()) { 5325 // With unit stride, the iteration never steps past the limit value. 5326 } else if (isKnownPositive(Step)) { 5327 // Test whether a positive iteration can step past the limit 5328 // value and past the maximum value for its type in a single step. 5329 // Note that it's not sufficient to check NoWrap here, because even 5330 // though the value after a wrap is undefined, it's not undefined 5331 // behavior, so if wrap does occur, the loop could either terminate or 5332 // loop infinitely, but in either case, the loop is guaranteed to 5333 // iterate at least until the iteration where the wrapping occurs. 5334 const SCEV *One = getConstant(Step->getType(), 1); 5335 if (isSigned) { 5336 APInt Max = APInt::getSignedMaxValue(BitWidth); 5337 if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax()) 5338 .slt(getSignedRange(RHS).getSignedMax())) 5339 return getCouldNotCompute(); 5340 } else { 5341 APInt Max = APInt::getMaxValue(BitWidth); 5342 if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax()) 5343 .ult(getUnsignedRange(RHS).getUnsignedMax())) 5344 return getCouldNotCompute(); 5345 } 5346 } else 5347 // TODO: Handle negative strides here and below. 5348 return getCouldNotCompute(); 5349 5350 // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant 5351 // m. So, we count the number of iterations in which {n,+,s} < m is true. 5352 // Note that we cannot simply return max(m-n,0)/s because it's not safe to 5353 // treat m-n as signed nor unsigned due to overflow possibility. 5354 5355 // First, we get the value of the LHS in the first iteration: n 5356 const SCEV *Start = AddRec->getOperand(0); 5357 5358 // Determine the minimum constant start value. 5359 const SCEV *MinStart = getConstant(isSigned ? 5360 getSignedRange(Start).getSignedMin() : 5361 getUnsignedRange(Start).getUnsignedMin()); 5362 5363 // If we know that the condition is true in order to enter the loop, 5364 // then we know that it will run exactly (m-n)/s times. Otherwise, we 5365 // only know that it will execute (max(m,n)-n)/s times. In both cases, 5366 // the division must round up. 5367 const SCEV *End = RHS; 5368 if (!isLoopEntryGuardedByCond(L, 5369 isSigned ? ICmpInst::ICMP_SLT : 5370 ICmpInst::ICMP_ULT, 5371 getMinusSCEV(Start, Step), RHS)) 5372 End = isSigned ? getSMaxExpr(RHS, Start) 5373 : getUMaxExpr(RHS, Start); 5374 5375 // Determine the maximum constant end value. 5376 const SCEV *MaxEnd = getConstant(isSigned ? 5377 getSignedRange(End).getSignedMax() : 5378 getUnsignedRange(End).getUnsignedMax()); 5379 5380 // If MaxEnd is within a step of the maximum integer value in its type, 5381 // adjust it down to the minimum value which would produce the same effect. 5382 // This allows the subsequent ceiling division of (N+(step-1))/step to 5383 // compute the correct value. 5384 const SCEV *StepMinusOne = getMinusSCEV(Step, 5385 getConstant(Step->getType(), 1)); 5386 MaxEnd = isSigned ? 5387 getSMinExpr(MaxEnd, 5388 getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)), 5389 StepMinusOne)) : 5390 getUMinExpr(MaxEnd, 5391 getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)), 5392 StepMinusOne)); 5393 5394 // Finally, we subtract these two values and divide, rounding up, to get 5395 // the number of times the backedge is executed. 5396 const SCEV *BECount = getBECount(Start, End, Step, NoWrap); 5397 5398 // The maximum backedge count is similar, except using the minimum start 5399 // value and the maximum end value. 5400 const SCEV *MaxBECount = getBECount(MinStart, MaxEnd, Step, NoWrap); 5401 5402 return BackedgeTakenInfo(BECount, MaxBECount); 5403 } 5404 5405 return getCouldNotCompute(); 5406} 5407 5408/// getNumIterationsInRange - Return the number of iterations of this loop that 5409/// produce values in the specified constant range. Another way of looking at 5410/// this is that it returns the first iteration number where the value is not in 5411/// the condition, thus computing the exit count. If the iteration count can't 5412/// be computed, an instance of SCEVCouldNotCompute is returned. 5413const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, 5414 ScalarEvolution &SE) const { 5415 if (Range.isFullSet()) // Infinite loop. 5416 return SE.getCouldNotCompute(); 5417 5418 // If the start is a non-zero constant, shift the range to simplify things. 5419 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) 5420 if (!SC->getValue()->isZero()) { 5421 SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); 5422 Operands[0] = SE.getConstant(SC->getType(), 0); 5423 const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop()); 5424 if (const SCEVAddRecExpr *ShiftedAddRec = 5425 dyn_cast<SCEVAddRecExpr>(Shifted)) 5426 return ShiftedAddRec->getNumIterationsInRange( 5427 Range.subtract(SC->getValue()->getValue()), SE); 5428 // This is strange and shouldn't happen. 5429 return SE.getCouldNotCompute(); 5430 } 5431 5432 // The only time we can solve this is when we have all constant indices. 5433 // Otherwise, we cannot determine the overflow conditions. 5434 for (unsigned i = 0, e = getNumOperands(); i != e; ++i) 5435 if (!isa<SCEVConstant>(getOperand(i))) 5436 return SE.getCouldNotCompute(); 5437 5438 5439 // Okay at this point we know that all elements of the chrec are constants and 5440 // that the start element is zero. 5441 5442 // First check to see if the range contains zero. If not, the first 5443 // iteration exits. 5444 unsigned BitWidth = SE.getTypeSizeInBits(getType()); 5445 if (!Range.contains(APInt(BitWidth, 0))) 5446 return SE.getConstant(getType(), 0); 5447 5448 if (isAffine()) { 5449 // If this is an affine expression then we have this situation: 5450 // Solve {0,+,A} in Range === Ax in Range 5451 5452 // We know that zero is in the range. If A is positive then we know that 5453 // the upper value of the range must be the first possible exit value. 5454 // If A is negative then the lower of the range is the last possible loop 5455 // value. Also note that we already checked for a full range. 5456 APInt One(BitWidth,1); 5457 APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue(); 5458 APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); 5459 5460 // The exit value should be (End+A)/A. 5461 APInt ExitVal = (End + A).udiv(A); 5462 ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); 5463 5464 // Evaluate at the exit value. If we really did fall out of the valid 5465 // range, then we computed our trip count, otherwise wrap around or other 5466 // things must have happened. 5467 ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE); 5468 if (Range.contains(Val->getValue())) 5469 return SE.getCouldNotCompute(); // Something strange happened 5470 5471 // Ensure that the previous value is in the range. This is a sanity check. 5472 assert(Range.contains( 5473 EvaluateConstantChrecAtConstant(this, 5474 ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && 5475 "Linear scev computation is off in a bad way!"); 5476 return SE.getConstant(ExitValue); 5477 } else if (isQuadratic()) { 5478 // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the 5479 // quadratic equation to solve it. To do this, we must frame our problem in 5480 // terms of figuring out when zero is crossed, instead of when 5481 // Range.getUpper() is crossed. 5482 SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end()); 5483 NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); 5484 const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); 5485 5486 // Next, solve the constructed addrec 5487 std::pair<const SCEV *,const SCEV *> Roots = 5488 SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE); 5489 const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); 5490 const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); 5491 if (R1) { 5492 // Pick the smallest positive root value. 5493 if (ConstantInt *CB = 5494 dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, 5495 R1->getValue(), R2->getValue()))) { 5496 if (CB->getZExtValue() == false) 5497 std::swap(R1, R2); // R1 is the minimum root now. 5498 5499 // Make sure the root is not off by one. The returned iteration should 5500 // not be in the range, but the previous one should be. When solving 5501 // for "X*X < 5", for example, we should not return a root of 2. 5502 ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this, 5503 R1->getValue(), 5504 SE); 5505 if (Range.contains(R1Val->getValue())) { 5506 // The next iteration must be out of the range... 5507 ConstantInt *NextVal = 5508 ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); 5509 5510 R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); 5511 if (!Range.contains(R1Val->getValue())) 5512 return SE.getConstant(NextVal); 5513 return SE.getCouldNotCompute(); // Something strange happened 5514 } 5515 5516 // If R1 was not in the range, then it is a good return value. Make 5517 // sure that R1-1 WAS in the range though, just in case. 5518 ConstantInt *NextVal = 5519 ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); 5520 R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); 5521 if (Range.contains(R1Val->getValue())) 5522 return R1; 5523 return SE.getCouldNotCompute(); // Something strange happened 5524 } 5525 } 5526 } 5527 5528 return SE.getCouldNotCompute(); 5529} 5530 5531 5532 5533//===----------------------------------------------------------------------===// 5534// SCEVCallbackVH Class Implementation 5535//===----------------------------------------------------------------------===// 5536 5537void ScalarEvolution::SCEVCallbackVH::deleted() { 5538 assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); 5539 if (PHINode *PN = dyn_cast<PHINode>(getValPtr())) 5540 SE->ConstantEvolutionLoopExitValue.erase(PN); 5541 SE->Scalars.erase(getValPtr()); 5542 // this now dangles! 5543} 5544 5545void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) { 5546 assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); 5547 5548 // Forget all the expressions associated with users of the old value, 5549 // so that future queries will recompute the expressions using the new 5550 // value. 5551 SmallVector<User *, 16> Worklist; 5552 SmallPtrSet<User *, 8> Visited; 5553 Value *Old = getValPtr(); 5554 bool DeleteOld = false; 5555 for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); 5556 UI != UE; ++UI) 5557 Worklist.push_back(*UI); 5558 while (!Worklist.empty()) { 5559 User *U = Worklist.pop_back_val(); 5560 // Deleting the Old value will cause this to dangle. Postpone 5561 // that until everything else is done. 5562 if (U == Old) { 5563 DeleteOld = true; 5564 continue; 5565 } 5566 if (!Visited.insert(U)) 5567 continue; 5568 if (PHINode *PN = dyn_cast<PHINode>(U)) 5569 SE->ConstantEvolutionLoopExitValue.erase(PN); 5570 SE->Scalars.erase(U); 5571 for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); 5572 UI != UE; ++UI) 5573 Worklist.push_back(*UI); 5574 } 5575 // Delete the Old value if it (indirectly) references itself. 5576 if (DeleteOld) { 5577 if (PHINode *PN = dyn_cast<PHINode>(Old)) 5578 SE->ConstantEvolutionLoopExitValue.erase(PN); 5579 SE->Scalars.erase(Old); 5580 // this now dangles! 5581 } 5582 // this may dangle! 5583} 5584 5585ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) 5586 : CallbackVH(V), SE(se) {} 5587 5588//===----------------------------------------------------------------------===// 5589// ScalarEvolution Class Implementation 5590//===----------------------------------------------------------------------===// 5591 5592ScalarEvolution::ScalarEvolution() 5593 : FunctionPass(&ID) { 5594} 5595 5596bool ScalarEvolution::runOnFunction(Function &F) { 5597 this->F = &F; 5598 LI = &getAnalysis<LoopInfo>(); 5599 TD = getAnalysisIfAvailable<TargetData>(); 5600 DT = &getAnalysis<DominatorTree>(); 5601 return false; 5602} 5603 5604void ScalarEvolution::releaseMemory() { 5605 Scalars.clear(); 5606 BackedgeTakenCounts.clear(); 5607 ConstantEvolutionLoopExitValue.clear(); 5608 ValuesAtScopes.clear(); 5609 UniqueSCEVs.clear(); 5610 SCEVAllocator.Reset(); 5611} 5612 5613void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { 5614 AU.setPreservesAll(); 5615 AU.addRequiredTransitive<LoopInfo>(); 5616 AU.addRequiredTransitive<DominatorTree>(); 5617} 5618 5619bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { 5620 return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L)); 5621} 5622 5623static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, 5624 const Loop *L) { 5625 // Print all inner loops first 5626 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) 5627 PrintLoopInfo(OS, SE, *I); 5628 5629 OS << "Loop "; 5630 WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); 5631 OS << ": "; 5632 5633 SmallVector<BasicBlock *, 8> ExitBlocks; 5634 L->getExitBlocks(ExitBlocks); 5635 if (ExitBlocks.size() != 1) 5636 OS << "<multiple exits> "; 5637 5638 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 5639 OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L); 5640 } else { 5641 OS << "Unpredictable backedge-taken count. "; 5642 } 5643 5644 OS << "\n" 5645 "Loop "; 5646 WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); 5647 OS << ": "; 5648 5649 if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) { 5650 OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); 5651 } else { 5652 OS << "Unpredictable max backedge-taken count. "; 5653 } 5654 5655 OS << "\n"; 5656} 5657 5658void ScalarEvolution::print(raw_ostream &OS, const Module *) const { 5659 // ScalarEvolution's implementation of the print method is to print 5660 // out SCEV values of all instructions that are interesting. Doing 5661 // this potentially causes it to create new SCEV objects though, 5662 // which technically conflicts with the const qualifier. This isn't 5663 // observable from outside the class though, so casting away the 5664 // const isn't dangerous. 5665 ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); 5666 5667 OS << "Classifying expressions for: "; 5668 WriteAsOperand(OS, F, /*PrintType=*/false); 5669 OS << "\n"; 5670 for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) 5671 if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) { 5672 OS << *I << '\n'; 5673 OS << " --> "; 5674 const SCEV *SV = SE.getSCEV(&*I); 5675 SV->print(OS); 5676 5677 const Loop *L = LI->getLoopFor((*I).getParent()); 5678 5679 const SCEV *AtUse = SE.getSCEVAtScope(SV, L); 5680 if (AtUse != SV) { 5681 OS << " --> "; 5682 AtUse->print(OS); 5683 } 5684 5685 if (L) { 5686 OS << "\t\t" "Exits: "; 5687 const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); 5688 if (!ExitValue->isLoopInvariant(L)) { 5689 OS << "<<Unknown>>"; 5690 } else { 5691 OS << *ExitValue; 5692 } 5693 } 5694 5695 OS << "\n"; 5696 } 5697 5698 OS << "Determining loop execution counts for: "; 5699 WriteAsOperand(OS, F, /*PrintType=*/false); 5700 OS << "\n"; 5701 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) 5702 PrintLoopInfo(OS, &SE, *I); 5703} 5704 5705