LoopVectorize.cpp revision f01cad69c1ab1b025c524d1ac16060b2cc0f4668
1//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops 11// and generates target-independent LLVM-IR. Legalization of the IR is done 12// in the codegen. However, the vectorizes uses (will use) the codegen 13// interfaces to generate IR that is likely to result in an optimal binary. 14// 15// The loop vectorizer combines consecutive loop iteration into a single 16// 'wide' iteration. After this transformation the index is incremented 17// by the SIMD vector width, and not by one. 18// 19// This pass has three parts: 20// 1. The main loop pass that drives the different parts. 21// 2. LoopVectorizationLegality - A helper class that checks for the legality 22// of the vectorization. 23// 3. SingleBlockLoopVectorizer - A helper class that performs the actual 24// widening of instructions. 25//===----------------------------------------------------------------------===// 26// 27// The reduction-variable vectorization is based on the paper: 28// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. 29// 30// Variable uniformity checks are inspired by: 31// Karrenberg, R. and Hack, S. Whole Function Vectorization. 32// 33// Other ideas/concepts are from: 34// A. Zaks and D. Nuzman. Autovectorization in GCC—two years later. 35// 36//===----------------------------------------------------------------------===// 37#define LV_NAME "loop-vectorize" 38#define DEBUG_TYPE LV_NAME 39#include "llvm/Constants.h" 40#include "llvm/DerivedTypes.h" 41#include "llvm/Instructions.h" 42#include "llvm/LLVMContext.h" 43#include "llvm/Pass.h" 44#include "llvm/Analysis/LoopPass.h" 45#include "llvm/Value.h" 46#include "llvm/Function.h" 47#include "llvm/Analysis/Verifier.h" 48#include "llvm/Module.h" 49#include "llvm/Type.h" 50#include "llvm/ADT/SmallVector.h" 51#include "llvm/ADT/StringExtras.h" 52#include "llvm/Analysis/AliasAnalysis.h" 53#include "llvm/Analysis/AliasSetTracker.h" 54#include "llvm/Transforms/Scalar.h" 55#include "llvm/Analysis/ScalarEvolution.h" 56#include "llvm/Analysis/ScalarEvolutionExpressions.h" 57#include "llvm/Analysis/ScalarEvolutionExpander.h" 58#include "llvm/Transforms/Utils/BasicBlockUtils.h" 59#include "llvm/Analysis/ValueTracking.h" 60#include "llvm/Analysis/LoopInfo.h" 61#include "llvm/Support/CommandLine.h" 62#include "llvm/Support/Debug.h" 63#include "llvm/Support/raw_ostream.h" 64#include "llvm/DataLayout.h" 65#include "llvm/Transforms/Utils/Local.h" 66#include <algorithm> 67using namespace llvm; 68 69static cl::opt<unsigned> 70DefaultVectorizationFactor("default-loop-vectorize-width", 71 cl::init(4), cl::Hidden, 72 cl::desc("Set the default loop vectorization width")); 73namespace { 74 75// Forward declaration. 76class LoopVectorizationLegality; 77 78/// Vectorize a simple loop. This class performs the widening of simple single 79/// basic block loops into vectors. It does not perform any 80/// vectorization-legality checks, and just does it. It widens the vectors 81/// to a given vectorization factor (VF). 82class SingleBlockLoopVectorizer { 83public: 84 /// Ctor. 85 SingleBlockLoopVectorizer(Loop *OrigLoop, ScalarEvolution *Se, LoopInfo *Li, 86 LPPassManager *Lpm, unsigned VecWidth): 87 Orig(OrigLoop), SE(Se), LI(Li), LPM(Lpm), VF(VecWidth), 88 Builder(Se->getContext()), Induction(0), OldInduction(0) { } 89 90 // Perform the actual loop widening (vectorization). 91 void vectorize(LoopVectorizationLegality *Legal) { 92 ///Create a new empty loop. Unlink the old loop and connect the new one. 93 createEmptyLoop(Legal); 94 /// Widen each instruction in the old loop to a new one in the new loop. 95 /// Use the Legality module to find the induction and reduction variables. 96 vectorizeLoop(Legal); 97 // register the new loop. 98 cleanup(); 99 } 100 101private: 102 /// Create an empty loop, based on the loop ranges of the old loop. 103 void createEmptyLoop(LoopVectorizationLegality *Legal); 104 /// Copy and widen the instructions from the old loop. 105 void vectorizeLoop(LoopVectorizationLegality *Legal); 106 /// Insert the new loop to the loop hierarchy and pass manager. 107 void cleanup(); 108 109 /// This instruction is un-vectorizable. Implement it as a sequence 110 /// of scalars. 111 void scalarizeInstruction(Instruction *Instr); 112 113 /// Create a broadcast instruction. This method generates a broadcast 114 /// instruction (shuffle) for loop invariant values and for the induction 115 /// value. If this is the induction variable then we extend it to N, N+1, ... 116 /// this is needed because each iteration in the loop corresponds to a SIMD 117 /// element. 118 Value *getBroadcastInstrs(Value *V); 119 120 /// This is a helper function used by getBroadcastInstrs. It adds 0, 1, 2 .. 121 /// for each element in the vector. Starting from zero. 122 Value *getConsecutiveVector(Value* Val); 123 124 /// When we go over instructions in the basic block we rely on previous 125 /// values within the current basic block or on loop invariant values. 126 /// When we widen (vectorize) values we place them in the map. If the values 127 /// are not within the map, they have to be loop invariant, so we simply 128 /// broadcast them into a vector. 129 Value *getVectorValue(Value *V); 130 131 /// Get a uniform vector of constant integers. We use this to get 132 /// vectors of ones and zeros for the reduction code. 133 Constant* getUniformVector(unsigned Val, Type* ScalarTy); 134 135 typedef DenseMap<Value*, Value*> ValueMap; 136 137 /// The original loop. 138 Loop *Orig; 139 // Scev analysis to use. 140 ScalarEvolution *SE; 141 // Loop Info. 142 LoopInfo *LI; 143 // Loop Pass Manager; 144 LPPassManager *LPM; 145 // The vectorization factor to use. 146 unsigned VF; 147 148 // The builder that we use 149 IRBuilder<> Builder; 150 151 // --- Vectorization state --- 152 153 /// Middle Block between the vector and the scalar. 154 BasicBlock *LoopMiddleBlock; 155 ///The ExitBlock of the scalar loop. 156 BasicBlock *LoopExitBlock; 157 ///The vector loop body. 158 BasicBlock *LoopVectorBody; 159 ///The scalar loop body. 160 BasicBlock *LoopScalarBody; 161 ///The first bypass block. 162 BasicBlock *LoopBypassBlock; 163 164 /// The new Induction variable which was added to the new block. 165 PHINode *Induction; 166 /// The induction variable of the old basic block. 167 PHINode *OldInduction; 168 // Maps scalars to widened vectors. 169 ValueMap WidenMap; 170}; 171 172/// Perform the vectorization legality check. This class does not look at the 173/// profitability of vectorization, only the legality. At the moment the checks 174/// are very simple and focus on single basic block loops with a constant 175/// iteration count and no reductions. 176class LoopVectorizationLegality { 177public: 178 LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl): 179 TheLoop(Lp), SE(Se), DL(Dl), Induction(0) { } 180 181 /// This represents the kinds of reductions that we support. 182 enum ReductionKind { 183 IntegerAdd, /// Sum of numbers. 184 IntegerMult, /// Product of numbers. 185 NoReduction /// Not a reduction. 186 }; 187 188 // Holds a pairing of reduction instruction and the reduction kind. 189 typedef std::pair<Instruction*, ReductionKind> ReductionPair; 190 191 /// ReductionList contains the reduction variables 192 /// as well as a single EXIT (from the block) value and the kind of 193 /// reduction variable.. 194 /// Notice that the EXIT instruction can also be the PHI itself. 195 typedef DenseMap<PHINode*, ReductionPair> ReductionList; 196 197 /// Returns the maximum vectorization factor that we *can* use to vectorize 198 /// this loop. This does not mean that it is profitable to vectorize this 199 /// loop, only that it is legal to do so. This may be a large number. We 200 /// can vectorize to any SIMD width below this number. 201 unsigned getLoopMaxVF(); 202 203 /// Returns the Induction variable. 204 PHINode *getInduction() {return Induction;} 205 206 /// Returns the reduction variables found in the loop. 207 ReductionList *getReductionVars() { return &Reductions; } 208 209 /// Check that the GEP operands are all uniform except for the last index 210 /// which has to be the induction variable. 211 bool isConsecutiveGep(Value *Ptr); 212 213private: 214 /// Check if a single basic block loop is vectorizable. 215 /// At this point we know that this is a loop with a constant trip count 216 /// and we only need to check individual instructions. 217 bool canVectorizeBlock(BasicBlock &BB); 218 219 /// When we vectorize loops we may change the order in which 220 /// we read and write from memory. This method checks if it is 221 /// legal to vectorize the code, considering only memory constrains. 222 /// Returns true if BB is vectorizable 223 bool canVectorizeMemory(BasicBlock &BB); 224 225 // Check if a pointer value is known to be disjoint. 226 // Example: Alloca, Global, NoAlias. 227 bool isIdentifiedSafeObject(Value* Val); 228 229 /// Returns True, if 'Phi' is the kind of reduction variable for type 230 /// 'Kind'. If this is a reduction variable, it adds it to ReductionList. 231 bool AddReductionVar(PHINode *Phi, ReductionKind Kind); 232 /// Checks if a constant matches the reduction kind. 233 /// Sums starts with zero. Products start at one. 234 bool isReductionConstant(Value *V, ReductionKind Kind); 235 /// Returns true if the instruction I can be a reduction variable of type 236 /// 'Kind'. 237 bool isReductionInstr(Instruction *I, ReductionKind Kind); 238 /// Returns True, if 'Phi' is an induction variable. 239 bool isInductionVariable(PHINode *Phi); 240 241 /// The loop that we evaluate. 242 Loop *TheLoop; 243 /// Scev analysis. 244 ScalarEvolution *SE; 245 /// DataLayout analysis. 246 DataLayout *DL; 247 248 // --- vectorization state --- // 249 250 /// Holds the induction variable. 251 PHINode *Induction; 252 /// Holds the reduction variables. 253 ReductionList Reductions; 254 /// Allowed outside users. This holds the reduction 255 /// vars which can be accessed from outside the loop. 256 SmallPtrSet<Value*, 4> AllowedExit; 257}; 258 259struct LoopVectorize : public LoopPass { 260 static char ID; // Pass identification, replacement for typeid 261 262 LoopVectorize() : LoopPass(ID) { 263 initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); 264 } 265 266 ScalarEvolution *SE; 267 DataLayout *DL; 268 LoopInfo *LI; 269 270 virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { 271 272 // Only vectorize innermost loops. 273 if (!L->empty()) 274 return false; 275 276 SE = &getAnalysis<ScalarEvolution>(); 277 DL = getAnalysisIfAvailable<DataLayout>(); 278 LI = &getAnalysis<LoopInfo>(); 279 280 DEBUG(dbgs() << "LV: Checking a loop in \"" << 281 L->getHeader()->getParent()->getName() << "\"\n"); 282 283 // Check if it is legal to vectorize the loop. 284 LoopVectorizationLegality LVL(L, SE, DL); 285 unsigned MaxVF = LVL.getLoopMaxVF(); 286 287 // Check that we can vectorize using the chosen vectorization width. 288 if (MaxVF < DefaultVectorizationFactor) { 289 DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF << ").\n"); 290 return false; 291 } 292 293 DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF << ").\n"); 294 295 // If we decided that is is *legal* to vectorizer the loop. Do it. 296 SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, DefaultVectorizationFactor); 297 LB.vectorize(&LVL); 298 299 DEBUG(verifyFunction(*L->getHeader()->getParent())); 300 return true; 301 } 302 303 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 304 LoopPass::getAnalysisUsage(AU); 305 AU.addRequiredID(LoopSimplifyID); 306 AU.addRequiredID(LCSSAID); 307 AU.addRequired<LoopInfo>(); 308 AU.addRequired<ScalarEvolution>(); 309 } 310 311}; 312 313Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) { 314 // Instructions that access the old induction variable 315 // actually want to get the new one. 316 if (V == OldInduction) 317 V = Induction; 318 // Create the types. 319 LLVMContext &C = V->getContext(); 320 Type *VTy = VectorType::get(V->getType(), VF); 321 Type *I32 = IntegerType::getInt32Ty(C); 322 Constant *Zero = ConstantInt::get(I32, 0); 323 Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32, VF)); 324 Value *UndefVal = UndefValue::get(VTy); 325 // Insert the value into a new vector. 326 Value *SingleElem = Builder.CreateInsertElement(UndefVal, V, Zero); 327 // Broadcast the scalar into all locations in the vector. 328 Value *Shuf = Builder.CreateShuffleVector(SingleElem, UndefVal, Zeros, 329 "broadcast"); 330 // We are accessing the induction variable. Make sure to promote the 331 // index for each consecutive SIMD lane. This adds 0,1,2 ... to all lanes. 332 if (V == Induction) 333 return getConsecutiveVector(Shuf); 334 return Shuf; 335} 336 337Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) { 338 assert(Val->getType()->isVectorTy() && "Must be a vector"); 339 assert(Val->getType()->getScalarType()->isIntegerTy() && 340 "Elem must be an integer"); 341 // Create the types. 342 Type *ITy = Val->getType()->getScalarType(); 343 VectorType *Ty = cast<VectorType>(Val->getType()); 344 unsigned VLen = Ty->getNumElements(); 345 SmallVector<Constant*, 8> Indices; 346 347 // Create a vector of consecutive numbers from zero to VF. 348 for (unsigned i = 0; i < VLen; ++i) 349 Indices.push_back(ConstantInt::get(ITy, i)); 350 351 // Add the consecutive indices to the vector value. 352 Constant *Cv = ConstantVector::get(Indices); 353 assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); 354 return Builder.CreateAdd(Val, Cv, "induction"); 355} 356 357bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) { 358 GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); 359 if (!Gep) 360 return false; 361 362 unsigned NumOperands = Gep->getNumOperands(); 363 Value *LastIndex = Gep->getOperand(NumOperands - 1); 364 365 // Check that all of the gep indices are uniform except for the last. 366 for (unsigned i = 0; i < NumOperands - 1; ++i) 367 if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) 368 return false; 369 370 // We can emit wide load/stores only of the last index is the induction 371 // variable. 372 const SCEV *Last = SE->getSCEV(LastIndex); 373 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) { 374 const SCEV *Step = AR->getStepRecurrence(*SE); 375 376 // The memory is consecutive because the last index is consecutive 377 // and all other indices are loop invariant. 378 if (Step->isOne()) 379 return true; 380 } 381 382 return false; 383} 384 385Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) { 386 assert(!V->getType()->isVectorTy() && "Can't widen a vector"); 387 // If we saved a vectorized copy of V, use it. 388 ValueMap::iterator it = WidenMap.find(V); 389 if (it != WidenMap.end()) 390 return it->second; 391 392 // Broadcast V and save the value for future uses. 393 Value *B = getBroadcastInstrs(V); 394 WidenMap[V] = B; 395 return B; 396} 397 398Constant* 399SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) { 400 SmallVector<Constant*, 8> Indices; 401 // Create a vector of consecutive numbers from zero to VF. 402 for (unsigned i = 0; i < VF; ++i) 403 Indices.push_back(ConstantInt::get(ScalarTy, Val)); 404 405 // Add the consecutive indices to the vector value. 406 return ConstantVector::get(Indices); 407} 408 409void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { 410 assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); 411 // Holds vector parameters or scalars, in case of uniform vals. 412 SmallVector<Value*, 8> Params; 413 414 // Find all of the vectorized parameters. 415 for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) { 416 Value *SrcOp = Instr->getOperand(op); 417 418 // If we are accessing the old induction variable, use the new one. 419 if (SrcOp == OldInduction) { 420 Params.push_back(getBroadcastInstrs(Induction)); 421 continue; 422 } 423 424 // Try using previously calculated values. 425 Instruction *SrcInst = dyn_cast<Instruction>(SrcOp); 426 427 // If the src is an instruction that appeared earlier in the basic block 428 // then it should already be vectorized. 429 if (SrcInst && SrcInst->getParent() == Instr->getParent()) { 430 assert(WidenMap.count(SrcInst) && "Source operand is unavailable"); 431 // The parameter is a vector value from earlier. 432 Params.push_back(WidenMap[SrcInst]); 433 } else { 434 // The parameter is a scalar from outside the loop. Maybe even a constant. 435 Params.push_back(SrcOp); 436 } 437 } 438 439 assert(Params.size() == Instr->getNumOperands() && 440 "Invalid number of operands"); 441 442 // Does this instruction return a value ? 443 bool IsVoidRetTy = Instr->getType()->isVoidTy(); 444 Value *VecResults = 0; 445 446 // If we have a return value, create an empty vector. We place the scalarized 447 // instructions in this vector. 448 if (!IsVoidRetTy) 449 VecResults = UndefValue::get(VectorType::get(Instr->getType(), VF)); 450 451 // For each scalar that we create. 452 for (unsigned i = 0; i < VF; ++i) { 453 Instruction *Cloned = Instr->clone(); 454 if (!IsVoidRetTy) 455 Cloned->setName(Instr->getName() + ".cloned"); 456 // Replace the operands of the cloned instrucions with extracted scalars. 457 for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) { 458 Value *Op = Params[op]; 459 // Param is a vector. Need to extract the right lane. 460 if (Op->getType()->isVectorTy()) 461 Op = Builder.CreateExtractElement(Op, Builder.getInt32(i)); 462 Cloned->setOperand(op, Op); 463 } 464 465 // Place the cloned scalar in the new loop. 466 Builder.Insert(Cloned); 467 468 // If the original scalar returns a value we need to place it in a vector 469 // so that future users will be able to use it. 470 if (!IsVoidRetTy) 471 VecResults = Builder.CreateInsertElement(VecResults, Cloned, 472 Builder.getInt32(i)); 473 } 474 475 if (!IsVoidRetTy) 476 WidenMap[Instr] = VecResults; 477} 478 479void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { 480 /* 481 In this function we generate a new loop. The new loop will contain 482 the vectorized instructions while the old loop will continue to run the 483 scalar remainder. 484 485 [ ] <-- vector loop bypass. 486 / | 487 / v 488| [ ] <-- vector pre header. 489| | 490| v 491| [ ] \ 492| [ ]_| <-- vector loop. 493| | 494 \ v 495 >[ ] <--- middle-block. 496 / | 497 / v 498| [ ] <--- new preheader. 499| | 500| v 501| [ ] \ 502| [ ]_| <-- old scalar loop to handle remainder. 503 \ | 504 \ v 505 >[ ] <-- exit block. 506 ... 507 */ 508 509 // This is the original scalar-loop preheader. 510 BasicBlock *BypassBlock = Orig->getLoopPreheader(); 511 BasicBlock *ExitBlock = Orig->getExitBlock(); 512 assert(ExitBlock && "Must have an exit block"); 513 514 assert(Orig->getNumBlocks() == 1 && "Invalid loop"); 515 assert(BypassBlock && "Invalid loop structure"); 516 517 BasicBlock *VectorPH = 518 BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph"); 519 BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(), 520 "vector.body"); 521 522 BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(), 523 "middle.block"); 524 BasicBlock *ScalarPH = 525 MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), 526 "scalar.preheader"); 527 // Find the induction variable. 528 BasicBlock *OldBasicBlock = Orig->getHeader(); 529 OldInduction = Legal->getInduction(); 530 assert(OldInduction && "We must have a single phi node."); 531 Type *IdxTy = OldInduction->getType(); 532 533 // Use this IR builder to create the loop instructions (Phi, Br, Cmp) 534 // inside the loop. 535 Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); 536 537 // Generate the induction variable. 538 Induction = Builder.CreatePHI(IdxTy, 2, "index"); 539 Constant *Zero = ConstantInt::get(IdxTy, 0); 540 Constant *Step = ConstantInt::get(IdxTy, VF); 541 542 // Find the loop boundaries. 543 const SCEV *ExitCount = SE->getExitCount(Orig, Orig->getHeader()); 544 assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); 545 546 // Get the total trip count from the count by adding 1. 547 ExitCount = SE->getAddExpr(ExitCount, 548 SE->getConstant(ExitCount->getType(), 1)); 549 550 // Expand the trip count and place the new instructions in the preheader. 551 // Notice that the pre-header does not change, only the loop body. 552 SCEVExpander Exp(*SE, "induction"); 553 Instruction *Loc = BypassBlock->getTerminator(); 554 555 // We may need to extend the index in case there is a type mismatch. 556 // We know that the count starts at zero and does not overflow. 557 // We are using Zext because it should be less expensive. 558 if (ExitCount->getType() != Induction->getType()) 559 ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy); 560 561 // Count holds the overall loop count (N). 562 Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc); 563 // Now we need to generate the expression for N - (N % VF), which is 564 // the part that the vectorized body will execute. 565 Constant *CIVF = ConstantInt::get(IdxTy, VF); 566 Value *R = BinaryOperator::CreateURem(Count, CIVF, "n.mod.vf", Loc); 567 Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc); 568 569 // Now, compare the new count to zero. If it is zero, jump to the scalar part. 570 Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, 571 CountRoundDown, ConstantInt::getNullValue(IdxTy), 572 "cmp.zero", Loc); 573 BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc); 574 // Remove the old terminator. 575 Loc->eraseFromParent(); 576 577 // Add a check in the middle block to see if we have completed 578 // all of the iterations in the first vector loop. 579 // If (N - N%VF) == N, then we *don't* need to run the remainder. 580 Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count, 581 CountRoundDown, "cmp.n", 582 MiddleBlock->getTerminator()); 583 584 BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator()); 585 // Remove the old terminator. 586 MiddleBlock->getTerminator()->eraseFromParent(); 587 588 // Create i+1 and fill the PHINode. 589 Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next"); 590 Induction->addIncoming(Zero, VectorPH); 591 Induction->addIncoming(NextIdx, VecBody); 592 // Create the compare. 593 Value *ICmp = Builder.CreateICmpEQ(NextIdx, CountRoundDown); 594 Builder.CreateCondBr(ICmp, MiddleBlock, VecBody); 595 596 // Now we have two terminators. Remove the old one from the block. 597 VecBody->getTerminator()->eraseFromParent(); 598 599 // Fix the scalar body iteration count. 600 unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH); 601 OldInduction->setIncomingValue(BlockIdx, CountRoundDown); 602 603 // Get ready to start creating new instructions into the vectorized body. 604 Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); 605 606 // Register the new loop. 607 Loop* Lp = new Loop(); 608 LPM->insertLoop(Lp, Orig->getParentLoop()); 609 610 Lp->addBasicBlockToLoop(VecBody, LI->getBase()); 611 612 Loop *ParentLoop = Orig->getParentLoop(); 613 if (ParentLoop) { 614 ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase()); 615 ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase()); 616 ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase()); 617 } 618 619 // Save the state. 620 LoopMiddleBlock = MiddleBlock; 621 LoopExitBlock = ExitBlock; 622 LoopVectorBody = VecBody; 623 LoopScalarBody = OldBasicBlock; 624 LoopBypassBlock = BypassBlock; 625} 626 627void 628SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { 629 typedef SmallVector<PHINode*, 4> PhiVector; 630 BasicBlock &BB = *Orig->getHeader(); 631 632 // In order to support reduction variables we need to be able to vectorize 633 // Phi nodes. Phi nodes have cycles, so we need to vectorize them in two 634 // steages. First, we create a new vector PHI node with no incoming edges. 635 // We use this value when we vectorize all of the instructions that use the 636 // PHI. Next, after all of the instructions in the block are complete we 637 // add the new incoming edges to the PHI. At this point all of the 638 // instructions in the basic block are vectorized, so we can use them to 639 // construct the PHI. 640 PhiVector PHIsToFix; 641 642 // For each instruction in the old loop. 643 for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { 644 Instruction *Inst = it; 645 646 switch (Inst->getOpcode()) { 647 case Instruction::Br: 648 // Nothing to do for PHIs and BR, since we already took care of the 649 // loop control flow instructions. 650 continue; 651 case Instruction::PHI:{ 652 PHINode* P = cast<PHINode>(Inst); 653 // Special handling for the induction var. 654 if (OldInduction == Inst) 655 continue; 656 // This is phase one of vectorizing PHIs. 657 // This has to be a reduction variable. 658 assert(Legal->getReductionVars()->count(P) && "Not a Reduction"); 659 Type *VecTy = VectorType::get(Inst->getType(), VF); 660 WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi"); 661 PHIsToFix.push_back(P); 662 continue; 663 } 664 case Instruction::Add: 665 case Instruction::FAdd: 666 case Instruction::Sub: 667 case Instruction::FSub: 668 case Instruction::Mul: 669 case Instruction::FMul: 670 case Instruction::UDiv: 671 case Instruction::SDiv: 672 case Instruction::FDiv: 673 case Instruction::URem: 674 case Instruction::SRem: 675 case Instruction::FRem: 676 case Instruction::Shl: 677 case Instruction::LShr: 678 case Instruction::AShr: 679 case Instruction::And: 680 case Instruction::Or: 681 case Instruction::Xor: { 682 // Just widen binops. 683 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); 684 Value *A = getVectorValue(Inst->getOperand(0)); 685 Value *B = getVectorValue(Inst->getOperand(1)); 686 // Use this vector value for all users of the original instruction. 687 WidenMap[Inst] = Builder.CreateBinOp(BinOp->getOpcode(), A, B); 688 break; 689 } 690 case Instruction::Select: { 691 // Widen selects. 692 // TODO: If the selector is loop invariant we can issue a select 693 // instruction with a scalar condition. 694 Value *A = getVectorValue(Inst->getOperand(0)); 695 Value *B = getVectorValue(Inst->getOperand(1)); 696 Value *C = getVectorValue(Inst->getOperand(2)); 697 WidenMap[Inst] = Builder.CreateSelect(A, B, C); 698 break; 699 } 700 701 case Instruction::ICmp: 702 case Instruction::FCmp: { 703 // Widen compares. Generate vector compares. 704 bool FCmp = (Inst->getOpcode() == Instruction::FCmp); 705 CmpInst *Cmp = dyn_cast<CmpInst>(Inst); 706 Value *A = getVectorValue(Inst->getOperand(0)); 707 Value *B = getVectorValue(Inst->getOperand(1)); 708 if (FCmp) 709 WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B); 710 else 711 WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B); 712 break; 713 } 714 715 case Instruction::Store: { 716 // Attempt to issue a wide store. 717 StoreInst *SI = dyn_cast<StoreInst>(Inst); 718 Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); 719 Value *Ptr = SI->getPointerOperand(); 720 unsigned Alignment = SI->getAlignment(); 721 GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); 722 // This store does not use GEPs. 723 if (!Legal->isConsecutiveGep(Gep)) { 724 scalarizeInstruction(Inst); 725 break; 726 } 727 728 // Create the new GEP with the new induction variable. 729 GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); 730 unsigned NumOperands = Gep->getNumOperands(); 731 Gep2->setOperand(NumOperands - 1, Induction); 732 Ptr = Builder.Insert(Gep2); 733 Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo()); 734 Value *Val = getVectorValue(SI->getValueOperand()); 735 Builder.CreateStore(Val, Ptr)->setAlignment(Alignment); 736 break; 737 } 738 case Instruction::Load: { 739 // Attempt to issue a wide load. 740 LoadInst *LI = dyn_cast<LoadInst>(Inst); 741 Type *RetTy = VectorType::get(LI->getType(), VF); 742 Value *Ptr = LI->getPointerOperand(); 743 unsigned Alignment = LI->getAlignment(); 744 GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); 745 746 // We don't have a gep. Scalarize the load. 747 if (!Legal->isConsecutiveGep(Gep)) { 748 scalarizeInstruction(Inst); 749 break; 750 } 751 752 // Create the new GEP with the new induction variable. 753 GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); 754 unsigned NumOperands = Gep->getNumOperands(); 755 Gep2->setOperand(NumOperands - 1, Induction); 756 Ptr = Builder.Insert(Gep2); 757 Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo()); 758 LI = Builder.CreateLoad(Ptr); 759 LI->setAlignment(Alignment); 760 // Use this vector value for all users of the load. 761 WidenMap[Inst] = LI; 762 break; 763 } 764 case Instruction::ZExt: 765 case Instruction::SExt: 766 case Instruction::FPToUI: 767 case Instruction::FPToSI: 768 case Instruction::FPExt: 769 case Instruction::PtrToInt: 770 case Instruction::IntToPtr: 771 case Instruction::SIToFP: 772 case Instruction::UIToFP: 773 case Instruction::Trunc: 774 case Instruction::FPTrunc: 775 case Instruction::BitCast: { 776 /// Vectorize bitcasts. 777 CastInst *CI = dyn_cast<CastInst>(Inst); 778 Value *A = getVectorValue(Inst->getOperand(0)); 779 Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); 780 WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy); 781 break; 782 } 783 784 default: 785 /// All other instructions are unsupported. Scalarize them. 786 scalarizeInstruction(Inst); 787 break; 788 }// end of switch. 789 }// end of for_each instr. 790 791 // At this point every instruction in the original loop is widended to 792 // a vector form. We are almost done. Now, we need to fix the PHI nodes 793 // that we vectorized. The PHI nodes are currently empty because we did 794 // not want to introduce cycles. Notice that the remaining PHI nodes 795 // that we need to fix are reduction variables. 796 797 // Create the 'reduced' values for each of the induction vars. 798 // The reduced values are the vector values that we scalarize and combine 799 // after the loop is finished. 800 for (PhiVector::iterator it = PHIsToFix.begin(), e = PHIsToFix.end(); 801 it != e; ++it) { 802 PHINode *RdxPhi = *it; 803 PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]); 804 assert(RdxPhi && "Unable to recover vectorized PHI"); 805 806 // Find the reduction variable. 807 assert(Legal->getReductionVars()->count(RdxPhi) && 808 "Unable to find the reduction variable"); 809 LoopVectorizationLegality::ReductionPair ReductionVar = 810 (*Legal->getReductionVars())[RdxPhi]; 811 812 // This is the vector-clone of the value that leaves the loop. 813 Value *VectorExit = getVectorValue(ReductionVar.first); 814 Type *VecTy = VectorExit->getType(); 815 816 // This is the kind of reduction. 817 LoopVectorizationLegality::ReductionKind RdxKind = ReductionVar.second; 818 // Find the reduction identity variable. 819 // Zero for addition. One for Multiplication. 820 unsigned IdentitySclr = 821 (RdxKind == LoopVectorizationLegality::IntegerAdd ? 0 : 1); 822 Constant *Identity = getUniformVector(IdentitySclr, VecTy->getScalarType()); 823 824 // Fix the vector-loop phi. 825 // We created the induction variable so we know that the 826 // preheader is the first entry. 827 BasicBlock *VecPreheader = Induction->getIncomingBlock(0); 828 VecRdxPhi->addIncoming(Identity, VecPreheader); 829 unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); 830 Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx)); 831 VecRdxPhi->addIncoming(Val, LoopVectorBody); 832 833 // Before each round, move the insertion point right between 834 // the PHIs and the values we are going to write. 835 // This allows us to write both PHINodes and the extractelement 836 // instructions. 837 Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt()); 838 839 // This PHINode contains the vectorized reduction variable, or 840 // the identity vector, if we bypass the vector loop. 841 PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi"); 842 NewPhi->addIncoming(Identity, LoopBypassBlock); 843 NewPhi->addIncoming(getVectorValue(ReductionVar.first), LoopVectorBody); 844 845 // Extract the first scalar. 846 Value *Scalar0 = 847 Builder.CreateExtractElement(NewPhi, Builder.getInt32(0)); 848 // Extract and sum the remaining vector elements. 849 for (unsigned i=1; i < VF; ++i) { 850 Value *Scalar1 = 851 Builder.CreateExtractElement(NewPhi, Builder.getInt32(i)); 852 if (RdxKind == LoopVectorizationLegality::IntegerAdd) { 853 Scalar0 = Builder.CreateAdd(Scalar0, Scalar1); 854 } else { 855 Scalar0 = Builder.CreateMul(Scalar0, Scalar1); 856 } 857 } 858 859 // Now, we need to fix the users of the reduction variable 860 // inside and outside of the scalar remainder loop. 861 // We know that the loop is in LCSSA form. We need to update the 862 // PHI nodes in the exit blocks. 863 for (BasicBlock::iterator LEI = LoopExitBlock->begin(), 864 LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) { 865 PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI); 866 if (!LCSSAPhi) continue; 867 868 // All PHINodes need to have a single entry edge, or two if we already fixed them. 869 assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI"); 870 871 // We found our reduction value exit-PHI. Update it with the incoming bypass edge. 872 if (LCSSAPhi->getIncomingValue(0) == ReductionVar.first) { 873 // Add an edge coming from the bypass. 874 LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock); 875 break; 876 } 877 }// end of the LCSSA phi scan. 878 879 // Fix the scalar loop reduction variable with the incoming reduction sum 880 // from the vector body and from the backedge value. 881 int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); 882 int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block. 883 (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0); 884 (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, ReductionVar.first); 885 }// end of for each redux variable. 886} 887 888void SingleBlockLoopVectorizer::cleanup() { 889 // The original basic block. 890 SE->forgetLoop(Orig); 891} 892 893unsigned LoopVectorizationLegality::getLoopMaxVF() { 894 if (!TheLoop->getLoopPreheader()) { 895 assert(false && "No preheader!!"); 896 DEBUG(dbgs() << "LV: Loop not normalized." << "\n"); 897 return 1; 898 } 899 900 // We can only vectorize single basic block loops. 901 unsigned NumBlocks = TheLoop->getNumBlocks(); 902 if (NumBlocks != 1) { 903 DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n"); 904 return 1; 905 } 906 907 // We need to have a loop header. 908 BasicBlock *BB = TheLoop->getHeader(); 909 DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n"); 910 911 // Go over each instruction and look at memory deps. 912 if (!canVectorizeBlock(*BB)) { 913 DEBUG(dbgs() << "LV: Can't vectorize this loop header\n"); 914 return 1; 915 } 916 917 // ScalarEvolution needs to be able to find the exit count. 918 const SCEV *ExitCount = SE->getExitCount(TheLoop, BB); 919 if (ExitCount == SE->getCouldNotCompute()) { 920 DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); 921 return 1; 922 } 923 924 DEBUG(dbgs() << "LV: We can vectorize this loop!\n"); 925 926 // Okay! We can vectorize. At this point we don't have any other mem analysis 927 // which may limit our maximum vectorization factor, so just return the 928 // maximum SIMD size. 929 return DefaultVectorizationFactor; 930} 931 932bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { 933 // Scan the instructions in the block and look for hazards. 934 for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { 935 Instruction *I = it; 936 937 PHINode *Phi = dyn_cast<PHINode>(I); 938 if (Phi) { 939 // This should not happen because the loop should be normalized. 940 if (Phi->getNumIncomingValues() != 2) { 941 DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); 942 return false; 943 } 944 // We only look at integer phi nodes. 945 if (!Phi->getType()->isIntegerTy()) { 946 DEBUG(dbgs() << "LV: Found an non-int PHI.\n"); 947 return false; 948 } 949 950 if (isInductionVariable(Phi)) { 951 if (Induction) { 952 DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n"); 953 return false; 954 } 955 DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n"); 956 Induction = Phi; 957 continue; 958 } 959 if (AddReductionVar(Phi, IntegerAdd)) { 960 DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n"); 961 continue; 962 } 963 if (AddReductionVar(Phi, IntegerMult)) { 964 DEBUG(dbgs() << "LV: Found an Mult reduction PHI."<< *Phi <<"\n"); 965 continue; 966 } 967 968 DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); 969 return false; 970 }// end of PHI handling 971 972 // We still don't handle functions. 973 CallInst *CI = dyn_cast<CallInst>(I); 974 if (CI) { 975 DEBUG(dbgs() << "LV: Found a call site:"<< 976 CI->getCalledFunction()->getName() << "\n"); 977 return false; 978 } 979 980 // We do not re-vectorize vectors. 981 if (!VectorType::isValidElementType(I->getType()) && 982 !I->getType()->isVoidTy()) { 983 DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n"); 984 return false; 985 } 986 987 // Reduction instructions are allowed to have exit users. 988 // All other instructions must not have external users. 989 if (!AllowedExit.count(I)) 990 //Check that all of the users of the loop are inside the BB. 991 for (Value::use_iterator it = I->use_begin(), e = I->use_end(); 992 it != e; ++it) { 993 Instruction *U = cast<Instruction>(*it); 994 // This user may be a reduction exit value. 995 BasicBlock *Parent = U->getParent(); 996 if (Parent != &BB) { 997 DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); 998 return false; 999 } 1000 } 1001 } // next instr. 1002 1003 if (!Induction) { 1004 DEBUG(dbgs() << "LV: Did not find an induction var.\n"); 1005 return false; 1006 } 1007 1008 // If the memory dependencies do not prevent us from 1009 // vectorizing, then vectorize. 1010 return canVectorizeMemory(BB); 1011} 1012 1013bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { 1014 typedef SmallVector<Value*, 16> ValueVector; 1015 typedef SmallPtrSet<Value*, 16> ValueSet; 1016 // Holds the Load and Store *instructions*. 1017 ValueVector Loads; 1018 ValueVector Stores; 1019 1020 // Scan the BB and collect legal loads and stores. 1021 for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) { 1022 Instruction *I = it; 1023 1024 // If this is a load, save it. If this instruction can read from memory 1025 // but is not a load, then we quit. Notice that we don't handle function 1026 // calls that read or write. 1027 if (I->mayReadFromMemory()) { 1028 LoadInst *Ld = dyn_cast<LoadInst>(I); 1029 if (!Ld) return false; 1030 if (!Ld->isSimple()) { 1031 DEBUG(dbgs() << "LV: Found a non-simple load.\n"); 1032 return false; 1033 } 1034 Loads.push_back(Ld); 1035 continue; 1036 } 1037 1038 // Save store instructions. Abort if other instructions write to memory. 1039 if (I->mayWriteToMemory()) { 1040 StoreInst *St = dyn_cast<StoreInst>(I); 1041 if (!St) return false; 1042 if (!St->isSimple()) { 1043 DEBUG(dbgs() << "LV: Found a non-simple store.\n"); 1044 return false; 1045 } 1046 Stores.push_back(St); 1047 } 1048 } // next instr. 1049 1050 // Now we have two lists that hold the loads and the stores. 1051 // Next, we find the pointers that they use. 1052 1053 // Check if we see any stores. If there are no stores, then we don't 1054 // care if the pointers are *restrict*. 1055 if (!Stores.size()) { 1056 DEBUG(dbgs() << "LV: Found a read-only loop!\n"); 1057 return true; 1058 } 1059 1060 // Holds the read and read-write *pointers* that we find. 1061 ValueVector Reads; 1062 ValueVector ReadWrites; 1063 1064 // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects 1065 // multiple times on the same object. If the ptr is accessed twice, once 1066 // for read and once for write, it will only appear once (on the write 1067 // list). This is okay, since we are going to check for conflicts between 1068 // writes and between reads and writes, but not between reads and reads. 1069 ValueSet Seen; 1070 1071 ValueVector::iterator I, IE; 1072 for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) { 1073 StoreInst *ST = dyn_cast<StoreInst>(*I); 1074 assert(ST && "Bad StoreInst"); 1075 Value* Ptr = ST->getPointerOperand(); 1076 // If we did *not* see this pointer before, insert it to 1077 // the read-write list. At this phase it is only a 'write' list. 1078 if (Seen.insert(Ptr)) 1079 ReadWrites.push_back(Ptr); 1080 } 1081 1082 for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { 1083 LoadInst *LD = dyn_cast<LoadInst>(*I); 1084 assert(LD && "Bad LoadInst"); 1085 Value* Ptr = LD->getPointerOperand(); 1086 // If we did *not* see this pointer before, insert it to the 1087 // read list. If we *did* see it before, then it is already in 1088 // the read-write list. This allows us to vectorize expressions 1089 // such as A[i] += x; Because the address of A[i] is a read-write 1090 // pointer. This only works if the index of A[i] is consecutive. 1091 // If the address of i is unknown (for example A[B[i]]) then we may 1092 // read a few words, modify, and write a few words, and some of the 1093 // words may be written to the same address. 1094 if (Seen.insert(Ptr) || !isConsecutiveGep(Ptr)) 1095 Reads.push_back(Ptr); 1096 } 1097 1098 // Now that the pointers are in two lists (Reads and ReadWrites), we 1099 // can check that there are no conflicts between each of the writes and 1100 // between the writes to the reads. 1101 ValueSet WriteObjects; 1102 ValueVector TempObjects; 1103 1104 // Check that the read-writes do not conflict with other read-write 1105 // pointers. 1106 for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) { 1107 GetUnderlyingObjects(*I, TempObjects, DL); 1108 for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end(); 1109 it != e; ++it) { 1110 if (!isIdentifiedSafeObject(*it)) { 1111 DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n"); 1112 return false; 1113 } 1114 if (!WriteObjects.insert(*it)) { 1115 DEBUG(dbgs() << "LV: Found a possible write-write reorder:" 1116 << **it <<"\n"); 1117 return false; 1118 } 1119 } 1120 TempObjects.clear(); 1121 } 1122 1123 /// Check that the reads don't conflict with the read-writes. 1124 for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) { 1125 GetUnderlyingObjects(*I, TempObjects, DL); 1126 for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end(); 1127 it != e; ++it) { 1128 if (!isIdentifiedSafeObject(*it)) { 1129 DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n"); 1130 return false; 1131 } 1132 if (WriteObjects.count(*it)) { 1133 DEBUG(dbgs() << "LV: Found a possible read/write reorder:" 1134 << **it <<"\n"); 1135 return false; 1136 } 1137 } 1138 TempObjects.clear(); 1139 } 1140 1141 // All is okay. 1142 return true; 1143} 1144 1145/// Checks if the value is a Global variable or if it is an Arguments 1146/// marked with the NoAlias attribute. 1147bool LoopVectorizationLegality::isIdentifiedSafeObject(Value* Val) { 1148 assert(Val && "Invalid value"); 1149 if (dyn_cast<GlobalValue>(Val)) 1150 return true; 1151 if (dyn_cast<AllocaInst>(Val)) 1152 return true; 1153 Argument *A = dyn_cast<Argument>(Val); 1154 if (!A) 1155 return false; 1156 return A->hasNoAliasAttr(); 1157} 1158 1159bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, 1160 ReductionKind Kind) { 1161 if (Phi->getNumIncomingValues() != 2) 1162 return false; 1163 1164 // Find the possible incoming reduction variable. 1165 BasicBlock *BB = Phi->getParent(); 1166 int SelfEdgeIdx = Phi->getBasicBlockIndex(BB); 1167 int InEdgeBlockIdx = (SelfEdgeIdx ? 0 : 1); // The other entry. 1168 Value *RdxStart = Phi->getIncomingValue(InEdgeBlockIdx); 1169 1170 // We must have a constant that starts the reduction. 1171 if (!isReductionConstant(RdxStart, Kind)) 1172 return false; 1173 1174 // ExitInstruction is the single value which is used outside the loop. 1175 // We only allow for a single reduction value to be used outside the loop. 1176 // This includes users of the reduction, variables (which form a cycle 1177 // which ends in the phi node). 1178 Instruction *ExitInstruction = 0; 1179 1180 // Iter is our iterator. We start with the PHI node and scan for all of the 1181 // users of this instruction. All users must be instructions which can be 1182 // used as reduction variables (such as ADD). We may have a single 1183 // out-of-block user. They cycle must end with the original PHI. 1184 // Also, we can't have multiple block-local users. 1185 Instruction *Iter = Phi; 1186 while (true) { 1187 // Any reduction instr must be of one of the allowed kinds. 1188 if (!isReductionInstr(Iter, Kind)) 1189 return false; 1190 1191 // Did we found a user inside this block ? 1192 bool FoundInBlockUser = false; 1193 // Did we reach the initial PHI node ? 1194 bool FoundStartPHI = false; 1195 1196 // If the instruction has no users then this is a broken 1197 // chain and can't be a reduction variable. 1198 if (Iter->use_begin() == Iter->use_end()) 1199 return false; 1200 1201 // For each of the *users* of iter. 1202 for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end(); 1203 it != e; ++it) { 1204 Instruction *U = cast<Instruction>(*it); 1205 // We already know that the PHI is a user. 1206 if (U == Phi) { 1207 FoundStartPHI = true; 1208 continue; 1209 } 1210 // Check if we found the exit user. 1211 BasicBlock *Parent = U->getParent(); 1212 if (Parent != BB) { 1213 // We must have a single exit instruction. 1214 if (ExitInstruction != 0) 1215 return false; 1216 ExitInstruction = Iter; 1217 } 1218 // We can't have multiple inside users. 1219 if (FoundInBlockUser) 1220 return false; 1221 FoundInBlockUser = true; 1222 Iter = U; 1223 } 1224 1225 // We found a reduction var if we have reached the original 1226 // phi node and we only have a single instruction with out-of-loop 1227 // users. 1228 if (FoundStartPHI && ExitInstruction) { 1229 // This instruction is allowed to have out-of-loop users. 1230 AllowedExit.insert(ExitInstruction); 1231 // Mark this as a reduction var. 1232 Reductions[Phi] = std::make_pair(ExitInstruction, Kind); 1233 return true; 1234 } 1235 } 1236} 1237 1238bool 1239LoopVectorizationLegality::isReductionConstant(Value *V, ReductionKind Kind) { 1240 ConstantInt *CI = dyn_cast<ConstantInt>(V); 1241 if (!CI) 1242 return false; 1243 if (Kind == IntegerMult && CI->isOne()) 1244 return true; 1245 if (Kind == IntegerAdd && CI->isZero()) 1246 return true; 1247 return false; 1248} 1249 1250bool 1251LoopVectorizationLegality::isReductionInstr(Instruction *I, 1252 ReductionKind Kind) { 1253 switch (I->getOpcode()) { 1254 default: 1255 return false; 1256 case Instruction::PHI: 1257 // possibly. 1258 return true; 1259 case Instruction::Add: 1260 case Instruction::Sub: 1261 return Kind == IntegerAdd; 1262 case Instruction::Mul: 1263 case Instruction::UDiv: 1264 case Instruction::SDiv: 1265 return Kind == IntegerMult; 1266 } 1267} 1268 1269bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { 1270 // Check that the PHI is consecutive and starts at zero. 1271 const SCEV *PhiScev = SE->getSCEV(Phi); 1272 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1273 if (!AR) { 1274 DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1275 return false; 1276 } 1277 const SCEV *Step = AR->getStepRecurrence(*SE); 1278 const SCEV *Start = AR->getStart(); 1279 1280 if (!Step->isOne() || !Start->isZero()) { 1281 DEBUG(dbgs() << "LV: PHI does not start at zero or steps by one.\n"); 1282 return false; 1283 } 1284 return true; 1285} 1286 1287} // namespace 1288 1289char LoopVectorize::ID = 0; 1290static const char lv_name[] = "Loop Vectorization"; 1291INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) 1292INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 1293INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 1294INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1295INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) 1296 1297namespace llvm { 1298 Pass *createLoopVectorizePass() { 1299 return new LoopVectorize(); 1300 } 1301} 1302 1303