EarlyCSE.cpp revision 3574eca1b02600bac4e625297f4ecf745f4c4f32
1//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs a simple dominator tree walk that eliminates trivially 11// redundant instructions. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "early-cse" 16#include "llvm/Transforms/Scalar.h" 17#include "llvm/Instructions.h" 18#include "llvm/Pass.h" 19#include "llvm/Analysis/Dominators.h" 20#include "llvm/Analysis/InstructionSimplify.h" 21#include "llvm/DataLayout.h" 22#include "llvm/Target/TargetLibraryInfo.h" 23#include "llvm/Transforms/Utils/Local.h" 24#include "llvm/Support/Debug.h" 25#include "llvm/Support/RecyclingAllocator.h" 26#include "llvm/ADT/ScopedHashTable.h" 27#include "llvm/ADT/Statistic.h" 28#include <deque> 29using namespace llvm; 30 31STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd"); 32STATISTIC(NumCSE, "Number of instructions CSE'd"); 33STATISTIC(NumCSELoad, "Number of load instructions CSE'd"); 34STATISTIC(NumCSECall, "Number of call instructions CSE'd"); 35STATISTIC(NumDSE, "Number of trivial dead stores removed"); 36 37static unsigned getHash(const void *V) { 38 return DenseMapInfo<const void*>::getHashValue(V); 39} 40 41//===----------------------------------------------------------------------===// 42// SimpleValue 43//===----------------------------------------------------------------------===// 44 45namespace { 46 /// SimpleValue - Instances of this struct represent available values in the 47 /// scoped hash table. 48 struct SimpleValue { 49 Instruction *Inst; 50 51 SimpleValue(Instruction *I) : Inst(I) { 52 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); 53 } 54 55 bool isSentinel() const { 56 return Inst == DenseMapInfo<Instruction*>::getEmptyKey() || 57 Inst == DenseMapInfo<Instruction*>::getTombstoneKey(); 58 } 59 60 static bool canHandle(Instruction *Inst) { 61 // This can only handle non-void readnone functions. 62 if (CallInst *CI = dyn_cast<CallInst>(Inst)) 63 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy(); 64 return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) || 65 isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) || 66 isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) || 67 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) || 68 isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst); 69 } 70 }; 71} 72 73namespace llvm { 74// SimpleValue is POD. 75template<> struct isPodLike<SimpleValue> { 76 static const bool value = true; 77}; 78 79template<> struct DenseMapInfo<SimpleValue> { 80 static inline SimpleValue getEmptyKey() { 81 return DenseMapInfo<Instruction*>::getEmptyKey(); 82 } 83 static inline SimpleValue getTombstoneKey() { 84 return DenseMapInfo<Instruction*>::getTombstoneKey(); 85 } 86 static unsigned getHashValue(SimpleValue Val); 87 static bool isEqual(SimpleValue LHS, SimpleValue RHS); 88}; 89} 90 91unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) { 92 Instruction *Inst = Val.Inst; 93 94 // Hash in all of the operands as pointers. 95 unsigned Res = 0; 96 for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) 97 Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); 98 99 if (CastInst *CI = dyn_cast<CastInst>(Inst)) 100 Res ^= getHash(CI->getType()); 101 else if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) 102 Res ^= CI->getPredicate(); 103 else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) { 104 for (ExtractValueInst::idx_iterator I = EVI->idx_begin(), 105 E = EVI->idx_end(); I != E; ++I) 106 Res ^= *I; 107 } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) { 108 for (InsertValueInst::idx_iterator I = IVI->idx_begin(), 109 E = IVI->idx_end(); I != E; ++I) 110 Res ^= *I; 111 } else { 112 // nothing extra to hash in. 113 assert((isa<CallInst>(Inst) || 114 isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) || 115 isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) || 116 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) && 117 "Invalid/unknown instruction"); 118 } 119 120 // Mix in the opcode. 121 return (Res << 1) ^ Inst->getOpcode(); 122} 123 124bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) { 125 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; 126 127 if (LHS.isSentinel() || RHS.isSentinel()) 128 return LHSI == RHSI; 129 130 if (LHSI->getOpcode() != RHSI->getOpcode()) return false; 131 return LHSI->isIdenticalTo(RHSI); 132} 133 134//===----------------------------------------------------------------------===// 135// CallValue 136//===----------------------------------------------------------------------===// 137 138namespace { 139 /// CallValue - Instances of this struct represent available call values in 140 /// the scoped hash table. 141 struct CallValue { 142 Instruction *Inst; 143 144 CallValue(Instruction *I) : Inst(I) { 145 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); 146 } 147 148 bool isSentinel() const { 149 return Inst == DenseMapInfo<Instruction*>::getEmptyKey() || 150 Inst == DenseMapInfo<Instruction*>::getTombstoneKey(); 151 } 152 153 static bool canHandle(Instruction *Inst) { 154 // Don't value number anything that returns void. 155 if (Inst->getType()->isVoidTy()) 156 return false; 157 158 CallInst *CI = dyn_cast<CallInst>(Inst); 159 if (CI == 0 || !CI->onlyReadsMemory()) 160 return false; 161 return true; 162 } 163 }; 164} 165 166namespace llvm { 167 // CallValue is POD. 168 template<> struct isPodLike<CallValue> { 169 static const bool value = true; 170 }; 171 172 template<> struct DenseMapInfo<CallValue> { 173 static inline CallValue getEmptyKey() { 174 return DenseMapInfo<Instruction*>::getEmptyKey(); 175 } 176 static inline CallValue getTombstoneKey() { 177 return DenseMapInfo<Instruction*>::getTombstoneKey(); 178 } 179 static unsigned getHashValue(CallValue Val); 180 static bool isEqual(CallValue LHS, CallValue RHS); 181 }; 182} 183unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) { 184 Instruction *Inst = Val.Inst; 185 // Hash in all of the operands as pointers. 186 unsigned Res = 0; 187 for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) { 188 assert(!Inst->getOperand(i)->getType()->isMetadataTy() && 189 "Cannot value number calls with metadata operands"); 190 Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); 191 } 192 193 // Mix in the opcode. 194 return (Res << 1) ^ Inst->getOpcode(); 195} 196 197bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) { 198 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; 199 if (LHS.isSentinel() || RHS.isSentinel()) 200 return LHSI == RHSI; 201 return LHSI->isIdenticalTo(RHSI); 202} 203 204 205//===----------------------------------------------------------------------===// 206// EarlyCSE pass. 207//===----------------------------------------------------------------------===// 208 209namespace { 210 211/// EarlyCSE - This pass does a simple depth-first walk over the dominator 212/// tree, eliminating trivially redundant instructions and using instsimplify 213/// to canonicalize things as it goes. It is intended to be fast and catch 214/// obvious cases so that instcombine and other passes are more effective. It 215/// is expected that a later pass of GVN will catch the interesting/hard 216/// cases. 217class EarlyCSE : public FunctionPass { 218public: 219 const DataLayout *TD; 220 const TargetLibraryInfo *TLI; 221 DominatorTree *DT; 222 typedef RecyclingAllocator<BumpPtrAllocator, 223 ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy; 224 typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>, 225 AllocatorTy> ScopedHTType; 226 227 /// AvailableValues - This scoped hash table contains the current values of 228 /// all of our simple scalar expressions. As we walk down the domtree, we 229 /// look to see if instructions are in this: if so, we replace them with what 230 /// we find, otherwise we insert them so that dominated values can succeed in 231 /// their lookup. 232 ScopedHTType *AvailableValues; 233 234 /// AvailableLoads - This scoped hash table contains the current values 235 /// of loads. This allows us to get efficient access to dominating loads when 236 /// we have a fully redundant load. In addition to the most recent load, we 237 /// keep track of a generation count of the read, which is compared against 238 /// the current generation count. The current generation count is 239 /// incremented after every possibly writing memory operation, which ensures 240 /// that we only CSE loads with other loads that have no intervening store. 241 typedef RecyclingAllocator<BumpPtrAllocator, 242 ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator; 243 typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>, 244 DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType; 245 LoadHTType *AvailableLoads; 246 247 /// AvailableCalls - This scoped hash table contains the current values 248 /// of read-only call values. It uses the same generation count as loads. 249 typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType; 250 CallHTType *AvailableCalls; 251 252 /// CurrentGeneration - This is the current generation of the memory value. 253 unsigned CurrentGeneration; 254 255 static char ID; 256 explicit EarlyCSE() : FunctionPass(ID) { 257 initializeEarlyCSEPass(*PassRegistry::getPassRegistry()); 258 } 259 260 bool runOnFunction(Function &F); 261 262private: 263 264 // NodeScope - almost a POD, but needs to call the constructors for the 265 // scoped hash tables so that a new scope gets pushed on. These are RAII so 266 // that the scope gets popped when the NodeScope is destroyed. 267 class NodeScope { 268 public: 269 NodeScope(ScopedHTType *availableValues, 270 LoadHTType *availableLoads, 271 CallHTType *availableCalls) : 272 Scope(*availableValues), 273 LoadScope(*availableLoads), 274 CallScope(*availableCalls) {} 275 276 private: 277 NodeScope(const NodeScope&) LLVM_DELETED_FUNCTION; 278 void operator=(const NodeScope&) LLVM_DELETED_FUNCTION; 279 280 ScopedHTType::ScopeTy Scope; 281 LoadHTType::ScopeTy LoadScope; 282 CallHTType::ScopeTy CallScope; 283 }; 284 285 // StackNode - contains all the needed information to create a stack for 286 // doing a depth first tranversal of the tree. This includes scopes for 287 // values, loads, and calls as well as the generation. There is a child 288 // iterator so that the children do not need to be store spearately. 289 class StackNode { 290 public: 291 StackNode(ScopedHTType *availableValues, 292 LoadHTType *availableLoads, 293 CallHTType *availableCalls, 294 unsigned cg, DomTreeNode *n, 295 DomTreeNode::iterator child, DomTreeNode::iterator end) : 296 CurrentGeneration(cg), ChildGeneration(cg), Node(n), 297 ChildIter(child), EndIter(end), 298 Scopes(availableValues, availableLoads, availableCalls), 299 Processed(false) {} 300 301 // Accessors. 302 unsigned currentGeneration() { return CurrentGeneration; } 303 unsigned childGeneration() { return ChildGeneration; } 304 void childGeneration(unsigned generation) { ChildGeneration = generation; } 305 DomTreeNode *node() { return Node; } 306 DomTreeNode::iterator childIter() { return ChildIter; } 307 DomTreeNode *nextChild() { 308 DomTreeNode *child = *ChildIter; 309 ++ChildIter; 310 return child; 311 } 312 DomTreeNode::iterator end() { return EndIter; } 313 bool isProcessed() { return Processed; } 314 void process() { Processed = true; } 315 316 private: 317 StackNode(const StackNode&) LLVM_DELETED_FUNCTION; 318 void operator=(const StackNode&) LLVM_DELETED_FUNCTION; 319 320 // Members. 321 unsigned CurrentGeneration; 322 unsigned ChildGeneration; 323 DomTreeNode *Node; 324 DomTreeNode::iterator ChildIter; 325 DomTreeNode::iterator EndIter; 326 NodeScope Scopes; 327 bool Processed; 328 }; 329 330 bool processNode(DomTreeNode *Node); 331 332 // This transformation requires dominator postdominator info 333 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 334 AU.addRequired<DominatorTree>(); 335 AU.addRequired<TargetLibraryInfo>(); 336 AU.setPreservesCFG(); 337 } 338}; 339} 340 341char EarlyCSE::ID = 0; 342 343// createEarlyCSEPass - The public interface to this file. 344FunctionPass *llvm::createEarlyCSEPass() { 345 return new EarlyCSE(); 346} 347 348INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false) 349INITIALIZE_PASS_DEPENDENCY(DominatorTree) 350INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 351INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false) 352 353bool EarlyCSE::processNode(DomTreeNode *Node) { 354 BasicBlock *BB = Node->getBlock(); 355 356 // If this block has a single predecessor, then the predecessor is the parent 357 // of the domtree node and all of the live out memory values are still current 358 // in this block. If this block has multiple predecessors, then they could 359 // have invalidated the live-out memory values of our parent value. For now, 360 // just be conservative and invalidate memory if this block has multiple 361 // predecessors. 362 if (BB->getSinglePredecessor() == 0) 363 ++CurrentGeneration; 364 365 /// LastStore - Keep track of the last non-volatile store that we saw... for 366 /// as long as there in no instruction that reads memory. If we see a store 367 /// to the same location, we delete the dead store. This zaps trivial dead 368 /// stores which can occur in bitfield code among other things. 369 StoreInst *LastStore = 0; 370 371 bool Changed = false; 372 373 // See if any instructions in the block can be eliminated. If so, do it. If 374 // not, add them to AvailableValues. 375 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { 376 Instruction *Inst = I++; 377 378 // Dead instructions should just be removed. 379 if (isInstructionTriviallyDead(Inst, TLI)) { 380 DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); 381 Inst->eraseFromParent(); 382 Changed = true; 383 ++NumSimplify; 384 continue; 385 } 386 387 // If the instruction can be simplified (e.g. X+0 = X) then replace it with 388 // its simpler value. 389 if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) { 390 DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); 391 Inst->replaceAllUsesWith(V); 392 Inst->eraseFromParent(); 393 Changed = true; 394 ++NumSimplify; 395 continue; 396 } 397 398 // If this is a simple instruction that we can value number, process it. 399 if (SimpleValue::canHandle(Inst)) { 400 // See if the instruction has an available value. If so, use it. 401 if (Value *V = AvailableValues->lookup(Inst)) { 402 DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n'); 403 Inst->replaceAllUsesWith(V); 404 Inst->eraseFromParent(); 405 Changed = true; 406 ++NumCSE; 407 continue; 408 } 409 410 // Otherwise, just remember that this value is available. 411 AvailableValues->insert(Inst, Inst); 412 continue; 413 } 414 415 // If this is a non-volatile load, process it. 416 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 417 // Ignore volatile loads. 418 if (!LI->isSimple()) { 419 LastStore = 0; 420 continue; 421 } 422 423 // If we have an available version of this load, and if it is the right 424 // generation, replace this instruction. 425 std::pair<Value*, unsigned> InVal = 426 AvailableLoads->lookup(Inst->getOperand(0)); 427 if (InVal.first != 0 && InVal.second == CurrentGeneration) { 428 DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: " 429 << *InVal.first << '\n'); 430 if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); 431 Inst->eraseFromParent(); 432 Changed = true; 433 ++NumCSELoad; 434 continue; 435 } 436 437 // Otherwise, remember that we have this instruction. 438 AvailableLoads->insert(Inst->getOperand(0), 439 std::pair<Value*, unsigned>(Inst, CurrentGeneration)); 440 LastStore = 0; 441 continue; 442 } 443 444 // If this instruction may read from memory, forget LastStore. 445 if (Inst->mayReadFromMemory()) 446 LastStore = 0; 447 448 // If this is a read-only call, process it. 449 if (CallValue::canHandle(Inst)) { 450 // If we have an available version of this call, and if it is the right 451 // generation, replace this instruction. 452 std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst); 453 if (InVal.first != 0 && InVal.second == CurrentGeneration) { 454 DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: " 455 << *InVal.first << '\n'); 456 if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); 457 Inst->eraseFromParent(); 458 Changed = true; 459 ++NumCSECall; 460 continue; 461 } 462 463 // Otherwise, remember that we have this instruction. 464 AvailableCalls->insert(Inst, 465 std::pair<Value*, unsigned>(Inst, CurrentGeneration)); 466 continue; 467 } 468 469 // Okay, this isn't something we can CSE at all. Check to see if it is 470 // something that could modify memory. If so, our available memory values 471 // cannot be used so bump the generation count. 472 if (Inst->mayWriteToMemory()) { 473 ++CurrentGeneration; 474 475 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 476 // We do a trivial form of DSE if there are two stores to the same 477 // location with no intervening loads. Delete the earlier store. 478 if (LastStore && 479 LastStore->getPointerOperand() == SI->getPointerOperand()) { 480 DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " 481 << *Inst << '\n'); 482 LastStore->eraseFromParent(); 483 Changed = true; 484 ++NumDSE; 485 LastStore = 0; 486 continue; 487 } 488 489 // Okay, we just invalidated anything we knew about loaded values. Try 490 // to salvage *something* by remembering that the stored value is a live 491 // version of the pointer. It is safe to forward from volatile stores 492 // to non-volatile loads, so we don't have to check for volatility of 493 // the store. 494 AvailableLoads->insert(SI->getPointerOperand(), 495 std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration)); 496 497 // Remember that this was the last store we saw for DSE. 498 if (SI->isSimple()) 499 LastStore = SI; 500 } 501 } 502 } 503 504 return Changed; 505} 506 507 508bool EarlyCSE::runOnFunction(Function &F) { 509 std::deque<StackNode *> nodesToProcess; 510 511 TD = getAnalysisIfAvailable<DataLayout>(); 512 TLI = &getAnalysis<TargetLibraryInfo>(); 513 DT = &getAnalysis<DominatorTree>(); 514 515 // Tables that the pass uses when walking the domtree. 516 ScopedHTType AVTable; 517 AvailableValues = &AVTable; 518 LoadHTType LoadTable; 519 AvailableLoads = &LoadTable; 520 CallHTType CallTable; 521 AvailableCalls = &CallTable; 522 523 CurrentGeneration = 0; 524 bool Changed = false; 525 526 // Process the root node. 527 nodesToProcess.push_front( 528 new StackNode(AvailableValues, AvailableLoads, AvailableCalls, 529 CurrentGeneration, DT->getRootNode(), 530 DT->getRootNode()->begin(), 531 DT->getRootNode()->end())); 532 533 // Save the current generation. 534 unsigned LiveOutGeneration = CurrentGeneration; 535 536 // Process the stack. 537 while (!nodesToProcess.empty()) { 538 // Grab the first item off the stack. Set the current generation, remove 539 // the node from the stack, and process it. 540 StackNode *NodeToProcess = nodesToProcess.front(); 541 542 // Initialize class members. 543 CurrentGeneration = NodeToProcess->currentGeneration(); 544 545 // Check if the node needs to be processed. 546 if (!NodeToProcess->isProcessed()) { 547 // Process the node. 548 Changed |= processNode(NodeToProcess->node()); 549 NodeToProcess->childGeneration(CurrentGeneration); 550 NodeToProcess->process(); 551 } else if (NodeToProcess->childIter() != NodeToProcess->end()) { 552 // Push the next child onto the stack. 553 DomTreeNode *child = NodeToProcess->nextChild(); 554 nodesToProcess.push_front( 555 new StackNode(AvailableValues, 556 AvailableLoads, 557 AvailableCalls, 558 NodeToProcess->childGeneration(), child, 559 child->begin(), child->end())); 560 } else { 561 // It has been processed, and there are no more children to process, 562 // so delete it and pop it off the stack. 563 delete NodeToProcess; 564 nodesToProcess.pop_front(); 565 } 566 } // while (!nodes...) 567 568 // Reset the current generation. 569 CurrentGeneration = LiveOutGeneration; 570 571 return Changed; 572} 573