GVN.cpp revision 88554df4a2b801a11964a2668acfac2d68ab14bc
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13// Note that this pass does the value numbering itself; it does not use the 14// ValueNumbering analysis passes. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "gvn" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/BasicBlock.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/IntrinsicInst.h" 25#include "llvm/Value.h" 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/DepthFirstIterator.h" 28#include "llvm/ADT/PostOrderIterator.h" 29#include "llvm/ADT/SmallPtrSet.h" 30#include "llvm/ADT/SmallVector.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/Analysis/Dominators.h" 33#include "llvm/Analysis/AliasAnalysis.h" 34#include "llvm/Analysis/MemoryDependenceAnalysis.h" 35#include "llvm/Support/CFG.h" 36#include "llvm/Support/CommandLine.h" 37#include "llvm/Support/Compiler.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Transforms/Utils/BasicBlockUtils.h" 40#include <cstdio> 41using namespace llvm; 42 43STATISTIC(NumGVNInstr, "Number of instructions deleted"); 44STATISTIC(NumGVNLoad, "Number of loads deleted"); 45STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 46STATISTIC(NumGVNBlocks, "Number of blocks merged"); 47STATISTIC(NumPRELoad, "Number of loads PRE'd"); 48 49static cl::opt<bool> EnablePRE("enable-pre", 50 cl::init(true), cl::Hidden); 51cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 52 53//===----------------------------------------------------------------------===// 54// ValueTable Class 55//===----------------------------------------------------------------------===// 56 57/// This class holds the mapping between values and value numbers. It is used 58/// as an efficient mechanism to determine the expression-wise equivalence of 59/// two values. 60namespace { 61 struct VISIBILITY_HIDDEN Expression { 62 enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, 63 FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 64 ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 65 ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 66 FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 67 FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 68 FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, 69 SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, 70 FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 71 PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, 72 EMPTY, TOMBSTONE }; 73 74 ExpressionOpcode opcode; 75 const Type* type; 76 uint32_t firstVN; 77 uint32_t secondVN; 78 uint32_t thirdVN; 79 SmallVector<uint32_t, 4> varargs; 80 Value* function; 81 82 Expression() { } 83 Expression(ExpressionOpcode o) : opcode(o) { } 84 85 bool operator==(const Expression &other) const { 86 if (opcode != other.opcode) 87 return false; 88 else if (opcode == EMPTY || opcode == TOMBSTONE) 89 return true; 90 else if (type != other.type) 91 return false; 92 else if (function != other.function) 93 return false; 94 else if (firstVN != other.firstVN) 95 return false; 96 else if (secondVN != other.secondVN) 97 return false; 98 else if (thirdVN != other.thirdVN) 99 return false; 100 else { 101 if (varargs.size() != other.varargs.size()) 102 return false; 103 104 for (size_t i = 0; i < varargs.size(); ++i) 105 if (varargs[i] != other.varargs[i]) 106 return false; 107 108 return true; 109 } 110 } 111 112 bool operator!=(const Expression &other) const { 113 return !(*this == other); 114 } 115 }; 116 117 class VISIBILITY_HIDDEN ValueTable { 118 private: 119 DenseMap<Value*, uint32_t> valueNumbering; 120 DenseMap<Expression, uint32_t> expressionNumbering; 121 AliasAnalysis* AA; 122 MemoryDependenceAnalysis* MD; 123 DominatorTree* DT; 124 125 uint32_t nextValueNumber; 126 127 Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); 128 Expression::ExpressionOpcode getOpcode(CmpInst* C); 129 Expression::ExpressionOpcode getOpcode(CastInst* C); 130 Expression create_expression(BinaryOperator* BO); 131 Expression create_expression(CmpInst* C); 132 Expression create_expression(ShuffleVectorInst* V); 133 Expression create_expression(ExtractElementInst* C); 134 Expression create_expression(InsertElementInst* V); 135 Expression create_expression(SelectInst* V); 136 Expression create_expression(CastInst* C); 137 Expression create_expression(GetElementPtrInst* G); 138 Expression create_expression(CallInst* C); 139 Expression create_expression(Constant* C); 140 public: 141 ValueTable() : nextValueNumber(1) { } 142 uint32_t lookup_or_add(Value* V); 143 uint32_t lookup(Value* V) const; 144 void add(Value* V, uint32_t num); 145 void clear(); 146 void erase(Value* v); 147 unsigned size(); 148 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 149 AliasAnalysis *getAliasAnalysis() const { return AA; } 150 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 151 void setDomTree(DominatorTree* D) { DT = D; } 152 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 153 void verifyRemoved(const Value *) const; 154 }; 155} 156 157namespace llvm { 158template <> struct DenseMapInfo<Expression> { 159 static inline Expression getEmptyKey() { 160 return Expression(Expression::EMPTY); 161 } 162 163 static inline Expression getTombstoneKey() { 164 return Expression(Expression::TOMBSTONE); 165 } 166 167 static unsigned getHashValue(const Expression e) { 168 unsigned hash = e.opcode; 169 170 hash = e.firstVN + hash * 37; 171 hash = e.secondVN + hash * 37; 172 hash = e.thirdVN + hash * 37; 173 174 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 175 (unsigned)((uintptr_t)e.type >> 9)) + 176 hash * 37; 177 178 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 179 E = e.varargs.end(); I != E; ++I) 180 hash = *I + hash * 37; 181 182 hash = ((unsigned)((uintptr_t)e.function >> 4) ^ 183 (unsigned)((uintptr_t)e.function >> 9)) + 184 hash * 37; 185 186 return hash; 187 } 188 static bool isEqual(const Expression &LHS, const Expression &RHS) { 189 return LHS == RHS; 190 } 191 static bool isPod() { return true; } 192}; 193} 194 195//===----------------------------------------------------------------------===// 196// ValueTable Internal Functions 197//===----------------------------------------------------------------------===// 198Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { 199 switch(BO->getOpcode()) { 200 default: // THIS SHOULD NEVER HAPPEN 201 assert(0 && "Binary operator with unknown opcode?"); 202 case Instruction::Add: return Expression::ADD; 203 case Instruction::Sub: return Expression::SUB; 204 case Instruction::Mul: return Expression::MUL; 205 case Instruction::UDiv: return Expression::UDIV; 206 case Instruction::SDiv: return Expression::SDIV; 207 case Instruction::FDiv: return Expression::FDIV; 208 case Instruction::URem: return Expression::UREM; 209 case Instruction::SRem: return Expression::SREM; 210 case Instruction::FRem: return Expression::FREM; 211 case Instruction::Shl: return Expression::SHL; 212 case Instruction::LShr: return Expression::LSHR; 213 case Instruction::AShr: return Expression::ASHR; 214 case Instruction::And: return Expression::AND; 215 case Instruction::Or: return Expression::OR; 216 case Instruction::Xor: return Expression::XOR; 217 } 218} 219 220Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { 221 if (isa<ICmpInst>(C) || isa<VICmpInst>(C)) { 222 switch (C->getPredicate()) { 223 default: // THIS SHOULD NEVER HAPPEN 224 assert(0 && "Comparison with unknown predicate?"); 225 case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; 226 case ICmpInst::ICMP_NE: return Expression::ICMPNE; 227 case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; 228 case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; 229 case ICmpInst::ICMP_ULT: return Expression::ICMPULT; 230 case ICmpInst::ICMP_ULE: return Expression::ICMPULE; 231 case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; 232 case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; 233 case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; 234 case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; 235 } 236 } 237 assert((isa<FCmpInst>(C) || isa<VFCmpInst>(C)) && "Unknown compare"); 238 switch (C->getPredicate()) { 239 default: // THIS SHOULD NEVER HAPPEN 240 assert(0 && "Comparison with unknown predicate?"); 241 case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; 242 case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; 243 case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; 244 case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; 245 case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; 246 case FCmpInst::FCMP_ONE: return Expression::FCMPONE; 247 case FCmpInst::FCMP_ORD: return Expression::FCMPORD; 248 case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; 249 case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; 250 case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; 251 case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; 252 case FCmpInst::FCMP_ULT: return Expression::FCMPULT; 253 case FCmpInst::FCMP_ULE: return Expression::FCMPULE; 254 case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; 255 } 256} 257 258Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { 259 switch(C->getOpcode()) { 260 default: // THIS SHOULD NEVER HAPPEN 261 assert(0 && "Cast operator with unknown opcode?"); 262 case Instruction::Trunc: return Expression::TRUNC; 263 case Instruction::ZExt: return Expression::ZEXT; 264 case Instruction::SExt: return Expression::SEXT; 265 case Instruction::FPToUI: return Expression::FPTOUI; 266 case Instruction::FPToSI: return Expression::FPTOSI; 267 case Instruction::UIToFP: return Expression::UITOFP; 268 case Instruction::SIToFP: return Expression::SITOFP; 269 case Instruction::FPTrunc: return Expression::FPTRUNC; 270 case Instruction::FPExt: return Expression::FPEXT; 271 case Instruction::PtrToInt: return Expression::PTRTOINT; 272 case Instruction::IntToPtr: return Expression::INTTOPTR; 273 case Instruction::BitCast: return Expression::BITCAST; 274 } 275} 276 277Expression ValueTable::create_expression(CallInst* C) { 278 Expression e; 279 280 e.type = C->getType(); 281 e.firstVN = 0; 282 e.secondVN = 0; 283 e.thirdVN = 0; 284 e.function = C->getCalledFunction(); 285 e.opcode = Expression::CALL; 286 287 for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); 288 I != E; ++I) 289 e.varargs.push_back(lookup_or_add(*I)); 290 291 return e; 292} 293 294Expression ValueTable::create_expression(BinaryOperator* BO) { 295 Expression e; 296 297 e.firstVN = lookup_or_add(BO->getOperand(0)); 298 e.secondVN = lookup_or_add(BO->getOperand(1)); 299 e.thirdVN = 0; 300 e.function = 0; 301 e.type = BO->getType(); 302 e.opcode = getOpcode(BO); 303 304 return e; 305} 306 307Expression ValueTable::create_expression(CmpInst* C) { 308 Expression e; 309 310 e.firstVN = lookup_or_add(C->getOperand(0)); 311 e.secondVN = lookup_or_add(C->getOperand(1)); 312 e.thirdVN = 0; 313 e.function = 0; 314 e.type = C->getType(); 315 e.opcode = getOpcode(C); 316 317 return e; 318} 319 320Expression ValueTable::create_expression(CastInst* C) { 321 Expression e; 322 323 e.firstVN = lookup_or_add(C->getOperand(0)); 324 e.secondVN = 0; 325 e.thirdVN = 0; 326 e.function = 0; 327 e.type = C->getType(); 328 e.opcode = getOpcode(C); 329 330 return e; 331} 332 333Expression ValueTable::create_expression(ShuffleVectorInst* S) { 334 Expression e; 335 336 e.firstVN = lookup_or_add(S->getOperand(0)); 337 e.secondVN = lookup_or_add(S->getOperand(1)); 338 e.thirdVN = lookup_or_add(S->getOperand(2)); 339 e.function = 0; 340 e.type = S->getType(); 341 e.opcode = Expression::SHUFFLE; 342 343 return e; 344} 345 346Expression ValueTable::create_expression(ExtractElementInst* E) { 347 Expression e; 348 349 e.firstVN = lookup_or_add(E->getOperand(0)); 350 e.secondVN = lookup_or_add(E->getOperand(1)); 351 e.thirdVN = 0; 352 e.function = 0; 353 e.type = E->getType(); 354 e.opcode = Expression::EXTRACT; 355 356 return e; 357} 358 359Expression ValueTable::create_expression(InsertElementInst* I) { 360 Expression e; 361 362 e.firstVN = lookup_or_add(I->getOperand(0)); 363 e.secondVN = lookup_or_add(I->getOperand(1)); 364 e.thirdVN = lookup_or_add(I->getOperand(2)); 365 e.function = 0; 366 e.type = I->getType(); 367 e.opcode = Expression::INSERT; 368 369 return e; 370} 371 372Expression ValueTable::create_expression(SelectInst* I) { 373 Expression e; 374 375 e.firstVN = lookup_or_add(I->getCondition()); 376 e.secondVN = lookup_or_add(I->getTrueValue()); 377 e.thirdVN = lookup_or_add(I->getFalseValue()); 378 e.function = 0; 379 e.type = I->getType(); 380 e.opcode = Expression::SELECT; 381 382 return e; 383} 384 385Expression ValueTable::create_expression(GetElementPtrInst* G) { 386 Expression e; 387 388 e.firstVN = lookup_or_add(G->getPointerOperand()); 389 e.secondVN = 0; 390 e.thirdVN = 0; 391 e.function = 0; 392 e.type = G->getType(); 393 e.opcode = Expression::GEP; 394 395 for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); 396 I != E; ++I) 397 e.varargs.push_back(lookup_or_add(*I)); 398 399 return e; 400} 401 402//===----------------------------------------------------------------------===// 403// ValueTable External Functions 404//===----------------------------------------------------------------------===// 405 406/// add - Insert a value into the table with a specified value number. 407void ValueTable::add(Value* V, uint32_t num) { 408 valueNumbering.insert(std::make_pair(V, num)); 409} 410 411/// lookup_or_add - Returns the value number for the specified value, assigning 412/// it a new number if it did not have one before. 413uint32_t ValueTable::lookup_or_add(Value* V) { 414 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 415 if (VI != valueNumbering.end()) 416 return VI->second; 417 418 if (CallInst* C = dyn_cast<CallInst>(V)) { 419 if (AA->doesNotAccessMemory(C)) { 420 Expression e = create_expression(C); 421 422 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 423 if (EI != expressionNumbering.end()) { 424 valueNumbering.insert(std::make_pair(V, EI->second)); 425 return EI->second; 426 } else { 427 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 428 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 429 430 return nextValueNumber++; 431 } 432 } else if (AA->onlyReadsMemory(C)) { 433 Expression e = create_expression(C); 434 435 if (expressionNumbering.find(e) == expressionNumbering.end()) { 436 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 437 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 438 return nextValueNumber++; 439 } 440 441 MemDepResult local_dep = MD->getDependency(C); 442 443 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 444 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 445 return nextValueNumber++; 446 } 447 448 if (local_dep.isDef()) { 449 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 450 451 if (local_cdep->getNumOperands() != C->getNumOperands()) { 452 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 453 return nextValueNumber++; 454 } 455 456 for (unsigned i = 1; i < C->getNumOperands(); ++i) { 457 uint32_t c_vn = lookup_or_add(C->getOperand(i)); 458 uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); 459 if (c_vn != cd_vn) { 460 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 461 return nextValueNumber++; 462 } 463 } 464 465 uint32_t v = lookup_or_add(local_cdep); 466 valueNumbering.insert(std::make_pair(V, v)); 467 return v; 468 } 469 470 // Non-local case. 471 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 472 MD->getNonLocalCallDependency(CallSite(C)); 473 // FIXME: call/call dependencies for readonly calls should return def, not 474 // clobber! Move the checking logic to MemDep! 475 CallInst* cdep = 0; 476 477 // Check to see if we have a single dominating call instruction that is 478 // identical to C. 479 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 480 const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; 481 // Ignore non-local dependencies. 482 if (I->second.isNonLocal()) 483 continue; 484 485 // We don't handle non-depedencies. If we already have a call, reject 486 // instruction dependencies. 487 if (I->second.isClobber() || cdep != 0) { 488 cdep = 0; 489 break; 490 } 491 492 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); 493 // FIXME: All duplicated with non-local case. 494 if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ 495 cdep = NonLocalDepCall; 496 continue; 497 } 498 499 cdep = 0; 500 break; 501 } 502 503 if (!cdep) { 504 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 505 return nextValueNumber++; 506 } 507 508 if (cdep->getNumOperands() != C->getNumOperands()) { 509 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 510 return nextValueNumber++; 511 } 512 for (unsigned i = 1; i < C->getNumOperands(); ++i) { 513 uint32_t c_vn = lookup_or_add(C->getOperand(i)); 514 uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); 515 if (c_vn != cd_vn) { 516 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 517 return nextValueNumber++; 518 } 519 } 520 521 uint32_t v = lookup_or_add(cdep); 522 valueNumbering.insert(std::make_pair(V, v)); 523 return v; 524 525 } else { 526 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 527 return nextValueNumber++; 528 } 529 } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) { 530 Expression e = create_expression(BO); 531 532 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 533 if (EI != expressionNumbering.end()) { 534 valueNumbering.insert(std::make_pair(V, EI->second)); 535 return EI->second; 536 } else { 537 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 538 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 539 540 return nextValueNumber++; 541 } 542 } else if (CmpInst* C = dyn_cast<CmpInst>(V)) { 543 Expression e = create_expression(C); 544 545 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 546 if (EI != expressionNumbering.end()) { 547 valueNumbering.insert(std::make_pair(V, EI->second)); 548 return EI->second; 549 } else { 550 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 551 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 552 553 return nextValueNumber++; 554 } 555 } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) { 556 Expression e = create_expression(U); 557 558 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 559 if (EI != expressionNumbering.end()) { 560 valueNumbering.insert(std::make_pair(V, EI->second)); 561 return EI->second; 562 } else { 563 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 564 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 565 566 return nextValueNumber++; 567 } 568 } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) { 569 Expression e = create_expression(U); 570 571 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 572 if (EI != expressionNumbering.end()) { 573 valueNumbering.insert(std::make_pair(V, EI->second)); 574 return EI->second; 575 } else { 576 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 577 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 578 579 return nextValueNumber++; 580 } 581 } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) { 582 Expression e = create_expression(U); 583 584 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 585 if (EI != expressionNumbering.end()) { 586 valueNumbering.insert(std::make_pair(V, EI->second)); 587 return EI->second; 588 } else { 589 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 590 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 591 592 return nextValueNumber++; 593 } 594 } else if (SelectInst* U = dyn_cast<SelectInst>(V)) { 595 Expression e = create_expression(U); 596 597 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 598 if (EI != expressionNumbering.end()) { 599 valueNumbering.insert(std::make_pair(V, EI->second)); 600 return EI->second; 601 } else { 602 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 603 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 604 605 return nextValueNumber++; 606 } 607 } else if (CastInst* U = dyn_cast<CastInst>(V)) { 608 Expression e = create_expression(U); 609 610 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 611 if (EI != expressionNumbering.end()) { 612 valueNumbering.insert(std::make_pair(V, EI->second)); 613 return EI->second; 614 } else { 615 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 616 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 617 618 return nextValueNumber++; 619 } 620 } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) { 621 Expression e = create_expression(U); 622 623 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 624 if (EI != expressionNumbering.end()) { 625 valueNumbering.insert(std::make_pair(V, EI->second)); 626 return EI->second; 627 } else { 628 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 629 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 630 631 return nextValueNumber++; 632 } 633 } else { 634 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 635 return nextValueNumber++; 636 } 637} 638 639/// lookup - Returns the value number of the specified value. Fails if 640/// the value has not yet been numbered. 641uint32_t ValueTable::lookup(Value* V) const { 642 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 643 assert(VI != valueNumbering.end() && "Value not numbered?"); 644 return VI->second; 645} 646 647/// clear - Remove all entries from the ValueTable 648void ValueTable::clear() { 649 valueNumbering.clear(); 650 expressionNumbering.clear(); 651 nextValueNumber = 1; 652} 653 654/// erase - Remove a value from the value numbering 655void ValueTable::erase(Value* V) { 656 valueNumbering.erase(V); 657} 658 659/// verifyRemoved - Verify that the value is removed from all internal data 660/// structures. 661void ValueTable::verifyRemoved(const Value *V) const { 662 for (DenseMap<Value*, uint32_t>::iterator 663 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 664 assert(I->first != V && "Inst still occurs in value numbering map!"); 665 } 666} 667 668//===----------------------------------------------------------------------===// 669// GVN Pass 670//===----------------------------------------------------------------------===// 671 672namespace { 673 struct VISIBILITY_HIDDEN ValueNumberScope { 674 ValueNumberScope* parent; 675 DenseMap<uint32_t, Value*> table; 676 677 ValueNumberScope(ValueNumberScope* p) : parent(p) { } 678 }; 679} 680 681namespace { 682 683 class VISIBILITY_HIDDEN GVN : public FunctionPass { 684 bool runOnFunction(Function &F); 685 public: 686 static char ID; // Pass identification, replacement for typeid 687 GVN() : FunctionPass(&ID) { } 688 689 private: 690 MemoryDependenceAnalysis *MD; 691 DominatorTree *DT; 692 693 ValueTable VN; 694 DenseMap<BasicBlock*, ValueNumberScope*> localAvail; 695 696 typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType; 697 PhiMapType phiMap; 698 699 700 // This transformation requires dominator postdominator info 701 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 702 AU.addRequired<DominatorTree>(); 703 AU.addRequired<MemoryDependenceAnalysis>(); 704 AU.addRequired<AliasAnalysis>(); 705 706 AU.addPreserved<DominatorTree>(); 707 AU.addPreserved<AliasAnalysis>(); 708 } 709 710 // Helper fuctions 711 // FIXME: eliminate or document these better 712 bool processLoad(LoadInst* L, 713 SmallVectorImpl<Instruction*> &toErase); 714 bool processInstruction(Instruction* I, 715 SmallVectorImpl<Instruction*> &toErase); 716 bool processNonLocalLoad(LoadInst* L, 717 SmallVectorImpl<Instruction*> &toErase); 718 bool processBlock(BasicBlock* BB); 719 Value *GetValueForBlock(BasicBlock *BB, Instruction* orig, 720 DenseMap<BasicBlock*, Value*> &Phis, 721 bool top_level = false); 722 void dump(DenseMap<uint32_t, Value*>& d); 723 bool iterateOnFunction(Function &F); 724 Value* CollapsePhi(PHINode* p); 725 bool isSafeReplacement(PHINode* p, Instruction* inst); 726 bool performPRE(Function& F); 727 Value* lookupNumber(BasicBlock* BB, uint32_t num); 728 bool mergeBlockIntoPredecessor(BasicBlock* BB); 729 Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno); 730 void cleanupGlobalSets(); 731 void verifyRemoved(const Instruction *I) const; 732 }; 733 734 char GVN::ID = 0; 735} 736 737// createGVNPass - The public interface to this file... 738FunctionPass *llvm::createGVNPass() { return new GVN(); } 739 740static RegisterPass<GVN> X("gvn", 741 "Global Value Numbering"); 742 743void GVN::dump(DenseMap<uint32_t, Value*>& d) { 744 printf("{\n"); 745 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 746 E = d.end(); I != E; ++I) { 747 printf("%d\n", I->first); 748 I->second->dump(); 749 } 750 printf("}\n"); 751} 752 753Value* GVN::CollapsePhi(PHINode* p) { 754 Value* constVal = p->hasConstantValue(); 755 if (!constVal) return 0; 756 757 Instruction* inst = dyn_cast<Instruction>(constVal); 758 if (!inst) 759 return constVal; 760 761 if (DT->dominates(inst, p)) 762 if (isSafeReplacement(p, inst)) 763 return inst; 764 return 0; 765} 766 767bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { 768 if (!isa<PHINode>(inst)) 769 return true; 770 771 for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); 772 UI != E; ++UI) 773 if (PHINode* use_phi = dyn_cast<PHINode>(UI)) 774 if (use_phi->getParent() == inst->getParent()) 775 return false; 776 777 return true; 778} 779 780/// GetValueForBlock - Get the value to use within the specified basic block. 781/// available values are in Phis. 782Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, 783 DenseMap<BasicBlock*, Value*> &Phis, 784 bool top_level) { 785 786 // If we have already computed this value, return the previously computed val. 787 DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB); 788 if (V != Phis.end() && !top_level) return V->second; 789 790 // If the block is unreachable, just return undef, since this path 791 // can't actually occur at runtime. 792 if (!DT->isReachableFromEntry(BB)) 793 return Phis[BB] = UndefValue::get(orig->getType()); 794 795 if (BasicBlock *Pred = BB->getSinglePredecessor()) { 796 Value *ret = GetValueForBlock(Pred, orig, Phis); 797 Phis[BB] = ret; 798 return ret; 799 } 800 801 // Get the number of predecessors of this block so we can reserve space later. 802 // If there is already a PHI in it, use the #preds from it, otherwise count. 803 // Getting it from the PHI is constant time. 804 unsigned NumPreds; 805 if (PHINode *ExistingPN = dyn_cast<PHINode>(BB->begin())) 806 NumPreds = ExistingPN->getNumIncomingValues(); 807 else 808 NumPreds = std::distance(pred_begin(BB), pred_end(BB)); 809 810 // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so 811 // now, then get values to fill in the incoming values for the PHI. 812 PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle", 813 BB->begin()); 814 PN->reserveOperandSpace(NumPreds); 815 816 Phis.insert(std::make_pair(BB, PN)); 817 818 // Fill in the incoming values for the block. 819 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 820 Value* val = GetValueForBlock(*PI, orig, Phis); 821 PN->addIncoming(val, *PI); 822 } 823 824 VN.getAliasAnalysis()->copyValue(orig, PN); 825 826 // Attempt to collapse PHI nodes that are trivially redundant 827 Value* v = CollapsePhi(PN); 828 if (!v) { 829 // Cache our phi construction results 830 if (LoadInst* L = dyn_cast<LoadInst>(orig)) 831 phiMap[L->getPointerOperand()].insert(PN); 832 else 833 phiMap[orig].insert(PN); 834 835 return PN; 836 } 837 838 PN->replaceAllUsesWith(v); 839 if (isa<PointerType>(v->getType())) 840 MD->invalidateCachedPointerInfo(v); 841 842 for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(), 843 E = Phis.end(); I != E; ++I) 844 if (I->second == PN) 845 I->second = v; 846 847 DEBUG(cerr << "GVN removed: " << *PN); 848 MD->removeInstruction(PN); 849 PN->eraseFromParent(); 850 DEBUG(verifyRemoved(PN)); 851 852 Phis[BB] = v; 853 return v; 854} 855 856/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 857/// we're analyzing is fully available in the specified block. As we go, keep 858/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 859/// map is actually a tri-state map with the following values: 860/// 0) we know the block *is not* fully available. 861/// 1) we know the block *is* fully available. 862/// 2) we do not know whether the block is fully available or not, but we are 863/// currently speculating that it will be. 864/// 3) we are speculating for this block and have used that to speculate for 865/// other blocks. 866static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 867 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 868 // Optimistically assume that the block is fully available and check to see 869 // if we already know about this block in one lookup. 870 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 871 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 872 873 // If the entry already existed for this block, return the precomputed value. 874 if (!IV.second) { 875 // If this is a speculative "available" value, mark it as being used for 876 // speculation of other blocks. 877 if (IV.first->second == 2) 878 IV.first->second = 3; 879 return IV.first->second != 0; 880 } 881 882 // Otherwise, see if it is fully available in all predecessors. 883 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 884 885 // If this block has no predecessors, it isn't live-in here. 886 if (PI == PE) 887 goto SpeculationFailure; 888 889 for (; PI != PE; ++PI) 890 // If the value isn't fully available in one of our predecessors, then it 891 // isn't fully available in this block either. Undo our previous 892 // optimistic assumption and bail out. 893 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 894 goto SpeculationFailure; 895 896 return true; 897 898// SpeculationFailure - If we get here, we found out that this is not, after 899// all, a fully-available block. We have a problem if we speculated on this and 900// used the speculation to mark other blocks as available. 901SpeculationFailure: 902 char &BBVal = FullyAvailableBlocks[BB]; 903 904 // If we didn't speculate on this, just return with it set to false. 905 if (BBVal == 2) { 906 BBVal = 0; 907 return false; 908 } 909 910 // If we did speculate on this value, we could have blocks set to 1 that are 911 // incorrect. Walk the (transitive) successors of this block and mark them as 912 // 0 if set to one. 913 SmallVector<BasicBlock*, 32> BBWorklist; 914 BBWorklist.push_back(BB); 915 916 while (!BBWorklist.empty()) { 917 BasicBlock *Entry = BBWorklist.pop_back_val(); 918 // Note that this sets blocks to 0 (unavailable) if they happen to not 919 // already be in FullyAvailableBlocks. This is safe. 920 char &EntryVal = FullyAvailableBlocks[Entry]; 921 if (EntryVal == 0) continue; // Already unavailable. 922 923 // Mark as unavailable. 924 EntryVal = 0; 925 926 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 927 BBWorklist.push_back(*I); 928 } 929 930 return false; 931} 932 933/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 934/// non-local by performing PHI construction. 935bool GVN::processNonLocalLoad(LoadInst *LI, 936 SmallVectorImpl<Instruction*> &toErase) { 937 // Find the non-local dependencies of the load. 938 SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps; 939 MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), 940 Deps); 941 //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI); 942 943 // If we had to process more than one hundred blocks to find the 944 // dependencies, this load isn't worth worrying about. Optimizing 945 // it will be too expensive. 946 if (Deps.size() > 100) 947 return false; 948 949 // If we had a phi translation failure, we'll have a single entry which is a 950 // clobber in the current block. Reject this early. 951 if (Deps.size() == 1 && Deps[0].second.isClobber()) 952 return false; 953 954 // Filter out useless results (non-locals, etc). Keep track of the blocks 955 // where we have a value available in repl, also keep track of whether we see 956 // dependencies that produce an unknown value for the load (such as a call 957 // that could potentially clobber the load). 958 SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock; 959 SmallVector<BasicBlock*, 16> UnavailableBlocks; 960 961 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 962 BasicBlock *DepBB = Deps[i].first; 963 MemDepResult DepInfo = Deps[i].second; 964 965 if (DepInfo.isClobber()) { 966 UnavailableBlocks.push_back(DepBB); 967 continue; 968 } 969 970 Instruction *DepInst = DepInfo.getInst(); 971 972 // Loading the allocation -> undef. 973 if (isa<AllocationInst>(DepInst)) { 974 ValuesPerBlock.push_back(std::make_pair(DepBB, 975 UndefValue::get(LI->getType()))); 976 continue; 977 } 978 979 if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) { 980 // Reject loads and stores that are to the same address but are of 981 // different types. 982 // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because 983 // of bitfield access, it would be interesting to optimize for it at some 984 // point. 985 if (S->getOperand(0)->getType() != LI->getType()) { 986 UnavailableBlocks.push_back(DepBB); 987 continue; 988 } 989 990 ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); 991 992 } else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) { 993 if (LD->getType() != LI->getType()) { 994 UnavailableBlocks.push_back(DepBB); 995 continue; 996 } 997 ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); 998 } else { 999 UnavailableBlocks.push_back(DepBB); 1000 continue; 1001 } 1002 } 1003 1004 // If we have no predecessors that produce a known value for this load, exit 1005 // early. 1006 if (ValuesPerBlock.empty()) return false; 1007 1008 // If all of the instructions we depend on produce a known value for this 1009 // load, then it is fully redundant and we can use PHI insertion to compute 1010 // its value. Insert PHIs and remove the fully redundant value now. 1011 if (UnavailableBlocks.empty()) { 1012 // Use cached PHI construction information from previous runs 1013 SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()]; 1014 // FIXME: What does phiMap do? Are we positive it isn't getting invalidated? 1015 for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); 1016 I != E; ++I) { 1017 if ((*I)->getParent() == LI->getParent()) { 1018 DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI); 1019 LI->replaceAllUsesWith(*I); 1020 if (isa<PointerType>((*I)->getType())) 1021 MD->invalidateCachedPointerInfo(*I); 1022 toErase.push_back(LI); 1023 NumGVNLoad++; 1024 return true; 1025 } 1026 1027 ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); 1028 } 1029 1030 DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI); 1031 1032 DenseMap<BasicBlock*, Value*> BlockReplValues; 1033 BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); 1034 // Perform PHI construction. 1035 Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); 1036 LI->replaceAllUsesWith(v); 1037 1038 if (isa<PHINode>(v)) 1039 v->takeName(LI); 1040 if (isa<PointerType>(v->getType())) 1041 MD->invalidateCachedPointerInfo(v); 1042 toErase.push_back(LI); 1043 NumGVNLoad++; 1044 return true; 1045 } 1046 1047 if (!EnablePRE || !EnableLoadPRE) 1048 return false; 1049 1050 // Okay, we have *some* definitions of the value. This means that the value 1051 // is available in some of our (transitive) predecessors. Lets think about 1052 // doing PRE of this load. This will involve inserting a new load into the 1053 // predecessor when it's not available. We could do this in general, but 1054 // prefer to not increase code size. As such, we only do this when we know 1055 // that we only have to insert *one* load (which means we're basically moving 1056 // the load, not inserting a new one). 1057 1058 SmallPtrSet<BasicBlock *, 4> Blockers; 1059 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1060 Blockers.insert(UnavailableBlocks[i]); 1061 1062 // Lets find first basic block with more than one predecessor. Walk backwards 1063 // through predecessors if needed. 1064 BasicBlock *LoadBB = LI->getParent(); 1065 BasicBlock *TmpBB = LoadBB; 1066 1067 bool isSinglePred = false; 1068 while (TmpBB->getSinglePredecessor()) { 1069 isSinglePred = true; 1070 TmpBB = TmpBB->getSinglePredecessor(); 1071 if (!TmpBB) // If haven't found any, bail now. 1072 return false; 1073 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1074 return false; 1075 if (Blockers.count(TmpBB)) 1076 return false; 1077 } 1078 1079 assert(TmpBB); 1080 LoadBB = TmpBB; 1081 1082 // If we have a repl set with LI itself in it, this means we have a loop where 1083 // at least one of the values is LI. Since this means that we won't be able 1084 // to eliminate LI even if we insert uses in the other predecessors, we will 1085 // end up increasing code size. Reject this by scanning for LI. 1086 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1087 if (ValuesPerBlock[i].second == LI) 1088 return false; 1089 1090 if (isSinglePred) { 1091 bool isHot = false; 1092 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1093 if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].second)) 1094 // "Hot" Instruction is in some loop (because it dominates its dep. 1095 // instruction). 1096 if (DT->dominates(LI, I)) { 1097 isHot = true; 1098 break; 1099 } 1100 1101 // We are interested only in "hot" instructions. We don't want to do any 1102 // mis-optimizations here. 1103 if (!isHot) 1104 return false; 1105 } 1106 1107 // Okay, we have some hope :). Check to see if the loaded value is fully 1108 // available in all but one predecessor. 1109 // FIXME: If we could restructure the CFG, we could make a common pred with 1110 // all the preds that don't have an available LI and insert a new load into 1111 // that one block. 1112 BasicBlock *UnavailablePred = 0; 1113 1114 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1115 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1116 FullyAvailableBlocks[ValuesPerBlock[i].first] = true; 1117 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1118 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1119 1120 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1121 PI != E; ++PI) { 1122 if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 1123 continue; 1124 1125 // If this load is not available in multiple predecessors, reject it. 1126 if (UnavailablePred && UnavailablePred != *PI) 1127 return false; 1128 UnavailablePred = *PI; 1129 } 1130 1131 assert(UnavailablePred != 0 && 1132 "Fully available value should be eliminated above!"); 1133 1134 // If the loaded pointer is PHI node defined in this block, do PHI translation 1135 // to get its value in the predecessor. 1136 Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); 1137 1138 // Make sure the value is live in the predecessor. If it was defined by a 1139 // non-PHI instruction in this block, we don't know how to recompute it above. 1140 if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr)) 1141 if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { 1142 DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " 1143 << *LPInst << *LI << "\n"); 1144 return false; 1145 } 1146 1147 // We don't currently handle critical edges :( 1148 if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { 1149 DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" 1150 << UnavailablePred->getName() << "': " << *LI); 1151 return false; 1152 } 1153 1154 // Okay, we can eliminate this load by inserting a reload in the predecessor 1155 // and using PHI construction to get the value in the other predecessors, do 1156 // it. 1157 DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI); 1158 1159 Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1160 LI->getAlignment(), 1161 UnavailablePred->getTerminator()); 1162 1163 SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()]; 1164 for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); 1165 I != E; ++I) 1166 ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); 1167 1168 DenseMap<BasicBlock*, Value*> BlockReplValues; 1169 BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); 1170 BlockReplValues[UnavailablePred] = NewLoad; 1171 1172 // Perform PHI construction. 1173 Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); 1174 LI->replaceAllUsesWith(v); 1175 if (isa<PHINode>(v)) 1176 v->takeName(LI); 1177 if (isa<PointerType>(v->getType())) 1178 MD->invalidateCachedPointerInfo(v); 1179 toErase.push_back(LI); 1180 NumPRELoad++; 1181 return true; 1182} 1183 1184/// processLoad - Attempt to eliminate a load, first by eliminating it 1185/// locally, and then attempting non-local elimination if that fails. 1186bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { 1187 if (L->isVolatile()) 1188 return false; 1189 1190 Value* pointer = L->getPointerOperand(); 1191 1192 // ... to a pointer that has been loaded from before... 1193 MemDepResult dep = MD->getDependency(L); 1194 1195 // If the value isn't available, don't do anything! 1196 if (dep.isClobber()) { 1197 DEBUG( 1198 // fast print dep, using operator<< on instruction would be too slow 1199 DOUT << "GVN: load "; 1200 WriteAsOperand(*DOUT.stream(), L); 1201 Instruction *I = dep.getInst(); 1202 DOUT << " is clobbered by " << *I; 1203 ); 1204 return false; 1205 } 1206 1207 // If it is defined in another block, try harder. 1208 if (dep.isNonLocal()) 1209 return processNonLocalLoad(L, toErase); 1210 1211 Instruction *DepInst = dep.getInst(); 1212 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1213 // Only forward substitute stores to loads of the same type. 1214 // FIXME: Could do better! 1215 if (DepSI->getPointerOperand()->getType() != pointer->getType()) 1216 return false; 1217 1218 // Remove it! 1219 L->replaceAllUsesWith(DepSI->getOperand(0)); 1220 if (isa<PointerType>(DepSI->getOperand(0)->getType())) 1221 MD->invalidateCachedPointerInfo(DepSI->getOperand(0)); 1222 toErase.push_back(L); 1223 NumGVNLoad++; 1224 return true; 1225 } 1226 1227 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1228 // Only forward substitute stores to loads of the same type. 1229 // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian. 1230 if (DepLI->getType() != L->getType()) 1231 return false; 1232 1233 // Remove it! 1234 L->replaceAllUsesWith(DepLI); 1235 if (isa<PointerType>(DepLI->getType())) 1236 MD->invalidateCachedPointerInfo(DepLI); 1237 toErase.push_back(L); 1238 NumGVNLoad++; 1239 return true; 1240 } 1241 1242 // If this load really doesn't depend on anything, then we must be loading an 1243 // undef value. This can happen when loading for a fresh allocation with no 1244 // intervening stores, for example. 1245 if (isa<AllocationInst>(DepInst)) { 1246 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1247 toErase.push_back(L); 1248 NumGVNLoad++; 1249 return true; 1250 } 1251 1252 return false; 1253} 1254 1255Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { 1256 DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB); 1257 if (I == localAvail.end()) 1258 return 0; 1259 1260 ValueNumberScope* locals = I->second; 1261 1262 while (locals) { 1263 DenseMap<uint32_t, Value*>::iterator I = locals->table.find(num); 1264 if (I != locals->table.end()) 1265 return I->second; 1266 else 1267 locals = locals->parent; 1268 } 1269 1270 return 0; 1271} 1272 1273/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination 1274/// by inheritance from the dominator fails, see if we can perform phi 1275/// construction to eliminate the redundancy. 1276Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { 1277 BasicBlock* BaseBlock = orig->getParent(); 1278 1279 SmallPtrSet<BasicBlock*, 4> Visited; 1280 SmallVector<BasicBlock*, 8> Stack; 1281 Stack.push_back(BaseBlock); 1282 1283 DenseMap<BasicBlock*, Value*> Results; 1284 1285 // Walk backwards through our predecessors, looking for instances of the 1286 // value number we're looking for. Instances are recorded in the Results 1287 // map, which is then used to perform phi construction. 1288 while (!Stack.empty()) { 1289 BasicBlock* Current = Stack.back(); 1290 Stack.pop_back(); 1291 1292 // If we've walked all the way to a proper dominator, then give up. Cases 1293 // where the instance is in the dominator will have been caught by the fast 1294 // path, and any cases that require phi construction further than this are 1295 // probably not worth it anyways. Note that this is a SIGNIFICANT compile 1296 // time improvement. 1297 if (DT->properlyDominates(Current, orig->getParent())) return 0; 1298 1299 DenseMap<BasicBlock*, ValueNumberScope*>::iterator LA = 1300 localAvail.find(Current); 1301 if (LA == localAvail.end()) return 0; 1302 DenseMap<uint32_t, Value*>::iterator V = LA->second->table.find(valno); 1303 1304 if (V != LA->second->table.end()) { 1305 // Found an instance, record it. 1306 Results.insert(std::make_pair(Current, V->second)); 1307 continue; 1308 } 1309 1310 // If we reach the beginning of the function, then give up. 1311 if (pred_begin(Current) == pred_end(Current)) 1312 return 0; 1313 1314 for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current); 1315 PI != PE; ++PI) 1316 if (Visited.insert(*PI)) 1317 Stack.push_back(*PI); 1318 } 1319 1320 // If we didn't find instances, give up. Otherwise, perform phi construction. 1321 if (Results.size() == 0) 1322 return 0; 1323 else 1324 return GetValueForBlock(BaseBlock, orig, Results, true); 1325} 1326 1327/// processInstruction - When calculating availability, handle an instruction 1328/// by inserting it into the appropriate sets 1329bool GVN::processInstruction(Instruction *I, 1330 SmallVectorImpl<Instruction*> &toErase) { 1331 if (LoadInst* L = dyn_cast<LoadInst>(I)) { 1332 bool changed = processLoad(L, toErase); 1333 1334 if (!changed) { 1335 unsigned num = VN.lookup_or_add(L); 1336 localAvail[I->getParent()]->table.insert(std::make_pair(num, L)); 1337 } 1338 1339 return changed; 1340 } 1341 1342 uint32_t nextNum = VN.getNextUnusedValueNumber(); 1343 unsigned num = VN.lookup_or_add(I); 1344 1345 if (BranchInst* BI = dyn_cast<BranchInst>(I)) { 1346 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1347 1348 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 1349 return false; 1350 1351 Value* branchCond = BI->getCondition(); 1352 uint32_t condVN = VN.lookup_or_add(branchCond); 1353 1354 BasicBlock* trueSucc = BI->getSuccessor(0); 1355 BasicBlock* falseSucc = BI->getSuccessor(1); 1356 1357 if (trueSucc->getSinglePredecessor()) 1358 localAvail[trueSucc]->table[condVN] = ConstantInt::getTrue(); 1359 if (falseSucc->getSinglePredecessor()) 1360 localAvail[falseSucc]->table[condVN] = ConstantInt::getFalse(); 1361 1362 return false; 1363 1364 // Allocations are always uniquely numbered, so we can save time and memory 1365 // by fast failing them. 1366 } else if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) { 1367 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1368 return false; 1369 } 1370 1371 // Collapse PHI nodes 1372 if (PHINode* p = dyn_cast<PHINode>(I)) { 1373 Value* constVal = CollapsePhi(p); 1374 1375 if (constVal) { 1376 for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); 1377 PI != PE; ++PI) 1378 PI->second.erase(p); 1379 1380 p->replaceAllUsesWith(constVal); 1381 if (isa<PointerType>(constVal->getType())) 1382 MD->invalidateCachedPointerInfo(constVal); 1383 VN.erase(p); 1384 1385 toErase.push_back(p); 1386 } else { 1387 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1388 } 1389 1390 // If the number we were assigned was a brand new VN, then we don't 1391 // need to do a lookup to see if the number already exists 1392 // somewhere in the domtree: it can't! 1393 } else if (num == nextNum) { 1394 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1395 1396 // Perform fast-path value-number based elimination of values inherited from 1397 // dominators. 1398 } else if (Value* repl = lookupNumber(I->getParent(), num)) { 1399 // Remove it! 1400 VN.erase(I); 1401 I->replaceAllUsesWith(repl); 1402 if (isa<PointerType>(repl->getType())) 1403 MD->invalidateCachedPointerInfo(repl); 1404 toErase.push_back(I); 1405 return true; 1406 1407#if 0 1408 // Perform slow-pathvalue-number based elimination with phi construction. 1409 } else if (Value* repl = AttemptRedundancyElimination(I, num)) { 1410 // Remove it! 1411 VN.erase(I); 1412 I->replaceAllUsesWith(repl); 1413 if (isa<PointerType>(repl->getType())) 1414 MD->invalidateCachedPointerInfo(repl); 1415 toErase.push_back(I); 1416 return true; 1417#endif 1418 } else { 1419 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1420 } 1421 1422 return false; 1423} 1424 1425/// runOnFunction - This is the main transformation entry point for a function. 1426bool GVN::runOnFunction(Function& F) { 1427 MD = &getAnalysis<MemoryDependenceAnalysis>(); 1428 DT = &getAnalysis<DominatorTree>(); 1429 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1430 VN.setMemDep(MD); 1431 VN.setDomTree(DT); 1432 1433 bool changed = false; 1434 bool shouldContinue = true; 1435 1436 // Merge unconditional branches, allowing PRE to catch more 1437 // optimization opportunities. 1438 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1439 BasicBlock* BB = FI; 1440 ++FI; 1441 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 1442 if (removedBlock) NumGVNBlocks++; 1443 1444 changed |= removedBlock; 1445 } 1446 1447 unsigned Iteration = 0; 1448 1449 while (shouldContinue) { 1450 DEBUG(cerr << "GVN iteration: " << Iteration << "\n"); 1451 shouldContinue = iterateOnFunction(F); 1452 changed |= shouldContinue; 1453 ++Iteration; 1454 } 1455 1456 if (EnablePRE) { 1457 bool PREChanged = true; 1458 while (PREChanged) { 1459 PREChanged = performPRE(F); 1460 changed |= PREChanged; 1461 } 1462 } 1463 // FIXME: Should perform GVN again after PRE does something. PRE can move 1464 // computations into blocks where they become fully redundant. Note that 1465 // we can't do this until PRE's critical edge splitting updates memdep. 1466 // Actually, when this happens, we should just fully integrate PRE into GVN. 1467 1468 cleanupGlobalSets(); 1469 1470 return changed; 1471} 1472 1473 1474bool GVN::processBlock(BasicBlock* BB) { 1475 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and 1476 // incrementing BI before processing an instruction). 1477 SmallVector<Instruction*, 8> toErase; 1478 bool changed_function = false; 1479 1480 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1481 BI != BE;) { 1482 changed_function |= processInstruction(BI, toErase); 1483 if (toErase.empty()) { 1484 ++BI; 1485 continue; 1486 } 1487 1488 // If we need some instructions deleted, do it now. 1489 NumGVNInstr += toErase.size(); 1490 1491 // Avoid iterator invalidation. 1492 bool AtStart = BI == BB->begin(); 1493 if (!AtStart) 1494 --BI; 1495 1496 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 1497 E = toErase.end(); I != E; ++I) { 1498 DEBUG(cerr << "GVN removed: " << **I); 1499 MD->removeInstruction(*I); 1500 (*I)->eraseFromParent(); 1501 DEBUG(verifyRemoved(*I)); 1502 } 1503 toErase.clear(); 1504 1505 if (AtStart) 1506 BI = BB->begin(); 1507 else 1508 ++BI; 1509 } 1510 1511 return changed_function; 1512} 1513 1514/// performPRE - Perform a purely local form of PRE that looks for diamond 1515/// control flow patterns and attempts to perform simple PRE at the join point. 1516bool GVN::performPRE(Function& F) { 1517 bool Changed = false; 1518 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 1519 DenseMap<BasicBlock*, Value*> predMap; 1520 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 1521 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 1522 BasicBlock* CurrentBlock = *DI; 1523 1524 // Nothing to PRE in the entry block. 1525 if (CurrentBlock == &F.getEntryBlock()) continue; 1526 1527 for (BasicBlock::iterator BI = CurrentBlock->begin(), 1528 BE = CurrentBlock->end(); BI != BE; ) { 1529 Instruction *CurInst = BI++; 1530 1531 if (isa<AllocationInst>(CurInst) || isa<TerminatorInst>(CurInst) || 1532 isa<PHINode>(CurInst) || (CurInst->getType() == Type::VoidTy) || 1533 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 1534 isa<DbgInfoIntrinsic>(CurInst)) 1535 continue; 1536 1537 uint32_t valno = VN.lookup(CurInst); 1538 1539 // Look for the predecessors for PRE opportunities. We're 1540 // only trying to solve the basic diamond case, where 1541 // a value is computed in the successor and one predecessor, 1542 // but not the other. We also explicitly disallow cases 1543 // where the successor is its own predecessor, because they're 1544 // more complicated to get right. 1545 unsigned numWith = 0; 1546 unsigned numWithout = 0; 1547 BasicBlock* PREPred = 0; 1548 predMap.clear(); 1549 1550 for (pred_iterator PI = pred_begin(CurrentBlock), 1551 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 1552 // We're not interested in PRE where the block is its 1553 // own predecessor, on in blocks with predecessors 1554 // that are not reachable. 1555 if (*PI == CurrentBlock) { 1556 numWithout = 2; 1557 break; 1558 } else if (!localAvail.count(*PI)) { 1559 numWithout = 2; 1560 break; 1561 } 1562 1563 DenseMap<uint32_t, Value*>::iterator predV = 1564 localAvail[*PI]->table.find(valno); 1565 if (predV == localAvail[*PI]->table.end()) { 1566 PREPred = *PI; 1567 numWithout++; 1568 } else if (predV->second == CurInst) { 1569 numWithout = 2; 1570 } else { 1571 predMap[*PI] = predV->second; 1572 numWith++; 1573 } 1574 } 1575 1576 // Don't do PRE when it might increase code size, i.e. when 1577 // we would need to insert instructions in more than one pred. 1578 if (numWithout != 1 || numWith == 0) 1579 continue; 1580 1581 // We can't do PRE safely on a critical edge, so instead we schedule 1582 // the edge to be split and perform the PRE the next time we iterate 1583 // on the function. 1584 unsigned succNum = 0; 1585 for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); 1586 i != e; ++i) 1587 if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { 1588 succNum = i; 1589 break; 1590 } 1591 1592 if (isCriticalEdge(PREPred->getTerminator(), succNum)) { 1593 toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum)); 1594 continue; 1595 } 1596 1597 // Instantiate the expression the in predecessor that lacked it. 1598 // Because we are going top-down through the block, all value numbers 1599 // will be available in the predecessor by the time we need them. Any 1600 // that weren't original present will have been instantiated earlier 1601 // in this loop. 1602 Instruction* PREInstr = CurInst->clone(); 1603 bool success = true; 1604 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 1605 Value *Op = PREInstr->getOperand(i); 1606 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 1607 continue; 1608 1609 if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { 1610 PREInstr->setOperand(i, V); 1611 } else { 1612 success = false; 1613 break; 1614 } 1615 } 1616 1617 // Fail out if we encounter an operand that is not available in 1618 // the PRE predecessor. This is typically because of loads which 1619 // are not value numbered precisely. 1620 if (!success) { 1621 delete PREInstr; 1622 DEBUG(verifyRemoved(PREInstr)); 1623 continue; 1624 } 1625 1626 PREInstr->insertBefore(PREPred->getTerminator()); 1627 PREInstr->setName(CurInst->getName() + ".pre"); 1628 predMap[PREPred] = PREInstr; 1629 VN.add(PREInstr, valno); 1630 NumGVNPRE++; 1631 1632 // Update the availability map to include the new instruction. 1633 localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr)); 1634 1635 // Create a PHI to make the value available in this block. 1636 PHINode* Phi = PHINode::Create(CurInst->getType(), 1637 CurInst->getName() + ".pre-phi", 1638 CurrentBlock->begin()); 1639 for (pred_iterator PI = pred_begin(CurrentBlock), 1640 PE = pred_end(CurrentBlock); PI != PE; ++PI) 1641 Phi->addIncoming(predMap[*PI], *PI); 1642 1643 VN.add(Phi, valno); 1644 localAvail[CurrentBlock]->table[valno] = Phi; 1645 1646 CurInst->replaceAllUsesWith(Phi); 1647 if (isa<PointerType>(Phi->getType())) 1648 MD->invalidateCachedPointerInfo(Phi); 1649 VN.erase(CurInst); 1650 1651 DEBUG(cerr << "GVN PRE removed: " << *CurInst); 1652 MD->removeInstruction(CurInst); 1653 CurInst->eraseFromParent(); 1654 DEBUG(verifyRemoved(CurInst)); 1655 Changed = true; 1656 } 1657 } 1658 1659 for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator 1660 I = toSplit.begin(), E = toSplit.end(); I != E; ++I) 1661 SplitCriticalEdge(I->first, I->second, this); 1662 1663 return Changed || toSplit.size(); 1664} 1665 1666/// iterateOnFunction - Executes one iteration of GVN 1667bool GVN::iterateOnFunction(Function &F) { 1668 cleanupGlobalSets(); 1669 1670 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 1671 DE = df_end(DT->getRootNode()); DI != DE; ++DI) { 1672 if (DI->getIDom()) 1673 localAvail[DI->getBlock()] = 1674 new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); 1675 else 1676 localAvail[DI->getBlock()] = new ValueNumberScope(0); 1677 } 1678 1679 // Top-down walk of the dominator tree 1680 bool changed = false; 1681#if 0 1682 // Needed for value numbering with phi construction to work. 1683 ReversePostOrderTraversal<Function*> RPOT(&F); 1684 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 1685 RE = RPOT.end(); RI != RE; ++RI) 1686 changed |= processBlock(*RI); 1687#else 1688 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 1689 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 1690 changed |= processBlock(DI->getBlock()); 1691#endif 1692 1693 return changed; 1694} 1695 1696void GVN::cleanupGlobalSets() { 1697 VN.clear(); 1698 phiMap.clear(); 1699 1700 for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator 1701 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) 1702 delete I->second; 1703 localAvail.clear(); 1704} 1705 1706/// verifyRemoved - Verify that the specified instruction does not occur in our 1707/// internal data structures. 1708void GVN::verifyRemoved(const Instruction *Inst) const { 1709 VN.verifyRemoved(Inst); 1710 1711 // Walk through the PHI map to make sure the instruction isn't hiding in there 1712 // somewhere. 1713 for (PhiMapType::iterator 1714 I = phiMap.begin(), E = phiMap.end(); I != E; ++I) { 1715 assert(I->first != Inst && "Inst is still a key in PHI map!"); 1716 1717 for (SmallPtrSet<Instruction*, 4>::iterator 1718 II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { 1719 assert(*II != Inst && "Inst is still a value in PHI map!"); 1720 } 1721 } 1722 1723 // Walk through the value number scope to make sure the instruction isn't 1724 // ferreted away in it. 1725 for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator 1726 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { 1727 const ValueNumberScope *VNS = I->second; 1728 1729 while (VNS) { 1730 for (DenseMap<uint32_t, Value*>::iterator 1731 II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { 1732 assert(II->second != Inst && "Inst still in value numbering scope!"); 1733 } 1734 1735 VNS = VNS->parent; 1736 } 1737 } 1738} 1739