GVN.cpp revision 3c04296b837e96f1579b4fc8e94cc1d1848ea2aa
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13// Note that this pass does the value numbering itself; it does not use the 14// ValueNumbering analysis passes. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "gvn" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/BasicBlock.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/IntrinsicInst.h" 25#include "llvm/Value.h" 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/DepthFirstIterator.h" 28#include "llvm/ADT/PostOrderIterator.h" 29#include "llvm/ADT/SmallPtrSet.h" 30#include "llvm/ADT/SmallVector.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/Analysis/Dominators.h" 33#include "llvm/Analysis/AliasAnalysis.h" 34#include "llvm/Analysis/MemoryDependenceAnalysis.h" 35#include "llvm/Support/CFG.h" 36#include "llvm/Support/CommandLine.h" 37#include "llvm/Support/Compiler.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Transforms/Utils/BasicBlockUtils.h" 40#include <cstdio> 41using namespace llvm; 42 43STATISTIC(NumGVNInstr, "Number of instructions deleted"); 44STATISTIC(NumGVNLoad, "Number of loads deleted"); 45STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 46STATISTIC(NumGVNBlocks, "Number of blocks merged"); 47STATISTIC(NumPRELoad, "Number of loads PRE'd"); 48 49static cl::opt<bool> EnablePRE("enable-pre", 50 cl::init(true), cl::Hidden); 51cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 52 53//===----------------------------------------------------------------------===// 54// ValueTable Class 55//===----------------------------------------------------------------------===// 56 57/// This class holds the mapping between values and value numbers. It is used 58/// as an efficient mechanism to determine the expression-wise equivalence of 59/// two values. 60namespace { 61 struct VISIBILITY_HIDDEN Expression { 62 enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, 63 FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 64 ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 65 ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 66 FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 67 FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 68 FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, 69 SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, 70 FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 71 PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, 72 EMPTY, TOMBSTONE }; 73 74 ExpressionOpcode opcode; 75 const Type* type; 76 uint32_t firstVN; 77 uint32_t secondVN; 78 uint32_t thirdVN; 79 SmallVector<uint32_t, 4> varargs; 80 Value* function; 81 82 Expression() { } 83 Expression(ExpressionOpcode o) : opcode(o) { } 84 85 bool operator==(const Expression &other) const { 86 if (opcode != other.opcode) 87 return false; 88 else if (opcode == EMPTY || opcode == TOMBSTONE) 89 return true; 90 else if (type != other.type) 91 return false; 92 else if (function != other.function) 93 return false; 94 else if (firstVN != other.firstVN) 95 return false; 96 else if (secondVN != other.secondVN) 97 return false; 98 else if (thirdVN != other.thirdVN) 99 return false; 100 else { 101 if (varargs.size() != other.varargs.size()) 102 return false; 103 104 for (size_t i = 0; i < varargs.size(); ++i) 105 if (varargs[i] != other.varargs[i]) 106 return false; 107 108 return true; 109 } 110 } 111 112 bool operator!=(const Expression &other) const { 113 return !(*this == other); 114 } 115 }; 116 117 class VISIBILITY_HIDDEN ValueTable { 118 private: 119 DenseMap<Value*, uint32_t> valueNumbering; 120 DenseMap<Expression, uint32_t> expressionNumbering; 121 AliasAnalysis* AA; 122 MemoryDependenceAnalysis* MD; 123 DominatorTree* DT; 124 125 uint32_t nextValueNumber; 126 127 Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); 128 Expression::ExpressionOpcode getOpcode(CmpInst* C); 129 Expression::ExpressionOpcode getOpcode(CastInst* C); 130 Expression create_expression(BinaryOperator* BO); 131 Expression create_expression(CmpInst* C); 132 Expression create_expression(ShuffleVectorInst* V); 133 Expression create_expression(ExtractElementInst* C); 134 Expression create_expression(InsertElementInst* V); 135 Expression create_expression(SelectInst* V); 136 Expression create_expression(CastInst* C); 137 Expression create_expression(GetElementPtrInst* G); 138 Expression create_expression(CallInst* C); 139 Expression create_expression(Constant* C); 140 public: 141 ValueTable() : nextValueNumber(1) { } 142 uint32_t lookup_or_add(Value* V); 143 uint32_t lookup(Value* V) const; 144 void add(Value* V, uint32_t num); 145 void clear(); 146 void erase(Value* v); 147 unsigned size(); 148 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 149 AliasAnalysis *getAliasAnalysis() const { return AA; } 150 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 151 void setDomTree(DominatorTree* D) { DT = D; } 152 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 153 void verifyRemoved(const Value *) const; 154 }; 155} 156 157namespace llvm { 158template <> struct DenseMapInfo<Expression> { 159 static inline Expression getEmptyKey() { 160 return Expression(Expression::EMPTY); 161 } 162 163 static inline Expression getTombstoneKey() { 164 return Expression(Expression::TOMBSTONE); 165 } 166 167 static unsigned getHashValue(const Expression e) { 168 unsigned hash = e.opcode; 169 170 hash = e.firstVN + hash * 37; 171 hash = e.secondVN + hash * 37; 172 hash = e.thirdVN + hash * 37; 173 174 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 175 (unsigned)((uintptr_t)e.type >> 9)) + 176 hash * 37; 177 178 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 179 E = e.varargs.end(); I != E; ++I) 180 hash = *I + hash * 37; 181 182 hash = ((unsigned)((uintptr_t)e.function >> 4) ^ 183 (unsigned)((uintptr_t)e.function >> 9)) + 184 hash * 37; 185 186 return hash; 187 } 188 static bool isEqual(const Expression &LHS, const Expression &RHS) { 189 return LHS == RHS; 190 } 191 static bool isPod() { return true; } 192}; 193} 194 195//===----------------------------------------------------------------------===// 196// ValueTable Internal Functions 197//===----------------------------------------------------------------------===// 198Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { 199 switch(BO->getOpcode()) { 200 default: // THIS SHOULD NEVER HAPPEN 201 assert(0 && "Binary operator with unknown opcode?"); 202 case Instruction::Add: return Expression::ADD; 203 case Instruction::Sub: return Expression::SUB; 204 case Instruction::Mul: return Expression::MUL; 205 case Instruction::UDiv: return Expression::UDIV; 206 case Instruction::SDiv: return Expression::SDIV; 207 case Instruction::FDiv: return Expression::FDIV; 208 case Instruction::URem: return Expression::UREM; 209 case Instruction::SRem: return Expression::SREM; 210 case Instruction::FRem: return Expression::FREM; 211 case Instruction::Shl: return Expression::SHL; 212 case Instruction::LShr: return Expression::LSHR; 213 case Instruction::AShr: return Expression::ASHR; 214 case Instruction::And: return Expression::AND; 215 case Instruction::Or: return Expression::OR; 216 case Instruction::Xor: return Expression::XOR; 217 } 218} 219 220Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { 221 if (isa<ICmpInst>(C) || isa<VICmpInst>(C)) { 222 switch (C->getPredicate()) { 223 default: // THIS SHOULD NEVER HAPPEN 224 assert(0 && "Comparison with unknown predicate?"); 225 case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; 226 case ICmpInst::ICMP_NE: return Expression::ICMPNE; 227 case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; 228 case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; 229 case ICmpInst::ICMP_ULT: return Expression::ICMPULT; 230 case ICmpInst::ICMP_ULE: return Expression::ICMPULE; 231 case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; 232 case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; 233 case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; 234 case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; 235 } 236 } 237 assert((isa<FCmpInst>(C) || isa<VFCmpInst>(C)) && "Unknown compare"); 238 switch (C->getPredicate()) { 239 default: // THIS SHOULD NEVER HAPPEN 240 assert(0 && "Comparison with unknown predicate?"); 241 case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; 242 case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; 243 case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; 244 case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; 245 case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; 246 case FCmpInst::FCMP_ONE: return Expression::FCMPONE; 247 case FCmpInst::FCMP_ORD: return Expression::FCMPORD; 248 case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; 249 case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; 250 case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; 251 case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; 252 case FCmpInst::FCMP_ULT: return Expression::FCMPULT; 253 case FCmpInst::FCMP_ULE: return Expression::FCMPULE; 254 case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; 255 } 256} 257 258Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { 259 switch(C->getOpcode()) { 260 default: // THIS SHOULD NEVER HAPPEN 261 assert(0 && "Cast operator with unknown opcode?"); 262 case Instruction::Trunc: return Expression::TRUNC; 263 case Instruction::ZExt: return Expression::ZEXT; 264 case Instruction::SExt: return Expression::SEXT; 265 case Instruction::FPToUI: return Expression::FPTOUI; 266 case Instruction::FPToSI: return Expression::FPTOSI; 267 case Instruction::UIToFP: return Expression::UITOFP; 268 case Instruction::SIToFP: return Expression::SITOFP; 269 case Instruction::FPTrunc: return Expression::FPTRUNC; 270 case Instruction::FPExt: return Expression::FPEXT; 271 case Instruction::PtrToInt: return Expression::PTRTOINT; 272 case Instruction::IntToPtr: return Expression::INTTOPTR; 273 case Instruction::BitCast: return Expression::BITCAST; 274 } 275} 276 277Expression ValueTable::create_expression(CallInst* C) { 278 Expression e; 279 280 e.type = C->getType(); 281 e.firstVN = 0; 282 e.secondVN = 0; 283 e.thirdVN = 0; 284 e.function = C->getCalledFunction(); 285 e.opcode = Expression::CALL; 286 287 for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); 288 I != E; ++I) 289 e.varargs.push_back(lookup_or_add(*I)); 290 291 return e; 292} 293 294Expression ValueTable::create_expression(BinaryOperator* BO) { 295 Expression e; 296 297 e.firstVN = lookup_or_add(BO->getOperand(0)); 298 e.secondVN = lookup_or_add(BO->getOperand(1)); 299 e.thirdVN = 0; 300 e.function = 0; 301 e.type = BO->getType(); 302 e.opcode = getOpcode(BO); 303 304 return e; 305} 306 307Expression ValueTable::create_expression(CmpInst* C) { 308 Expression e; 309 310 e.firstVN = lookup_or_add(C->getOperand(0)); 311 e.secondVN = lookup_or_add(C->getOperand(1)); 312 e.thirdVN = 0; 313 e.function = 0; 314 e.type = C->getType(); 315 e.opcode = getOpcode(C); 316 317 return e; 318} 319 320Expression ValueTable::create_expression(CastInst* C) { 321 Expression e; 322 323 e.firstVN = lookup_or_add(C->getOperand(0)); 324 e.secondVN = 0; 325 e.thirdVN = 0; 326 e.function = 0; 327 e.type = C->getType(); 328 e.opcode = getOpcode(C); 329 330 return e; 331} 332 333Expression ValueTable::create_expression(ShuffleVectorInst* S) { 334 Expression e; 335 336 e.firstVN = lookup_or_add(S->getOperand(0)); 337 e.secondVN = lookup_or_add(S->getOperand(1)); 338 e.thirdVN = lookup_or_add(S->getOperand(2)); 339 e.function = 0; 340 e.type = S->getType(); 341 e.opcode = Expression::SHUFFLE; 342 343 return e; 344} 345 346Expression ValueTable::create_expression(ExtractElementInst* E) { 347 Expression e; 348 349 e.firstVN = lookup_or_add(E->getOperand(0)); 350 e.secondVN = lookup_or_add(E->getOperand(1)); 351 e.thirdVN = 0; 352 e.function = 0; 353 e.type = E->getType(); 354 e.opcode = Expression::EXTRACT; 355 356 return e; 357} 358 359Expression ValueTable::create_expression(InsertElementInst* I) { 360 Expression e; 361 362 e.firstVN = lookup_or_add(I->getOperand(0)); 363 e.secondVN = lookup_or_add(I->getOperand(1)); 364 e.thirdVN = lookup_or_add(I->getOperand(2)); 365 e.function = 0; 366 e.type = I->getType(); 367 e.opcode = Expression::INSERT; 368 369 return e; 370} 371 372Expression ValueTable::create_expression(SelectInst* I) { 373 Expression e; 374 375 e.firstVN = lookup_or_add(I->getCondition()); 376 e.secondVN = lookup_or_add(I->getTrueValue()); 377 e.thirdVN = lookup_or_add(I->getFalseValue()); 378 e.function = 0; 379 e.type = I->getType(); 380 e.opcode = Expression::SELECT; 381 382 return e; 383} 384 385Expression ValueTable::create_expression(GetElementPtrInst* G) { 386 Expression e; 387 388 e.firstVN = lookup_or_add(G->getPointerOperand()); 389 e.secondVN = 0; 390 e.thirdVN = 0; 391 e.function = 0; 392 e.type = G->getType(); 393 e.opcode = Expression::GEP; 394 395 for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); 396 I != E; ++I) 397 e.varargs.push_back(lookup_or_add(*I)); 398 399 return e; 400} 401 402//===----------------------------------------------------------------------===// 403// ValueTable External Functions 404//===----------------------------------------------------------------------===// 405 406/// add - Insert a value into the table with a specified value number. 407void ValueTable::add(Value* V, uint32_t num) { 408 valueNumbering.insert(std::make_pair(V, num)); 409} 410 411/// lookup_or_add - Returns the value number for the specified value, assigning 412/// it a new number if it did not have one before. 413uint32_t ValueTable::lookup_or_add(Value* V) { 414 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 415 if (VI != valueNumbering.end()) 416 return VI->second; 417 418 if (CallInst* C = dyn_cast<CallInst>(V)) { 419 if (AA->doesNotAccessMemory(C)) { 420 Expression e = create_expression(C); 421 422 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 423 if (EI != expressionNumbering.end()) { 424 valueNumbering.insert(std::make_pair(V, EI->second)); 425 return EI->second; 426 } else { 427 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 428 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 429 430 return nextValueNumber++; 431 } 432 } else if (AA->onlyReadsMemory(C)) { 433 Expression e = create_expression(C); 434 435 if (expressionNumbering.find(e) == expressionNumbering.end()) { 436 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 437 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 438 return nextValueNumber++; 439 } 440 441 MemDepResult local_dep = MD->getDependency(C); 442 443 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 444 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 445 return nextValueNumber++; 446 } 447 448 if (local_dep.isDef()) { 449 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 450 451 if (local_cdep->getNumOperands() != C->getNumOperands()) { 452 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 453 return nextValueNumber++; 454 } 455 456 for (unsigned i = 1; i < C->getNumOperands(); ++i) { 457 uint32_t c_vn = lookup_or_add(C->getOperand(i)); 458 uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); 459 if (c_vn != cd_vn) { 460 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 461 return nextValueNumber++; 462 } 463 } 464 465 uint32_t v = lookup_or_add(local_cdep); 466 valueNumbering.insert(std::make_pair(V, v)); 467 return v; 468 } 469 470 // Non-local case. 471 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 472 MD->getNonLocalCallDependency(CallSite(C)); 473 // FIXME: call/call dependencies for readonly calls should return def, not 474 // clobber! Move the checking logic to MemDep! 475 CallInst* cdep = 0; 476 477 // Check to see if we have a single dominating call instruction that is 478 // identical to C. 479 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 480 const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; 481 // Ignore non-local dependencies. 482 if (I->second.isNonLocal()) 483 continue; 484 485 // We don't handle non-depedencies. If we already have a call, reject 486 // instruction dependencies. 487 if (I->second.isClobber() || cdep != 0) { 488 cdep = 0; 489 break; 490 } 491 492 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); 493 // FIXME: All duplicated with non-local case. 494 if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ 495 cdep = NonLocalDepCall; 496 continue; 497 } 498 499 cdep = 0; 500 break; 501 } 502 503 if (!cdep) { 504 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 505 return nextValueNumber++; 506 } 507 508 if (cdep->getNumOperands() != C->getNumOperands()) { 509 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 510 return nextValueNumber++; 511 } 512 for (unsigned i = 1; i < C->getNumOperands(); ++i) { 513 uint32_t c_vn = lookup_or_add(C->getOperand(i)); 514 uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); 515 if (c_vn != cd_vn) { 516 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 517 return nextValueNumber++; 518 } 519 } 520 521 uint32_t v = lookup_or_add(cdep); 522 valueNumbering.insert(std::make_pair(V, v)); 523 return v; 524 525 } else { 526 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 527 return nextValueNumber++; 528 } 529 } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) { 530 Expression e = create_expression(BO); 531 532 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 533 if (EI != expressionNumbering.end()) { 534 valueNumbering.insert(std::make_pair(V, EI->second)); 535 return EI->second; 536 } else { 537 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 538 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 539 540 return nextValueNumber++; 541 } 542 } else if (CmpInst* C = dyn_cast<CmpInst>(V)) { 543 Expression e = create_expression(C); 544 545 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 546 if (EI != expressionNumbering.end()) { 547 valueNumbering.insert(std::make_pair(V, EI->second)); 548 return EI->second; 549 } else { 550 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 551 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 552 553 return nextValueNumber++; 554 } 555 } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) { 556 Expression e = create_expression(U); 557 558 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 559 if (EI != expressionNumbering.end()) { 560 valueNumbering.insert(std::make_pair(V, EI->second)); 561 return EI->second; 562 } else { 563 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 564 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 565 566 return nextValueNumber++; 567 } 568 } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) { 569 Expression e = create_expression(U); 570 571 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 572 if (EI != expressionNumbering.end()) { 573 valueNumbering.insert(std::make_pair(V, EI->second)); 574 return EI->second; 575 } else { 576 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 577 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 578 579 return nextValueNumber++; 580 } 581 } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) { 582 Expression e = create_expression(U); 583 584 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 585 if (EI != expressionNumbering.end()) { 586 valueNumbering.insert(std::make_pair(V, EI->second)); 587 return EI->second; 588 } else { 589 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 590 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 591 592 return nextValueNumber++; 593 } 594 } else if (SelectInst* U = dyn_cast<SelectInst>(V)) { 595 Expression e = create_expression(U); 596 597 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 598 if (EI != expressionNumbering.end()) { 599 valueNumbering.insert(std::make_pair(V, EI->second)); 600 return EI->second; 601 } else { 602 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 603 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 604 605 return nextValueNumber++; 606 } 607 } else if (CastInst* U = dyn_cast<CastInst>(V)) { 608 Expression e = create_expression(U); 609 610 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 611 if (EI != expressionNumbering.end()) { 612 valueNumbering.insert(std::make_pair(V, EI->second)); 613 return EI->second; 614 } else { 615 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 616 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 617 618 return nextValueNumber++; 619 } 620 } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) { 621 Expression e = create_expression(U); 622 623 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 624 if (EI != expressionNumbering.end()) { 625 valueNumbering.insert(std::make_pair(V, EI->second)); 626 return EI->second; 627 } else { 628 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 629 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 630 631 return nextValueNumber++; 632 } 633 } else { 634 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 635 return nextValueNumber++; 636 } 637} 638 639/// lookup - Returns the value number of the specified value. Fails if 640/// the value has not yet been numbered. 641uint32_t ValueTable::lookup(Value* V) const { 642 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 643 assert(VI != valueNumbering.end() && "Value not numbered?"); 644 return VI->second; 645} 646 647/// clear - Remove all entries from the ValueTable 648void ValueTable::clear() { 649 valueNumbering.clear(); 650 expressionNumbering.clear(); 651 nextValueNumber = 1; 652} 653 654/// erase - Remove a value from the value numbering 655void ValueTable::erase(Value* V) { 656 valueNumbering.erase(V); 657} 658 659/// verifyRemoved - Verify that the value is removed from all internal data 660/// structures. 661void ValueTable::verifyRemoved(const Value *V) const { 662 for (DenseMap<Value*, uint32_t>::iterator 663 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 664 assert(I->first != V && "Inst still occurs in value numbering map!"); 665 } 666} 667 668//===----------------------------------------------------------------------===// 669// GVN Pass 670//===----------------------------------------------------------------------===// 671 672namespace { 673 struct VISIBILITY_HIDDEN ValueNumberScope { 674 ValueNumberScope* parent; 675 DenseMap<uint32_t, Value*> table; 676 677 ValueNumberScope(ValueNumberScope* p) : parent(p) { } 678 }; 679} 680 681namespace { 682 683 class VISIBILITY_HIDDEN GVN : public FunctionPass { 684 bool runOnFunction(Function &F); 685 public: 686 static char ID; // Pass identification, replacement for typeid 687 GVN() : FunctionPass(&ID) { } 688 689 private: 690 MemoryDependenceAnalysis *MD; 691 DominatorTree *DT; 692 693 ValueTable VN; 694 DenseMap<BasicBlock*, ValueNumberScope*> localAvail; 695 696 typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType; 697 PhiMapType phiMap; 698 699 700 // This transformation requires dominator postdominator info 701 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 702 AU.addRequired<DominatorTree>(); 703 AU.addRequired<MemoryDependenceAnalysis>(); 704 AU.addRequired<AliasAnalysis>(); 705 706 AU.addPreserved<DominatorTree>(); 707 AU.addPreserved<AliasAnalysis>(); 708 } 709 710 // Helper fuctions 711 // FIXME: eliminate or document these better 712 bool processLoad(LoadInst* L, 713 SmallVectorImpl<Instruction*> &toErase); 714 bool processInstruction(Instruction* I, 715 SmallVectorImpl<Instruction*> &toErase); 716 bool processNonLocalLoad(LoadInst* L, 717 SmallVectorImpl<Instruction*> &toErase); 718 bool processBlock(BasicBlock* BB); 719 Value *GetValueForBlock(BasicBlock *BB, Instruction* orig, 720 DenseMap<BasicBlock*, Value*> &Phis, 721 bool top_level = false); 722 void dump(DenseMap<uint32_t, Value*>& d); 723 bool iterateOnFunction(Function &F); 724 Value* CollapsePhi(PHINode* p); 725 bool isSafeReplacement(PHINode* p, Instruction* inst); 726 bool performPRE(Function& F); 727 Value* lookupNumber(BasicBlock* BB, uint32_t num); 728 bool mergeBlockIntoPredecessor(BasicBlock* BB); 729 Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno); 730 void cleanupGlobalSets(); 731 void verifyRemoved(const Instruction *I) const; 732 }; 733 734 char GVN::ID = 0; 735} 736 737// createGVNPass - The public interface to this file... 738FunctionPass *llvm::createGVNPass() { return new GVN(); } 739 740static RegisterPass<GVN> X("gvn", 741 "Global Value Numbering"); 742 743void GVN::dump(DenseMap<uint32_t, Value*>& d) { 744 printf("{\n"); 745 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 746 E = d.end(); I != E; ++I) { 747 printf("%d\n", I->first); 748 I->second->dump(); 749 } 750 printf("}\n"); 751} 752 753Value* GVN::CollapsePhi(PHINode* p) { 754 Value* constVal = p->hasConstantValue(); 755 if (!constVal) return 0; 756 757 Instruction* inst = dyn_cast<Instruction>(constVal); 758 if (!inst) 759 return constVal; 760 761 if (DT->dominates(inst, p)) 762 if (isSafeReplacement(p, inst)) 763 return inst; 764 return 0; 765} 766 767bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { 768 if (!isa<PHINode>(inst)) 769 return true; 770 771 for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); 772 UI != E; ++UI) 773 if (PHINode* use_phi = dyn_cast<PHINode>(UI)) 774 if (use_phi->getParent() == inst->getParent()) 775 return false; 776 777 return true; 778} 779 780/// GetValueForBlock - Get the value to use within the specified basic block. 781/// available values are in Phis. 782Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, 783 DenseMap<BasicBlock*, Value*> &Phis, 784 bool top_level) { 785 786 // If we have already computed this value, return the previously computed val. 787 DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB); 788 if (V != Phis.end() && !top_level) return V->second; 789 790 // If the block is unreachable, just return undef, since this path 791 // can't actually occur at runtime. 792 if (!DT->isReachableFromEntry(BB)) 793 return Phis[BB] = UndefValue::get(orig->getType()); 794 795 if (BasicBlock *Pred = BB->getSinglePredecessor()) { 796 Value *ret = GetValueForBlock(Pred, orig, Phis); 797 Phis[BB] = ret; 798 return ret; 799 } 800 801 // Get the number of predecessors of this block so we can reserve space later. 802 // If there is already a PHI in it, use the #preds from it, otherwise count. 803 // Getting it from the PHI is constant time. 804 unsigned NumPreds; 805 if (PHINode *ExistingPN = dyn_cast<PHINode>(BB->begin())) 806 NumPreds = ExistingPN->getNumIncomingValues(); 807 else 808 NumPreds = std::distance(pred_begin(BB), pred_end(BB)); 809 810 // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so 811 // now, then get values to fill in the incoming values for the PHI. 812 PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle", 813 BB->begin()); 814 PN->reserveOperandSpace(NumPreds); 815 816 Phis.insert(std::make_pair(BB, PN)); 817 818 // Fill in the incoming values for the block. 819 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 820 Value* val = GetValueForBlock(*PI, orig, Phis); 821 PN->addIncoming(val, *PI); 822 } 823 824 VN.getAliasAnalysis()->copyValue(orig, PN); 825 826 // Attempt to collapse PHI nodes that are trivially redundant 827 Value* v = CollapsePhi(PN); 828 if (!v) { 829 // Cache our phi construction results 830 if (LoadInst* L = dyn_cast<LoadInst>(orig)) 831 phiMap[L->getPointerOperand()].insert(PN); 832 else 833 phiMap[orig].insert(PN); 834 835 return PN; 836 } 837 838 PN->replaceAllUsesWith(v); 839 if (isa<PointerType>(v->getType())) 840 MD->invalidateCachedPointerInfo(v); 841 842 for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(), 843 E = Phis.end(); I != E; ++I) 844 if (I->second == PN) 845 I->second = v; 846 847 DEBUG(cerr << "GVN removed: " << *PN); 848 MD->removeInstruction(PN); 849 PN->eraseFromParent(); 850 DEBUG(verifyRemoved(PN)); 851 852 Phis[BB] = v; 853 return v; 854} 855 856/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 857/// we're analyzing is fully available in the specified block. As we go, keep 858/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 859/// map is actually a tri-state map with the following values: 860/// 0) we know the block *is not* fully available. 861/// 1) we know the block *is* fully available. 862/// 2) we do not know whether the block is fully available or not, but we are 863/// currently speculating that it will be. 864/// 3) we are speculating for this block and have used that to speculate for 865/// other blocks. 866static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 867 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 868 // Optimistically assume that the block is fully available and check to see 869 // if we already know about this block in one lookup. 870 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 871 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 872 873 // If the entry already existed for this block, return the precomputed value. 874 if (!IV.second) { 875 // If this is a speculative "available" value, mark it as being used for 876 // speculation of other blocks. 877 if (IV.first->second == 2) 878 IV.first->second = 3; 879 return IV.first->second != 0; 880 } 881 882 // Otherwise, see if it is fully available in all predecessors. 883 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 884 885 // If this block has no predecessors, it isn't live-in here. 886 if (PI == PE) 887 goto SpeculationFailure; 888 889 for (; PI != PE; ++PI) 890 // If the value isn't fully available in one of our predecessors, then it 891 // isn't fully available in this block either. Undo our previous 892 // optimistic assumption and bail out. 893 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 894 goto SpeculationFailure; 895 896 return true; 897 898// SpeculationFailure - If we get here, we found out that this is not, after 899// all, a fully-available block. We have a problem if we speculated on this and 900// used the speculation to mark other blocks as available. 901SpeculationFailure: 902 char &BBVal = FullyAvailableBlocks[BB]; 903 904 // If we didn't speculate on this, just return with it set to false. 905 if (BBVal == 2) { 906 BBVal = 0; 907 return false; 908 } 909 910 // If we did speculate on this value, we could have blocks set to 1 that are 911 // incorrect. Walk the (transitive) successors of this block and mark them as 912 // 0 if set to one. 913 SmallVector<BasicBlock*, 32> BBWorklist; 914 BBWorklist.push_back(BB); 915 916 while (!BBWorklist.empty()) { 917 BasicBlock *Entry = BBWorklist.pop_back_val(); 918 // Note that this sets blocks to 0 (unavailable) if they happen to not 919 // already be in FullyAvailableBlocks. This is safe. 920 char &EntryVal = FullyAvailableBlocks[Entry]; 921 if (EntryVal == 0) continue; // Already unavailable. 922 923 // Mark as unavailable. 924 EntryVal = 0; 925 926 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 927 BBWorklist.push_back(*I); 928 } 929 930 return false; 931} 932 933/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 934/// non-local by performing PHI construction. 935bool GVN::processNonLocalLoad(LoadInst *LI, 936 SmallVectorImpl<Instruction*> &toErase) { 937 // Find the non-local dependencies of the load. 938 SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps; 939 MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), 940 Deps); 941 //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI); 942 943 // If we had to process more than one hundred blocks to find the 944 // dependencies, this load isn't worth worrying about. Optimizing 945 // it will be too expensive. 946 if (Deps.size() > 100) 947 return false; 948 949 // If we had a phi translation failure, we'll have a single entry which is a 950 // clobber in the current block. Reject this early. 951 if (Deps.size() == 1 && Deps[0].second.isClobber()) 952 return false; 953 954 // Filter out useless results (non-locals, etc). Keep track of the blocks 955 // where we have a value available in repl, also keep track of whether we see 956 // dependencies that produce an unknown value for the load (such as a call 957 // that could potentially clobber the load). 958 SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock; 959 SmallVector<BasicBlock*, 16> UnavailableBlocks; 960 961 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 962 BasicBlock *DepBB = Deps[i].first; 963 MemDepResult DepInfo = Deps[i].second; 964 965 if (DepInfo.isClobber()) { 966 UnavailableBlocks.push_back(DepBB); 967 continue; 968 } 969 970 Instruction *DepInst = DepInfo.getInst(); 971 972 // Loading the allocation -> undef. 973 if (isa<AllocationInst>(DepInst)) { 974 ValuesPerBlock.push_back(std::make_pair(DepBB, 975 UndefValue::get(LI->getType()))); 976 continue; 977 } 978 979 if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) { 980 // Reject loads and stores that are to the same address but are of 981 // different types. 982 // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because 983 // of bitfield access, it would be interesting to optimize for it at some 984 // point. 985 if (S->getOperand(0)->getType() != LI->getType()) { 986 UnavailableBlocks.push_back(DepBB); 987 continue; 988 } 989 990 ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); 991 992 } else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) { 993 if (LD->getType() != LI->getType()) { 994 UnavailableBlocks.push_back(DepBB); 995 continue; 996 } 997 ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); 998 } else { 999 UnavailableBlocks.push_back(DepBB); 1000 continue; 1001 } 1002 } 1003 1004 // If we have no predecessors that produce a known value for this load, exit 1005 // early. 1006 if (ValuesPerBlock.empty()) return false; 1007 1008 // If all of the instructions we depend on produce a known value for this 1009 // load, then it is fully redundant and we can use PHI insertion to compute 1010 // its value. Insert PHIs and remove the fully redundant value now. 1011 if (UnavailableBlocks.empty()) { 1012 // Use cached PHI construction information from previous runs 1013 SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()]; 1014 // FIXME: What does phiMap do? Are we positive it isn't getting invalidated? 1015 for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); 1016 I != E; ++I) { 1017 if ((*I)->getParent() == LI->getParent()) { 1018 DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI); 1019 LI->replaceAllUsesWith(*I); 1020 if (isa<PointerType>((*I)->getType())) 1021 MD->invalidateCachedPointerInfo(*I); 1022 toErase.push_back(LI); 1023 NumGVNLoad++; 1024 return true; 1025 } 1026 1027 ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); 1028 } 1029 1030 DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI); 1031 1032 DenseMap<BasicBlock*, Value*> BlockReplValues; 1033 BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); 1034 // Perform PHI construction. 1035 Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); 1036 LI->replaceAllUsesWith(v); 1037 1038 if (isa<PHINode>(v)) 1039 v->takeName(LI); 1040 if (isa<PointerType>(v->getType())) 1041 MD->invalidateCachedPointerInfo(v); 1042 toErase.push_back(LI); 1043 NumGVNLoad++; 1044 return true; 1045 } 1046 1047 if (!EnablePRE || !EnableLoadPRE) 1048 return false; 1049 1050 // Okay, we have *some* definitions of the value. This means that the value 1051 // is available in some of our (transitive) predecessors. Lets think about 1052 // doing PRE of this load. This will involve inserting a new load into the 1053 // predecessor when it's not available. We could do this in general, but 1054 // prefer to not increase code size. As such, we only do this when we know 1055 // that we only have to insert *one* load (which means we're basically moving 1056 // the load, not inserting a new one). 1057 1058 // Everything we do here is based on local predecessors of LI's block. If it 1059 // only has one predecessor, bail now. 1060 BasicBlock *LoadBB = LI->getParent(); 1061 if (LoadBB->getSinglePredecessor()) 1062 return false; 1063 1064 // If we have a repl set with LI itself in it, this means we have a loop where 1065 // at least one of the values is LI. Since this means that we won't be able 1066 // to eliminate LI even if we insert uses in the other predecessors, we will 1067 // end up increasing code size. Reject this by scanning for LI. 1068 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1069 if (ValuesPerBlock[i].second == LI) 1070 return false; 1071 1072 // Okay, we have some hope :). Check to see if the loaded value is fully 1073 // available in all but one predecessor. 1074 // FIXME: If we could restructure the CFG, we could make a common pred with 1075 // all the preds that don't have an available LI and insert a new load into 1076 // that one block. 1077 BasicBlock *UnavailablePred = 0; 1078 1079 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1080 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1081 FullyAvailableBlocks[ValuesPerBlock[i].first] = true; 1082 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1083 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1084 1085 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1086 PI != E; ++PI) { 1087 if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 1088 continue; 1089 1090 // If this load is not available in multiple predecessors, reject it. 1091 if (UnavailablePred && UnavailablePred != *PI) 1092 return false; 1093 UnavailablePred = *PI; 1094 } 1095 1096 assert(UnavailablePred != 0 && 1097 "Fully available value should be eliminated above!"); 1098 1099 // If the loaded pointer is PHI node defined in this block, do PHI translation 1100 // to get its value in the predecessor. 1101 Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); 1102 1103 // Make sure the value is live in the predecessor. If it was defined by a 1104 // non-PHI instruction in this block, we don't know how to recompute it above. 1105 if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr)) 1106 if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { 1107 DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " 1108 << *LPInst << *LI << "\n"); 1109 return false; 1110 } 1111 1112 // We don't currently handle critical edges :( 1113 if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { 1114 DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" 1115 << UnavailablePred->getName() << "': " << *LI); 1116 return false; 1117 } 1118 1119 // Okay, we can eliminate this load by inserting a reload in the predecessor 1120 // and using PHI construction to get the value in the other predecessors, do 1121 // it. 1122 DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI); 1123 1124 Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1125 LI->getAlignment(), 1126 UnavailablePred->getTerminator()); 1127 1128 SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()]; 1129 for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); 1130 I != E; ++I) 1131 ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); 1132 1133 DenseMap<BasicBlock*, Value*> BlockReplValues; 1134 BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); 1135 BlockReplValues[UnavailablePred] = NewLoad; 1136 1137 // Perform PHI construction. 1138 Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); 1139 LI->replaceAllUsesWith(v); 1140 if (isa<PHINode>(v)) 1141 v->takeName(LI); 1142 if (isa<PointerType>(v->getType())) 1143 MD->invalidateCachedPointerInfo(v); 1144 toErase.push_back(LI); 1145 NumPRELoad++; 1146 return true; 1147} 1148 1149/// processLoad - Attempt to eliminate a load, first by eliminating it 1150/// locally, and then attempting non-local elimination if that fails. 1151bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { 1152 if (L->isVolatile()) 1153 return false; 1154 1155 Value* pointer = L->getPointerOperand(); 1156 1157 // ... to a pointer that has been loaded from before... 1158 MemDepResult dep = MD->getDependency(L); 1159 1160 // If the value isn't available, don't do anything! 1161 if (dep.isClobber()) { 1162 DEBUG( 1163 // fast print dep, using operator<< on instruction would be too slow 1164 DOUT << "GVN: load "; 1165 WriteAsOperand(*DOUT.stream(), L); 1166 Instruction *I = dep.getInst(); 1167 DOUT << " is clobbered by " << *I; 1168 ); 1169 return false; 1170 } 1171 1172 // If it is defined in another block, try harder. 1173 if (dep.isNonLocal()) 1174 return processNonLocalLoad(L, toErase); 1175 1176 Instruction *DepInst = dep.getInst(); 1177 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1178 // Only forward substitute stores to loads of the same type. 1179 // FIXME: Could do better! 1180 if (DepSI->getPointerOperand()->getType() != pointer->getType()) 1181 return false; 1182 1183 // Remove it! 1184 L->replaceAllUsesWith(DepSI->getOperand(0)); 1185 if (isa<PointerType>(DepSI->getOperand(0)->getType())) 1186 MD->invalidateCachedPointerInfo(DepSI->getOperand(0)); 1187 toErase.push_back(L); 1188 NumGVNLoad++; 1189 return true; 1190 } 1191 1192 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1193 // Only forward substitute stores to loads of the same type. 1194 // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian. 1195 if (DepLI->getType() != L->getType()) 1196 return false; 1197 1198 // Remove it! 1199 L->replaceAllUsesWith(DepLI); 1200 if (isa<PointerType>(DepLI->getType())) 1201 MD->invalidateCachedPointerInfo(DepLI); 1202 toErase.push_back(L); 1203 NumGVNLoad++; 1204 return true; 1205 } 1206 1207 // If this load really doesn't depend on anything, then we must be loading an 1208 // undef value. This can happen when loading for a fresh allocation with no 1209 // intervening stores, for example. 1210 if (isa<AllocationInst>(DepInst)) { 1211 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1212 toErase.push_back(L); 1213 NumGVNLoad++; 1214 return true; 1215 } 1216 1217 return false; 1218} 1219 1220Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { 1221 DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB); 1222 if (I == localAvail.end()) 1223 return 0; 1224 1225 ValueNumberScope* locals = I->second; 1226 1227 while (locals) { 1228 DenseMap<uint32_t, Value*>::iterator I = locals->table.find(num); 1229 if (I != locals->table.end()) 1230 return I->second; 1231 else 1232 locals = locals->parent; 1233 } 1234 1235 return 0; 1236} 1237 1238/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination 1239/// by inheritance from the dominator fails, see if we can perform phi 1240/// construction to eliminate the redundancy. 1241Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { 1242 BasicBlock* BaseBlock = orig->getParent(); 1243 1244 SmallPtrSet<BasicBlock*, 4> Visited; 1245 SmallVector<BasicBlock*, 8> Stack; 1246 Stack.push_back(BaseBlock); 1247 1248 DenseMap<BasicBlock*, Value*> Results; 1249 1250 // Walk backwards through our predecessors, looking for instances of the 1251 // value number we're looking for. Instances are recorded in the Results 1252 // map, which is then used to perform phi construction. 1253 while (!Stack.empty()) { 1254 BasicBlock* Current = Stack.back(); 1255 Stack.pop_back(); 1256 1257 // If we've walked all the way to a proper dominator, then give up. Cases 1258 // where the instance is in the dominator will have been caught by the fast 1259 // path, and any cases that require phi construction further than this are 1260 // probably not worth it anyways. Note that this is a SIGNIFICANT compile 1261 // time improvement. 1262 if (DT->properlyDominates(Current, orig->getParent())) return 0; 1263 1264 DenseMap<BasicBlock*, ValueNumberScope*>::iterator LA = 1265 localAvail.find(Current); 1266 if (LA == localAvail.end()) return 0; 1267 DenseMap<uint32_t, Value*>::iterator V = LA->second->table.find(valno); 1268 1269 if (V != LA->second->table.end()) { 1270 // Found an instance, record it. 1271 Results.insert(std::make_pair(Current, V->second)); 1272 continue; 1273 } 1274 1275 // If we reach the beginning of the function, then give up. 1276 if (pred_begin(Current) == pred_end(Current)) 1277 return 0; 1278 1279 for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current); 1280 PI != PE; ++PI) 1281 if (Visited.insert(*PI)) 1282 Stack.push_back(*PI); 1283 } 1284 1285 // If we didn't find instances, give up. Otherwise, perform phi construction. 1286 if (Results.size() == 0) 1287 return 0; 1288 else 1289 return GetValueForBlock(BaseBlock, orig, Results, true); 1290} 1291 1292/// processInstruction - When calculating availability, handle an instruction 1293/// by inserting it into the appropriate sets 1294bool GVN::processInstruction(Instruction *I, 1295 SmallVectorImpl<Instruction*> &toErase) { 1296 if (LoadInst* L = dyn_cast<LoadInst>(I)) { 1297 bool changed = processLoad(L, toErase); 1298 1299 if (!changed) { 1300 unsigned num = VN.lookup_or_add(L); 1301 localAvail[I->getParent()]->table.insert(std::make_pair(num, L)); 1302 } 1303 1304 return changed; 1305 } 1306 1307 uint32_t nextNum = VN.getNextUnusedValueNumber(); 1308 unsigned num = VN.lookup_or_add(I); 1309 1310 if (BranchInst* BI = dyn_cast<BranchInst>(I)) { 1311 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1312 1313 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 1314 return false; 1315 1316 Value* branchCond = BI->getCondition(); 1317 uint32_t condVN = VN.lookup_or_add(branchCond); 1318 1319 BasicBlock* trueSucc = BI->getSuccessor(0); 1320 BasicBlock* falseSucc = BI->getSuccessor(1); 1321 1322 if (trueSucc->getSinglePredecessor()) 1323 localAvail[trueSucc]->table[condVN] = ConstantInt::getTrue(); 1324 if (falseSucc->getSinglePredecessor()) 1325 localAvail[falseSucc]->table[condVN] = ConstantInt::getFalse(); 1326 1327 return false; 1328 1329 // Allocations are always uniquely numbered, so we can save time and memory 1330 // by fast failing them. 1331 } else if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) { 1332 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1333 return false; 1334 } 1335 1336 // Collapse PHI nodes 1337 if (PHINode* p = dyn_cast<PHINode>(I)) { 1338 Value* constVal = CollapsePhi(p); 1339 1340 if (constVal) { 1341 for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); 1342 PI != PE; ++PI) 1343 PI->second.erase(p); 1344 1345 p->replaceAllUsesWith(constVal); 1346 if (isa<PointerType>(constVal->getType())) 1347 MD->invalidateCachedPointerInfo(constVal); 1348 VN.erase(p); 1349 1350 toErase.push_back(p); 1351 } else { 1352 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1353 } 1354 1355 // If the number we were assigned was a brand new VN, then we don't 1356 // need to do a lookup to see if the number already exists 1357 // somewhere in the domtree: it can't! 1358 } else if (num == nextNum) { 1359 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1360 1361 // Perform fast-path value-number based elimination of values inherited from 1362 // dominators. 1363 } else if (Value* repl = lookupNumber(I->getParent(), num)) { 1364 // Remove it! 1365 VN.erase(I); 1366 I->replaceAllUsesWith(repl); 1367 if (isa<PointerType>(repl->getType())) 1368 MD->invalidateCachedPointerInfo(repl); 1369 toErase.push_back(I); 1370 return true; 1371 1372#if 0 1373 // Perform slow-pathvalue-number based elimination with phi construction. 1374 } else if (Value* repl = AttemptRedundancyElimination(I, num)) { 1375 // Remove it! 1376 VN.erase(I); 1377 I->replaceAllUsesWith(repl); 1378 if (isa<PointerType>(repl->getType())) 1379 MD->invalidateCachedPointerInfo(repl); 1380 toErase.push_back(I); 1381 return true; 1382#endif 1383 } else { 1384 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1385 } 1386 1387 return false; 1388} 1389 1390/// runOnFunction - This is the main transformation entry point for a function. 1391bool GVN::runOnFunction(Function& F) { 1392 MD = &getAnalysis<MemoryDependenceAnalysis>(); 1393 DT = &getAnalysis<DominatorTree>(); 1394 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1395 VN.setMemDep(MD); 1396 VN.setDomTree(DT); 1397 1398 bool changed = false; 1399 bool shouldContinue = true; 1400 1401 // Merge unconditional branches, allowing PRE to catch more 1402 // optimization opportunities. 1403 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1404 BasicBlock* BB = FI; 1405 ++FI; 1406 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 1407 if (removedBlock) NumGVNBlocks++; 1408 1409 changed |= removedBlock; 1410 } 1411 1412 unsigned Iteration = 0; 1413 1414 while (shouldContinue) { 1415 DEBUG(cerr << "GVN iteration: " << Iteration << "\n"); 1416 shouldContinue = iterateOnFunction(F); 1417 changed |= shouldContinue; 1418 ++Iteration; 1419 } 1420 1421 if (EnablePRE) { 1422 bool PREChanged = true; 1423 while (PREChanged) { 1424 PREChanged = performPRE(F); 1425 changed |= PREChanged; 1426 } 1427 } 1428 // FIXME: Should perform GVN again after PRE does something. PRE can move 1429 // computations into blocks where they become fully redundant. Note that 1430 // we can't do this until PRE's critical edge splitting updates memdep. 1431 // Actually, when this happens, we should just fully integrate PRE into GVN. 1432 1433 cleanupGlobalSets(); 1434 1435 return changed; 1436} 1437 1438 1439bool GVN::processBlock(BasicBlock* BB) { 1440 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and 1441 // incrementing BI before processing an instruction). 1442 SmallVector<Instruction*, 8> toErase; 1443 bool changed_function = false; 1444 1445 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1446 BI != BE;) { 1447 changed_function |= processInstruction(BI, toErase); 1448 if (toErase.empty()) { 1449 ++BI; 1450 continue; 1451 } 1452 1453 // If we need some instructions deleted, do it now. 1454 NumGVNInstr += toErase.size(); 1455 1456 // Avoid iterator invalidation. 1457 bool AtStart = BI == BB->begin(); 1458 if (!AtStart) 1459 --BI; 1460 1461 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 1462 E = toErase.end(); I != E; ++I) { 1463 DEBUG(cerr << "GVN removed: " << **I); 1464 MD->removeInstruction(*I); 1465 (*I)->eraseFromParent(); 1466 DEBUG(verifyRemoved(*I)); 1467 } 1468 toErase.clear(); 1469 1470 if (AtStart) 1471 BI = BB->begin(); 1472 else 1473 ++BI; 1474 } 1475 1476 return changed_function; 1477} 1478 1479/// performPRE - Perform a purely local form of PRE that looks for diamond 1480/// control flow patterns and attempts to perform simple PRE at the join point. 1481bool GVN::performPRE(Function& F) { 1482 bool Changed = false; 1483 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 1484 DenseMap<BasicBlock*, Value*> predMap; 1485 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 1486 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 1487 BasicBlock* CurrentBlock = *DI; 1488 1489 // Nothing to PRE in the entry block. 1490 if (CurrentBlock == &F.getEntryBlock()) continue; 1491 1492 for (BasicBlock::iterator BI = CurrentBlock->begin(), 1493 BE = CurrentBlock->end(); BI != BE; ) { 1494 Instruction *CurInst = BI++; 1495 1496 if (isa<AllocationInst>(CurInst) || isa<TerminatorInst>(CurInst) || 1497 isa<PHINode>(CurInst) || (CurInst->getType() == Type::VoidTy) || 1498 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 1499 isa<DbgInfoIntrinsic>(CurInst)) 1500 continue; 1501 1502 uint32_t valno = VN.lookup(CurInst); 1503 1504 // Look for the predecessors for PRE opportunities. We're 1505 // only trying to solve the basic diamond case, where 1506 // a value is computed in the successor and one predecessor, 1507 // but not the other. We also explicitly disallow cases 1508 // where the successor is its own predecessor, because they're 1509 // more complicated to get right. 1510 unsigned numWith = 0; 1511 unsigned numWithout = 0; 1512 BasicBlock* PREPred = 0; 1513 predMap.clear(); 1514 1515 for (pred_iterator PI = pred_begin(CurrentBlock), 1516 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 1517 // We're not interested in PRE where the block is its 1518 // own predecessor, on in blocks with predecessors 1519 // that are not reachable. 1520 if (*PI == CurrentBlock) { 1521 numWithout = 2; 1522 break; 1523 } else if (!localAvail.count(*PI)) { 1524 numWithout = 2; 1525 break; 1526 } 1527 1528 DenseMap<uint32_t, Value*>::iterator predV = 1529 localAvail[*PI]->table.find(valno); 1530 if (predV == localAvail[*PI]->table.end()) { 1531 PREPred = *PI; 1532 numWithout++; 1533 } else if (predV->second == CurInst) { 1534 numWithout = 2; 1535 } else { 1536 predMap[*PI] = predV->second; 1537 numWith++; 1538 } 1539 } 1540 1541 // Don't do PRE when it might increase code size, i.e. when 1542 // we would need to insert instructions in more than one pred. 1543 if (numWithout != 1 || numWith == 0) 1544 continue; 1545 1546 // We can't do PRE safely on a critical edge, so instead we schedule 1547 // the edge to be split and perform the PRE the next time we iterate 1548 // on the function. 1549 unsigned succNum = 0; 1550 for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); 1551 i != e; ++i) 1552 if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { 1553 succNum = i; 1554 break; 1555 } 1556 1557 if (isCriticalEdge(PREPred->getTerminator(), succNum)) { 1558 toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum)); 1559 continue; 1560 } 1561 1562 // Instantiate the expression the in predecessor that lacked it. 1563 // Because we are going top-down through the block, all value numbers 1564 // will be available in the predecessor by the time we need them. Any 1565 // that weren't original present will have been instantiated earlier 1566 // in this loop. 1567 Instruction* PREInstr = CurInst->clone(); 1568 bool success = true; 1569 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 1570 Value *Op = PREInstr->getOperand(i); 1571 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 1572 continue; 1573 1574 if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { 1575 PREInstr->setOperand(i, V); 1576 } else { 1577 success = false; 1578 break; 1579 } 1580 } 1581 1582 // Fail out if we encounter an operand that is not available in 1583 // the PRE predecessor. This is typically because of loads which 1584 // are not value numbered precisely. 1585 if (!success) { 1586 delete PREInstr; 1587 DEBUG(verifyRemoved(PREInstr)); 1588 continue; 1589 } 1590 1591 PREInstr->insertBefore(PREPred->getTerminator()); 1592 PREInstr->setName(CurInst->getName() + ".pre"); 1593 predMap[PREPred] = PREInstr; 1594 VN.add(PREInstr, valno); 1595 NumGVNPRE++; 1596 1597 // Update the availability map to include the new instruction. 1598 localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr)); 1599 1600 // Create a PHI to make the value available in this block. 1601 PHINode* Phi = PHINode::Create(CurInst->getType(), 1602 CurInst->getName() + ".pre-phi", 1603 CurrentBlock->begin()); 1604 for (pred_iterator PI = pred_begin(CurrentBlock), 1605 PE = pred_end(CurrentBlock); PI != PE; ++PI) 1606 Phi->addIncoming(predMap[*PI], *PI); 1607 1608 VN.add(Phi, valno); 1609 localAvail[CurrentBlock]->table[valno] = Phi; 1610 1611 CurInst->replaceAllUsesWith(Phi); 1612 if (isa<PointerType>(Phi->getType())) 1613 MD->invalidateCachedPointerInfo(Phi); 1614 VN.erase(CurInst); 1615 1616 DEBUG(cerr << "GVN PRE removed: " << *CurInst); 1617 MD->removeInstruction(CurInst); 1618 CurInst->eraseFromParent(); 1619 DEBUG(verifyRemoved(CurInst)); 1620 Changed = true; 1621 } 1622 } 1623 1624 for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator 1625 I = toSplit.begin(), E = toSplit.end(); I != E; ++I) 1626 SplitCriticalEdge(I->first, I->second, this); 1627 1628 return Changed || toSplit.size(); 1629} 1630 1631/// iterateOnFunction - Executes one iteration of GVN 1632bool GVN::iterateOnFunction(Function &F) { 1633 cleanupGlobalSets(); 1634 1635 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 1636 DE = df_end(DT->getRootNode()); DI != DE; ++DI) { 1637 if (DI->getIDom()) 1638 localAvail[DI->getBlock()] = 1639 new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); 1640 else 1641 localAvail[DI->getBlock()] = new ValueNumberScope(0); 1642 } 1643 1644 // Top-down walk of the dominator tree 1645 bool changed = false; 1646#if 0 1647 // Needed for value numbering with phi construction to work. 1648 ReversePostOrderTraversal<Function*> RPOT(&F); 1649 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 1650 RE = RPOT.end(); RI != RE; ++RI) 1651 changed |= processBlock(*RI); 1652#else 1653 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 1654 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 1655 changed |= processBlock(DI->getBlock()); 1656#endif 1657 1658 return changed; 1659} 1660 1661void GVN::cleanupGlobalSets() { 1662 VN.clear(); 1663 phiMap.clear(); 1664 1665 for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator 1666 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) 1667 delete I->second; 1668 localAvail.clear(); 1669} 1670 1671/// verifyRemoved - Verify that the specified instruction does not occur in our 1672/// internal data structures. 1673void GVN::verifyRemoved(const Instruction *Inst) const { 1674 VN.verifyRemoved(Inst); 1675 1676 // Walk through the PHI map to make sure the instruction isn't hiding in there 1677 // somewhere. 1678 for (PhiMapType::iterator 1679 I = phiMap.begin(), E = phiMap.end(); I != E; ++I) { 1680 assert(I->first != Inst && "Inst is still a key in PHI map!"); 1681 1682 for (SmallPtrSet<Instruction*, 4>::iterator 1683 II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { 1684 assert(*II != Inst && "Inst is still a value in PHI map!"); 1685 } 1686 } 1687 1688 // Walk through the value number scope to make sure the instruction isn't 1689 // ferreted away in it. 1690 for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator 1691 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { 1692 const ValueNumberScope *VNS = I->second; 1693 1694 while (VNS) { 1695 for (DenseMap<uint32_t, Value*>::iterator 1696 II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { 1697 assert(II->second != Inst && "Inst still in value numbering scope!"); 1698 } 1699 1700 VNS = VNS->parent; 1701 } 1702 } 1703} 1704