GVN.cpp revision f4177aa0193d9bedc4f919b9322464bf3dee83ab
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13// Note that this pass does the value numbering itself; it does not use the 14// ValueNumbering analysis passes. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "gvn" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/Constants.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/GlobalVariable.h" 23#include "llvm/Function.h" 24#include "llvm/IntrinsicInst.h" 25#include "llvm/LLVMContext.h" 26#include "llvm/Operator.h" 27#include "llvm/Analysis/AliasAnalysis.h" 28#include "llvm/Analysis/ConstantFolding.h" 29#include "llvm/Analysis/Dominators.h" 30#include "llvm/Analysis/InstructionSimplify.h" 31#include "llvm/Analysis/Loads.h" 32#include "llvm/Analysis/MemoryBuiltins.h" 33#include "llvm/Analysis/MemoryDependenceAnalysis.h" 34#include "llvm/Analysis/PHITransAddr.h" 35#include "llvm/Analysis/ValueTracking.h" 36#include "llvm/Target/TargetData.h" 37#include "llvm/Transforms/Utils/BasicBlockUtils.h" 38#include "llvm/Transforms/Utils/Local.h" 39#include "llvm/Transforms/Utils/SSAUpdater.h" 40#include "llvm/ADT/DenseMap.h" 41#include "llvm/ADT/DepthFirstIterator.h" 42#include "llvm/ADT/PostOrderIterator.h" 43#include "llvm/ADT/SmallPtrSet.h" 44#include "llvm/ADT/Statistic.h" 45#include "llvm/Support/Allocator.h" 46#include "llvm/Support/CFG.h" 47#include "llvm/Support/CommandLine.h" 48#include "llvm/Support/Debug.h" 49#include "llvm/Support/ErrorHandling.h" 50#include "llvm/Support/GetElementPtrTypeIterator.h" 51#include "llvm/Support/IRBuilder.h" 52#include <list> 53using namespace llvm; 54 55STATISTIC(NumGVNInstr, "Number of instructions deleted"); 56STATISTIC(NumGVNLoad, "Number of loads deleted"); 57STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 58STATISTIC(NumGVNBlocks, "Number of blocks merged"); 59STATISTIC(NumPRELoad, "Number of loads PRE'd"); 60 61static cl::opt<bool> EnablePRE("enable-pre", 62 cl::init(true), cl::Hidden); 63static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 64 65//===----------------------------------------------------------------------===// 66// ValueTable Class 67//===----------------------------------------------------------------------===// 68 69/// This class holds the mapping between values and value numbers. It is used 70/// as an efficient mechanism to determine the expression-wise equivalence of 71/// two values. 72namespace { 73 struct Expression { 74 enum ExpressionOpcode { 75 ADD = Instruction::Add, 76 FADD = Instruction::FAdd, 77 SUB = Instruction::Sub, 78 FSUB = Instruction::FSub, 79 MUL = Instruction::Mul, 80 FMUL = Instruction::FMul, 81 UDIV = Instruction::UDiv, 82 SDIV = Instruction::SDiv, 83 FDIV = Instruction::FDiv, 84 UREM = Instruction::URem, 85 SREM = Instruction::SRem, 86 FREM = Instruction::FRem, 87 SHL = Instruction::Shl, 88 LSHR = Instruction::LShr, 89 ASHR = Instruction::AShr, 90 AND = Instruction::And, 91 OR = Instruction::Or, 92 XOR = Instruction::Xor, 93 TRUNC = Instruction::Trunc, 94 ZEXT = Instruction::ZExt, 95 SEXT = Instruction::SExt, 96 FPTOUI = Instruction::FPToUI, 97 FPTOSI = Instruction::FPToSI, 98 UITOFP = Instruction::UIToFP, 99 SITOFP = Instruction::SIToFP, 100 FPTRUNC = Instruction::FPTrunc, 101 FPEXT = Instruction::FPExt, 102 PTRTOINT = Instruction::PtrToInt, 103 INTTOPTR = Instruction::IntToPtr, 104 BITCAST = Instruction::BitCast, 105 ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 106 ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 107 FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 108 FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 109 FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, 110 SHUFFLE, SELECT, GEP, CALL, CONSTANT, 111 INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; 112 113 ExpressionOpcode opcode; 114 const Type* type; 115 SmallVector<uint32_t, 4> varargs; 116 Value *function; 117 118 Expression() { } 119 Expression(ExpressionOpcode o) : opcode(o) { } 120 121 bool operator==(const Expression &other) const { 122 if (opcode != other.opcode) 123 return false; 124 else if (opcode == EMPTY || opcode == TOMBSTONE) 125 return true; 126 else if (type != other.type) 127 return false; 128 else if (function != other.function) 129 return false; 130 else { 131 if (varargs.size() != other.varargs.size()) 132 return false; 133 134 for (size_t i = 0; i < varargs.size(); ++i) 135 if (varargs[i] != other.varargs[i]) 136 return false; 137 138 return true; 139 } 140 } 141 142 /*bool operator!=(const Expression &other) const { 143 return !(*this == other); 144 }*/ 145 }; 146 147 class ValueTable { 148 private: 149 DenseMap<Value*, uint32_t> valueNumbering; 150 DenseMap<Expression, uint32_t> expressionNumbering; 151 AliasAnalysis* AA; 152 MemoryDependenceAnalysis* MD; 153 DominatorTree* DT; 154 155 uint32_t nextValueNumber; 156 157 Expression::ExpressionOpcode getOpcode(CmpInst* C); 158 Expression create_expression(BinaryOperator* BO); 159 Expression create_expression(CmpInst* C); 160 Expression create_expression(ShuffleVectorInst* V); 161 Expression create_expression(ExtractElementInst* C); 162 Expression create_expression(InsertElementInst* V); 163 Expression create_expression(SelectInst* V); 164 Expression create_expression(CastInst* C); 165 Expression create_expression(GetElementPtrInst* G); 166 Expression create_expression(CallInst* C); 167 Expression create_expression(ExtractValueInst* C); 168 Expression create_expression(InsertValueInst* C); 169 170 uint32_t lookup_or_add_call(CallInst* C); 171 public: 172 ValueTable() : nextValueNumber(1) { } 173 uint32_t lookup_or_add(Value *V); 174 uint32_t lookup(Value *V) const; 175 void add(Value *V, uint32_t num); 176 void clear(); 177 void erase(Value *v); 178 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 179 AliasAnalysis *getAliasAnalysis() const { return AA; } 180 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 181 void setDomTree(DominatorTree* D) { DT = D; } 182 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 183 void verifyRemoved(const Value *) const; 184 }; 185} 186 187namespace llvm { 188template <> struct DenseMapInfo<Expression> { 189 static inline Expression getEmptyKey() { 190 return Expression(Expression::EMPTY); 191 } 192 193 static inline Expression getTombstoneKey() { 194 return Expression(Expression::TOMBSTONE); 195 } 196 197 static unsigned getHashValue(const Expression e) { 198 unsigned hash = e.opcode; 199 200 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 201 (unsigned)((uintptr_t)e.type >> 9)); 202 203 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 204 E = e.varargs.end(); I != E; ++I) 205 hash = *I + hash * 37; 206 207 hash = ((unsigned)((uintptr_t)e.function >> 4) ^ 208 (unsigned)((uintptr_t)e.function >> 9)) + 209 hash * 37; 210 211 return hash; 212 } 213 static bool isEqual(const Expression &LHS, const Expression &RHS) { 214 return LHS == RHS; 215 } 216}; 217 218template <> 219struct isPodLike<Expression> { static const bool value = true; }; 220 221} 222 223//===----------------------------------------------------------------------===// 224// ValueTable Internal Functions 225//===----------------------------------------------------------------------===// 226 227Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { 228 if (isa<ICmpInst>(C)) { 229 switch (C->getPredicate()) { 230 default: // THIS SHOULD NEVER HAPPEN 231 llvm_unreachable("Comparison with unknown predicate?"); 232 case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; 233 case ICmpInst::ICMP_NE: return Expression::ICMPNE; 234 case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; 235 case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; 236 case ICmpInst::ICMP_ULT: return Expression::ICMPULT; 237 case ICmpInst::ICMP_ULE: return Expression::ICMPULE; 238 case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; 239 case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; 240 case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; 241 case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; 242 } 243 } else { 244 switch (C->getPredicate()) { 245 default: // THIS SHOULD NEVER HAPPEN 246 llvm_unreachable("Comparison with unknown predicate?"); 247 case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; 248 case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; 249 case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; 250 case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; 251 case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; 252 case FCmpInst::FCMP_ONE: return Expression::FCMPONE; 253 case FCmpInst::FCMP_ORD: return Expression::FCMPORD; 254 case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; 255 case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; 256 case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; 257 case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; 258 case FCmpInst::FCMP_ULT: return Expression::FCMPULT; 259 case FCmpInst::FCMP_ULE: return Expression::FCMPULE; 260 case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; 261 } 262 } 263} 264 265Expression ValueTable::create_expression(CallInst* C) { 266 Expression e; 267 268 e.type = C->getType(); 269 e.function = C->getCalledFunction(); 270 e.opcode = Expression::CALL; 271 272 CallSite CS(C); 273 for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end(); 274 I != E; ++I) 275 e.varargs.push_back(lookup_or_add(*I)); 276 277 return e; 278} 279 280Expression ValueTable::create_expression(BinaryOperator* BO) { 281 Expression e; 282 e.varargs.push_back(lookup_or_add(BO->getOperand(0))); 283 e.varargs.push_back(lookup_or_add(BO->getOperand(1))); 284 e.function = 0; 285 e.type = BO->getType(); 286 e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode()); 287 288 return e; 289} 290 291Expression ValueTable::create_expression(CmpInst* C) { 292 Expression e; 293 294 e.varargs.push_back(lookup_or_add(C->getOperand(0))); 295 e.varargs.push_back(lookup_or_add(C->getOperand(1))); 296 e.function = 0; 297 e.type = C->getType(); 298 e.opcode = getOpcode(C); 299 300 return e; 301} 302 303Expression ValueTable::create_expression(CastInst* C) { 304 Expression e; 305 306 e.varargs.push_back(lookup_or_add(C->getOperand(0))); 307 e.function = 0; 308 e.type = C->getType(); 309 e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode()); 310 311 return e; 312} 313 314Expression ValueTable::create_expression(ShuffleVectorInst* S) { 315 Expression e; 316 317 e.varargs.push_back(lookup_or_add(S->getOperand(0))); 318 e.varargs.push_back(lookup_or_add(S->getOperand(1))); 319 e.varargs.push_back(lookup_or_add(S->getOperand(2))); 320 e.function = 0; 321 e.type = S->getType(); 322 e.opcode = Expression::SHUFFLE; 323 324 return e; 325} 326 327Expression ValueTable::create_expression(ExtractElementInst* E) { 328 Expression e; 329 330 e.varargs.push_back(lookup_or_add(E->getOperand(0))); 331 e.varargs.push_back(lookup_or_add(E->getOperand(1))); 332 e.function = 0; 333 e.type = E->getType(); 334 e.opcode = Expression::EXTRACT; 335 336 return e; 337} 338 339Expression ValueTable::create_expression(InsertElementInst* I) { 340 Expression e; 341 342 e.varargs.push_back(lookup_or_add(I->getOperand(0))); 343 e.varargs.push_back(lookup_or_add(I->getOperand(1))); 344 e.varargs.push_back(lookup_or_add(I->getOperand(2))); 345 e.function = 0; 346 e.type = I->getType(); 347 e.opcode = Expression::INSERT; 348 349 return e; 350} 351 352Expression ValueTable::create_expression(SelectInst* I) { 353 Expression e; 354 355 e.varargs.push_back(lookup_or_add(I->getCondition())); 356 e.varargs.push_back(lookup_or_add(I->getTrueValue())); 357 e.varargs.push_back(lookup_or_add(I->getFalseValue())); 358 e.function = 0; 359 e.type = I->getType(); 360 e.opcode = Expression::SELECT; 361 362 return e; 363} 364 365Expression ValueTable::create_expression(GetElementPtrInst* G) { 366 Expression e; 367 368 e.varargs.push_back(lookup_or_add(G->getPointerOperand())); 369 e.function = 0; 370 e.type = G->getType(); 371 e.opcode = Expression::GEP; 372 373 for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); 374 I != E; ++I) 375 e.varargs.push_back(lookup_or_add(*I)); 376 377 return e; 378} 379 380Expression ValueTable::create_expression(ExtractValueInst* E) { 381 Expression e; 382 383 e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); 384 for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 385 II != IE; ++II) 386 e.varargs.push_back(*II); 387 e.function = 0; 388 e.type = E->getType(); 389 e.opcode = Expression::EXTRACTVALUE; 390 391 return e; 392} 393 394Expression ValueTable::create_expression(InsertValueInst* E) { 395 Expression e; 396 397 e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); 398 e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand())); 399 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 400 II != IE; ++II) 401 e.varargs.push_back(*II); 402 e.function = 0; 403 e.type = E->getType(); 404 e.opcode = Expression::INSERTVALUE; 405 406 return e; 407} 408 409//===----------------------------------------------------------------------===// 410// ValueTable External Functions 411//===----------------------------------------------------------------------===// 412 413/// add - Insert a value into the table with a specified value number. 414void ValueTable::add(Value *V, uint32_t num) { 415 valueNumbering.insert(std::make_pair(V, num)); 416} 417 418uint32_t ValueTable::lookup_or_add_call(CallInst* C) { 419 if (AA->doesNotAccessMemory(C)) { 420 Expression exp = create_expression(C); 421 uint32_t& e = expressionNumbering[exp]; 422 if (!e) e = nextValueNumber++; 423 valueNumbering[C] = e; 424 return e; 425 } else if (AA->onlyReadsMemory(C)) { 426 Expression exp = create_expression(C); 427 uint32_t& e = expressionNumbering[exp]; 428 if (!e) { 429 e = nextValueNumber++; 430 valueNumbering[C] = e; 431 return e; 432 } 433 if (!MD) { 434 e = nextValueNumber++; 435 valueNumbering[C] = e; 436 return e; 437 } 438 439 MemDepResult local_dep = MD->getDependency(C); 440 441 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 442 valueNumbering[C] = nextValueNumber; 443 return nextValueNumber++; 444 } 445 446 if (local_dep.isDef()) { 447 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 448 449 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 450 valueNumbering[C] = nextValueNumber; 451 return nextValueNumber++; 452 } 453 454 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 455 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 456 uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); 457 if (c_vn != cd_vn) { 458 valueNumbering[C] = nextValueNumber; 459 return nextValueNumber++; 460 } 461 } 462 463 uint32_t v = lookup_or_add(local_cdep); 464 valueNumbering[C] = v; 465 return v; 466 } 467 468 // Non-local case. 469 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 470 MD->getNonLocalCallDependency(CallSite(C)); 471 // FIXME: call/call dependencies for readonly calls should return def, not 472 // clobber! Move the checking logic to MemDep! 473 CallInst* cdep = 0; 474 475 // Check to see if we have a single dominating call instruction that is 476 // identical to C. 477 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 478 const NonLocalDepEntry *I = &deps[i]; 479 // Ignore non-local dependencies. 480 if (I->getResult().isNonLocal()) 481 continue; 482 483 // We don't handle non-depedencies. If we already have a call, reject 484 // instruction dependencies. 485 if (I->getResult().isClobber() || cdep != 0) { 486 cdep = 0; 487 break; 488 } 489 490 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 491 // FIXME: All duplicated with non-local case. 492 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 493 cdep = NonLocalDepCall; 494 continue; 495 } 496 497 cdep = 0; 498 break; 499 } 500 501 if (!cdep) { 502 valueNumbering[C] = nextValueNumber; 503 return nextValueNumber++; 504 } 505 506 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 507 valueNumbering[C] = nextValueNumber; 508 return nextValueNumber++; 509 } 510 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 511 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 512 uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); 513 if (c_vn != cd_vn) { 514 valueNumbering[C] = nextValueNumber; 515 return nextValueNumber++; 516 } 517 } 518 519 uint32_t v = lookup_or_add(cdep); 520 valueNumbering[C] = v; 521 return v; 522 523 } else { 524 valueNumbering[C] = nextValueNumber; 525 return nextValueNumber++; 526 } 527} 528 529/// lookup_or_add - Returns the value number for the specified value, assigning 530/// it a new number if it did not have one before. 531uint32_t ValueTable::lookup_or_add(Value *V) { 532 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 533 if (VI != valueNumbering.end()) 534 return VI->second; 535 536 if (!isa<Instruction>(V)) { 537 valueNumbering[V] = nextValueNumber; 538 return nextValueNumber++; 539 } 540 541 Instruction* I = cast<Instruction>(V); 542 Expression exp; 543 switch (I->getOpcode()) { 544 case Instruction::Call: 545 return lookup_or_add_call(cast<CallInst>(I)); 546 case Instruction::Add: 547 case Instruction::FAdd: 548 case Instruction::Sub: 549 case Instruction::FSub: 550 case Instruction::Mul: 551 case Instruction::FMul: 552 case Instruction::UDiv: 553 case Instruction::SDiv: 554 case Instruction::FDiv: 555 case Instruction::URem: 556 case Instruction::SRem: 557 case Instruction::FRem: 558 case Instruction::Shl: 559 case Instruction::LShr: 560 case Instruction::AShr: 561 case Instruction::And: 562 case Instruction::Or : 563 case Instruction::Xor: 564 exp = create_expression(cast<BinaryOperator>(I)); 565 break; 566 case Instruction::ICmp: 567 case Instruction::FCmp: 568 exp = create_expression(cast<CmpInst>(I)); 569 break; 570 case Instruction::Trunc: 571 case Instruction::ZExt: 572 case Instruction::SExt: 573 case Instruction::FPToUI: 574 case Instruction::FPToSI: 575 case Instruction::UIToFP: 576 case Instruction::SIToFP: 577 case Instruction::FPTrunc: 578 case Instruction::FPExt: 579 case Instruction::PtrToInt: 580 case Instruction::IntToPtr: 581 case Instruction::BitCast: 582 exp = create_expression(cast<CastInst>(I)); 583 break; 584 case Instruction::Select: 585 exp = create_expression(cast<SelectInst>(I)); 586 break; 587 case Instruction::ExtractElement: 588 exp = create_expression(cast<ExtractElementInst>(I)); 589 break; 590 case Instruction::InsertElement: 591 exp = create_expression(cast<InsertElementInst>(I)); 592 break; 593 case Instruction::ShuffleVector: 594 exp = create_expression(cast<ShuffleVectorInst>(I)); 595 break; 596 case Instruction::ExtractValue: 597 exp = create_expression(cast<ExtractValueInst>(I)); 598 break; 599 case Instruction::InsertValue: 600 exp = create_expression(cast<InsertValueInst>(I)); 601 break; 602 case Instruction::GetElementPtr: 603 exp = create_expression(cast<GetElementPtrInst>(I)); 604 break; 605 default: 606 valueNumbering[V] = nextValueNumber; 607 return nextValueNumber++; 608 } 609 610 uint32_t& e = expressionNumbering[exp]; 611 if (!e) e = nextValueNumber++; 612 valueNumbering[V] = e; 613 return e; 614} 615 616/// lookup - Returns the value number of the specified value. Fails if 617/// the value has not yet been numbered. 618uint32_t ValueTable::lookup(Value *V) const { 619 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 620 assert(VI != valueNumbering.end() && "Value not numbered?"); 621 return VI->second; 622} 623 624/// clear - Remove all entries from the ValueTable 625void ValueTable::clear() { 626 valueNumbering.clear(); 627 expressionNumbering.clear(); 628 nextValueNumber = 1; 629} 630 631/// erase - Remove a value from the value numbering 632void ValueTable::erase(Value *V) { 633 valueNumbering.erase(V); 634} 635 636/// verifyRemoved - Verify that the value is removed from all internal data 637/// structures. 638void ValueTable::verifyRemoved(const Value *V) const { 639 for (DenseMap<Value*, uint32_t>::const_iterator 640 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 641 assert(I->first != V && "Inst still occurs in value numbering map!"); 642 } 643} 644 645//===----------------------------------------------------------------------===// 646// GVN Pass 647//===----------------------------------------------------------------------===// 648 649namespace { 650 struct ValueNumberScope { 651 ValueNumberScope* parent; 652 DenseMap<uint32_t, Value*> table; 653 654 ValueNumberScope(ValueNumberScope* p) : parent(p) { } 655 }; 656} 657 658namespace { 659 660 class GVN : public FunctionPass { 661 bool runOnFunction(Function &F); 662 public: 663 static char ID; // Pass identification, replacement for typeid 664 explicit GVN(bool noloads = false) 665 : FunctionPass(ID), NoLoads(noloads), MD(0) { 666 initializeGVNPass(*PassRegistry::getPassRegistry()); 667 } 668 669 private: 670 bool NoLoads; 671 MemoryDependenceAnalysis *MD; 672 DominatorTree *DT; 673 const TargetData* TD; 674 675 ValueTable VN; 676 677 /// NumberTable - A mapping from value numers to lists of Value*'s that 678 /// have that value number. Use lookupNumber to query it. 679 DenseMap<uint32_t, std::pair<Value*, void*> > NumberTable; 680 BumpPtrAllocator TableAllocator; 681 682 /// insert_table - Push a new Value to the NumberTable onto the list for 683 /// its value number. 684 void insert_table(uint32_t N, Value *V) { 685 std::pair<Value*, void*>& Curr = NumberTable[N]; 686 if (!Curr.first) { 687 Curr.first = V; 688 return; 689 } 690 691 std::pair<Value*, void*>* Node = 692 TableAllocator.Allocate<std::pair<Value*, void*> >(); 693 Node->first = V; 694 Node->second = Curr.second; 695 Curr.second = Node; 696 } 697 698 /// erase_table - Scan the list of values corresponding to a given value 699 /// number, and remove the given value if encountered. 700 void erase_table(uint32_t N, Value *V) { 701 std::pair<Value*, void*>* Prev = 0; 702 std::pair<Value*, void*>* Curr = &NumberTable[N]; 703 704 while (Curr->first != V) { 705 Prev = Curr; 706 Curr = static_cast<std::pair<Value*, void*>*>(Curr->second); 707 } 708 709 if (Prev) { 710 Prev->second = Curr->second; 711 } else { 712 if (!Curr->second) { 713 Curr->first = 0; 714 } else { 715 std::pair<Value*, void*>* Next = 716 static_cast<std::pair<Value*, void*>*>(Curr->second); 717 Curr->first = Next->first; 718 Curr->second = Next->second; 719 } 720 } 721 } 722 723 // List of critical edges to be split between iterations. 724 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 725 726 // This transformation requires dominator postdominator info 727 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 728 AU.addRequired<DominatorTree>(); 729 if (!NoLoads) 730 AU.addRequired<MemoryDependenceAnalysis>(); 731 AU.addRequired<AliasAnalysis>(); 732 733 AU.addPreserved<DominatorTree>(); 734 AU.addPreserved<AliasAnalysis>(); 735 } 736 737 // Helper fuctions 738 // FIXME: eliminate or document these better 739 bool processLoad(LoadInst* L, 740 SmallVectorImpl<Instruction*> &toErase); 741 bool processInstruction(Instruction *I, 742 SmallVectorImpl<Instruction*> &toErase); 743 bool processNonLocalLoad(LoadInst* L, 744 SmallVectorImpl<Instruction*> &toErase); 745 bool processBlock(BasicBlock *BB); 746 void dump(DenseMap<uint32_t, Value*>& d); 747 bool iterateOnFunction(Function &F); 748 bool performPRE(Function& F); 749 Value *lookupNumber(BasicBlock *BB, uint32_t num); 750 void cleanupGlobalSets(); 751 void verifyRemoved(const Instruction *I) const; 752 bool splitCriticalEdges(); 753 }; 754 755 char GVN::ID = 0; 756} 757 758// createGVNPass - The public interface to this file... 759FunctionPass *llvm::createGVNPass(bool NoLoads) { 760 return new GVN(NoLoads); 761} 762 763INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) 764INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 765INITIALIZE_PASS_DEPENDENCY(DominatorTree) 766INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 767INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) 768 769void GVN::dump(DenseMap<uint32_t, Value*>& d) { 770 errs() << "{\n"; 771 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 772 E = d.end(); I != E; ++I) { 773 errs() << I->first << "\n"; 774 I->second->dump(); 775 } 776 errs() << "}\n"; 777} 778 779/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 780/// we're analyzing is fully available in the specified block. As we go, keep 781/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 782/// map is actually a tri-state map with the following values: 783/// 0) we know the block *is not* fully available. 784/// 1) we know the block *is* fully available. 785/// 2) we do not know whether the block is fully available or not, but we are 786/// currently speculating that it will be. 787/// 3) we are speculating for this block and have used that to speculate for 788/// other blocks. 789static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 790 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 791 // Optimistically assume that the block is fully available and check to see 792 // if we already know about this block in one lookup. 793 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 794 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 795 796 // If the entry already existed for this block, return the precomputed value. 797 if (!IV.second) { 798 // If this is a speculative "available" value, mark it as being used for 799 // speculation of other blocks. 800 if (IV.first->second == 2) 801 IV.first->second = 3; 802 return IV.first->second != 0; 803 } 804 805 // Otherwise, see if it is fully available in all predecessors. 806 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 807 808 // If this block has no predecessors, it isn't live-in here. 809 if (PI == PE) 810 goto SpeculationFailure; 811 812 for (; PI != PE; ++PI) 813 // If the value isn't fully available in one of our predecessors, then it 814 // isn't fully available in this block either. Undo our previous 815 // optimistic assumption and bail out. 816 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 817 goto SpeculationFailure; 818 819 return true; 820 821// SpeculationFailure - If we get here, we found out that this is not, after 822// all, a fully-available block. We have a problem if we speculated on this and 823// used the speculation to mark other blocks as available. 824SpeculationFailure: 825 char &BBVal = FullyAvailableBlocks[BB]; 826 827 // If we didn't speculate on this, just return with it set to false. 828 if (BBVal == 2) { 829 BBVal = 0; 830 return false; 831 } 832 833 // If we did speculate on this value, we could have blocks set to 1 that are 834 // incorrect. Walk the (transitive) successors of this block and mark them as 835 // 0 if set to one. 836 SmallVector<BasicBlock*, 32> BBWorklist; 837 BBWorklist.push_back(BB); 838 839 do { 840 BasicBlock *Entry = BBWorklist.pop_back_val(); 841 // Note that this sets blocks to 0 (unavailable) if they happen to not 842 // already be in FullyAvailableBlocks. This is safe. 843 char &EntryVal = FullyAvailableBlocks[Entry]; 844 if (EntryVal == 0) continue; // Already unavailable. 845 846 // Mark as unavailable. 847 EntryVal = 0; 848 849 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 850 BBWorklist.push_back(*I); 851 } while (!BBWorklist.empty()); 852 853 return false; 854} 855 856 857/// CanCoerceMustAliasedValueToLoad - Return true if 858/// CoerceAvailableValueToLoadType will succeed. 859static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 860 const Type *LoadTy, 861 const TargetData &TD) { 862 // If the loaded or stored value is an first class array or struct, don't try 863 // to transform them. We need to be able to bitcast to integer. 864 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 865 StoredVal->getType()->isStructTy() || 866 StoredVal->getType()->isArrayTy()) 867 return false; 868 869 // The store has to be at least as big as the load. 870 if (TD.getTypeSizeInBits(StoredVal->getType()) < 871 TD.getTypeSizeInBits(LoadTy)) 872 return false; 873 874 return true; 875} 876 877 878/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and 879/// then a load from a must-aliased pointer of a different type, try to coerce 880/// the stored value. LoadedTy is the type of the load we want to replace and 881/// InsertPt is the place to insert new instructions. 882/// 883/// If we can't do it, return null. 884static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 885 const Type *LoadedTy, 886 Instruction *InsertPt, 887 const TargetData &TD) { 888 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) 889 return 0; 890 891 const Type *StoredValTy = StoredVal->getType(); 892 893 uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); 894 uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); 895 896 // If the store and reload are the same size, we can always reuse it. 897 if (StoreSize == LoadSize) { 898 if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { 899 // Pointer to Pointer -> use bitcast. 900 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 901 } 902 903 // Convert source pointers to integers, which can be bitcast. 904 if (StoredValTy->isPointerTy()) { 905 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 906 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 907 } 908 909 const Type *TypeToCastTo = LoadedTy; 910 if (TypeToCastTo->isPointerTy()) 911 TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); 912 913 if (StoredValTy != TypeToCastTo) 914 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 915 916 // Cast to pointer if the load needs a pointer type. 917 if (LoadedTy->isPointerTy()) 918 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 919 920 return StoredVal; 921 } 922 923 // If the loaded value is smaller than the available value, then we can 924 // extract out a piece from it. If the available value is too small, then we 925 // can't do anything. 926 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 927 928 // Convert source pointers to integers, which can be manipulated. 929 if (StoredValTy->isPointerTy()) { 930 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 931 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 932 } 933 934 // Convert vectors and fp to integer, which can be manipulated. 935 if (!StoredValTy->isIntegerTy()) { 936 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 937 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 938 } 939 940 // If this is a big-endian system, we need to shift the value down to the low 941 // bits so that a truncate will work. 942 if (TD.isBigEndian()) { 943 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 944 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 945 } 946 947 // Truncate the integer to the right size now. 948 const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 949 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 950 951 if (LoadedTy == NewIntTy) 952 return StoredVal; 953 954 // If the result is a pointer, inttoptr. 955 if (LoadedTy->isPointerTy()) 956 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 957 958 // Otherwise, bitcast. 959 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 960} 961 962/// AnalyzeLoadFromClobberingWrite - This function is called when we have a 963/// memdep query of a load that ends up being a clobbering memory write (store, 964/// memset, memcpy, memmove). This means that the write *may* provide bits used 965/// by the load but we can't be sure because the pointers don't mustalias. 966/// 967/// Check this case to see if there is anything more we can do before we give 968/// up. This returns -1 if we have to give up, or a byte number in the stored 969/// value of the piece that feeds the load. 970static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, 971 Value *WritePtr, 972 uint64_t WriteSizeInBits, 973 const TargetData &TD) { 974 // If the loaded or stored value is an first class array or struct, don't try 975 // to transform them. We need to be able to bitcast to integer. 976 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 977 return -1; 978 979 int64_t StoreOffset = 0, LoadOffset = 0; 980 Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); 981 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); 982 if (StoreBase != LoadBase) 983 return -1; 984 985 // If the load and store are to the exact same address, they should have been 986 // a must alias. AA must have gotten confused. 987 // FIXME: Study to see if/when this happens. One case is forwarding a memset 988 // to a load from the base of the memset. 989#if 0 990 if (LoadOffset == StoreOffset) { 991 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 992 << "Base = " << *StoreBase << "\n" 993 << "Store Ptr = " << *WritePtr << "\n" 994 << "Store Offs = " << StoreOffset << "\n" 995 << "Load Ptr = " << *LoadPtr << "\n"; 996 abort(); 997 } 998#endif 999 1000 // If the load and store don't overlap at all, the store doesn't provide 1001 // anything to the load. In this case, they really don't alias at all, AA 1002 // must have gotten confused. 1003 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); 1004 1005 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 1006 return -1; 1007 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 1008 LoadSize >>= 3; 1009 1010 1011 bool isAAFailure = false; 1012 if (StoreOffset < LoadOffset) 1013 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 1014 else 1015 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 1016 1017 if (isAAFailure) { 1018#if 0 1019 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" 1020 << "Base = " << *StoreBase << "\n" 1021 << "Store Ptr = " << *WritePtr << "\n" 1022 << "Store Offs = " << StoreOffset << "\n" 1023 << "Load Ptr = " << *LoadPtr << "\n"; 1024 abort(); 1025#endif 1026 return -1; 1027 } 1028 1029 // If the Load isn't completely contained within the stored bits, we don't 1030 // have all the bits to feed it. We could do something crazy in the future 1031 // (issue a smaller load then merge the bits in) but this seems unlikely to be 1032 // valuable. 1033 if (StoreOffset > LoadOffset || 1034 StoreOffset+StoreSize < LoadOffset+LoadSize) 1035 return -1; 1036 1037 // Okay, we can do this transformation. Return the number of bytes into the 1038 // store that the load is. 1039 return LoadOffset-StoreOffset; 1040} 1041 1042/// AnalyzeLoadFromClobberingStore - This function is called when we have a 1043/// memdep query of a load that ends up being a clobbering store. 1044static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, 1045 StoreInst *DepSI, 1046 const TargetData &TD) { 1047 // Cannot handle reading from store of first-class aggregate yet. 1048 if (DepSI->getValueOperand()->getType()->isStructTy() || 1049 DepSI->getValueOperand()->getType()->isArrayTy()) 1050 return -1; 1051 1052 Value *StorePtr = DepSI->getPointerOperand(); 1053 uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 1054 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 1055 StorePtr, StoreSize, TD); 1056} 1057 1058static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, 1059 MemIntrinsic *MI, 1060 const TargetData &TD) { 1061 // If the mem operation is a non-constant size, we can't handle it. 1062 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 1063 if (SizeCst == 0) return -1; 1064 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 1065 1066 // If this is memset, we just need to see if the offset is valid in the size 1067 // of the memset.. 1068 if (MI->getIntrinsicID() == Intrinsic::memset) 1069 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 1070 MemSizeInBits, TD); 1071 1072 // If we have a memcpy/memmove, the only case we can handle is if this is a 1073 // copy from constant memory. In that case, we can read directly from the 1074 // constant memory. 1075 MemTransferInst *MTI = cast<MemTransferInst>(MI); 1076 1077 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 1078 if (Src == 0) return -1; 1079 1080 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src)); 1081 if (GV == 0 || !GV->isConstant()) return -1; 1082 1083 // See if the access is within the bounds of the transfer. 1084 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 1085 MI->getDest(), MemSizeInBits, TD); 1086 if (Offset == -1) 1087 return Offset; 1088 1089 // Otherwise, see if we can constant fold a load from the constant with the 1090 // offset applied as appropriate. 1091 Src = ConstantExpr::getBitCast(Src, 1092 llvm::Type::getInt8PtrTy(Src->getContext())); 1093 Constant *OffsetCst = 1094 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1095 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 1096 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 1097 if (ConstantFoldLoadFromConstPtr(Src, &TD)) 1098 return Offset; 1099 return -1; 1100} 1101 1102 1103/// GetStoreValueForLoad - This function is called when we have a 1104/// memdep query of a load that ends up being a clobbering store. This means 1105/// that the store *may* provide bits used by the load but we can't be sure 1106/// because the pointers don't mustalias. Check this case to see if there is 1107/// anything more we can do before we give up. 1108static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 1109 const Type *LoadTy, 1110 Instruction *InsertPt, const TargetData &TD){ 1111 LLVMContext &Ctx = SrcVal->getType()->getContext(); 1112 1113 uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 1114 uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; 1115 1116 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1117 1118 // Compute which bits of the stored value are being used by the load. Convert 1119 // to an integer type to start with. 1120 if (SrcVal->getType()->isPointerTy()) 1121 SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); 1122 if (!SrcVal->getType()->isIntegerTy()) 1123 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), 1124 "tmp"); 1125 1126 // Shift the bits to the least significant depending on endianness. 1127 unsigned ShiftAmt; 1128 if (TD.isLittleEndian()) 1129 ShiftAmt = Offset*8; 1130 else 1131 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 1132 1133 if (ShiftAmt) 1134 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); 1135 1136 if (LoadSize != StoreSize) 1137 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), 1138 "tmp"); 1139 1140 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); 1141} 1142 1143/// GetMemInstValueForLoad - This function is called when we have a 1144/// memdep query of a load that ends up being a clobbering mem intrinsic. 1145static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 1146 const Type *LoadTy, Instruction *InsertPt, 1147 const TargetData &TD){ 1148 LLVMContext &Ctx = LoadTy->getContext(); 1149 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 1150 1151 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1152 1153 // We know that this method is only called when the mem transfer fully 1154 // provides the bits for the load. 1155 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 1156 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 1157 // independently of what the offset is. 1158 Value *Val = MSI->getValue(); 1159 if (LoadSize != 1) 1160 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 1161 1162 Value *OneElt = Val; 1163 1164 // Splat the value out to the right number of bits. 1165 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 1166 // If we can double the number of bytes set, do it. 1167 if (NumBytesSet*2 <= LoadSize) { 1168 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 1169 Val = Builder.CreateOr(Val, ShVal); 1170 NumBytesSet <<= 1; 1171 continue; 1172 } 1173 1174 // Otherwise insert one byte at a time. 1175 Value *ShVal = Builder.CreateShl(Val, 1*8); 1176 Val = Builder.CreateOr(OneElt, ShVal); 1177 ++NumBytesSet; 1178 } 1179 1180 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); 1181 } 1182 1183 // Otherwise, this is a memcpy/memmove from a constant global. 1184 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 1185 Constant *Src = cast<Constant>(MTI->getSource()); 1186 1187 // Otherwise, see if we can constant fold a load from the constant with the 1188 // offset applied as appropriate. 1189 Src = ConstantExpr::getBitCast(Src, 1190 llvm::Type::getInt8PtrTy(Src->getContext())); 1191 Constant *OffsetCst = 1192 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1193 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 1194 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 1195 return ConstantFoldLoadFromConstPtr(Src, &TD); 1196} 1197 1198namespace { 1199 1200struct AvailableValueInBlock { 1201 /// BB - The basic block in question. 1202 BasicBlock *BB; 1203 enum ValType { 1204 SimpleVal, // A simple offsetted value that is accessed. 1205 MemIntrin // A memory intrinsic which is loaded from. 1206 }; 1207 1208 /// V - The value that is live out of the block. 1209 PointerIntPair<Value *, 1, ValType> Val; 1210 1211 /// Offset - The byte offset in Val that is interesting for the load query. 1212 unsigned Offset; 1213 1214 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 1215 unsigned Offset = 0) { 1216 AvailableValueInBlock Res; 1217 Res.BB = BB; 1218 Res.Val.setPointer(V); 1219 Res.Val.setInt(SimpleVal); 1220 Res.Offset = Offset; 1221 return Res; 1222 } 1223 1224 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 1225 unsigned Offset = 0) { 1226 AvailableValueInBlock Res; 1227 Res.BB = BB; 1228 Res.Val.setPointer(MI); 1229 Res.Val.setInt(MemIntrin); 1230 Res.Offset = Offset; 1231 return Res; 1232 } 1233 1234 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 1235 Value *getSimpleValue() const { 1236 assert(isSimpleValue() && "Wrong accessor"); 1237 return Val.getPointer(); 1238 } 1239 1240 MemIntrinsic *getMemIntrinValue() const { 1241 assert(!isSimpleValue() && "Wrong accessor"); 1242 return cast<MemIntrinsic>(Val.getPointer()); 1243 } 1244 1245 /// MaterializeAdjustedValue - Emit code into this block to adjust the value 1246 /// defined here to the specified type. This handles various coercion cases. 1247 Value *MaterializeAdjustedValue(const Type *LoadTy, 1248 const TargetData *TD) const { 1249 Value *Res; 1250 if (isSimpleValue()) { 1251 Res = getSimpleValue(); 1252 if (Res->getType() != LoadTy) { 1253 assert(TD && "Need target data to handle type mismatch case"); 1254 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), 1255 *TD); 1256 1257 DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 1258 << *getSimpleValue() << '\n' 1259 << *Res << '\n' << "\n\n\n"); 1260 } 1261 } else { 1262 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, 1263 LoadTy, BB->getTerminator(), *TD); 1264 DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1265 << " " << *getMemIntrinValue() << '\n' 1266 << *Res << '\n' << "\n\n\n"); 1267 } 1268 return Res; 1269 } 1270}; 1271 1272} 1273 1274/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, 1275/// construct SSA form, allowing us to eliminate LI. This returns the value 1276/// that should be used at LI's definition site. 1277static Value *ConstructSSAForLoadSet(LoadInst *LI, 1278 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1279 const TargetData *TD, 1280 const DominatorTree &DT, 1281 AliasAnalysis *AA) { 1282 // Check for the fully redundant, dominating load case. In this case, we can 1283 // just use the dominating value directly. 1284 if (ValuesPerBlock.size() == 1 && 1285 DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) 1286 return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); 1287 1288 // Otherwise, we have to construct SSA form. 1289 SmallVector<PHINode*, 8> NewPHIs; 1290 SSAUpdater SSAUpdate(&NewPHIs); 1291 SSAUpdate.Initialize(LI->getType(), LI->getName()); 1292 1293 const Type *LoadTy = LI->getType(); 1294 1295 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1296 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1297 BasicBlock *BB = AV.BB; 1298 1299 if (SSAUpdate.HasValueForBlock(BB)) 1300 continue; 1301 1302 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); 1303 } 1304 1305 // Perform PHI construction. 1306 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 1307 1308 // If new PHI nodes were created, notify alias analysis. 1309 if (V->getType()->isPointerTy()) 1310 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 1311 AA->copyValue(LI, NewPHIs[i]); 1312 1313 return V; 1314} 1315 1316static bool isLifetimeStart(const Instruction *Inst) { 1317 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1318 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1319 return false; 1320} 1321 1322/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 1323/// non-local by performing PHI construction. 1324bool GVN::processNonLocalLoad(LoadInst *LI, 1325 SmallVectorImpl<Instruction*> &toErase) { 1326 // Find the non-local dependencies of the load. 1327 SmallVector<NonLocalDepResult, 64> Deps; 1328 AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); 1329 MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); 1330 //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " 1331 // << Deps.size() << *LI << '\n'); 1332 1333 // If we had to process more than one hundred blocks to find the 1334 // dependencies, this load isn't worth worrying about. Optimizing 1335 // it will be too expensive. 1336 if (Deps.size() > 100) 1337 return false; 1338 1339 // If we had a phi translation failure, we'll have a single entry which is a 1340 // clobber in the current block. Reject this early. 1341 if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { 1342 DEBUG( 1343 dbgs() << "GVN: non-local load "; 1344 WriteAsOperand(dbgs(), LI); 1345 dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; 1346 ); 1347 return false; 1348 } 1349 1350 // Filter out useless results (non-locals, etc). Keep track of the blocks 1351 // where we have a value available in repl, also keep track of whether we see 1352 // dependencies that produce an unknown value for the load (such as a call 1353 // that could potentially clobber the load). 1354 SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; 1355 SmallVector<BasicBlock*, 16> UnavailableBlocks; 1356 1357 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 1358 BasicBlock *DepBB = Deps[i].getBB(); 1359 MemDepResult DepInfo = Deps[i].getResult(); 1360 1361 if (DepInfo.isClobber()) { 1362 // The address being loaded in this non-local block may not be the same as 1363 // the pointer operand of the load if PHI translation occurs. Make sure 1364 // to consider the right address. 1365 Value *Address = Deps[i].getAddress(); 1366 1367 // If the dependence is to a store that writes to a superset of the bits 1368 // read by the load, we can extract the bits we need for the load from the 1369 // stored value. 1370 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 1371 if (TD && Address) { 1372 int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, 1373 DepSI, *TD); 1374 if (Offset != -1) { 1375 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1376 DepSI->getValueOperand(), 1377 Offset)); 1378 continue; 1379 } 1380 } 1381 } 1382 1383 // If the clobbering value is a memset/memcpy/memmove, see if we can 1384 // forward a value on from it. 1385 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 1386 if (TD && Address) { 1387 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, 1388 DepMI, *TD); 1389 if (Offset != -1) { 1390 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 1391 Offset)); 1392 continue; 1393 } 1394 } 1395 } 1396 1397 UnavailableBlocks.push_back(DepBB); 1398 continue; 1399 } 1400 1401 Instruction *DepInst = DepInfo.getInst(); 1402 1403 // Loading the allocation -> undef. 1404 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || 1405 // Loading immediately after lifetime begin -> undef. 1406 isLifetimeStart(DepInst)) { 1407 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1408 UndefValue::get(LI->getType()))); 1409 continue; 1410 } 1411 1412 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1413 // Reject loads and stores that are to the same address but are of 1414 // different types if we have to. 1415 if (S->getValueOperand()->getType() != LI->getType()) { 1416 // If the stored value is larger or equal to the loaded value, we can 1417 // reuse it. 1418 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), 1419 LI->getType(), *TD)) { 1420 UnavailableBlocks.push_back(DepBB); 1421 continue; 1422 } 1423 } 1424 1425 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1426 S->getValueOperand())); 1427 continue; 1428 } 1429 1430 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1431 // If the types mismatch and we can't handle it, reject reuse of the load. 1432 if (LD->getType() != LI->getType()) { 1433 // If the stored value is larger or equal to the loaded value, we can 1434 // reuse it. 1435 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ 1436 UnavailableBlocks.push_back(DepBB); 1437 continue; 1438 } 1439 } 1440 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); 1441 continue; 1442 } 1443 1444 UnavailableBlocks.push_back(DepBB); 1445 continue; 1446 } 1447 1448 // If we have no predecessors that produce a known value for this load, exit 1449 // early. 1450 if (ValuesPerBlock.empty()) return false; 1451 1452 // If all of the instructions we depend on produce a known value for this 1453 // load, then it is fully redundant and we can use PHI insertion to compute 1454 // its value. Insert PHIs and remove the fully redundant value now. 1455 if (UnavailableBlocks.empty()) { 1456 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1457 1458 // Perform PHI construction. 1459 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1460 VN.getAliasAnalysis()); 1461 LI->replaceAllUsesWith(V); 1462 1463 if (isa<PHINode>(V)) 1464 V->takeName(LI); 1465 if (V->getType()->isPointerTy()) 1466 MD->invalidateCachedPointerInfo(V); 1467 VN.erase(LI); 1468 toErase.push_back(LI); 1469 ++NumGVNLoad; 1470 return true; 1471 } 1472 1473 if (!EnablePRE || !EnableLoadPRE) 1474 return false; 1475 1476 // Okay, we have *some* definitions of the value. This means that the value 1477 // is available in some of our (transitive) predecessors. Lets think about 1478 // doing PRE of this load. This will involve inserting a new load into the 1479 // predecessor when it's not available. We could do this in general, but 1480 // prefer to not increase code size. As such, we only do this when we know 1481 // that we only have to insert *one* load (which means we're basically moving 1482 // the load, not inserting a new one). 1483 1484 SmallPtrSet<BasicBlock *, 4> Blockers; 1485 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1486 Blockers.insert(UnavailableBlocks[i]); 1487 1488 // Lets find first basic block with more than one predecessor. Walk backwards 1489 // through predecessors if needed. 1490 BasicBlock *LoadBB = LI->getParent(); 1491 BasicBlock *TmpBB = LoadBB; 1492 1493 bool isSinglePred = false; 1494 bool allSingleSucc = true; 1495 while (TmpBB->getSinglePredecessor()) { 1496 isSinglePred = true; 1497 TmpBB = TmpBB->getSinglePredecessor(); 1498 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1499 return false; 1500 if (Blockers.count(TmpBB)) 1501 return false; 1502 1503 // If any of these blocks has more than one successor (i.e. if the edge we 1504 // just traversed was critical), then there are other paths through this 1505 // block along which the load may not be anticipated. Hoisting the load 1506 // above this block would be adding the load to execution paths along 1507 // which it was not previously executed. 1508 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1509 return false; 1510 } 1511 1512 assert(TmpBB); 1513 LoadBB = TmpBB; 1514 1515 // FIXME: It is extremely unclear what this loop is doing, other than 1516 // artificially restricting loadpre. 1517 if (isSinglePred) { 1518 bool isHot = false; 1519 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1520 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1521 if (AV.isSimpleValue()) 1522 // "Hot" Instruction is in some loop (because it dominates its dep. 1523 // instruction). 1524 if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) 1525 if (DT->dominates(LI, I)) { 1526 isHot = true; 1527 break; 1528 } 1529 } 1530 1531 // We are interested only in "hot" instructions. We don't want to do any 1532 // mis-optimizations here. 1533 if (!isHot) 1534 return false; 1535 } 1536 1537 // Check to see how many predecessors have the loaded value fully 1538 // available. 1539 DenseMap<BasicBlock*, Value*> PredLoads; 1540 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1541 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1542 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 1543 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1544 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1545 1546 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit; 1547 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1548 PI != E; ++PI) { 1549 BasicBlock *Pred = *PI; 1550 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { 1551 continue; 1552 } 1553 PredLoads[Pred] = 0; 1554 1555 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1556 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1557 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1558 << Pred->getName() << "': " << *LI << '\n'); 1559 return false; 1560 } 1561 unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); 1562 NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); 1563 } 1564 } 1565 if (!NeedToSplit.empty()) { 1566 toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); 1567 return false; 1568 } 1569 1570 // Decide whether PRE is profitable for this load. 1571 unsigned NumUnavailablePreds = PredLoads.size(); 1572 assert(NumUnavailablePreds != 0 && 1573 "Fully available value should be eliminated above!"); 1574 1575 // If this load is unavailable in multiple predecessors, reject it. 1576 // FIXME: If we could restructure the CFG, we could make a common pred with 1577 // all the preds that don't have an available LI and insert a new load into 1578 // that one block. 1579 if (NumUnavailablePreds != 1) 1580 return false; 1581 1582 // Check if the load can safely be moved to all the unavailable predecessors. 1583 bool CanDoPRE = true; 1584 SmallVector<Instruction*, 8> NewInsts; 1585 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1586 E = PredLoads.end(); I != E; ++I) { 1587 BasicBlock *UnavailablePred = I->first; 1588 1589 // Do PHI translation to get its value in the predecessor if necessary. The 1590 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1591 1592 // If all preds have a single successor, then we know it is safe to insert 1593 // the load on the pred (?!?), so we can insert code to materialize the 1594 // pointer if it is not available. 1595 PHITransAddr Address(LI->getPointerOperand(), TD); 1596 Value *LoadPtr = 0; 1597 if (allSingleSucc) { 1598 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1599 *DT, NewInsts); 1600 } else { 1601 Address.PHITranslateValue(LoadBB, UnavailablePred, DT); 1602 LoadPtr = Address.getAddr(); 1603 } 1604 1605 // If we couldn't find or insert a computation of this phi translated value, 1606 // we fail PRE. 1607 if (LoadPtr == 0) { 1608 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1609 << *LI->getPointerOperand() << "\n"); 1610 CanDoPRE = false; 1611 break; 1612 } 1613 1614 // Make sure it is valid to move this load here. We have to watch out for: 1615 // @1 = getelementptr (i8* p, ... 1616 // test p and branch if == 0 1617 // load @1 1618 // It is valid to have the getelementptr before the test, even if p can be 0, 1619 // as getelementptr only does address arithmetic. 1620 // If we are not pushing the value through any multiple-successor blocks 1621 // we do not have this case. Otherwise, check that the load is safe to 1622 // put anywhere; this can be improved, but should be conservatively safe. 1623 if (!allSingleSucc && 1624 // FIXME: REEVALUTE THIS. 1625 !isSafeToLoadUnconditionally(LoadPtr, 1626 UnavailablePred->getTerminator(), 1627 LI->getAlignment(), TD)) { 1628 CanDoPRE = false; 1629 break; 1630 } 1631 1632 I->second = LoadPtr; 1633 } 1634 1635 if (!CanDoPRE) { 1636 while (!NewInsts.empty()) 1637 NewInsts.pop_back_val()->eraseFromParent(); 1638 return false; 1639 } 1640 1641 // Okay, we can eliminate this load by inserting a reload in the predecessor 1642 // and using PHI construction to get the value in the other predecessors, do 1643 // it. 1644 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1645 DEBUG(if (!NewInsts.empty()) 1646 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 1647 << *NewInsts.back() << '\n'); 1648 1649 // Assign value numbers to the new instructions. 1650 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { 1651 // FIXME: We really _ought_ to insert these value numbers into their 1652 // parent's availability map. However, in doing so, we risk getting into 1653 // ordering issues. If a block hasn't been processed yet, we would be 1654 // marking a value as AVAIL-IN, which isn't what we intend. 1655 VN.lookup_or_add(NewInsts[i]); 1656 } 1657 1658 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), 1659 E = PredLoads.end(); I != E; ++I) { 1660 BasicBlock *UnavailablePred = I->first; 1661 Value *LoadPtr = I->second; 1662 1663 Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1664 LI->getAlignment(), 1665 UnavailablePred->getTerminator()); 1666 1667 // Transfer the old load's TBAA tag to the new load. 1668 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) 1669 NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); 1670 1671 // Add the newly created load. 1672 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 1673 NewLoad)); 1674 MD->invalidateCachedPointerInfo(LoadPtr); 1675 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1676 } 1677 1678 // Perform PHI construction. 1679 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1680 VN.getAliasAnalysis()); 1681 LI->replaceAllUsesWith(V); 1682 if (isa<PHINode>(V)) 1683 V->takeName(LI); 1684 if (V->getType()->isPointerTy()) 1685 MD->invalidateCachedPointerInfo(V); 1686 VN.erase(LI); 1687 toErase.push_back(LI); 1688 ++NumPRELoad; 1689 return true; 1690} 1691 1692/// processLoad - Attempt to eliminate a load, first by eliminating it 1693/// locally, and then attempting non-local elimination if that fails. 1694bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { 1695 if (!MD) 1696 return false; 1697 1698 if (L->isVolatile()) 1699 return false; 1700 1701 // ... to a pointer that has been loaded from before... 1702 MemDepResult Dep = MD->getDependency(L); 1703 1704 // If the value isn't available, don't do anything! 1705 if (Dep.isClobber()) { 1706 // Check to see if we have something like this: 1707 // store i32 123, i32* %P 1708 // %A = bitcast i32* %P to i8* 1709 // %B = gep i8* %A, i32 1 1710 // %C = load i8* %B 1711 // 1712 // We could do that by recognizing if the clobber instructions are obviously 1713 // a common base + constant offset, and if the previous store (or memset) 1714 // completely covers this load. This sort of thing can happen in bitfield 1715 // access code. 1716 Value *AvailVal = 0; 1717 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) 1718 if (TD) { 1719 int Offset = AnalyzeLoadFromClobberingStore(L->getType(), 1720 L->getPointerOperand(), 1721 DepSI, *TD); 1722 if (Offset != -1) 1723 AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, 1724 L->getType(), L, *TD); 1725 } 1726 1727 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 1728 // a value on from it. 1729 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 1730 if (TD) { 1731 int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), 1732 L->getPointerOperand(), 1733 DepMI, *TD); 1734 if (Offset != -1) 1735 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); 1736 } 1737 } 1738 1739 if (AvailVal) { 1740 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 1741 << *AvailVal << '\n' << *L << "\n\n\n"); 1742 1743 // Replace the load! 1744 L->replaceAllUsesWith(AvailVal); 1745 if (AvailVal->getType()->isPointerTy()) 1746 MD->invalidateCachedPointerInfo(AvailVal); 1747 VN.erase(L); 1748 toErase.push_back(L); 1749 ++NumGVNLoad; 1750 return true; 1751 } 1752 1753 DEBUG( 1754 // fast print dep, using operator<< on instruction would be too slow 1755 dbgs() << "GVN: load "; 1756 WriteAsOperand(dbgs(), L); 1757 Instruction *I = Dep.getInst(); 1758 dbgs() << " is clobbered by " << *I << '\n'; 1759 ); 1760 return false; 1761 } 1762 1763 // If it is defined in another block, try harder. 1764 if (Dep.isNonLocal()) 1765 return processNonLocalLoad(L, toErase); 1766 1767 Instruction *DepInst = Dep.getInst(); 1768 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1769 Value *StoredVal = DepSI->getValueOperand(); 1770 1771 // The store and load are to a must-aliased pointer, but they may not 1772 // actually have the same type. See if we know how to reuse the stored 1773 // value (depending on its type). 1774 if (StoredVal->getType() != L->getType()) { 1775 if (TD) { 1776 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), 1777 L, *TD); 1778 if (StoredVal == 0) 1779 return false; 1780 1781 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 1782 << '\n' << *L << "\n\n\n"); 1783 } 1784 else 1785 return false; 1786 } 1787 1788 // Remove it! 1789 L->replaceAllUsesWith(StoredVal); 1790 if (StoredVal->getType()->isPointerTy()) 1791 MD->invalidateCachedPointerInfo(StoredVal); 1792 VN.erase(L); 1793 toErase.push_back(L); 1794 ++NumGVNLoad; 1795 return true; 1796 } 1797 1798 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1799 Value *AvailableVal = DepLI; 1800 1801 // The loads are of a must-aliased pointer, but they may not actually have 1802 // the same type. See if we know how to reuse the previously loaded value 1803 // (depending on its type). 1804 if (DepLI->getType() != L->getType()) { 1805 if (TD) { 1806 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); 1807 if (AvailableVal == 0) 1808 return false; 1809 1810 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 1811 << "\n" << *L << "\n\n\n"); 1812 } 1813 else 1814 return false; 1815 } 1816 1817 // Remove it! 1818 L->replaceAllUsesWith(AvailableVal); 1819 if (DepLI->getType()->isPointerTy()) 1820 MD->invalidateCachedPointerInfo(DepLI); 1821 VN.erase(L); 1822 toErase.push_back(L); 1823 ++NumGVNLoad; 1824 return true; 1825 } 1826 1827 // If this load really doesn't depend on anything, then we must be loading an 1828 // undef value. This can happen when loading for a fresh allocation with no 1829 // intervening stores, for example. 1830 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { 1831 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1832 VN.erase(L); 1833 toErase.push_back(L); 1834 ++NumGVNLoad; 1835 return true; 1836 } 1837 1838 // If this load occurs either right after a lifetime begin, 1839 // then the loaded value is undefined. 1840 if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { 1841 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1842 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1843 VN.erase(L); 1844 toErase.push_back(L); 1845 ++NumGVNLoad; 1846 return true; 1847 } 1848 } 1849 1850 return false; 1851} 1852 1853// lookupNumber - In order to find a leader for a given value number at a 1854// specific basic block, we first obtain the list of all Values for that number, 1855// and then scan the list to find one whose block dominates the block in 1856// question. This is fast because dominator tree queries consist of only 1857// a few comparisons of DFS numbers. 1858Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { 1859 std::pair<Value*, void*> Vals = NumberTable[num]; 1860 if (!Vals.first) return 0; 1861 Instruction *Inst = dyn_cast<Instruction>(Vals.first); 1862 if (!Inst) return Vals.first; 1863 BasicBlock *Parent = Inst->getParent(); 1864 if (DT->dominates(Parent, BB)) 1865 return Inst; 1866 1867 std::pair<Value*, void*>* Next = 1868 static_cast<std::pair<Value*, void*>*>(Vals.second); 1869 while (Next) { 1870 Instruction *CurrInst = dyn_cast<Instruction>(Next->first); 1871 if (!CurrInst) return Next->first; 1872 1873 BasicBlock *Parent = CurrInst->getParent(); 1874 if (DT->dominates(Parent, BB)) 1875 return CurrInst; 1876 1877 Next = static_cast<std::pair<Value*, void*>*>(Next->second); 1878 } 1879 1880 return 0; 1881} 1882 1883 1884/// processInstruction - When calculating availability, handle an instruction 1885/// by inserting it into the appropriate sets 1886bool GVN::processInstruction(Instruction *I, 1887 SmallVectorImpl<Instruction*> &toErase) { 1888 // Ignore dbg info intrinsics. 1889 if (isa<DbgInfoIntrinsic>(I)) 1890 return false; 1891 1892 // If the instruction can be easily simplified then do so now in preference 1893 // to value numbering it. Value numbering often exposes redundancies, for 1894 // example if it determines that %y is equal to %x then the instruction 1895 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 1896 if (Value *V = SimplifyInstruction(I, TD, DT)) { 1897 I->replaceAllUsesWith(V); 1898 if (MD && V->getType()->isPointerTy()) 1899 MD->invalidateCachedPointerInfo(V); 1900 VN.erase(I); 1901 toErase.push_back(I); 1902 return true; 1903 } 1904 1905 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1906 bool Changed = processLoad(LI, toErase); 1907 1908 if (!Changed) { 1909 unsigned Num = VN.lookup_or_add(LI); 1910 insert_table(Num, LI); 1911 } 1912 1913 return Changed; 1914 } 1915 1916 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1917 unsigned Num = VN.lookup_or_add(I); 1918 1919 // Allocations are always uniquely numbered, so we can save time and memory 1920 // by fast failing them. 1921 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) { 1922 insert_table(Num, I); 1923 return false; 1924 } 1925 1926 if (isa<PHINode>(I)) { 1927 insert_table(Num, I); 1928 1929 // If the number we were assigned was a brand new VN, then we don't 1930 // need to do a lookup to see if the number already exists 1931 // somewhere in the domtree: it can't! 1932 } else if (Num == NextNum) { 1933 insert_table(Num, I); 1934 1935 // Perform fast-path value-number based elimination of values inherited from 1936 // dominators. 1937 } else if (Value *repl = lookupNumber(I->getParent(), Num)) { 1938 // Remove it! 1939 VN.erase(I); 1940 I->replaceAllUsesWith(repl); 1941 if (MD && repl->getType()->isPointerTy()) 1942 MD->invalidateCachedPointerInfo(repl); 1943 toErase.push_back(I); 1944 return true; 1945 1946 } else { 1947 insert_table(Num, I); 1948 } 1949 1950 return false; 1951} 1952 1953/// runOnFunction - This is the main transformation entry point for a function. 1954bool GVN::runOnFunction(Function& F) { 1955 if (!NoLoads) 1956 MD = &getAnalysis<MemoryDependenceAnalysis>(); 1957 DT = &getAnalysis<DominatorTree>(); 1958 TD = getAnalysisIfAvailable<TargetData>(); 1959 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1960 VN.setMemDep(MD); 1961 VN.setDomTree(DT); 1962 1963 bool Changed = false; 1964 bool ShouldContinue = true; 1965 1966 // Merge unconditional branches, allowing PRE to catch more 1967 // optimization opportunities. 1968 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1969 BasicBlock *BB = FI; 1970 ++FI; 1971 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 1972 if (removedBlock) ++NumGVNBlocks; 1973 1974 Changed |= removedBlock; 1975 } 1976 1977 unsigned Iteration = 0; 1978 1979 while (ShouldContinue) { 1980 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 1981 ShouldContinue = iterateOnFunction(F); 1982 if (splitCriticalEdges()) 1983 ShouldContinue = true; 1984 Changed |= ShouldContinue; 1985 ++Iteration; 1986 } 1987 1988 if (EnablePRE) { 1989 bool PREChanged = true; 1990 while (PREChanged) { 1991 PREChanged = performPRE(F); 1992 Changed |= PREChanged; 1993 } 1994 } 1995 // FIXME: Should perform GVN again after PRE does something. PRE can move 1996 // computations into blocks where they become fully redundant. Note that 1997 // we can't do this until PRE's critical edge splitting updates memdep. 1998 // Actually, when this happens, we should just fully integrate PRE into GVN. 1999 2000 cleanupGlobalSets(); 2001 2002 return Changed; 2003} 2004 2005 2006bool GVN::processBlock(BasicBlock *BB) { 2007 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and 2008 // incrementing BI before processing an instruction). 2009 SmallVector<Instruction*, 8> toErase; 2010 bool ChangedFunction = false; 2011 2012 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 2013 BI != BE;) { 2014 ChangedFunction |= processInstruction(BI, toErase); 2015 if (toErase.empty()) { 2016 ++BI; 2017 continue; 2018 } 2019 2020 // If we need some instructions deleted, do it now. 2021 NumGVNInstr += toErase.size(); 2022 2023 // Avoid iterator invalidation. 2024 bool AtStart = BI == BB->begin(); 2025 if (!AtStart) 2026 --BI; 2027 2028 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 2029 E = toErase.end(); I != E; ++I) { 2030 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 2031 if (MD) MD->removeInstruction(*I); 2032 (*I)->eraseFromParent(); 2033 DEBUG(verifyRemoved(*I)); 2034 } 2035 toErase.clear(); 2036 2037 if (AtStart) 2038 BI = BB->begin(); 2039 else 2040 ++BI; 2041 } 2042 2043 return ChangedFunction; 2044} 2045 2046/// performPRE - Perform a purely local form of PRE that looks for diamond 2047/// control flow patterns and attempts to perform simple PRE at the join point. 2048bool GVN::performPRE(Function &F) { 2049 bool Changed = false; 2050 DenseMap<BasicBlock*, Value*> predMap; 2051 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 2052 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 2053 BasicBlock *CurrentBlock = *DI; 2054 2055 // Nothing to PRE in the entry block. 2056 if (CurrentBlock == &F.getEntryBlock()) continue; 2057 2058 for (BasicBlock::iterator BI = CurrentBlock->begin(), 2059 BE = CurrentBlock->end(); BI != BE; ) { 2060 Instruction *CurInst = BI++; 2061 2062 if (isa<AllocaInst>(CurInst) || 2063 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || 2064 CurInst->getType()->isVoidTy() || 2065 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 2066 isa<DbgInfoIntrinsic>(CurInst)) 2067 continue; 2068 2069 // We don't currently value number ANY inline asm calls. 2070 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 2071 if (CallI->isInlineAsm()) 2072 continue; 2073 2074 uint32_t ValNo = VN.lookup(CurInst); 2075 2076 // Look for the predecessors for PRE opportunities. We're 2077 // only trying to solve the basic diamond case, where 2078 // a value is computed in the successor and one predecessor, 2079 // but not the other. We also explicitly disallow cases 2080 // where the successor is its own predecessor, because they're 2081 // more complicated to get right. 2082 unsigned NumWith = 0; 2083 unsigned NumWithout = 0; 2084 BasicBlock *PREPred = 0; 2085 predMap.clear(); 2086 2087 for (pred_iterator PI = pred_begin(CurrentBlock), 2088 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 2089 BasicBlock *P = *PI; 2090 // We're not interested in PRE where the block is its 2091 // own predecessor, or in blocks with predecessors 2092 // that are not reachable. 2093 if (P == CurrentBlock) { 2094 NumWithout = 2; 2095 break; 2096 } else if (!DT->dominates(&F.getEntryBlock(), P)) { 2097 NumWithout = 2; 2098 break; 2099 } 2100 2101 Value* predV = lookupNumber(P, ValNo); 2102 if (predV == 0) { 2103 PREPred = P; 2104 ++NumWithout; 2105 } else if (predV == CurInst) { 2106 NumWithout = 2; 2107 } else { 2108 predMap[P] = predV; 2109 ++NumWith; 2110 } 2111 } 2112 2113 // Don't do PRE when it might increase code size, i.e. when 2114 // we would need to insert instructions in more than one pred. 2115 if (NumWithout != 1 || NumWith == 0) 2116 continue; 2117 2118 // Don't do PRE across indirect branch. 2119 if (isa<IndirectBrInst>(PREPred->getTerminator())) 2120 continue; 2121 2122 // We can't do PRE safely on a critical edge, so instead we schedule 2123 // the edge to be split and perform the PRE the next time we iterate 2124 // on the function. 2125 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 2126 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 2127 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 2128 continue; 2129 } 2130 2131 // Instantiate the expression in the predecessor that lacked it. 2132 // Because we are going top-down through the block, all value numbers 2133 // will be available in the predecessor by the time we need them. Any 2134 // that weren't originally present will have been instantiated earlier 2135 // in this loop. 2136 Instruction *PREInstr = CurInst->clone(); 2137 bool success = true; 2138 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 2139 Value *Op = PREInstr->getOperand(i); 2140 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 2141 continue; 2142 2143 if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { 2144 PREInstr->setOperand(i, V); 2145 } else { 2146 success = false; 2147 break; 2148 } 2149 } 2150 2151 // Fail out if we encounter an operand that is not available in 2152 // the PRE predecessor. This is typically because of loads which 2153 // are not value numbered precisely. 2154 if (!success) { 2155 delete PREInstr; 2156 DEBUG(verifyRemoved(PREInstr)); 2157 continue; 2158 } 2159 2160 PREInstr->insertBefore(PREPred->getTerminator()); 2161 PREInstr->setName(CurInst->getName() + ".pre"); 2162 predMap[PREPred] = PREInstr; 2163 VN.add(PREInstr, ValNo); 2164 ++NumGVNPRE; 2165 2166 // Update the availability map to include the new instruction. 2167 insert_table(ValNo, PREInstr); 2168 2169 // Create a PHI to make the value available in this block. 2170 PHINode* Phi = PHINode::Create(CurInst->getType(), 2171 CurInst->getName() + ".pre-phi", 2172 CurrentBlock->begin()); 2173 for (pred_iterator PI = pred_begin(CurrentBlock), 2174 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 2175 BasicBlock *P = *PI; 2176 Phi->addIncoming(predMap[P], P); 2177 } 2178 2179 VN.add(Phi, ValNo); 2180 insert_table(ValNo, Phi); 2181 2182 CurInst->replaceAllUsesWith(Phi); 2183 if (MD && Phi->getType()->isPointerTy()) 2184 MD->invalidateCachedPointerInfo(Phi); 2185 VN.erase(CurInst); 2186 erase_table(ValNo, CurInst); 2187 2188 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 2189 if (MD) MD->removeInstruction(CurInst); 2190 CurInst->eraseFromParent(); 2191 DEBUG(verifyRemoved(CurInst)); 2192 Changed = true; 2193 } 2194 } 2195 2196 if (splitCriticalEdges()) 2197 Changed = true; 2198 2199 return Changed; 2200} 2201 2202/// splitCriticalEdges - Split critical edges found during the previous 2203/// iteration that may enable further optimization. 2204bool GVN::splitCriticalEdges() { 2205 if (toSplit.empty()) 2206 return false; 2207 do { 2208 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 2209 SplitCriticalEdge(Edge.first, Edge.second, this); 2210 } while (!toSplit.empty()); 2211 if (MD) MD->invalidateCachedPredecessors(); 2212 return true; 2213} 2214 2215/// iterateOnFunction - Executes one iteration of GVN 2216bool GVN::iterateOnFunction(Function &F) { 2217 cleanupGlobalSets(); 2218 2219 // Top-down walk of the dominator tree 2220 bool Changed = false; 2221#if 0 2222 // Needed for value numbering with phi construction to work. 2223 ReversePostOrderTraversal<Function*> RPOT(&F); 2224 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 2225 RE = RPOT.end(); RI != RE; ++RI) 2226 Changed |= processBlock(*RI); 2227#else 2228 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2229 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 2230 Changed |= processBlock(DI->getBlock()); 2231#endif 2232 2233 return Changed; 2234} 2235 2236void GVN::cleanupGlobalSets() { 2237 VN.clear(); 2238 NumberTable.clear(); 2239 TableAllocator.Reset(); 2240} 2241 2242/// verifyRemoved - Verify that the specified instruction does not occur in our 2243/// internal data structures. 2244void GVN::verifyRemoved(const Instruction *Inst) const { 2245 VN.verifyRemoved(Inst); 2246 2247 // Walk through the value number scope to make sure the instruction isn't 2248 // ferreted away in it. 2249 for (DenseMap<uint32_t, std::pair<Value*, void*> >::const_iterator 2250 I = NumberTable.begin(), E = NumberTable.end(); I != E; ++I) { 2251 std::pair<Value*, void*> const * Node = &I->second; 2252 assert(Node->first != Inst && "Inst still in value numbering scope!"); 2253 2254 while (Node->second) { 2255 Node = static_cast<std::pair<Value*, void*>*>(Node->second); 2256 assert(Node->first != Inst && "Inst still in value numbering scope!"); 2257 } 2258 } 2259} 2260