GVN.cpp revision 0ee443d169786017d151034b8bd34225bc144c98
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13// Note that this pass does the value numbering itself; it does not use the 14// ValueNumbering analysis passes. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "gvn" 19#include "llvm/Transforms/Scalar.h" 20#include "llvm/BasicBlock.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/IntrinsicInst.h" 25#include "llvm/LLVMContext.h" 26#include "llvm/Operator.h" 27#include "llvm/Value.h" 28#include "llvm/ADT/DenseMap.h" 29#include "llvm/ADT/DepthFirstIterator.h" 30#include "llvm/ADT/PostOrderIterator.h" 31#include "llvm/ADT/SmallPtrSet.h" 32#include "llvm/ADT/SmallVector.h" 33#include "llvm/ADT/Statistic.h" 34#include "llvm/Analysis/AliasAnalysis.h" 35#include "llvm/Analysis/ConstantFolding.h" 36#include "llvm/Analysis/Dominators.h" 37#include "llvm/Analysis/MemoryBuiltins.h" 38#include "llvm/Analysis/MemoryDependenceAnalysis.h" 39#include "llvm/Analysis/PHITransAddr.h" 40#include "llvm/Support/CFG.h" 41#include "llvm/Support/CommandLine.h" 42#include "llvm/Support/Debug.h" 43#include "llvm/Support/ErrorHandling.h" 44#include "llvm/Support/GetElementPtrTypeIterator.h" 45#include "llvm/Support/IRBuilder.h" 46#include "llvm/Support/raw_ostream.h" 47#include "llvm/Target/TargetData.h" 48#include "llvm/Transforms/Utils/BasicBlockUtils.h" 49#include "llvm/Transforms/Utils/Local.h" 50#include "llvm/Transforms/Utils/SSAUpdater.h" 51using namespace llvm; 52 53STATISTIC(NumGVNInstr, "Number of instructions deleted"); 54STATISTIC(NumGVNLoad, "Number of loads deleted"); 55STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 56STATISTIC(NumGVNBlocks, "Number of blocks merged"); 57STATISTIC(NumPRELoad, "Number of loads PRE'd"); 58 59static cl::opt<bool> EnablePRE("enable-pre", 60 cl::init(true), cl::Hidden); 61static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 62 63//===----------------------------------------------------------------------===// 64// ValueTable Class 65//===----------------------------------------------------------------------===// 66 67/// This class holds the mapping between values and value numbers. It is used 68/// as an efficient mechanism to determine the expression-wise equivalence of 69/// two values. 70namespace { 71 struct Expression { 72 enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, 73 UDIV, SDIV, FDIV, UREM, SREM, 74 FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 75 ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 76 ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 77 FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 78 FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 79 FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, 80 SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, 81 FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 82 PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, 83 INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; 84 85 ExpressionOpcode opcode; 86 const Type* type; 87 SmallVector<uint32_t, 4> varargs; 88 Value *function; 89 90 Expression() { } 91 Expression(ExpressionOpcode o) : opcode(o) { } 92 93 bool operator==(const Expression &other) const { 94 if (opcode != other.opcode) 95 return false; 96 else if (opcode == EMPTY || opcode == TOMBSTONE) 97 return true; 98 else if (type != other.type) 99 return false; 100 else if (function != other.function) 101 return false; 102 else { 103 if (varargs.size() != other.varargs.size()) 104 return false; 105 106 for (size_t i = 0; i < varargs.size(); ++i) 107 if (varargs[i] != other.varargs[i]) 108 return false; 109 110 return true; 111 } 112 } 113 114 bool operator!=(const Expression &other) const { 115 return !(*this == other); 116 } 117 }; 118 119 class ValueTable { 120 private: 121 DenseMap<Value*, uint32_t> valueNumbering; 122 DenseMap<Expression, uint32_t> expressionNumbering; 123 AliasAnalysis* AA; 124 MemoryDependenceAnalysis* MD; 125 DominatorTree* DT; 126 127 uint32_t nextValueNumber; 128 129 Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); 130 Expression::ExpressionOpcode getOpcode(CmpInst* C); 131 Expression::ExpressionOpcode getOpcode(CastInst* C); 132 Expression create_expression(BinaryOperator* BO); 133 Expression create_expression(CmpInst* C); 134 Expression create_expression(ShuffleVectorInst* V); 135 Expression create_expression(ExtractElementInst* C); 136 Expression create_expression(InsertElementInst* V); 137 Expression create_expression(SelectInst* V); 138 Expression create_expression(CastInst* C); 139 Expression create_expression(GetElementPtrInst* G); 140 Expression create_expression(CallInst* C); 141 Expression create_expression(Constant* C); 142 Expression create_expression(ExtractValueInst* C); 143 Expression create_expression(InsertValueInst* C); 144 145 uint32_t lookup_or_add_call(CallInst* C); 146 public: 147 ValueTable() : nextValueNumber(1) { } 148 uint32_t lookup_or_add(Value *V); 149 uint32_t lookup(Value *V) const; 150 void add(Value *V, uint32_t num); 151 void clear(); 152 void erase(Value *v); 153 unsigned size(); 154 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 155 AliasAnalysis *getAliasAnalysis() const { return AA; } 156 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 157 void setDomTree(DominatorTree* D) { DT = D; } 158 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 159 void verifyRemoved(const Value *) const; 160 }; 161} 162 163namespace llvm { 164template <> struct DenseMapInfo<Expression> { 165 static inline Expression getEmptyKey() { 166 return Expression(Expression::EMPTY); 167 } 168 169 static inline Expression getTombstoneKey() { 170 return Expression(Expression::TOMBSTONE); 171 } 172 173 static unsigned getHashValue(const Expression e) { 174 unsigned hash = e.opcode; 175 176 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 177 (unsigned)((uintptr_t)e.type >> 9)); 178 179 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 180 E = e.varargs.end(); I != E; ++I) 181 hash = *I + hash * 37; 182 183 hash = ((unsigned)((uintptr_t)e.function >> 4) ^ 184 (unsigned)((uintptr_t)e.function >> 9)) + 185 hash * 37; 186 187 return hash; 188 } 189 static bool isEqual(const Expression &LHS, const Expression &RHS) { 190 return LHS == RHS; 191 } 192}; 193 194template <> 195struct isPodLike<Expression> { static const bool value = true; }; 196 197} 198 199//===----------------------------------------------------------------------===// 200// ValueTable Internal Functions 201//===----------------------------------------------------------------------===// 202Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { 203 switch(BO->getOpcode()) { 204 default: // THIS SHOULD NEVER HAPPEN 205 llvm_unreachable("Binary operator with unknown opcode?"); 206 case Instruction::Add: return Expression::ADD; 207 case Instruction::FAdd: return Expression::FADD; 208 case Instruction::Sub: return Expression::SUB; 209 case Instruction::FSub: return Expression::FSUB; 210 case Instruction::Mul: return Expression::MUL; 211 case Instruction::FMul: return Expression::FMUL; 212 case Instruction::UDiv: return Expression::UDIV; 213 case Instruction::SDiv: return Expression::SDIV; 214 case Instruction::FDiv: return Expression::FDIV; 215 case Instruction::URem: return Expression::UREM; 216 case Instruction::SRem: return Expression::SREM; 217 case Instruction::FRem: return Expression::FREM; 218 case Instruction::Shl: return Expression::SHL; 219 case Instruction::LShr: return Expression::LSHR; 220 case Instruction::AShr: return Expression::ASHR; 221 case Instruction::And: return Expression::AND; 222 case Instruction::Or: return Expression::OR; 223 case Instruction::Xor: return Expression::XOR; 224 } 225} 226 227Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { 228 if (isa<ICmpInst>(C)) { 229 switch (C->getPredicate()) { 230 default: // THIS SHOULD NEVER HAPPEN 231 llvm_unreachable("Comparison with unknown predicate?"); 232 case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; 233 case ICmpInst::ICMP_NE: return Expression::ICMPNE; 234 case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; 235 case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; 236 case ICmpInst::ICMP_ULT: return Expression::ICMPULT; 237 case ICmpInst::ICMP_ULE: return Expression::ICMPULE; 238 case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; 239 case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; 240 case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; 241 case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; 242 } 243 } else { 244 switch (C->getPredicate()) { 245 default: // THIS SHOULD NEVER HAPPEN 246 llvm_unreachable("Comparison with unknown predicate?"); 247 case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; 248 case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; 249 case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; 250 case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; 251 case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; 252 case FCmpInst::FCMP_ONE: return Expression::FCMPONE; 253 case FCmpInst::FCMP_ORD: return Expression::FCMPORD; 254 case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; 255 case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; 256 case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; 257 case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; 258 case FCmpInst::FCMP_ULT: return Expression::FCMPULT; 259 case FCmpInst::FCMP_ULE: return Expression::FCMPULE; 260 case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; 261 } 262 } 263} 264 265Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { 266 switch(C->getOpcode()) { 267 default: // THIS SHOULD NEVER HAPPEN 268 llvm_unreachable("Cast operator with unknown opcode?"); 269 case Instruction::Trunc: return Expression::TRUNC; 270 case Instruction::ZExt: return Expression::ZEXT; 271 case Instruction::SExt: return Expression::SEXT; 272 case Instruction::FPToUI: return Expression::FPTOUI; 273 case Instruction::FPToSI: return Expression::FPTOSI; 274 case Instruction::UIToFP: return Expression::UITOFP; 275 case Instruction::SIToFP: return Expression::SITOFP; 276 case Instruction::FPTrunc: return Expression::FPTRUNC; 277 case Instruction::FPExt: return Expression::FPEXT; 278 case Instruction::PtrToInt: return Expression::PTRTOINT; 279 case Instruction::IntToPtr: return Expression::INTTOPTR; 280 case Instruction::BitCast: return Expression::BITCAST; 281 } 282} 283 284Expression ValueTable::create_expression(CallInst* C) { 285 Expression e; 286 287 e.type = C->getType(); 288 e.function = C->getCalledFunction(); 289 e.opcode = Expression::CALL; 290 291 for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); 292 I != E; ++I) 293 e.varargs.push_back(lookup_or_add(*I)); 294 295 return e; 296} 297 298Expression ValueTable::create_expression(BinaryOperator* BO) { 299 Expression e; 300 e.varargs.push_back(lookup_or_add(BO->getOperand(0))); 301 e.varargs.push_back(lookup_or_add(BO->getOperand(1))); 302 e.function = 0; 303 e.type = BO->getType(); 304 e.opcode = getOpcode(BO); 305 306 return e; 307} 308 309Expression ValueTable::create_expression(CmpInst* C) { 310 Expression e; 311 312 e.varargs.push_back(lookup_or_add(C->getOperand(0))); 313 e.varargs.push_back(lookup_or_add(C->getOperand(1))); 314 e.function = 0; 315 e.type = C->getType(); 316 e.opcode = getOpcode(C); 317 318 return e; 319} 320 321Expression ValueTable::create_expression(CastInst* C) { 322 Expression e; 323 324 e.varargs.push_back(lookup_or_add(C->getOperand(0))); 325 e.function = 0; 326 e.type = C->getType(); 327 e.opcode = getOpcode(C); 328 329 return e; 330} 331 332Expression ValueTable::create_expression(ShuffleVectorInst* S) { 333 Expression e; 334 335 e.varargs.push_back(lookup_or_add(S->getOperand(0))); 336 e.varargs.push_back(lookup_or_add(S->getOperand(1))); 337 e.varargs.push_back(lookup_or_add(S->getOperand(2))); 338 e.function = 0; 339 e.type = S->getType(); 340 e.opcode = Expression::SHUFFLE; 341 342 return e; 343} 344 345Expression ValueTable::create_expression(ExtractElementInst* E) { 346 Expression e; 347 348 e.varargs.push_back(lookup_or_add(E->getOperand(0))); 349 e.varargs.push_back(lookup_or_add(E->getOperand(1))); 350 e.function = 0; 351 e.type = E->getType(); 352 e.opcode = Expression::EXTRACT; 353 354 return e; 355} 356 357Expression ValueTable::create_expression(InsertElementInst* I) { 358 Expression e; 359 360 e.varargs.push_back(lookup_or_add(I->getOperand(0))); 361 e.varargs.push_back(lookup_or_add(I->getOperand(1))); 362 e.varargs.push_back(lookup_or_add(I->getOperand(2))); 363 e.function = 0; 364 e.type = I->getType(); 365 e.opcode = Expression::INSERT; 366 367 return e; 368} 369 370Expression ValueTable::create_expression(SelectInst* I) { 371 Expression e; 372 373 e.varargs.push_back(lookup_or_add(I->getCondition())); 374 e.varargs.push_back(lookup_or_add(I->getTrueValue())); 375 e.varargs.push_back(lookup_or_add(I->getFalseValue())); 376 e.function = 0; 377 e.type = I->getType(); 378 e.opcode = Expression::SELECT; 379 380 return e; 381} 382 383Expression ValueTable::create_expression(GetElementPtrInst* G) { 384 Expression e; 385 386 e.varargs.push_back(lookup_or_add(G->getPointerOperand())); 387 e.function = 0; 388 e.type = G->getType(); 389 e.opcode = Expression::GEP; 390 391 for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); 392 I != E; ++I) 393 e.varargs.push_back(lookup_or_add(*I)); 394 395 return e; 396} 397 398Expression ValueTable::create_expression(ExtractValueInst* E) { 399 Expression e; 400 401 e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); 402 for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 403 II != IE; ++II) 404 e.varargs.push_back(*II); 405 e.function = 0; 406 e.type = E->getType(); 407 e.opcode = Expression::EXTRACTVALUE; 408 409 return e; 410} 411 412Expression ValueTable::create_expression(InsertValueInst* E) { 413 Expression e; 414 415 e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); 416 e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand())); 417 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 418 II != IE; ++II) 419 e.varargs.push_back(*II); 420 e.function = 0; 421 e.type = E->getType(); 422 e.opcode = Expression::INSERTVALUE; 423 424 return e; 425} 426 427//===----------------------------------------------------------------------===// 428// ValueTable External Functions 429//===----------------------------------------------------------------------===// 430 431/// add - Insert a value into the table with a specified value number. 432void ValueTable::add(Value *V, uint32_t num) { 433 valueNumbering.insert(std::make_pair(V, num)); 434} 435 436uint32_t ValueTable::lookup_or_add_call(CallInst* C) { 437 if (AA->doesNotAccessMemory(C)) { 438 Expression exp = create_expression(C); 439 uint32_t& e = expressionNumbering[exp]; 440 if (!e) e = nextValueNumber++; 441 valueNumbering[C] = e; 442 return e; 443 } else if (AA->onlyReadsMemory(C)) { 444 Expression exp = create_expression(C); 445 uint32_t& e = expressionNumbering[exp]; 446 if (!e) { 447 e = nextValueNumber++; 448 valueNumbering[C] = e; 449 return e; 450 } 451 if (!MD) { 452 e = nextValueNumber++; 453 valueNumbering[C] = e; 454 return e; 455 } 456 457 MemDepResult local_dep = MD->getDependency(C); 458 459 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 460 valueNumbering[C] = nextValueNumber; 461 return nextValueNumber++; 462 } 463 464 if (local_dep.isDef()) { 465 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 466 467 if (local_cdep->getNumOperands() != C->getNumOperands()) { 468 valueNumbering[C] = nextValueNumber; 469 return nextValueNumber++; 470 } 471 472 for (unsigned i = 1; i < C->getNumOperands(); ++i) { 473 uint32_t c_vn = lookup_or_add(C->getOperand(i)); 474 uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); 475 if (c_vn != cd_vn) { 476 valueNumbering[C] = nextValueNumber; 477 return nextValueNumber++; 478 } 479 } 480 481 uint32_t v = lookup_or_add(local_cdep); 482 valueNumbering[C] = v; 483 return v; 484 } 485 486 // Non-local case. 487 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 488 MD->getNonLocalCallDependency(CallSite(C)); 489 // FIXME: call/call dependencies for readonly calls should return def, not 490 // clobber! Move the checking logic to MemDep! 491 CallInst* cdep = 0; 492 493 // Check to see if we have a single dominating call instruction that is 494 // identical to C. 495 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 496 const NonLocalDepEntry *I = &deps[i]; 497 // Ignore non-local dependencies. 498 if (I->getResult().isNonLocal()) 499 continue; 500 501 // We don't handle non-depedencies. If we already have a call, reject 502 // instruction dependencies. 503 if (I->getResult().isClobber() || cdep != 0) { 504 cdep = 0; 505 break; 506 } 507 508 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 509 // FIXME: All duplicated with non-local case. 510 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 511 cdep = NonLocalDepCall; 512 continue; 513 } 514 515 cdep = 0; 516 break; 517 } 518 519 if (!cdep) { 520 valueNumbering[C] = nextValueNumber; 521 return nextValueNumber++; 522 } 523 524 if (cdep->getNumOperands() != C->getNumOperands()) { 525 valueNumbering[C] = nextValueNumber; 526 return nextValueNumber++; 527 } 528 for (unsigned i = 1; i < C->getNumOperands(); ++i) { 529 uint32_t c_vn = lookup_or_add(C->getOperand(i)); 530 uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); 531 if (c_vn != cd_vn) { 532 valueNumbering[C] = nextValueNumber; 533 return nextValueNumber++; 534 } 535 } 536 537 uint32_t v = lookup_or_add(cdep); 538 valueNumbering[C] = v; 539 return v; 540 541 } else { 542 valueNumbering[C] = nextValueNumber; 543 return nextValueNumber++; 544 } 545} 546 547/// lookup_or_add - Returns the value number for the specified value, assigning 548/// it a new number if it did not have one before. 549uint32_t ValueTable::lookup_or_add(Value *V) { 550 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 551 if (VI != valueNumbering.end()) 552 return VI->second; 553 554 if (!isa<Instruction>(V)) { 555 valueNumbering[V] = nextValueNumber; 556 return nextValueNumber++; 557 } 558 559 Instruction* I = cast<Instruction>(V); 560 Expression exp; 561 switch (I->getOpcode()) { 562 case Instruction::Call: 563 return lookup_or_add_call(cast<CallInst>(I)); 564 case Instruction::Add: 565 case Instruction::FAdd: 566 case Instruction::Sub: 567 case Instruction::FSub: 568 case Instruction::Mul: 569 case Instruction::FMul: 570 case Instruction::UDiv: 571 case Instruction::SDiv: 572 case Instruction::FDiv: 573 case Instruction::URem: 574 case Instruction::SRem: 575 case Instruction::FRem: 576 case Instruction::Shl: 577 case Instruction::LShr: 578 case Instruction::AShr: 579 case Instruction::And: 580 case Instruction::Or : 581 case Instruction::Xor: 582 exp = create_expression(cast<BinaryOperator>(I)); 583 break; 584 case Instruction::ICmp: 585 case Instruction::FCmp: 586 exp = create_expression(cast<CmpInst>(I)); 587 break; 588 case Instruction::Trunc: 589 case Instruction::ZExt: 590 case Instruction::SExt: 591 case Instruction::FPToUI: 592 case Instruction::FPToSI: 593 case Instruction::UIToFP: 594 case Instruction::SIToFP: 595 case Instruction::FPTrunc: 596 case Instruction::FPExt: 597 case Instruction::PtrToInt: 598 case Instruction::IntToPtr: 599 case Instruction::BitCast: 600 exp = create_expression(cast<CastInst>(I)); 601 break; 602 case Instruction::Select: 603 exp = create_expression(cast<SelectInst>(I)); 604 break; 605 case Instruction::ExtractElement: 606 exp = create_expression(cast<ExtractElementInst>(I)); 607 break; 608 case Instruction::InsertElement: 609 exp = create_expression(cast<InsertElementInst>(I)); 610 break; 611 case Instruction::ShuffleVector: 612 exp = create_expression(cast<ShuffleVectorInst>(I)); 613 break; 614 case Instruction::ExtractValue: 615 exp = create_expression(cast<ExtractValueInst>(I)); 616 break; 617 case Instruction::InsertValue: 618 exp = create_expression(cast<InsertValueInst>(I)); 619 break; 620 case Instruction::GetElementPtr: 621 exp = create_expression(cast<GetElementPtrInst>(I)); 622 break; 623 default: 624 valueNumbering[V] = nextValueNumber; 625 return nextValueNumber++; 626 } 627 628 uint32_t& e = expressionNumbering[exp]; 629 if (!e) e = nextValueNumber++; 630 valueNumbering[V] = e; 631 return e; 632} 633 634/// lookup - Returns the value number of the specified value. Fails if 635/// the value has not yet been numbered. 636uint32_t ValueTable::lookup(Value *V) const { 637 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 638 assert(VI != valueNumbering.end() && "Value not numbered?"); 639 return VI->second; 640} 641 642/// clear - Remove all entries from the ValueTable 643void ValueTable::clear() { 644 valueNumbering.clear(); 645 expressionNumbering.clear(); 646 nextValueNumber = 1; 647} 648 649/// erase - Remove a value from the value numbering 650void ValueTable::erase(Value *V) { 651 valueNumbering.erase(V); 652} 653 654/// verifyRemoved - Verify that the value is removed from all internal data 655/// structures. 656void ValueTable::verifyRemoved(const Value *V) const { 657 for (DenseMap<Value*, uint32_t>::const_iterator 658 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 659 assert(I->first != V && "Inst still occurs in value numbering map!"); 660 } 661} 662 663//===----------------------------------------------------------------------===// 664// GVN Pass 665//===----------------------------------------------------------------------===// 666 667namespace { 668 struct ValueNumberScope { 669 ValueNumberScope* parent; 670 DenseMap<uint32_t, Value*> table; 671 672 ValueNumberScope(ValueNumberScope* p) : parent(p) { } 673 }; 674} 675 676namespace { 677 678 class GVN : public FunctionPass { 679 bool runOnFunction(Function &F); 680 public: 681 static char ID; // Pass identification, replacement for typeid 682 explicit GVN(bool nopre = false, bool noloads = false) 683 : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { } 684 685 private: 686 bool NoPRE; 687 bool NoLoads; 688 MemoryDependenceAnalysis *MD; 689 DominatorTree *DT; 690 691 ValueTable VN; 692 DenseMap<BasicBlock*, ValueNumberScope*> localAvail; 693 694 // This transformation requires dominator postdominator info 695 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 696 AU.addRequired<DominatorTree>(); 697 if (!NoLoads) 698 AU.addRequired<MemoryDependenceAnalysis>(); 699 AU.addRequired<AliasAnalysis>(); 700 701 AU.addPreserved<DominatorTree>(); 702 AU.addPreserved<AliasAnalysis>(); 703 } 704 705 // Helper fuctions 706 // FIXME: eliminate or document these better 707 bool processLoad(LoadInst* L, 708 SmallVectorImpl<Instruction*> &toErase); 709 bool processInstruction(Instruction *I, 710 SmallVectorImpl<Instruction*> &toErase); 711 bool processNonLocalLoad(LoadInst* L, 712 SmallVectorImpl<Instruction*> &toErase); 713 bool processBlock(BasicBlock *BB); 714 void dump(DenseMap<uint32_t, Value*>& d); 715 bool iterateOnFunction(Function &F); 716 Value *CollapsePhi(PHINode* p); 717 bool performPRE(Function& F); 718 Value *lookupNumber(BasicBlock *BB, uint32_t num); 719 void cleanupGlobalSets(); 720 void verifyRemoved(const Instruction *I) const; 721 }; 722 723 char GVN::ID = 0; 724} 725 726// createGVNPass - The public interface to this file... 727FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) { 728 return new GVN(NoPRE, NoLoads); 729} 730 731static RegisterPass<GVN> X("gvn", 732 "Global Value Numbering"); 733 734void GVN::dump(DenseMap<uint32_t, Value*>& d) { 735 errs() << "{\n"; 736 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 737 E = d.end(); I != E; ++I) { 738 errs() << I->first << "\n"; 739 I->second->dump(); 740 } 741 errs() << "}\n"; 742} 743 744static bool isSafeReplacement(PHINode* p, Instruction *inst) { 745 if (!isa<PHINode>(inst)) 746 return true; 747 748 for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); 749 UI != E; ++UI) 750 if (PHINode* use_phi = dyn_cast<PHINode>(UI)) 751 if (use_phi->getParent() == inst->getParent()) 752 return false; 753 754 return true; 755} 756 757Value *GVN::CollapsePhi(PHINode *PN) { 758 Value *ConstVal = PN->hasConstantValue(DT); 759 if (!ConstVal) return 0; 760 761 Instruction *Inst = dyn_cast<Instruction>(ConstVal); 762 if (!Inst) 763 return ConstVal; 764 765 if (DT->dominates(Inst, PN)) 766 if (isSafeReplacement(PN, Inst)) 767 return Inst; 768 return 0; 769} 770 771/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 772/// we're analyzing is fully available in the specified block. As we go, keep 773/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 774/// map is actually a tri-state map with the following values: 775/// 0) we know the block *is not* fully available. 776/// 1) we know the block *is* fully available. 777/// 2) we do not know whether the block is fully available or not, but we are 778/// currently speculating that it will be. 779/// 3) we are speculating for this block and have used that to speculate for 780/// other blocks. 781static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 782 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 783 // Optimistically assume that the block is fully available and check to see 784 // if we already know about this block in one lookup. 785 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 786 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 787 788 // If the entry already existed for this block, return the precomputed value. 789 if (!IV.second) { 790 // If this is a speculative "available" value, mark it as being used for 791 // speculation of other blocks. 792 if (IV.first->second == 2) 793 IV.first->second = 3; 794 return IV.first->second != 0; 795 } 796 797 // Otherwise, see if it is fully available in all predecessors. 798 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 799 800 // If this block has no predecessors, it isn't live-in here. 801 if (PI == PE) 802 goto SpeculationFailure; 803 804 for (; PI != PE; ++PI) 805 // If the value isn't fully available in one of our predecessors, then it 806 // isn't fully available in this block either. Undo our previous 807 // optimistic assumption and bail out. 808 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 809 goto SpeculationFailure; 810 811 return true; 812 813// SpeculationFailure - If we get here, we found out that this is not, after 814// all, a fully-available block. We have a problem if we speculated on this and 815// used the speculation to mark other blocks as available. 816SpeculationFailure: 817 char &BBVal = FullyAvailableBlocks[BB]; 818 819 // If we didn't speculate on this, just return with it set to false. 820 if (BBVal == 2) { 821 BBVal = 0; 822 return false; 823 } 824 825 // If we did speculate on this value, we could have blocks set to 1 that are 826 // incorrect. Walk the (transitive) successors of this block and mark them as 827 // 0 if set to one. 828 SmallVector<BasicBlock*, 32> BBWorklist; 829 BBWorklist.push_back(BB); 830 831 while (!BBWorklist.empty()) { 832 BasicBlock *Entry = BBWorklist.pop_back_val(); 833 // Note that this sets blocks to 0 (unavailable) if they happen to not 834 // already be in FullyAvailableBlocks. This is safe. 835 char &EntryVal = FullyAvailableBlocks[Entry]; 836 if (EntryVal == 0) continue; // Already unavailable. 837 838 // Mark as unavailable. 839 EntryVal = 0; 840 841 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 842 BBWorklist.push_back(*I); 843 } 844 845 return false; 846} 847 848 849/// CanCoerceMustAliasedValueToLoad - Return true if 850/// CoerceAvailableValueToLoadType will succeed. 851static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 852 const Type *LoadTy, 853 const TargetData &TD) { 854 // If the loaded or stored value is an first class array or struct, don't try 855 // to transform them. We need to be able to bitcast to integer. 856 if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) || 857 isa<StructType>(StoredVal->getType()) || 858 isa<ArrayType>(StoredVal->getType())) 859 return false; 860 861 // The store has to be at least as big as the load. 862 if (TD.getTypeSizeInBits(StoredVal->getType()) < 863 TD.getTypeSizeInBits(LoadTy)) 864 return false; 865 866 return true; 867} 868 869 870/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and 871/// then a load from a must-aliased pointer of a different type, try to coerce 872/// the stored value. LoadedTy is the type of the load we want to replace and 873/// InsertPt is the place to insert new instructions. 874/// 875/// If we can't do it, return null. 876static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 877 const Type *LoadedTy, 878 Instruction *InsertPt, 879 const TargetData &TD) { 880 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) 881 return 0; 882 883 const Type *StoredValTy = StoredVal->getType(); 884 885 uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); 886 uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); 887 888 // If the store and reload are the same size, we can always reuse it. 889 if (StoreSize == LoadSize) { 890 if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) { 891 // Pointer to Pointer -> use bitcast. 892 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 893 } 894 895 // Convert source pointers to integers, which can be bitcast. 896 if (isa<PointerType>(StoredValTy)) { 897 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 898 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 899 } 900 901 const Type *TypeToCastTo = LoadedTy; 902 if (isa<PointerType>(TypeToCastTo)) 903 TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); 904 905 if (StoredValTy != TypeToCastTo) 906 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 907 908 // Cast to pointer if the load needs a pointer type. 909 if (isa<PointerType>(LoadedTy)) 910 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 911 912 return StoredVal; 913 } 914 915 // If the loaded value is smaller than the available value, then we can 916 // extract out a piece from it. If the available value is too small, then we 917 // can't do anything. 918 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 919 920 // Convert source pointers to integers, which can be manipulated. 921 if (isa<PointerType>(StoredValTy)) { 922 StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); 923 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 924 } 925 926 // Convert vectors and fp to integer, which can be manipulated. 927 if (!isa<IntegerType>(StoredValTy)) { 928 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 929 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 930 } 931 932 // If this is a big-endian system, we need to shift the value down to the low 933 // bits so that a truncate will work. 934 if (TD.isBigEndian()) { 935 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 936 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 937 } 938 939 // Truncate the integer to the right size now. 940 const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 941 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 942 943 if (LoadedTy == NewIntTy) 944 return StoredVal; 945 946 // If the result is a pointer, inttoptr. 947 if (isa<PointerType>(LoadedTy)) 948 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 949 950 // Otherwise, bitcast. 951 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 952} 953 954/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can 955/// be expressed as a base pointer plus a constant offset. Return the base and 956/// offset to the caller. 957static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, 958 const TargetData &TD) { 959 Operator *PtrOp = dyn_cast<Operator>(Ptr); 960 if (PtrOp == 0) return Ptr; 961 962 // Just look through bitcasts. 963 if (PtrOp->getOpcode() == Instruction::BitCast) 964 return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); 965 966 // If this is a GEP with constant indices, we can look through it. 967 GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); 968 if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; 969 970 gep_type_iterator GTI = gep_type_begin(GEP); 971 for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; 972 ++I, ++GTI) { 973 ConstantInt *OpC = cast<ConstantInt>(*I); 974 if (OpC->isZero()) continue; 975 976 // Handle a struct and array indices which add their offset to the pointer. 977 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 978 Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 979 } else { 980 uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); 981 Offset += OpC->getSExtValue()*Size; 982 } 983 } 984 985 // Re-sign extend from the pointer size if needed to get overflow edge cases 986 // right. 987 unsigned PtrSize = TD.getPointerSizeInBits(); 988 if (PtrSize < 64) 989 Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); 990 991 return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); 992} 993 994 995/// AnalyzeLoadFromClobberingWrite - This function is called when we have a 996/// memdep query of a load that ends up being a clobbering memory write (store, 997/// memset, memcpy, memmove). This means that the write *may* provide bits used 998/// by the load but we can't be sure because the pointers don't mustalias. 999/// 1000/// Check this case to see if there is anything more we can do before we give 1001/// up. This returns -1 if we have to give up, or a byte number in the stored 1002/// value of the piece that feeds the load. 1003static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, 1004 Value *WritePtr, 1005 uint64_t WriteSizeInBits, 1006 const TargetData &TD) { 1007 // If the loaded or stored value is an first class array or struct, don't try 1008 // to transform them. We need to be able to bitcast to integer. 1009 if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy)) 1010 return -1; 1011 1012 int64_t StoreOffset = 0, LoadOffset = 0; 1013 Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD); 1014 Value *LoadBase = 1015 GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD); 1016 if (StoreBase != LoadBase) 1017 return -1; 1018 1019 // If the load and store are to the exact same address, they should have been 1020 // a must alias. AA must have gotten confused. 1021 // FIXME: Study to see if/when this happens. 1022 if (LoadOffset == StoreOffset) { 1023#if 0 1024 errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 1025 << "Base = " << *StoreBase << "\n" 1026 << "Store Ptr = " << *WritePtr << "\n" 1027 << "Store Offs = " << StoreOffset << "\n" 1028 << "Load Ptr = " << *LoadPtr << "\n"; 1029 abort(); 1030#endif 1031 return -1; 1032 } 1033 1034 // If the load and store don't overlap at all, the store doesn't provide 1035 // anything to the load. In this case, they really don't alias at all, AA 1036 // must have gotten confused. 1037 // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then 1038 // remove this check, as it is duplicated with what we have below. 1039 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); 1040 1041 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 1042 return -1; 1043 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 1044 LoadSize >>= 3; 1045 1046 1047 bool isAAFailure = false; 1048 if (StoreOffset < LoadOffset) { 1049 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 1050 } else { 1051 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 1052 } 1053 if (isAAFailure) { 1054#if 0 1055 errs() << "STORE LOAD DEP WITH COMMON BASE:\n" 1056 << "Base = " << *StoreBase << "\n" 1057 << "Store Ptr = " << *WritePtr << "\n" 1058 << "Store Offs = " << StoreOffset << "\n" 1059 << "Load Ptr = " << *LoadPtr << "\n"; 1060 abort(); 1061#endif 1062 return -1; 1063 } 1064 1065 // If the Load isn't completely contained within the stored bits, we don't 1066 // have all the bits to feed it. We could do something crazy in the future 1067 // (issue a smaller load then merge the bits in) but this seems unlikely to be 1068 // valuable. 1069 if (StoreOffset > LoadOffset || 1070 StoreOffset+StoreSize < LoadOffset+LoadSize) 1071 return -1; 1072 1073 // Okay, we can do this transformation. Return the number of bytes into the 1074 // store that the load is. 1075 return LoadOffset-StoreOffset; 1076} 1077 1078/// AnalyzeLoadFromClobberingStore - This function is called when we have a 1079/// memdep query of a load that ends up being a clobbering store. 1080static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, 1081 StoreInst *DepSI, 1082 const TargetData &TD) { 1083 // Cannot handle reading from store of first-class aggregate yet. 1084 if (isa<StructType>(DepSI->getOperand(0)->getType()) || 1085 isa<ArrayType>(DepSI->getOperand(0)->getType())) 1086 return -1; 1087 1088 Value *StorePtr = DepSI->getPointerOperand(); 1089 uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); 1090 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 1091 StorePtr, StoreSize, TD); 1092} 1093 1094static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, 1095 MemIntrinsic *MI, 1096 const TargetData &TD) { 1097 // If the mem operation is a non-constant size, we can't handle it. 1098 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 1099 if (SizeCst == 0) return -1; 1100 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 1101 1102 // If this is memset, we just need to see if the offset is valid in the size 1103 // of the memset.. 1104 if (MI->getIntrinsicID() == Intrinsic::memset) 1105 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 1106 MemSizeInBits, TD); 1107 1108 // If we have a memcpy/memmove, the only case we can handle is if this is a 1109 // copy from constant memory. In that case, we can read directly from the 1110 // constant memory. 1111 MemTransferInst *MTI = cast<MemTransferInst>(MI); 1112 1113 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 1114 if (Src == 0) return -1; 1115 1116 GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject()); 1117 if (GV == 0 || !GV->isConstant()) return -1; 1118 1119 // See if the access is within the bounds of the transfer. 1120 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 1121 MI->getDest(), MemSizeInBits, TD); 1122 if (Offset == -1) 1123 return Offset; 1124 1125 // Otherwise, see if we can constant fold a load from the constant with the 1126 // offset applied as appropriate. 1127 Src = ConstantExpr::getBitCast(Src, 1128 llvm::Type::getInt8PtrTy(Src->getContext())); 1129 Constant *OffsetCst = 1130 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1131 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 1132 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 1133 if (ConstantFoldLoadFromConstPtr(Src, &TD)) 1134 return Offset; 1135 return -1; 1136} 1137 1138 1139/// GetStoreValueForLoad - This function is called when we have a 1140/// memdep query of a load that ends up being a clobbering store. This means 1141/// that the store *may* provide bits used by the load but we can't be sure 1142/// because the pointers don't mustalias. Check this case to see if there is 1143/// anything more we can do before we give up. 1144static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 1145 const Type *LoadTy, 1146 Instruction *InsertPt, const TargetData &TD){ 1147 LLVMContext &Ctx = SrcVal->getType()->getContext(); 1148 1149 uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; 1150 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 1151 1152 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1153 1154 // Compute which bits of the stored value are being used by the load. Convert 1155 // to an integer type to start with. 1156 if (isa<PointerType>(SrcVal->getType())) 1157 SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); 1158 if (!isa<IntegerType>(SrcVal->getType())) 1159 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), 1160 "tmp"); 1161 1162 // Shift the bits to the least significant depending on endianness. 1163 unsigned ShiftAmt; 1164 if (TD.isLittleEndian()) 1165 ShiftAmt = Offset*8; 1166 else 1167 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 1168 1169 if (ShiftAmt) 1170 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); 1171 1172 if (LoadSize != StoreSize) 1173 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), 1174 "tmp"); 1175 1176 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); 1177} 1178 1179/// GetMemInstValueForLoad - This function is called when we have a 1180/// memdep query of a load that ends up being a clobbering mem intrinsic. 1181static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 1182 const Type *LoadTy, Instruction *InsertPt, 1183 const TargetData &TD){ 1184 LLVMContext &Ctx = LoadTy->getContext(); 1185 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; 1186 1187 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 1188 1189 // We know that this method is only called when the mem transfer fully 1190 // provides the bits for the load. 1191 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 1192 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 1193 // independently of what the offset is. 1194 Value *Val = MSI->getValue(); 1195 if (LoadSize != 1) 1196 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 1197 1198 Value *OneElt = Val; 1199 1200 // Splat the value out to the right number of bits. 1201 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 1202 // If we can double the number of bytes set, do it. 1203 if (NumBytesSet*2 <= LoadSize) { 1204 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 1205 Val = Builder.CreateOr(Val, ShVal); 1206 NumBytesSet <<= 1; 1207 continue; 1208 } 1209 1210 // Otherwise insert one byte at a time. 1211 Value *ShVal = Builder.CreateShl(Val, 1*8); 1212 Val = Builder.CreateOr(OneElt, ShVal); 1213 ++NumBytesSet; 1214 } 1215 1216 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); 1217 } 1218 1219 // Otherwise, this is a memcpy/memmove from a constant global. 1220 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 1221 Constant *Src = cast<Constant>(MTI->getSource()); 1222 1223 // Otherwise, see if we can constant fold a load from the constant with the 1224 // offset applied as appropriate. 1225 Src = ConstantExpr::getBitCast(Src, 1226 llvm::Type::getInt8PtrTy(Src->getContext())); 1227 Constant *OffsetCst = 1228 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 1229 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); 1230 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); 1231 return ConstantFoldLoadFromConstPtr(Src, &TD); 1232} 1233 1234 1235 1236struct AvailableValueInBlock { 1237 /// BB - The basic block in question. 1238 BasicBlock *BB; 1239 enum ValType { 1240 SimpleVal, // A simple offsetted value that is accessed. 1241 MemIntrin // A memory intrinsic which is loaded from. 1242 }; 1243 1244 /// V - The value that is live out of the block. 1245 PointerIntPair<Value *, 1, ValType> Val; 1246 1247 /// Offset - The byte offset in Val that is interesting for the load query. 1248 unsigned Offset; 1249 1250 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 1251 unsigned Offset = 0) { 1252 AvailableValueInBlock Res; 1253 Res.BB = BB; 1254 Res.Val.setPointer(V); 1255 Res.Val.setInt(SimpleVal); 1256 Res.Offset = Offset; 1257 return Res; 1258 } 1259 1260 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 1261 unsigned Offset = 0) { 1262 AvailableValueInBlock Res; 1263 Res.BB = BB; 1264 Res.Val.setPointer(MI); 1265 Res.Val.setInt(MemIntrin); 1266 Res.Offset = Offset; 1267 return Res; 1268 } 1269 1270 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 1271 Value *getSimpleValue() const { 1272 assert(isSimpleValue() && "Wrong accessor"); 1273 return Val.getPointer(); 1274 } 1275 1276 MemIntrinsic *getMemIntrinValue() const { 1277 assert(!isSimpleValue() && "Wrong accessor"); 1278 return cast<MemIntrinsic>(Val.getPointer()); 1279 } 1280 1281 /// MaterializeAdjustedValue - Emit code into this block to adjust the value 1282 /// defined here to the specified type. This handles various coercion cases. 1283 Value *MaterializeAdjustedValue(const Type *LoadTy, 1284 const TargetData *TD) const { 1285 Value *Res; 1286 if (isSimpleValue()) { 1287 Res = getSimpleValue(); 1288 if (Res->getType() != LoadTy) { 1289 assert(TD && "Need target data to handle type mismatch case"); 1290 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), 1291 *TD); 1292 1293 DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 1294 << *getSimpleValue() << '\n' 1295 << *Res << '\n' << "\n\n\n"); 1296 } 1297 } else { 1298 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, 1299 LoadTy, BB->getTerminator(), *TD); 1300 DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1301 << " " << *getMemIntrinValue() << '\n' 1302 << *Res << '\n' << "\n\n\n"); 1303 } 1304 return Res; 1305 } 1306}; 1307 1308/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, 1309/// construct SSA form, allowing us to eliminate LI. This returns the value 1310/// that should be used at LI's definition site. 1311static Value *ConstructSSAForLoadSet(LoadInst *LI, 1312 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1313 const TargetData *TD, 1314 const DominatorTree &DT, 1315 AliasAnalysis *AA) { 1316 // Check for the fully redundant, dominating load case. In this case, we can 1317 // just use the dominating value directly. 1318 if (ValuesPerBlock.size() == 1 && 1319 DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) 1320 return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); 1321 1322 // Otherwise, we have to construct SSA form. 1323 SmallVector<PHINode*, 8> NewPHIs; 1324 SSAUpdater SSAUpdate(&NewPHIs); 1325 SSAUpdate.Initialize(LI); 1326 1327 const Type *LoadTy = LI->getType(); 1328 1329 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1330 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1331 BasicBlock *BB = AV.BB; 1332 1333 if (SSAUpdate.HasValueForBlock(BB)) 1334 continue; 1335 1336 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); 1337 } 1338 1339 // Perform PHI construction. 1340 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 1341 1342 // If new PHI nodes were created, notify alias analysis. 1343 if (isa<PointerType>(V->getType())) 1344 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 1345 AA->copyValue(LI, NewPHIs[i]); 1346 1347 return V; 1348} 1349 1350static bool isLifetimeStart(Instruction *Inst) { 1351 if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1352 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1353 return false; 1354} 1355 1356/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 1357/// non-local by performing PHI construction. 1358bool GVN::processNonLocalLoad(LoadInst *LI, 1359 SmallVectorImpl<Instruction*> &toErase) { 1360 // Find the non-local dependencies of the load. 1361 SmallVector<NonLocalDepResult, 64> Deps; 1362 MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), 1363 Deps); 1364 //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " 1365 // << Deps.size() << *LI << '\n'); 1366 1367 // If we had to process more than one hundred blocks to find the 1368 // dependencies, this load isn't worth worrying about. Optimizing 1369 // it will be too expensive. 1370 if (Deps.size() > 100) 1371 return false; 1372 1373 // If we had a phi translation failure, we'll have a single entry which is a 1374 // clobber in the current block. Reject this early. 1375 if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { 1376 DEBUG( 1377 errs() << "GVN: non-local load "; 1378 WriteAsOperand(errs(), LI); 1379 errs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; 1380 ); 1381 return false; 1382 } 1383 1384 // Filter out useless results (non-locals, etc). Keep track of the blocks 1385 // where we have a value available in repl, also keep track of whether we see 1386 // dependencies that produce an unknown value for the load (such as a call 1387 // that could potentially clobber the load). 1388 SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; 1389 SmallVector<BasicBlock*, 16> UnavailableBlocks; 1390 1391 const TargetData *TD = 0; 1392 1393 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 1394 BasicBlock *DepBB = Deps[i].getBB(); 1395 MemDepResult DepInfo = Deps[i].getResult(); 1396 1397 if (DepInfo.isClobber()) { 1398 // The address being loaded in this non-local block may not be the same as 1399 // the pointer operand of the load if PHI translation occurs. Make sure 1400 // to consider the right address. 1401 Value *Address = Deps[i].getAddress(); 1402 1403 // If the dependence is to a store that writes to a superset of the bits 1404 // read by the load, we can extract the bits we need for the load from the 1405 // stored value. 1406 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 1407 if (TD == 0) 1408 TD = getAnalysisIfAvailable<TargetData>(); 1409 if (TD && Address) { 1410 int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, 1411 DepSI, *TD); 1412 if (Offset != -1) { 1413 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1414 DepSI->getOperand(0), 1415 Offset)); 1416 continue; 1417 } 1418 } 1419 } 1420 1421 // If the clobbering value is a memset/memcpy/memmove, see if we can 1422 // forward a value on from it. 1423 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 1424 if (TD == 0) 1425 TD = getAnalysisIfAvailable<TargetData>(); 1426 if (TD && Address) { 1427 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, 1428 DepMI, *TD); 1429 if (Offset != -1) { 1430 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 1431 Offset)); 1432 continue; 1433 } 1434 } 1435 } 1436 1437 UnavailableBlocks.push_back(DepBB); 1438 continue; 1439 } 1440 1441 Instruction *DepInst = DepInfo.getInst(); 1442 1443 // Loading the allocation -> undef. 1444 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || 1445 // Loading immediately after lifetime begin -> undef. 1446 isLifetimeStart(DepInst)) { 1447 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1448 UndefValue::get(LI->getType()))); 1449 continue; 1450 } 1451 1452 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1453 // Reject loads and stores that are to the same address but are of 1454 // different types if we have to. 1455 if (S->getOperand(0)->getType() != LI->getType()) { 1456 if (TD == 0) 1457 TD = getAnalysisIfAvailable<TargetData>(); 1458 1459 // If the stored value is larger or equal to the loaded value, we can 1460 // reuse it. 1461 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), 1462 LI->getType(), *TD)) { 1463 UnavailableBlocks.push_back(DepBB); 1464 continue; 1465 } 1466 } 1467 1468 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 1469 S->getOperand(0))); 1470 continue; 1471 } 1472 1473 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1474 // If the types mismatch and we can't handle it, reject reuse of the load. 1475 if (LD->getType() != LI->getType()) { 1476 if (TD == 0) 1477 TD = getAnalysisIfAvailable<TargetData>(); 1478 1479 // If the stored value is larger or equal to the loaded value, we can 1480 // reuse it. 1481 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ 1482 UnavailableBlocks.push_back(DepBB); 1483 continue; 1484 } 1485 } 1486 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); 1487 continue; 1488 } 1489 1490 UnavailableBlocks.push_back(DepBB); 1491 continue; 1492 } 1493 1494 // If we have no predecessors that produce a known value for this load, exit 1495 // early. 1496 if (ValuesPerBlock.empty()) return false; 1497 1498 // If all of the instructions we depend on produce a known value for this 1499 // load, then it is fully redundant and we can use PHI insertion to compute 1500 // its value. Insert PHIs and remove the fully redundant value now. 1501 if (UnavailableBlocks.empty()) { 1502 DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 1503 1504 // Perform PHI construction. 1505 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1506 VN.getAliasAnalysis()); 1507 LI->replaceAllUsesWith(V); 1508 1509 if (isa<PHINode>(V)) 1510 V->takeName(LI); 1511 if (isa<PointerType>(V->getType())) 1512 MD->invalidateCachedPointerInfo(V); 1513 toErase.push_back(LI); 1514 NumGVNLoad++; 1515 return true; 1516 } 1517 1518 if (!EnablePRE || !EnableLoadPRE) 1519 return false; 1520 1521 // Okay, we have *some* definitions of the value. This means that the value 1522 // is available in some of our (transitive) predecessors. Lets think about 1523 // doing PRE of this load. This will involve inserting a new load into the 1524 // predecessor when it's not available. We could do this in general, but 1525 // prefer to not increase code size. As such, we only do this when we know 1526 // that we only have to insert *one* load (which means we're basically moving 1527 // the load, not inserting a new one). 1528 1529 SmallPtrSet<BasicBlock *, 4> Blockers; 1530 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1531 Blockers.insert(UnavailableBlocks[i]); 1532 1533 // Lets find first basic block with more than one predecessor. Walk backwards 1534 // through predecessors if needed. 1535 BasicBlock *LoadBB = LI->getParent(); 1536 BasicBlock *TmpBB = LoadBB; 1537 1538 bool isSinglePred = false; 1539 bool allSingleSucc = true; 1540 while (TmpBB->getSinglePredecessor()) { 1541 isSinglePred = true; 1542 TmpBB = TmpBB->getSinglePredecessor(); 1543 if (!TmpBB) // If haven't found any, bail now. 1544 return false; 1545 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1546 return false; 1547 if (Blockers.count(TmpBB)) 1548 return false; 1549 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1550 allSingleSucc = false; 1551 } 1552 1553 assert(TmpBB); 1554 LoadBB = TmpBB; 1555 1556 // If we have a repl set with LI itself in it, this means we have a loop where 1557 // at least one of the values is LI. Since this means that we won't be able 1558 // to eliminate LI even if we insert uses in the other predecessors, we will 1559 // end up increasing code size. Reject this by scanning for LI. 1560 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1561 if (ValuesPerBlock[i].isSimpleValue() && 1562 ValuesPerBlock[i].getSimpleValue() == LI) 1563 return false; 1564 1565 // FIXME: It is extremely unclear what this loop is doing, other than 1566 // artificially restricting loadpre. 1567 if (isSinglePred) { 1568 bool isHot = false; 1569 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 1570 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 1571 if (AV.isSimpleValue()) 1572 // "Hot" Instruction is in some loop (because it dominates its dep. 1573 // instruction). 1574 if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) 1575 if (DT->dominates(LI, I)) { 1576 isHot = true; 1577 break; 1578 } 1579 } 1580 1581 // We are interested only in "hot" instructions. We don't want to do any 1582 // mis-optimizations here. 1583 if (!isHot) 1584 return false; 1585 } 1586 1587 // Okay, we have some hope :). Check to see if the loaded value is fully 1588 // available in all but one predecessor. 1589 // FIXME: If we could restructure the CFG, we could make a common pred with 1590 // all the preds that don't have an available LI and insert a new load into 1591 // that one block. 1592 BasicBlock *UnavailablePred = 0; 1593 1594 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1595 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1596 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 1597 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1598 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1599 1600 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1601 PI != E; ++PI) { 1602 if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 1603 continue; 1604 1605 // If this load is not available in multiple predecessors, reject it. 1606 if (UnavailablePred && UnavailablePred != *PI) 1607 return false; 1608 UnavailablePred = *PI; 1609 } 1610 1611 assert(UnavailablePred != 0 && 1612 "Fully available value should be eliminated above!"); 1613 1614 // We don't currently handle critical edges :( 1615 if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { 1616 DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" 1617 << UnavailablePred->getName() << "': " << *LI << '\n'); 1618 return false; 1619 } 1620 1621 // Do PHI translation to get its value in the predecessor if necessary. The 1622 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1623 // 1624 SmallVector<Instruction*, 8> NewInsts; 1625 1626 // If all preds have a single successor, then we know it is safe to insert the 1627 // load on the pred (?!?), so we can insert code to materialize the pointer if 1628 // it is not available. 1629 PHITransAddr Address(LI->getOperand(0), TD); 1630 Value *LoadPtr = 0; 1631 if (allSingleSucc) { 1632 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 1633 *DT, NewInsts); 1634 } else { 1635 Address.PHITranslateValue(LoadBB, UnavailablePred); 1636 LoadPtr = Address.getAddr(); 1637 1638 // Make sure the value is live in the predecessor. 1639 if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr)) 1640 if (!DT->dominates(Inst->getParent(), UnavailablePred)) 1641 LoadPtr = 0; 1642 } 1643 1644 // If we couldn't find or insert a computation of this phi translated value, 1645 // we fail PRE. 1646 if (LoadPtr == 0) { 1647 assert(NewInsts.empty() && "Shouldn't insert insts on failure"); 1648 DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1649 << *LI->getOperand(0) << "\n"); 1650 return false; 1651 } 1652 1653 // Assign value numbers to these new instructions. 1654 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { 1655 // FIXME: We really _ought_ to insert these value numbers into their 1656 // parent's availability map. However, in doing so, we risk getting into 1657 // ordering issues. If a block hasn't been processed yet, we would be 1658 // marking a value as AVAIL-IN, which isn't what we intend. 1659 VN.lookup_or_add(NewInsts[i]); 1660 } 1661 1662 // Make sure it is valid to move this load here. We have to watch out for: 1663 // @1 = getelementptr (i8* p, ... 1664 // test p and branch if == 0 1665 // load @1 1666 // It is valid to have the getelementptr before the test, even if p can be 0, 1667 // as getelementptr only does address arithmetic. 1668 // If we are not pushing the value through any multiple-successor blocks 1669 // we do not have this case. Otherwise, check that the load is safe to 1670 // put anywhere; this can be improved, but should be conservatively safe. 1671 if (!allSingleSucc && 1672 // FIXME: REEVALUTE THIS. 1673 !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) { 1674 assert(NewInsts.empty() && "Should not have inserted instructions"); 1675 return false; 1676 } 1677 1678 // Okay, we can eliminate this load by inserting a reload in the predecessor 1679 // and using PHI construction to get the value in the other predecessors, do 1680 // it. 1681 DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 1682 DEBUG(if (!NewInsts.empty()) 1683 errs() << "INSERTED " << NewInsts.size() << " INSTS: " 1684 << *NewInsts.back() << '\n'); 1685 1686 Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1687 LI->getAlignment(), 1688 UnavailablePred->getTerminator()); 1689 1690 // Add the newly created load. 1691 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad)); 1692 1693 // Perform PHI construction. 1694 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, 1695 VN.getAliasAnalysis()); 1696 LI->replaceAllUsesWith(V); 1697 if (isa<PHINode>(V)) 1698 V->takeName(LI); 1699 if (isa<PointerType>(V->getType())) 1700 MD->invalidateCachedPointerInfo(V); 1701 toErase.push_back(LI); 1702 NumPRELoad++; 1703 return true; 1704} 1705 1706/// processLoad - Attempt to eliminate a load, first by eliminating it 1707/// locally, and then attempting non-local elimination if that fails. 1708bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { 1709 if (!MD) 1710 return false; 1711 1712 if (L->isVolatile()) 1713 return false; 1714 1715 // ... to a pointer that has been loaded from before... 1716 MemDepResult Dep = MD->getDependency(L); 1717 1718 // If the value isn't available, don't do anything! 1719 if (Dep.isClobber()) { 1720 // Check to see if we have something like this: 1721 // store i32 123, i32* %P 1722 // %A = bitcast i32* %P to i8* 1723 // %B = gep i8* %A, i32 1 1724 // %C = load i8* %B 1725 // 1726 // We could do that by recognizing if the clobber instructions are obviously 1727 // a common base + constant offset, and if the previous store (or memset) 1728 // completely covers this load. This sort of thing can happen in bitfield 1729 // access code. 1730 Value *AvailVal = 0; 1731 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) 1732 if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { 1733 int Offset = AnalyzeLoadFromClobberingStore(L->getType(), 1734 L->getPointerOperand(), 1735 DepSI, *TD); 1736 if (Offset != -1) 1737 AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, 1738 L->getType(), L, *TD); 1739 } 1740 1741 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 1742 // a value on from it. 1743 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 1744 if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { 1745 int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), 1746 L->getPointerOperand(), 1747 DepMI, *TD); 1748 if (Offset != -1) 1749 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); 1750 } 1751 } 1752 1753 if (AvailVal) { 1754 DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 1755 << *AvailVal << '\n' << *L << "\n\n\n"); 1756 1757 // Replace the load! 1758 L->replaceAllUsesWith(AvailVal); 1759 if (isa<PointerType>(AvailVal->getType())) 1760 MD->invalidateCachedPointerInfo(AvailVal); 1761 toErase.push_back(L); 1762 NumGVNLoad++; 1763 return true; 1764 } 1765 1766 DEBUG( 1767 // fast print dep, using operator<< on instruction would be too slow 1768 errs() << "GVN: load "; 1769 WriteAsOperand(errs(), L); 1770 Instruction *I = Dep.getInst(); 1771 errs() << " is clobbered by " << *I << '\n'; 1772 ); 1773 return false; 1774 } 1775 1776 // If it is defined in another block, try harder. 1777 if (Dep.isNonLocal()) 1778 return processNonLocalLoad(L, toErase); 1779 1780 Instruction *DepInst = Dep.getInst(); 1781 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1782 Value *StoredVal = DepSI->getOperand(0); 1783 1784 // The store and load are to a must-aliased pointer, but they may not 1785 // actually have the same type. See if we know how to reuse the stored 1786 // value (depending on its type). 1787 const TargetData *TD = 0; 1788 if (StoredVal->getType() != L->getType()) { 1789 if ((TD = getAnalysisIfAvailable<TargetData>())) { 1790 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), 1791 L, *TD); 1792 if (StoredVal == 0) 1793 return false; 1794 1795 DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 1796 << '\n' << *L << "\n\n\n"); 1797 } 1798 else 1799 return false; 1800 } 1801 1802 // Remove it! 1803 L->replaceAllUsesWith(StoredVal); 1804 if (isa<PointerType>(StoredVal->getType())) 1805 MD->invalidateCachedPointerInfo(StoredVal); 1806 toErase.push_back(L); 1807 NumGVNLoad++; 1808 return true; 1809 } 1810 1811 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1812 Value *AvailableVal = DepLI; 1813 1814 // The loads are of a must-aliased pointer, but they may not actually have 1815 // the same type. See if we know how to reuse the previously loaded value 1816 // (depending on its type). 1817 const TargetData *TD = 0; 1818 if (DepLI->getType() != L->getType()) { 1819 if ((TD = getAnalysisIfAvailable<TargetData>())) { 1820 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); 1821 if (AvailableVal == 0) 1822 return false; 1823 1824 DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 1825 << "\n" << *L << "\n\n\n"); 1826 } 1827 else 1828 return false; 1829 } 1830 1831 // Remove it! 1832 L->replaceAllUsesWith(AvailableVal); 1833 if (isa<PointerType>(DepLI->getType())) 1834 MD->invalidateCachedPointerInfo(DepLI); 1835 toErase.push_back(L); 1836 NumGVNLoad++; 1837 return true; 1838 } 1839 1840 // If this load really doesn't depend on anything, then we must be loading an 1841 // undef value. This can happen when loading for a fresh allocation with no 1842 // intervening stores, for example. 1843 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { 1844 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1845 toErase.push_back(L); 1846 NumGVNLoad++; 1847 return true; 1848 } 1849 1850 // If this load occurs either right after a lifetime begin, 1851 // then the loaded value is undefined. 1852 if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { 1853 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1854 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1855 toErase.push_back(L); 1856 NumGVNLoad++; 1857 return true; 1858 } 1859 } 1860 1861 return false; 1862} 1863 1864Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { 1865 DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB); 1866 if (I == localAvail.end()) 1867 return 0; 1868 1869 ValueNumberScope *Locals = I->second; 1870 while (Locals) { 1871 DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num); 1872 if (I != Locals->table.end()) 1873 return I->second; 1874 Locals = Locals->parent; 1875 } 1876 1877 return 0; 1878} 1879 1880 1881/// processInstruction - When calculating availability, handle an instruction 1882/// by inserting it into the appropriate sets 1883bool GVN::processInstruction(Instruction *I, 1884 SmallVectorImpl<Instruction*> &toErase) { 1885 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1886 bool Changed = processLoad(LI, toErase); 1887 1888 if (!Changed) { 1889 unsigned Num = VN.lookup_or_add(LI); 1890 localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); 1891 } 1892 1893 return Changed; 1894 } 1895 1896 uint32_t NextNum = VN.getNextUnusedValueNumber(); 1897 unsigned Num = VN.lookup_or_add(I); 1898 1899 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1900 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1901 1902 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 1903 return false; 1904 1905 Value *BranchCond = BI->getCondition(); 1906 uint32_t CondVN = VN.lookup_or_add(BranchCond); 1907 1908 BasicBlock *TrueSucc = BI->getSuccessor(0); 1909 BasicBlock *FalseSucc = BI->getSuccessor(1); 1910 1911 if (TrueSucc->getSinglePredecessor()) 1912 localAvail[TrueSucc]->table[CondVN] = 1913 ConstantInt::getTrue(TrueSucc->getContext()); 1914 if (FalseSucc->getSinglePredecessor()) 1915 localAvail[FalseSucc]->table[CondVN] = 1916 ConstantInt::getFalse(TrueSucc->getContext()); 1917 1918 return false; 1919 1920 // Allocations are always uniquely numbered, so we can save time and memory 1921 // by fast failing them. 1922 } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) { 1923 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1924 return false; 1925 } 1926 1927 // Collapse PHI nodes 1928 if (PHINode* p = dyn_cast<PHINode>(I)) { 1929 Value *constVal = CollapsePhi(p); 1930 1931 if (constVal) { 1932 p->replaceAllUsesWith(constVal); 1933 if (MD && isa<PointerType>(constVal->getType())) 1934 MD->invalidateCachedPointerInfo(constVal); 1935 VN.erase(p); 1936 1937 toErase.push_back(p); 1938 } else { 1939 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1940 } 1941 1942 // If the number we were assigned was a brand new VN, then we don't 1943 // need to do a lookup to see if the number already exists 1944 // somewhere in the domtree: it can't! 1945 } else if (Num == NextNum) { 1946 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1947 1948 // Perform fast-path value-number based elimination of values inherited from 1949 // dominators. 1950 } else if (Value *repl = lookupNumber(I->getParent(), Num)) { 1951 // Remove it! 1952 VN.erase(I); 1953 I->replaceAllUsesWith(repl); 1954 if (MD && isa<PointerType>(repl->getType())) 1955 MD->invalidateCachedPointerInfo(repl); 1956 toErase.push_back(I); 1957 return true; 1958 1959 } else { 1960 localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); 1961 } 1962 1963 return false; 1964} 1965 1966/// runOnFunction - This is the main transformation entry point for a function. 1967bool GVN::runOnFunction(Function& F) { 1968 if (!NoLoads) 1969 MD = &getAnalysis<MemoryDependenceAnalysis>(); 1970 DT = &getAnalysis<DominatorTree>(); 1971 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1972 VN.setMemDep(MD); 1973 VN.setDomTree(DT); 1974 1975 bool Changed = false; 1976 bool ShouldContinue = true; 1977 1978 // Merge unconditional branches, allowing PRE to catch more 1979 // optimization opportunities. 1980 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1981 BasicBlock *BB = FI; 1982 ++FI; 1983 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 1984 if (removedBlock) NumGVNBlocks++; 1985 1986 Changed |= removedBlock; 1987 } 1988 1989 unsigned Iteration = 0; 1990 1991 while (ShouldContinue) { 1992 DEBUG(errs() << "GVN iteration: " << Iteration << "\n"); 1993 ShouldContinue = iterateOnFunction(F); 1994 Changed |= ShouldContinue; 1995 ++Iteration; 1996 } 1997 1998 if (EnablePRE) { 1999 bool PREChanged = true; 2000 while (PREChanged) { 2001 PREChanged = performPRE(F); 2002 Changed |= PREChanged; 2003 } 2004 } 2005 // FIXME: Should perform GVN again after PRE does something. PRE can move 2006 // computations into blocks where they become fully redundant. Note that 2007 // we can't do this until PRE's critical edge splitting updates memdep. 2008 // Actually, when this happens, we should just fully integrate PRE into GVN. 2009 2010 cleanupGlobalSets(); 2011 2012 return Changed; 2013} 2014 2015 2016bool GVN::processBlock(BasicBlock *BB) { 2017 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and 2018 // incrementing BI before processing an instruction). 2019 SmallVector<Instruction*, 8> toErase; 2020 bool ChangedFunction = false; 2021 2022 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 2023 BI != BE;) { 2024 ChangedFunction |= processInstruction(BI, toErase); 2025 if (toErase.empty()) { 2026 ++BI; 2027 continue; 2028 } 2029 2030 // If we need some instructions deleted, do it now. 2031 NumGVNInstr += toErase.size(); 2032 2033 // Avoid iterator invalidation. 2034 bool AtStart = BI == BB->begin(); 2035 if (!AtStart) 2036 --BI; 2037 2038 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 2039 E = toErase.end(); I != E; ++I) { 2040 DEBUG(errs() << "GVN removed: " << **I << '\n'); 2041 if (MD) MD->removeInstruction(*I); 2042 (*I)->eraseFromParent(); 2043 DEBUG(verifyRemoved(*I)); 2044 } 2045 toErase.clear(); 2046 2047 if (AtStart) 2048 BI = BB->begin(); 2049 else 2050 ++BI; 2051 } 2052 2053 return ChangedFunction; 2054} 2055 2056/// performPRE - Perform a purely local form of PRE that looks for diamond 2057/// control flow patterns and attempts to perform simple PRE at the join point. 2058bool GVN::performPRE(Function &F) { 2059 bool Changed = false; 2060 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 2061 DenseMap<BasicBlock*, Value*> predMap; 2062 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 2063 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 2064 BasicBlock *CurrentBlock = *DI; 2065 2066 // Nothing to PRE in the entry block. 2067 if (CurrentBlock == &F.getEntryBlock()) continue; 2068 2069 for (BasicBlock::iterator BI = CurrentBlock->begin(), 2070 BE = CurrentBlock->end(); BI != BE; ) { 2071 Instruction *CurInst = BI++; 2072 2073 if (isa<AllocaInst>(CurInst) || 2074 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || 2075 CurInst->getType()->isVoidTy() || 2076 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 2077 isa<DbgInfoIntrinsic>(CurInst)) 2078 continue; 2079 2080 uint32_t ValNo = VN.lookup(CurInst); 2081 2082 // Look for the predecessors for PRE opportunities. We're 2083 // only trying to solve the basic diamond case, where 2084 // a value is computed in the successor and one predecessor, 2085 // but not the other. We also explicitly disallow cases 2086 // where the successor is its own predecessor, because they're 2087 // more complicated to get right. 2088 unsigned NumWith = 0; 2089 unsigned NumWithout = 0; 2090 BasicBlock *PREPred = 0; 2091 predMap.clear(); 2092 2093 for (pred_iterator PI = pred_begin(CurrentBlock), 2094 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 2095 // We're not interested in PRE where the block is its 2096 // own predecessor, on in blocks with predecessors 2097 // that are not reachable. 2098 if (*PI == CurrentBlock) { 2099 NumWithout = 2; 2100 break; 2101 } else if (!localAvail.count(*PI)) { 2102 NumWithout = 2; 2103 break; 2104 } 2105 2106 DenseMap<uint32_t, Value*>::iterator predV = 2107 localAvail[*PI]->table.find(ValNo); 2108 if (predV == localAvail[*PI]->table.end()) { 2109 PREPred = *PI; 2110 NumWithout++; 2111 } else if (predV->second == CurInst) { 2112 NumWithout = 2; 2113 } else { 2114 predMap[*PI] = predV->second; 2115 NumWith++; 2116 } 2117 } 2118 2119 // Don't do PRE when it might increase code size, i.e. when 2120 // we would need to insert instructions in more than one pred. 2121 if (NumWithout != 1 || NumWith == 0) 2122 continue; 2123 2124 // Don't do PRE across indirect branch. 2125 if (isa<IndirectBrInst>(PREPred->getTerminator())) 2126 continue; 2127 2128 // We can't do PRE safely on a critical edge, so instead we schedule 2129 // the edge to be split and perform the PRE the next time we iterate 2130 // on the function. 2131 unsigned SuccNum = 0; 2132 for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); 2133 i != e; ++i) 2134 if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { 2135 SuccNum = i; 2136 break; 2137 } 2138 2139 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 2140 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 2141 continue; 2142 } 2143 2144 // Instantiate the expression the in predecessor that lacked it. 2145 // Because we are going top-down through the block, all value numbers 2146 // will be available in the predecessor by the time we need them. Any 2147 // that weren't original present will have been instantiated earlier 2148 // in this loop. 2149 Instruction *PREInstr = CurInst->clone(); 2150 bool success = true; 2151 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 2152 Value *Op = PREInstr->getOperand(i); 2153 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 2154 continue; 2155 2156 if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { 2157 PREInstr->setOperand(i, V); 2158 } else { 2159 success = false; 2160 break; 2161 } 2162 } 2163 2164 // Fail out if we encounter an operand that is not available in 2165 // the PRE predecessor. This is typically because of loads which 2166 // are not value numbered precisely. 2167 if (!success) { 2168 delete PREInstr; 2169 DEBUG(verifyRemoved(PREInstr)); 2170 continue; 2171 } 2172 2173 PREInstr->insertBefore(PREPred->getTerminator()); 2174 PREInstr->setName(CurInst->getName() + ".pre"); 2175 predMap[PREPred] = PREInstr; 2176 VN.add(PREInstr, ValNo); 2177 NumGVNPRE++; 2178 2179 // Update the availability map to include the new instruction. 2180 localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); 2181 2182 // Create a PHI to make the value available in this block. 2183 PHINode* Phi = PHINode::Create(CurInst->getType(), 2184 CurInst->getName() + ".pre-phi", 2185 CurrentBlock->begin()); 2186 for (pred_iterator PI = pred_begin(CurrentBlock), 2187 PE = pred_end(CurrentBlock); PI != PE; ++PI) 2188 Phi->addIncoming(predMap[*PI], *PI); 2189 2190 VN.add(Phi, ValNo); 2191 localAvail[CurrentBlock]->table[ValNo] = Phi; 2192 2193 CurInst->replaceAllUsesWith(Phi); 2194 if (MD && isa<PointerType>(Phi->getType())) 2195 MD->invalidateCachedPointerInfo(Phi); 2196 VN.erase(CurInst); 2197 2198 DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n'); 2199 if (MD) MD->removeInstruction(CurInst); 2200 CurInst->eraseFromParent(); 2201 DEBUG(verifyRemoved(CurInst)); 2202 Changed = true; 2203 } 2204 } 2205 2206 for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator 2207 I = toSplit.begin(), E = toSplit.end(); I != E; ++I) 2208 SplitCriticalEdge(I->first, I->second, this); 2209 2210 return Changed || toSplit.size(); 2211} 2212 2213/// iterateOnFunction - Executes one iteration of GVN 2214bool GVN::iterateOnFunction(Function &F) { 2215 cleanupGlobalSets(); 2216 2217 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2218 DE = df_end(DT->getRootNode()); DI != DE; ++DI) { 2219 if (DI->getIDom()) 2220 localAvail[DI->getBlock()] = 2221 new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); 2222 else 2223 localAvail[DI->getBlock()] = new ValueNumberScope(0); 2224 } 2225 2226 // Top-down walk of the dominator tree 2227 bool Changed = false; 2228#if 0 2229 // Needed for value numbering with phi construction to work. 2230 ReversePostOrderTraversal<Function*> RPOT(&F); 2231 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 2232 RE = RPOT.end(); RI != RE; ++RI) 2233 Changed |= processBlock(*RI); 2234#else 2235 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 2236 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 2237 Changed |= processBlock(DI->getBlock()); 2238#endif 2239 2240 return Changed; 2241} 2242 2243void GVN::cleanupGlobalSets() { 2244 VN.clear(); 2245 2246 for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator 2247 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) 2248 delete I->second; 2249 localAvail.clear(); 2250} 2251 2252/// verifyRemoved - Verify that the specified instruction does not occur in our 2253/// internal data structures. 2254void GVN::verifyRemoved(const Instruction *Inst) const { 2255 VN.verifyRemoved(Inst); 2256 2257 // Walk through the value number scope to make sure the instruction isn't 2258 // ferreted away in it. 2259 for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator 2260 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { 2261 const ValueNumberScope *VNS = I->second; 2262 2263 while (VNS) { 2264 for (DenseMap<uint32_t, Value*>::const_iterator 2265 II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { 2266 assert(II->second != Inst && "Inst still in value numbering scope!"); 2267 } 2268 2269 VNS = VNS->parent; 2270 } 2271 } 2272} 2273