GVN.cpp revision b017c9e89cdd971a992a414e29caad0232b89a0d
1//===- GVN.cpp - Eliminate redundant values and loads ------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "gvn" 16#include "llvm/Transforms/Scalar.h" 17#include "llvm/BasicBlock.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/IntrinsicInst.h" 22#include "llvm/Instructions.h" 23#include "llvm/ParameterAttributes.h" 24#include "llvm/Value.h" 25#include "llvm/ADT/BitVector.h" 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/DepthFirstIterator.h" 28#include "llvm/ADT/SmallPtrSet.h" 29#include "llvm/ADT/SmallVector.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/Analysis/Dominators.h" 32#include "llvm/Analysis/AliasAnalysis.h" 33#include "llvm/Analysis/MemoryDependenceAnalysis.h" 34#include "llvm/Support/CFG.h" 35#include "llvm/Support/Compiler.h" 36#include "llvm/Support/GetElementPtrTypeIterator.h" 37#include "llvm/Target/TargetData.h" 38using namespace llvm; 39 40STATISTIC(NumGVNInstr, "Number of instructions deleted"); 41STATISTIC(NumGVNLoad, "Number of loads deleted"); 42STATISTIC(NumMemSetInfer, "Number of memsets inferred"); 43 44 45//===----------------------------------------------------------------------===// 46// ValueTable Class 47//===----------------------------------------------------------------------===// 48 49/// This class holds the mapping between values and value numbers. It is used 50/// as an efficient mechanism to determine the expression-wise equivalence of 51/// two values. 52namespace { 53 struct VISIBILITY_HIDDEN Expression { 54 enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, 55 FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 56 ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 57 ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 58 FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 59 FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 60 FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, 61 SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, 62 FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 63 PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, EMPTY, 64 TOMBSTONE }; 65 66 ExpressionOpcode opcode; 67 const Type* type; 68 uint32_t firstVN; 69 uint32_t secondVN; 70 uint32_t thirdVN; 71 SmallVector<uint32_t, 4> varargs; 72 Value* function; 73 74 Expression() { } 75 Expression(ExpressionOpcode o) : opcode(o) { } 76 77 bool operator==(const Expression &other) const { 78 if (opcode != other.opcode) 79 return false; 80 else if (opcode == EMPTY || opcode == TOMBSTONE) 81 return true; 82 else if (type != other.type) 83 return false; 84 else if (function != other.function) 85 return false; 86 else if (firstVN != other.firstVN) 87 return false; 88 else if (secondVN != other.secondVN) 89 return false; 90 else if (thirdVN != other.thirdVN) 91 return false; 92 else { 93 if (varargs.size() != other.varargs.size()) 94 return false; 95 96 for (size_t i = 0; i < varargs.size(); ++i) 97 if (varargs[i] != other.varargs[i]) 98 return false; 99 100 return true; 101 } 102 } 103 104 bool operator!=(const Expression &other) const { 105 if (opcode != other.opcode) 106 return true; 107 else if (opcode == EMPTY || opcode == TOMBSTONE) 108 return false; 109 else if (type != other.type) 110 return true; 111 else if (function != other.function) 112 return true; 113 else if (firstVN != other.firstVN) 114 return true; 115 else if (secondVN != other.secondVN) 116 return true; 117 else if (thirdVN != other.thirdVN) 118 return true; 119 else { 120 if (varargs.size() != other.varargs.size()) 121 return true; 122 123 for (size_t i = 0; i < varargs.size(); ++i) 124 if (varargs[i] != other.varargs[i]) 125 return true; 126 127 return false; 128 } 129 } 130 }; 131 132 class VISIBILITY_HIDDEN ValueTable { 133 private: 134 DenseMap<Value*, uint32_t> valueNumbering; 135 DenseMap<Expression, uint32_t> expressionNumbering; 136 AliasAnalysis* AA; 137 138 uint32_t nextValueNumber; 139 140 Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); 141 Expression::ExpressionOpcode getOpcode(CmpInst* C); 142 Expression::ExpressionOpcode getOpcode(CastInst* C); 143 Expression create_expression(BinaryOperator* BO); 144 Expression create_expression(CmpInst* C); 145 Expression create_expression(ShuffleVectorInst* V); 146 Expression create_expression(ExtractElementInst* C); 147 Expression create_expression(InsertElementInst* V); 148 Expression create_expression(SelectInst* V); 149 Expression create_expression(CastInst* C); 150 Expression create_expression(GetElementPtrInst* G); 151 Expression create_expression(CallInst* C); 152 public: 153 ValueTable() : nextValueNumber(1) { } 154 uint32_t lookup_or_add(Value* V); 155 uint32_t lookup(Value* V) const; 156 void add(Value* V, uint32_t num); 157 void clear(); 158 void erase(Value* v); 159 unsigned size(); 160 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 161 uint32_t hash_operand(Value* v); 162 }; 163} 164 165namespace llvm { 166template <> struct DenseMapInfo<Expression> { 167 static inline Expression getEmptyKey() { 168 return Expression(Expression::EMPTY); 169 } 170 171 static inline Expression getTombstoneKey() { 172 return Expression(Expression::TOMBSTONE); 173 } 174 175 static unsigned getHashValue(const Expression e) { 176 unsigned hash = e.opcode; 177 178 hash = e.firstVN + hash * 37; 179 hash = e.secondVN + hash * 37; 180 hash = e.thirdVN + hash * 37; 181 182 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 183 (unsigned)((uintptr_t)e.type >> 9)) + 184 hash * 37; 185 186 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 187 E = e.varargs.end(); I != E; ++I) 188 hash = *I + hash * 37; 189 190 hash = ((unsigned)((uintptr_t)e.function >> 4) ^ 191 (unsigned)((uintptr_t)e.function >> 9)) + 192 hash * 37; 193 194 return hash; 195 } 196 static bool isEqual(const Expression &LHS, const Expression &RHS) { 197 return LHS == RHS; 198 } 199 static bool isPod() { return true; } 200}; 201} 202 203//===----------------------------------------------------------------------===// 204// ValueTable Internal Functions 205//===----------------------------------------------------------------------===// 206Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { 207 switch(BO->getOpcode()) { 208 default: // THIS SHOULD NEVER HAPPEN 209 assert(0 && "Binary operator with unknown opcode?"); 210 case Instruction::Add: return Expression::ADD; 211 case Instruction::Sub: return Expression::SUB; 212 case Instruction::Mul: return Expression::MUL; 213 case Instruction::UDiv: return Expression::UDIV; 214 case Instruction::SDiv: return Expression::SDIV; 215 case Instruction::FDiv: return Expression::FDIV; 216 case Instruction::URem: return Expression::UREM; 217 case Instruction::SRem: return Expression::SREM; 218 case Instruction::FRem: return Expression::FREM; 219 case Instruction::Shl: return Expression::SHL; 220 case Instruction::LShr: return Expression::LSHR; 221 case Instruction::AShr: return Expression::ASHR; 222 case Instruction::And: return Expression::AND; 223 case Instruction::Or: return Expression::OR; 224 case Instruction::Xor: return Expression::XOR; 225 } 226} 227 228Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { 229 if (isa<ICmpInst>(C)) { 230 switch (C->getPredicate()) { 231 default: // THIS SHOULD NEVER HAPPEN 232 assert(0 && "Comparison with unknown predicate?"); 233 case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; 234 case ICmpInst::ICMP_NE: return Expression::ICMPNE; 235 case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; 236 case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; 237 case ICmpInst::ICMP_ULT: return Expression::ICMPULT; 238 case ICmpInst::ICMP_ULE: return Expression::ICMPULE; 239 case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; 240 case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; 241 case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; 242 case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; 243 } 244 } 245 assert(isa<FCmpInst>(C) && "Unknown compare"); 246 switch (C->getPredicate()) { 247 default: // THIS SHOULD NEVER HAPPEN 248 assert(0 && "Comparison with unknown predicate?"); 249 case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; 250 case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; 251 case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; 252 case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; 253 case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; 254 case FCmpInst::FCMP_ONE: return Expression::FCMPONE; 255 case FCmpInst::FCMP_ORD: return Expression::FCMPORD; 256 case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; 257 case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; 258 case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; 259 case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; 260 case FCmpInst::FCMP_ULT: return Expression::FCMPULT; 261 case FCmpInst::FCMP_ULE: return Expression::FCMPULE; 262 case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; 263 } 264} 265 266Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { 267 switch(C->getOpcode()) { 268 default: // THIS SHOULD NEVER HAPPEN 269 assert(0 && "Cast operator with unknown opcode?"); 270 case Instruction::Trunc: return Expression::TRUNC; 271 case Instruction::ZExt: return Expression::ZEXT; 272 case Instruction::SExt: return Expression::SEXT; 273 case Instruction::FPToUI: return Expression::FPTOUI; 274 case Instruction::FPToSI: return Expression::FPTOSI; 275 case Instruction::UIToFP: return Expression::UITOFP; 276 case Instruction::SIToFP: return Expression::SITOFP; 277 case Instruction::FPTrunc: return Expression::FPTRUNC; 278 case Instruction::FPExt: return Expression::FPEXT; 279 case Instruction::PtrToInt: return Expression::PTRTOINT; 280 case Instruction::IntToPtr: return Expression::INTTOPTR; 281 case Instruction::BitCast: return Expression::BITCAST; 282 } 283} 284 285uint32_t ValueTable::hash_operand(Value* v) { 286 if (CallInst* CI = dyn_cast<CallInst>(v)) 287 if (!AA->doesNotAccessMemory(CI)) 288 return nextValueNumber++; 289 290 return lookup_or_add(v); 291} 292 293Expression ValueTable::create_expression(CallInst* C) { 294 Expression e; 295 296 e.type = C->getType(); 297 e.firstVN = 0; 298 e.secondVN = 0; 299 e.thirdVN = 0; 300 e.function = C->getCalledFunction(); 301 e.opcode = Expression::CALL; 302 303 for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); 304 I != E; ++I) 305 e.varargs.push_back(hash_operand(*I)); 306 307 return e; 308} 309 310Expression ValueTable::create_expression(BinaryOperator* BO) { 311 Expression e; 312 313 e.firstVN = hash_operand(BO->getOperand(0)); 314 e.secondVN = hash_operand(BO->getOperand(1)); 315 e.thirdVN = 0; 316 e.function = 0; 317 e.type = BO->getType(); 318 e.opcode = getOpcode(BO); 319 320 return e; 321} 322 323Expression ValueTable::create_expression(CmpInst* C) { 324 Expression e; 325 326 e.firstVN = hash_operand(C->getOperand(0)); 327 e.secondVN = hash_operand(C->getOperand(1)); 328 e.thirdVN = 0; 329 e.function = 0; 330 e.type = C->getType(); 331 e.opcode = getOpcode(C); 332 333 return e; 334} 335 336Expression ValueTable::create_expression(CastInst* C) { 337 Expression e; 338 339 e.firstVN = hash_operand(C->getOperand(0)); 340 e.secondVN = 0; 341 e.thirdVN = 0; 342 e.function = 0; 343 e.type = C->getType(); 344 e.opcode = getOpcode(C); 345 346 return e; 347} 348 349Expression ValueTable::create_expression(ShuffleVectorInst* S) { 350 Expression e; 351 352 e.firstVN = hash_operand(S->getOperand(0)); 353 e.secondVN = hash_operand(S->getOperand(1)); 354 e.thirdVN = hash_operand(S->getOperand(2)); 355 e.function = 0; 356 e.type = S->getType(); 357 e.opcode = Expression::SHUFFLE; 358 359 return e; 360} 361 362Expression ValueTable::create_expression(ExtractElementInst* E) { 363 Expression e; 364 365 e.firstVN = hash_operand(E->getOperand(0)); 366 e.secondVN = hash_operand(E->getOperand(1)); 367 e.thirdVN = 0; 368 e.function = 0; 369 e.type = E->getType(); 370 e.opcode = Expression::EXTRACT; 371 372 return e; 373} 374 375Expression ValueTable::create_expression(InsertElementInst* I) { 376 Expression e; 377 378 e.firstVN = hash_operand(I->getOperand(0)); 379 e.secondVN = hash_operand(I->getOperand(1)); 380 e.thirdVN = hash_operand(I->getOperand(2)); 381 e.function = 0; 382 e.type = I->getType(); 383 e.opcode = Expression::INSERT; 384 385 return e; 386} 387 388Expression ValueTable::create_expression(SelectInst* I) { 389 Expression e; 390 391 e.firstVN = hash_operand(I->getCondition()); 392 e.secondVN = hash_operand(I->getTrueValue()); 393 e.thirdVN = hash_operand(I->getFalseValue()); 394 e.function = 0; 395 e.type = I->getType(); 396 e.opcode = Expression::SELECT; 397 398 return e; 399} 400 401Expression ValueTable::create_expression(GetElementPtrInst* G) { 402 Expression e; 403 404 e.firstVN = hash_operand(G->getPointerOperand()); 405 e.secondVN = 0; 406 e.thirdVN = 0; 407 e.function = 0; 408 e.type = G->getType(); 409 e.opcode = Expression::GEP; 410 411 for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); 412 I != E; ++I) 413 e.varargs.push_back(hash_operand(*I)); 414 415 return e; 416} 417 418//===----------------------------------------------------------------------===// 419// ValueTable External Functions 420//===----------------------------------------------------------------------===// 421 422/// lookup_or_add - Returns the value number for the specified value, assigning 423/// it a new number if it did not have one before. 424uint32_t ValueTable::lookup_or_add(Value* V) { 425 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 426 if (VI != valueNumbering.end()) 427 return VI->second; 428 429 if (CallInst* C = dyn_cast<CallInst>(V)) { 430 if (AA->onlyReadsMemory(C)) { // includes doesNotAccessMemory 431 Expression e = create_expression(C); 432 433 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 434 if (EI != expressionNumbering.end()) { 435 valueNumbering.insert(std::make_pair(V, EI->second)); 436 return EI->second; 437 } else { 438 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 439 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 440 441 return nextValueNumber++; 442 } 443 } else { 444 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 445 return nextValueNumber++; 446 } 447 } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) { 448 Expression e = create_expression(BO); 449 450 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 451 if (EI != expressionNumbering.end()) { 452 valueNumbering.insert(std::make_pair(V, EI->second)); 453 return EI->second; 454 } else { 455 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 456 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 457 458 return nextValueNumber++; 459 } 460 } else if (CmpInst* C = dyn_cast<CmpInst>(V)) { 461 Expression e = create_expression(C); 462 463 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 464 if (EI != expressionNumbering.end()) { 465 valueNumbering.insert(std::make_pair(V, EI->second)); 466 return EI->second; 467 } else { 468 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 469 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 470 471 return nextValueNumber++; 472 } 473 } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) { 474 Expression e = create_expression(U); 475 476 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 477 if (EI != expressionNumbering.end()) { 478 valueNumbering.insert(std::make_pair(V, EI->second)); 479 return EI->second; 480 } else { 481 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 482 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 483 484 return nextValueNumber++; 485 } 486 } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) { 487 Expression e = create_expression(U); 488 489 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 490 if (EI != expressionNumbering.end()) { 491 valueNumbering.insert(std::make_pair(V, EI->second)); 492 return EI->second; 493 } else { 494 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 495 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 496 497 return nextValueNumber++; 498 } 499 } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) { 500 Expression e = create_expression(U); 501 502 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 503 if (EI != expressionNumbering.end()) { 504 valueNumbering.insert(std::make_pair(V, EI->second)); 505 return EI->second; 506 } else { 507 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 508 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 509 510 return nextValueNumber++; 511 } 512 } else if (SelectInst* U = dyn_cast<SelectInst>(V)) { 513 Expression e = create_expression(U); 514 515 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 516 if (EI != expressionNumbering.end()) { 517 valueNumbering.insert(std::make_pair(V, EI->second)); 518 return EI->second; 519 } else { 520 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 521 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 522 523 return nextValueNumber++; 524 } 525 } else if (CastInst* U = dyn_cast<CastInst>(V)) { 526 Expression e = create_expression(U); 527 528 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 529 if (EI != expressionNumbering.end()) { 530 valueNumbering.insert(std::make_pair(V, EI->second)); 531 return EI->second; 532 } else { 533 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 534 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 535 536 return nextValueNumber++; 537 } 538 } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) { 539 Expression e = create_expression(U); 540 541 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 542 if (EI != expressionNumbering.end()) { 543 valueNumbering.insert(std::make_pair(V, EI->second)); 544 return EI->second; 545 } else { 546 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 547 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 548 549 return nextValueNumber++; 550 } 551 } else { 552 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 553 return nextValueNumber++; 554 } 555} 556 557/// lookup - Returns the value number of the specified value. Fails if 558/// the value has not yet been numbered. 559uint32_t ValueTable::lookup(Value* V) const { 560 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 561 assert(VI != valueNumbering.end() && "Value not numbered?"); 562 return VI->second; 563} 564 565/// clear - Remove all entries from the ValueTable 566void ValueTable::clear() { 567 valueNumbering.clear(); 568 expressionNumbering.clear(); 569 nextValueNumber = 1; 570} 571 572/// erase - Remove a value from the value numbering 573void ValueTable::erase(Value* V) { 574 valueNumbering.erase(V); 575} 576 577//===----------------------------------------------------------------------===// 578// ValueNumberedSet Class 579//===----------------------------------------------------------------------===// 580namespace { 581class VISIBILITY_HIDDEN ValueNumberedSet { 582 private: 583 SmallPtrSet<Value*, 8> contents; 584 BitVector numbers; 585 public: 586 ValueNumberedSet() { numbers.resize(1); } 587 ValueNumberedSet(const ValueNumberedSet& other) { 588 numbers = other.numbers; 589 contents = other.contents; 590 } 591 592 typedef SmallPtrSet<Value*, 8>::iterator iterator; 593 594 iterator begin() { return contents.begin(); } 595 iterator end() { return contents.end(); } 596 597 bool insert(Value* v) { return contents.insert(v); } 598 void insert(iterator I, iterator E) { contents.insert(I, E); } 599 void erase(Value* v) { contents.erase(v); } 600 unsigned count(Value* v) { return contents.count(v); } 601 size_t size() { return contents.size(); } 602 603 void set(unsigned i) { 604 if (i >= numbers.size()) 605 numbers.resize(i+1); 606 607 numbers.set(i); 608 } 609 610 void operator=(const ValueNumberedSet& other) { 611 contents = other.contents; 612 numbers = other.numbers; 613 } 614 615 void reset(unsigned i) { 616 if (i < numbers.size()) 617 numbers.reset(i); 618 } 619 620 bool test(unsigned i) { 621 if (i >= numbers.size()) 622 return false; 623 624 return numbers.test(i); 625 } 626 627 void clear() { 628 contents.clear(); 629 numbers.clear(); 630 } 631}; 632} 633 634//===----------------------------------------------------------------------===// 635// GVN Pass 636//===----------------------------------------------------------------------===// 637 638namespace { 639 640 class VISIBILITY_HIDDEN GVN : public FunctionPass { 641 bool runOnFunction(Function &F); 642 public: 643 static char ID; // Pass identification, replacement for typeid 644 GVN() : FunctionPass((intptr_t)&ID) { } 645 646 private: 647 ValueTable VN; 648 649 DenseMap<BasicBlock*, ValueNumberedSet> availableOut; 650 651 typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType; 652 PhiMapType phiMap; 653 654 655 // This transformation requires dominator postdominator info 656 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 657 AU.setPreservesCFG(); 658 AU.addRequired<DominatorTree>(); 659 AU.addRequired<MemoryDependenceAnalysis>(); 660 AU.addRequired<AliasAnalysis>(); 661 AU.addRequired<TargetData>(); 662 AU.addPreserved<AliasAnalysis>(); 663 AU.addPreserved<MemoryDependenceAnalysis>(); 664 AU.addPreserved<TargetData>(); 665 } 666 667 // Helper fuctions 668 // FIXME: eliminate or document these better 669 Value* find_leader(ValueNumberedSet& vals, uint32_t v) ; 670 void val_insert(ValueNumberedSet& s, Value* v); 671 bool processLoad(LoadInst* L, 672 DenseMap<Value*, LoadInst*> &lastLoad, 673 SmallVectorImpl<Instruction*> &toErase); 674 bool processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase); 675 bool processInstruction(Instruction* I, 676 ValueNumberedSet& currAvail, 677 DenseMap<Value*, LoadInst*>& lastSeenLoad, 678 SmallVectorImpl<Instruction*> &toErase); 679 bool processNonLocalLoad(LoadInst* L, 680 SmallVectorImpl<Instruction*> &toErase); 681 bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep, 682 SmallVectorImpl<Instruction*> &toErase); 683 bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C, 684 SmallVectorImpl<Instruction*> &toErase); 685 Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig, 686 DenseMap<BasicBlock*, Value*> &Phis, 687 bool top_level = false); 688 void dump(DenseMap<BasicBlock*, Value*>& d); 689 bool iterateOnFunction(Function &F); 690 Value* CollapsePhi(PHINode* p); 691 bool isSafeReplacement(PHINode* p, Instruction* inst); 692 }; 693 694 char GVN::ID = 0; 695} 696 697// createGVNPass - The public interface to this file... 698FunctionPass *llvm::createGVNPass() { return new GVN(); } 699 700static RegisterPass<GVN> X("gvn", 701 "Global Value Numbering"); 702 703/// find_leader - Given a set and a value number, return the first 704/// element of the set with that value number, or 0 if no such element 705/// is present 706Value* GVN::find_leader(ValueNumberedSet& vals, uint32_t v) { 707 if (!vals.test(v)) 708 return 0; 709 710 for (ValueNumberedSet::iterator I = vals.begin(), E = vals.end(); 711 I != E; ++I) 712 if (v == VN.lookup(*I)) 713 return *I; 714 715 assert(0 && "No leader found, but present bit is set?"); 716 return 0; 717} 718 719/// val_insert - Insert a value into a set only if there is not a value 720/// with the same value number already in the set 721void GVN::val_insert(ValueNumberedSet& s, Value* v) { 722 uint32_t num = VN.lookup(v); 723 if (!s.test(num)) 724 s.insert(v); 725} 726 727void GVN::dump(DenseMap<BasicBlock*, Value*>& d) { 728 printf("{\n"); 729 for (DenseMap<BasicBlock*, Value*>::iterator I = d.begin(), 730 E = d.end(); I != E; ++I) { 731 if (I->second == MemoryDependenceAnalysis::None) 732 printf("None\n"); 733 else 734 I->second->dump(); 735 } 736 printf("}\n"); 737} 738 739Value* GVN::CollapsePhi(PHINode* p) { 740 DominatorTree &DT = getAnalysis<DominatorTree>(); 741 Value* constVal = p->hasConstantValue(); 742 743 if (!constVal) return 0; 744 745 Instruction* inst = dyn_cast<Instruction>(constVal); 746 if (!inst) 747 return constVal; 748 749 if (DT.dominates(inst, p)) 750 if (isSafeReplacement(p, inst)) 751 return inst; 752 return 0; 753} 754 755bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { 756 if (!isa<PHINode>(inst)) 757 return true; 758 759 for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); 760 UI != E; ++UI) 761 if (PHINode* use_phi = dyn_cast<PHINode>(UI)) 762 if (use_phi->getParent() == inst->getParent()) 763 return false; 764 765 return true; 766} 767 768/// GetValueForBlock - Get the value to use within the specified basic block. 769/// available values are in Phis. 770Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig, 771 DenseMap<BasicBlock*, Value*> &Phis, 772 bool top_level) { 773 774 // If we have already computed this value, return the previously computed val. 775 DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB); 776 if (V != Phis.end() && !top_level) return V->second; 777 778 BasicBlock* singlePred = BB->getSinglePredecessor(); 779 if (singlePred) { 780 Value *ret = GetValueForBlock(singlePred, orig, Phis); 781 Phis[BB] = ret; 782 return ret; 783 } 784 785 // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so 786 // now, then get values to fill in the incoming values for the PHI. 787 PHINode *PN = new PHINode(orig->getType(), orig->getName()+".rle", 788 BB->begin()); 789 PN->reserveOperandSpace(std::distance(pred_begin(BB), pred_end(BB))); 790 791 if (Phis.count(BB) == 0) 792 Phis.insert(std::make_pair(BB, PN)); 793 794 // Fill in the incoming values for the block. 795 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 796 Value* val = GetValueForBlock(*PI, orig, Phis); 797 PN->addIncoming(val, *PI); 798 } 799 800 AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); 801 AA.copyValue(orig, PN); 802 803 // Attempt to collapse PHI nodes that are trivially redundant 804 Value* v = CollapsePhi(PN); 805 if (!v) { 806 // Cache our phi construction results 807 phiMap[orig->getPointerOperand()].insert(PN); 808 return PN; 809 } 810 811 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 812 813 MD.removeInstruction(PN); 814 PN->replaceAllUsesWith(v); 815 816 for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(), 817 E = Phis.end(); I != E; ++I) 818 if (I->second == PN) 819 I->second = v; 820 821 PN->eraseFromParent(); 822 823 Phis[BB] = v; 824 return v; 825} 826 827/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 828/// non-local by performing PHI construction. 829bool GVN::processNonLocalLoad(LoadInst* L, 830 SmallVectorImpl<Instruction*> &toErase) { 831 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 832 833 // Find the non-local dependencies of the load 834 DenseMap<BasicBlock*, Value*> deps; 835 MD.getNonLocalDependency(L, deps); 836 837 DenseMap<BasicBlock*, Value*> repl; 838 839 // Filter out useless results (non-locals, etc) 840 for (DenseMap<BasicBlock*, Value*>::iterator I = deps.begin(), E = deps.end(); 841 I != E; ++I) { 842 if (I->second == MemoryDependenceAnalysis::None) 843 return false; 844 845 if (I->second == MemoryDependenceAnalysis::NonLocal) 846 continue; 847 848 if (StoreInst* S = dyn_cast<StoreInst>(I->second)) { 849 if (S->getPointerOperand() != L->getPointerOperand()) 850 return false; 851 repl[I->first] = S->getOperand(0); 852 } else if (LoadInst* LD = dyn_cast<LoadInst>(I->second)) { 853 if (LD->getPointerOperand() != L->getPointerOperand()) 854 return false; 855 repl[I->first] = LD; 856 } else { 857 return false; 858 } 859 } 860 861 // Use cached PHI construction information from previous runs 862 SmallPtrSet<Instruction*, 4>& p = phiMap[L->getPointerOperand()]; 863 for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); 864 I != E; ++I) { 865 if ((*I)->getParent() == L->getParent()) { 866 MD.removeInstruction(L); 867 L->replaceAllUsesWith(*I); 868 toErase.push_back(L); 869 NumGVNLoad++; 870 return true; 871 } 872 873 repl.insert(std::make_pair((*I)->getParent(), *I)); 874 } 875 876 // Perform PHI construction 877 SmallPtrSet<BasicBlock*, 4> visited; 878 Value* v = GetValueForBlock(L->getParent(), L, repl, true); 879 880 MD.removeInstruction(L); 881 L->replaceAllUsesWith(v); 882 toErase.push_back(L); 883 NumGVNLoad++; 884 885 return true; 886} 887 888/// processLoad - Attempt to eliminate a load, first by eliminating it 889/// locally, and then attempting non-local elimination if that fails. 890bool GVN::processLoad(LoadInst *L, DenseMap<Value*, LoadInst*> &lastLoad, 891 SmallVectorImpl<Instruction*> &toErase) { 892 if (L->isVolatile()) { 893 lastLoad[L->getPointerOperand()] = L; 894 return false; 895 } 896 897 Value* pointer = L->getPointerOperand(); 898 LoadInst*& last = lastLoad[pointer]; 899 900 // ... to a pointer that has been loaded from before... 901 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 902 bool removedNonLocal = false; 903 Instruction* dep = MD.getDependency(L); 904 if (dep == MemoryDependenceAnalysis::NonLocal && 905 L->getParent() != &L->getParent()->getParent()->getEntryBlock()) { 906 removedNonLocal = processNonLocalLoad(L, toErase); 907 908 if (!removedNonLocal) 909 last = L; 910 911 return removedNonLocal; 912 } 913 914 915 bool deletedLoad = false; 916 917 // Walk up the dependency chain until we either find 918 // a dependency we can use, or we can't walk any further 919 while (dep != MemoryDependenceAnalysis::None && 920 dep != MemoryDependenceAnalysis::NonLocal && 921 (isa<LoadInst>(dep) || isa<StoreInst>(dep))) { 922 // ... that depends on a store ... 923 if (StoreInst* S = dyn_cast<StoreInst>(dep)) { 924 if (S->getPointerOperand() == pointer) { 925 // Remove it! 926 MD.removeInstruction(L); 927 928 L->replaceAllUsesWith(S->getOperand(0)); 929 toErase.push_back(L); 930 deletedLoad = true; 931 NumGVNLoad++; 932 } 933 934 // Whether we removed it or not, we can't 935 // go any further 936 break; 937 } else if (!last) { 938 // If we don't depend on a store, and we haven't 939 // been loaded before, bail. 940 break; 941 } else if (dep == last) { 942 // Remove it! 943 MD.removeInstruction(L); 944 945 L->replaceAllUsesWith(last); 946 toErase.push_back(L); 947 deletedLoad = true; 948 NumGVNLoad++; 949 950 break; 951 } else { 952 dep = MD.getDependency(L, dep); 953 } 954 } 955 956 if (dep != MemoryDependenceAnalysis::None && 957 dep != MemoryDependenceAnalysis::NonLocal && 958 isa<AllocationInst>(dep)) { 959 // Check that this load is actually from the 960 // allocation we found 961 Value* v = L->getOperand(0); 962 while (true) { 963 if (BitCastInst *BC = dyn_cast<BitCastInst>(v)) 964 v = BC->getOperand(0); 965 else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v)) 966 v = GEP->getOperand(0); 967 else 968 break; 969 } 970 if (v == dep) { 971 // If this load depends directly on an allocation, there isn't 972 // anything stored there; therefore, we can optimize this load 973 // to undef. 974 MD.removeInstruction(L); 975 976 L->replaceAllUsesWith(UndefValue::get(L->getType())); 977 toErase.push_back(L); 978 deletedLoad = true; 979 NumGVNLoad++; 980 } 981 } 982 983 if (!deletedLoad) 984 last = L; 985 986 return deletedLoad; 987} 988 989/// isBytewiseValue - If the specified value can be set by repeating the same 990/// byte in memory, return the i8 value that it is represented with. This is 991/// true for all i8 values obviously, but is also true for i32 0, i32 -1, 992/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated 993/// byte store (e.g. i16 0x1234), return null. 994static Value *isBytewiseValue(Value *V) { 995 // All byte-wide stores are splatable, even of arbitrary variables. 996 if (V->getType() == Type::Int8Ty) return V; 997 998 // Constant float and double values can be handled as integer values if the 999 // corresponding integer value is "byteable". An important case is 0.0. 1000 if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { 1001 if (CFP->getType() == Type::FloatTy) 1002 V = ConstantExpr::getBitCast(CFP, Type::Int32Ty); 1003 if (CFP->getType() == Type::DoubleTy) 1004 V = ConstantExpr::getBitCast(CFP, Type::Int64Ty); 1005 // Don't handle long double formats, which have strange constraints. 1006 } 1007 1008 // We can handle constant integers that are power of two in size and a 1009 // multiple of 8 bits. 1010 if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { 1011 unsigned Width = CI->getBitWidth(); 1012 if (isPowerOf2_32(Width) && Width > 8) { 1013 // We can handle this value if the recursive binary decomposition is the 1014 // same at all levels. 1015 APInt Val = CI->getValue(); 1016 APInt Val2; 1017 while (Val.getBitWidth() != 8) { 1018 unsigned NextWidth = Val.getBitWidth()/2; 1019 Val2 = Val.lshr(NextWidth); 1020 Val2.trunc(Val.getBitWidth()/2); 1021 Val.trunc(Val.getBitWidth()/2); 1022 1023 // If the top/bottom halves aren't the same, reject it. 1024 if (Val != Val2) 1025 return 0; 1026 } 1027 return ConstantInt::get(Val); 1028 } 1029 } 1030 1031 // Conceptually, we could handle things like: 1032 // %a = zext i8 %X to i16 1033 // %b = shl i16 %a, 8 1034 // %c = or i16 %a, %b 1035 // but until there is an example that actually needs this, it doesn't seem 1036 // worth worrying about. 1037 return 0; 1038} 1039 1040static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, 1041 bool &VariableIdxFound, TargetData &TD) { 1042 // Skip over the first indices. 1043 gep_type_iterator GTI = gep_type_begin(GEP); 1044 for (unsigned i = 1; i != Idx; ++i, ++GTI) 1045 /*skip along*/; 1046 1047 // Compute the offset implied by the rest of the indices. 1048 int64_t Offset = 0; 1049 for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { 1050 ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); 1051 if (OpC == 0) 1052 return VariableIdxFound = true; 1053 if (OpC->isZero()) continue; // No offset. 1054 1055 // Handle struct indices, which add their field offset to the pointer. 1056 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 1057 Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 1058 continue; 1059 } 1060 1061 // Otherwise, we have a sequential type like an array or vector. Multiply 1062 // the index by the ElementSize. 1063 uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()); 1064 Offset += Size*OpC->getSExtValue(); 1065 } 1066 1067 return Offset; 1068} 1069 1070/// IsPointerAtOffset - Return true if Ptr1 is exactly provably equal to Ptr2 1071/// plus the specified constant offset. For example, Ptr1 might be &A[42], and 1072/// Ptr2 might be &A[40] and Offset might be 8. 1073static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset, 1074 TargetData &TD) { 1075 // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical 1076 // base. After that base, they may have some number of common (and 1077 // potentially variable) indices. After that they handle some constant 1078 // offset, which determines their offset from each other. At this point, we 1079 // handle no other case. 1080 GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1); 1081 GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2); 1082 if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) 1083 return false; 1084 1085 // Skip any common indices and track the GEP types. 1086 unsigned Idx = 1; 1087 for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) 1088 if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) 1089 break; 1090 1091 bool VariableIdxFound = false; 1092 int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); 1093 int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); 1094 if (VariableIdxFound) return false; 1095 1096 return Offset1 == Offset2+(int64_t)Offset; 1097} 1098 1099 1100/// processStore - When GVN is scanning forward over instructions, we look for 1101/// some other patterns to fold away. In particular, this looks for stores to 1102/// neighboring locations of memory. If it sees enough consequtive ones 1103/// (currently 4) it attempts to merge them together into a memcpy/memset. 1104bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) { 1105 if (SI->isVolatile()) return false; 1106 1107 // There are two cases that are interesting for this code to handle: memcpy 1108 // and memset. Right now we only handle memset. 1109 1110 // Ensure that the value being stored is something that can be memset'able a 1111 // byte at a time like "0" or "-1" or any width, as well as things like 1112 // 0xA0A0A0A0 and 0.0. 1113 Value *ByteVal = isBytewiseValue(SI->getOperand(0)); 1114 if (!ByteVal) 1115 return false; 1116 1117 TargetData &TD = getAnalysis<TargetData>(); 1118 AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 1119 1120 // Okay, so we now have a single store that can be splatable. Try to 'grow' 1121 // this store by looking for neighboring stores to the immediate left or right 1122 // of the store we have so far. While we could in theory handle stores in 1123 // this order: A[0], A[2], A[1] 1124 // in practice, right now we only worry about cases where stores are 1125 // consequtive in increasing or decreasing address order. 1126 uint64_t BytesSoFar = TD.getTypeStoreSize(SI->getOperand(0)->getType()); 1127 uint64_t BytesFromSI = 0; 1128 unsigned StartAlign = SI->getAlignment(); 1129 Value *StartPtr = SI->getPointerOperand(); 1130 SmallVector<StoreInst*, 16> Stores; 1131 Stores.push_back(SI); 1132 1133 BasicBlock::iterator BI = SI; 1134 for (++BI; !isa<TerminatorInst>(BI); ++BI) { 1135 if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { 1136 // If the call is readnone, ignore it, otherwise bail out. We don't even 1137 // allow readonly here because we don't want something like: 1138 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). 1139 if (AA.getModRefBehavior(CallSite::get(BI)) == 1140 AliasAnalysis::DoesNotAccessMemory) 1141 continue; 1142 1143 // TODO: If this is a memset, try to join it in. 1144 1145 break; 1146 } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI)) 1147 break; 1148 1149 // If this is a non-store instruction it is fine, ignore it. 1150 StoreInst *NextStore = dyn_cast<StoreInst>(BI); 1151 if (NextStore == 0) continue; 1152 1153 // If this is a store, see if we can merge it in. 1154 if (NextStore->isVolatile()) break; 1155 1156 // Check to see if this stored value is of the same byte-splattable value. 1157 if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) 1158 break; 1159 1160 Value *ThisPointer = NextStore->getPointerOperand(); 1161 unsigned AccessSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); 1162 1163 // If so, check to see if the store is before the current range or after it 1164 // in either case, extend the range, otherwise reject it. 1165 if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar, TD)) { 1166 // Okay, this extends the stored area on the end, just add to the bytes 1167 // so far and remember this store. 1168 BytesSoFar += AccessSize; 1169 Stores.push_back(NextStore); 1170 continue; 1171 } 1172 1173 if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize, TD)) { 1174 // Okay, the store is before the current range. Reset our start pointer 1175 // and get new alignment info etc. 1176 BytesSoFar += AccessSize; 1177 BytesFromSI += AccessSize; 1178 Stores.push_back(NextStore); 1179 StartPtr = ThisPointer; 1180 StartAlign = NextStore->getAlignment(); 1181 continue; 1182 } 1183 1184 // Otherwise, this store wasn't contiguous with our current range, bail out. 1185 break; 1186 } 1187 1188 // If we found less than 4 stores to merge, bail out, it isn't worth losing 1189 // type information in llvm IR to do the transformation. 1190 if (Stores.size() < 4) 1191 return false; 1192 1193 // Otherwise, we do want to transform this! Create a new memset. We put the 1194 // memset right after the first store that we found in this block. This 1195 // ensures that the caller will increment the iterator to the memset before 1196 // it deletes all the stores. 1197 BasicBlock::iterator InsertPt = SI; ++InsertPt; 1198 1199 Function *F = Intrinsic::getDeclaration(SI->getParent()->getParent() 1200 ->getParent(), Intrinsic::memset_i64); 1201 1202 // StartPtr may not dominate the starting point. Instead of using it, base 1203 // the destination pointer off the input to the first store in the block. 1204 StartPtr = SI->getPointerOperand(); 1205 1206 // Cast the start ptr to be i8* as memset requires. 1207 const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty); 1208 if (StartPtr->getType() != i8Ptr) 1209 StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), 1210 InsertPt); 1211 1212 // Offset the pointer if needed. 1213 if (BytesFromSI) 1214 StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty, 1215 -BytesFromSI), 1216 "ptroffset", InsertPt); 1217 1218 Value *Ops[] = { 1219 StartPtr, ByteVal, // Start, value 1220 ConstantInt::get(Type::Int64Ty, BytesSoFar), // size 1221 ConstantInt::get(Type::Int32Ty, StartAlign) // align 1222 }; 1223 new CallInst(F, Ops, Ops+4, "", InsertPt); 1224 1225 // Zap all the stores. 1226 toErase.append(Stores.begin(), Stores.end()); 1227 1228 ++NumMemSetInfer; 1229 return true; 1230} 1231 1232 1233/// performCallSlotOptzn - takes a memcpy and a call that it depends on, 1234/// and checks for the possibility of a call slot optimization by having 1235/// the call write its result directly into the destination of the memcpy. 1236bool GVN::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C, 1237 SmallVectorImpl<Instruction*> &toErase) { 1238 // The general transformation to keep in mind is 1239 // 1240 // call @func(..., src, ...) 1241 // memcpy(dest, src, ...) 1242 // 1243 // -> 1244 // 1245 // memcpy(dest, src, ...) 1246 // call @func(..., dest, ...) 1247 // 1248 // Since moving the memcpy is technically awkward, we additionally check that 1249 // src only holds uninitialized values at the moment of the call, meaning that 1250 // the memcpy can be discarded rather than moved. 1251 1252 // Deliberately get the source and destination with bitcasts stripped away, 1253 // because we'll need to do type comparisons based on the underlying type. 1254 Value* cpyDest = cpy->getDest(); 1255 Value* cpySrc = cpy->getSource(); 1256 CallSite CS = CallSite::get(C); 1257 1258 // We need to be able to reason about the size of the memcpy, so we require 1259 // that it be a constant. 1260 ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength()); 1261 if (!cpyLength) 1262 return false; 1263 1264 // Require that src be an alloca. This simplifies the reasoning considerably. 1265 AllocaInst* srcAlloca = dyn_cast<AllocaInst>(cpySrc); 1266 if (!srcAlloca) 1267 return false; 1268 1269 // Check that all of src is copied to dest. 1270 TargetData& TD = getAnalysis<TargetData>(); 1271 1272 ConstantInt* srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); 1273 if (!srcArraySize) 1274 return false; 1275 1276 uint64_t srcSize = TD.getABITypeSize(srcAlloca->getAllocatedType()) * 1277 srcArraySize->getZExtValue(); 1278 1279 if (cpyLength->getZExtValue() < srcSize) 1280 return false; 1281 1282 // Check that accessing the first srcSize bytes of dest will not cause a 1283 // trap. Otherwise the transform is invalid since it might cause a trap 1284 // to occur earlier than it otherwise would. 1285 if (AllocaInst* A = dyn_cast<AllocaInst>(cpyDest)) { 1286 // The destination is an alloca. Check it is larger than srcSize. 1287 ConstantInt* destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); 1288 if (!destArraySize) 1289 return false; 1290 1291 uint64_t destSize = TD.getABITypeSize(A->getAllocatedType()) * 1292 destArraySize->getZExtValue(); 1293 1294 if (destSize < srcSize) 1295 return false; 1296 } else if (Argument* A = dyn_cast<Argument>(cpyDest)) { 1297 // If the destination is an sret parameter then only accesses that are 1298 // outside of the returned struct type can trap. 1299 if (!A->hasStructRetAttr()) 1300 return false; 1301 1302 const Type* StructTy = cast<PointerType>(A->getType())->getElementType(); 1303 uint64_t destSize = TD.getABITypeSize(StructTy); 1304 1305 if (destSize < srcSize) 1306 return false; 1307 } else { 1308 return false; 1309 } 1310 1311 // Check that src is not accessed except via the call and the memcpy. This 1312 // guarantees that it holds only undefined values when passed in (so the final 1313 // memcpy can be dropped), that it is not read or written between the call and 1314 // the memcpy, and that writing beyond the end of it is undefined. 1315 SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), 1316 srcAlloca->use_end()); 1317 while (!srcUseList.empty()) { 1318 User* UI = srcUseList.back(); 1319 srcUseList.pop_back(); 1320 1321 if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) { 1322 for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); 1323 I != E; ++I) 1324 srcUseList.push_back(*I); 1325 } else if (UI != C && UI != cpy) { 1326 return false; 1327 } 1328 } 1329 1330 // Since we're changing the parameter to the callsite, we need to make sure 1331 // that what would be the new parameter dominates the callsite. 1332 DominatorTree& DT = getAnalysis<DominatorTree>(); 1333 if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest)) 1334 if (!DT.dominates(cpyDestInst, C)) 1335 return false; 1336 1337 // In addition to knowing that the call does not access src in some 1338 // unexpected manner, for example via a global, which we deduce from 1339 // the use analysis, we also need to know that it does not sneakily 1340 // access dest. We rely on AA to figure this out for us. 1341 AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); 1342 if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != 1343 AliasAnalysis::NoModRef) 1344 return false; 1345 1346 // All the checks have passed, so do the transformation. 1347 for (unsigned i = 0; i < CS.arg_size(); ++i) 1348 if (CS.getArgument(i) == cpySrc) { 1349 if (cpySrc->getType() != cpyDest->getType()) 1350 cpyDest = CastInst::createPointerCast(cpyDest, cpySrc->getType(), 1351 cpyDest->getName(), C); 1352 CS.setArgument(i, cpyDest); 1353 } 1354 1355 // Drop any cached information about the call, because we may have changed 1356 // its dependence information by changing its parameter. 1357 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 1358 MD.dropInstruction(C); 1359 1360 // Remove the memcpy 1361 MD.removeInstruction(cpy); 1362 toErase.push_back(cpy); 1363 1364 return true; 1365} 1366 1367/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which 1368/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be 1369/// a memcpy from X to Z (or potentially a memmove, depending on circumstances). 1370/// This allows later passes to remove the first memcpy altogether. 1371bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep, 1372 SmallVectorImpl<Instruction*> &toErase) { 1373 // We can only transforms memcpy's where the dest of one is the source of the 1374 // other 1375 if (M->getSource() != MDep->getDest()) 1376 return false; 1377 1378 // Second, the length of the memcpy's must be the same, or the preceeding one 1379 // must be larger than the following one. 1380 ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength()); 1381 ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength()); 1382 if (!C1 || !C2) 1383 return false; 1384 1385 uint64_t DepSize = C1->getValue().getZExtValue(); 1386 uint64_t CpySize = C2->getValue().getZExtValue(); 1387 1388 if (DepSize < CpySize) 1389 return false; 1390 1391 // Finally, we have to make sure that the dest of the second does not 1392 // alias the source of the first 1393 AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); 1394 if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) != 1395 AliasAnalysis::NoAlias) 1396 return false; 1397 else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) != 1398 AliasAnalysis::NoAlias) 1399 return false; 1400 else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize) 1401 != AliasAnalysis::NoAlias) 1402 return false; 1403 1404 // If all checks passed, then we can transform these memcpy's 1405 Function* MemCpyFun = Intrinsic::getDeclaration( 1406 M->getParent()->getParent()->getParent(), 1407 M->getIntrinsicID()); 1408 1409 std::vector<Value*> args; 1410 args.push_back(M->getRawDest()); 1411 args.push_back(MDep->getRawSource()); 1412 args.push_back(M->getLength()); 1413 args.push_back(M->getAlignment()); 1414 1415 CallInst* C = new CallInst(MemCpyFun, args.begin(), args.end(), "", M); 1416 1417 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 1418 if (MD.getDependency(C) == MDep) { 1419 MD.dropInstruction(M); 1420 toErase.push_back(M); 1421 return true; 1422 } 1423 1424 MD.removeInstruction(C); 1425 toErase.push_back(C); 1426 return false; 1427} 1428 1429/// processInstruction - When calculating availability, handle an instruction 1430/// by inserting it into the appropriate sets 1431bool GVN::processInstruction(Instruction *I, ValueNumberedSet &currAvail, 1432 DenseMap<Value*, LoadInst*> &lastSeenLoad, 1433 SmallVectorImpl<Instruction*> &toErase) { 1434 if (LoadInst* L = dyn_cast<LoadInst>(I)) 1435 return processLoad(L, lastSeenLoad, toErase); 1436 1437 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 1438 return processStore(SI, toErase); 1439 1440 if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) { 1441 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 1442 1443 // The are two possible optimizations we can do for memcpy: 1444 // a) memcpy-memcpy xform which exposes redundance for DSE 1445 // b) call-memcpy xform for return slot optimization 1446 Instruction* dep = MD.getDependency(M); 1447 if (dep == MemoryDependenceAnalysis::None || 1448 dep == MemoryDependenceAnalysis::NonLocal) 1449 return false; 1450 if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep)) 1451 return processMemCpy(M, MemCpy, toErase); 1452 if (CallInst* C = dyn_cast<CallInst>(dep)) 1453 return performCallSlotOptzn(M, C, toErase); 1454 return false; 1455 } 1456 1457 unsigned num = VN.lookup_or_add(I); 1458 1459 // Collapse PHI nodes 1460 if (PHINode* p = dyn_cast<PHINode>(I)) { 1461 Value* constVal = CollapsePhi(p); 1462 1463 if (constVal) { 1464 for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); 1465 PI != PE; ++PI) 1466 if (PI->second.count(p)) 1467 PI->second.erase(p); 1468 1469 p->replaceAllUsesWith(constVal); 1470 toErase.push_back(p); 1471 } 1472 // Perform value-number based elimination 1473 } else if (currAvail.test(num)) { 1474 Value* repl = find_leader(currAvail, num); 1475 1476 if (CallInst* CI = dyn_cast<CallInst>(I)) { 1477 AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); 1478 if (!AA.doesNotAccessMemory(CI)) { 1479 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 1480 if (cast<Instruction>(repl)->getParent() != CI->getParent() || 1481 MD.getDependency(CI) != MD.getDependency(cast<CallInst>(repl))) { 1482 // There must be an intervening may-alias store, so nothing from 1483 // this point on will be able to be replaced with the preceding call 1484 currAvail.erase(repl); 1485 currAvail.insert(I); 1486 1487 return false; 1488 } 1489 } 1490 } 1491 1492 // Remove it! 1493 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 1494 MD.removeInstruction(I); 1495 1496 VN.erase(I); 1497 I->replaceAllUsesWith(repl); 1498 toErase.push_back(I); 1499 return true; 1500 } else if (!I->isTerminator()) { 1501 currAvail.set(num); 1502 currAvail.insert(I); 1503 } 1504 1505 return false; 1506} 1507 1508// GVN::runOnFunction - This is the main transformation entry point for a 1509// function. 1510// 1511bool GVN::runOnFunction(Function& F) { 1512 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1513 1514 bool changed = false; 1515 bool shouldContinue = true; 1516 1517 while (shouldContinue) { 1518 shouldContinue = iterateOnFunction(F); 1519 changed |= shouldContinue; 1520 } 1521 1522 return changed; 1523} 1524 1525 1526// GVN::iterateOnFunction - Executes one iteration of GVN 1527bool GVN::iterateOnFunction(Function &F) { 1528 // Clean out global sets from any previous functions 1529 VN.clear(); 1530 availableOut.clear(); 1531 phiMap.clear(); 1532 1533 bool changed_function = false; 1534 1535 DominatorTree &DT = getAnalysis<DominatorTree>(); 1536 1537 SmallVector<Instruction*, 4> toErase; 1538 DenseMap<Value*, LoadInst*> lastSeenLoad; 1539 1540 // Top-down walk of the dominator tree 1541 for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()), 1542 E = df_end(DT.getRootNode()); DI != E; ++DI) { 1543 1544 // Get the set to update for this block 1545 ValueNumberedSet& currAvail = availableOut[DI->getBlock()]; 1546 lastSeenLoad.clear(); 1547 1548 BasicBlock* BB = DI->getBlock(); 1549 1550 // A block inherits AVAIL_OUT from its dominator 1551 if (DI->getIDom() != 0) 1552 currAvail = availableOut[DI->getIDom()->getBlock()]; 1553 1554 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1555 BI != BE; ) { 1556 changed_function |= processInstruction(BI, currAvail, 1557 lastSeenLoad, toErase); 1558 1559 NumGVNInstr += toErase.size(); 1560 1561 // Avoid iterator invalidation 1562 ++BI; 1563 1564 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 1565 E = toErase.end(); I != E; ++I) 1566 (*I)->eraseFromParent(); 1567 1568 toErase.clear(); 1569 } 1570 } 1571 1572 return changed_function; 1573} 1574