GVN.cpp revision d22fe2b51f553f7eca200cd22b9e2247f9aea2ff
1//===- GVN.cpp - Eliminate redundant values and loads ------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global value numbering to eliminate fully redundant 11// instructions. It also performs simple dead load elimination. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "gvn" 16#include "llvm/Transforms/Scalar.h" 17#include "llvm/BasicBlock.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/IntrinsicInst.h" 22#include "llvm/Instructions.h" 23#include "llvm/ParameterAttributes.h" 24#include "llvm/Value.h" 25#include "llvm/ADT/BitVector.h" 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/DepthFirstIterator.h" 28#include "llvm/ADT/SmallPtrSet.h" 29#include "llvm/ADT/SmallVector.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/Analysis/Dominators.h" 32#include "llvm/Analysis/AliasAnalysis.h" 33#include "llvm/Analysis/MemoryDependenceAnalysis.h" 34#include "llvm/Support/CFG.h" 35#include "llvm/Support/CommandLine.h" 36#include "llvm/Support/Compiler.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/GetElementPtrTypeIterator.h" 39#include "llvm/Target/TargetData.h" 40#include <list> 41using namespace llvm; 42 43STATISTIC(NumGVNInstr, "Number of instructions deleted"); 44STATISTIC(NumGVNLoad, "Number of loads deleted"); 45STATISTIC(NumMemSetInfer, "Number of memsets inferred"); 46 47namespace { 48 cl::opt<bool> 49 FormMemSet("form-memset-from-stores", 50 cl::desc("Transform straight-line stores to memsets"), 51 cl::init(true), cl::Hidden); 52} 53 54//===----------------------------------------------------------------------===// 55// ValueTable Class 56//===----------------------------------------------------------------------===// 57 58/// This class holds the mapping between values and value numbers. It is used 59/// as an efficient mechanism to determine the expression-wise equivalence of 60/// two values. 61namespace { 62 struct VISIBILITY_HIDDEN Expression { 63 enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, 64 FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 65 ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 66 ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 67 FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 68 FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 69 FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, 70 SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, 71 FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 72 PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, EMPTY, 73 TOMBSTONE }; 74 75 ExpressionOpcode opcode; 76 const Type* type; 77 uint32_t firstVN; 78 uint32_t secondVN; 79 uint32_t thirdVN; 80 SmallVector<uint32_t, 4> varargs; 81 Value* function; 82 83 Expression() { } 84 Expression(ExpressionOpcode o) : opcode(o) { } 85 86 bool operator==(const Expression &other) const { 87 if (opcode != other.opcode) 88 return false; 89 else if (opcode == EMPTY || opcode == TOMBSTONE) 90 return true; 91 else if (type != other.type) 92 return false; 93 else if (function != other.function) 94 return false; 95 else if (firstVN != other.firstVN) 96 return false; 97 else if (secondVN != other.secondVN) 98 return false; 99 else if (thirdVN != other.thirdVN) 100 return false; 101 else { 102 if (varargs.size() != other.varargs.size()) 103 return false; 104 105 for (size_t i = 0; i < varargs.size(); ++i) 106 if (varargs[i] != other.varargs[i]) 107 return false; 108 109 return true; 110 } 111 } 112 113 bool operator!=(const Expression &other) const { 114 if (opcode != other.opcode) 115 return true; 116 else if (opcode == EMPTY || opcode == TOMBSTONE) 117 return false; 118 else if (type != other.type) 119 return true; 120 else if (function != other.function) 121 return true; 122 else if (firstVN != other.firstVN) 123 return true; 124 else if (secondVN != other.secondVN) 125 return true; 126 else if (thirdVN != other.thirdVN) 127 return true; 128 else { 129 if (varargs.size() != other.varargs.size()) 130 return true; 131 132 for (size_t i = 0; i < varargs.size(); ++i) 133 if (varargs[i] != other.varargs[i]) 134 return true; 135 136 return false; 137 } 138 } 139 }; 140 141 class VISIBILITY_HIDDEN ValueTable { 142 private: 143 DenseMap<Value*, uint32_t> valueNumbering; 144 DenseMap<Expression, uint32_t> expressionNumbering; 145 AliasAnalysis* AA; 146 147 uint32_t nextValueNumber; 148 149 Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); 150 Expression::ExpressionOpcode getOpcode(CmpInst* C); 151 Expression::ExpressionOpcode getOpcode(CastInst* C); 152 Expression create_expression(BinaryOperator* BO); 153 Expression create_expression(CmpInst* C); 154 Expression create_expression(ShuffleVectorInst* V); 155 Expression create_expression(ExtractElementInst* C); 156 Expression create_expression(InsertElementInst* V); 157 Expression create_expression(SelectInst* V); 158 Expression create_expression(CastInst* C); 159 Expression create_expression(GetElementPtrInst* G); 160 Expression create_expression(CallInst* C); 161 public: 162 ValueTable() : nextValueNumber(1) { } 163 uint32_t lookup_or_add(Value* V); 164 uint32_t lookup(Value* V) const; 165 void add(Value* V, uint32_t num); 166 void clear(); 167 void erase(Value* v); 168 unsigned size(); 169 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 170 uint32_t hash_operand(Value* v); 171 }; 172} 173 174namespace llvm { 175template <> struct DenseMapInfo<Expression> { 176 static inline Expression getEmptyKey() { 177 return Expression(Expression::EMPTY); 178 } 179 180 static inline Expression getTombstoneKey() { 181 return Expression(Expression::TOMBSTONE); 182 } 183 184 static unsigned getHashValue(const Expression e) { 185 unsigned hash = e.opcode; 186 187 hash = e.firstVN + hash * 37; 188 hash = e.secondVN + hash * 37; 189 hash = e.thirdVN + hash * 37; 190 191 hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 192 (unsigned)((uintptr_t)e.type >> 9)) + 193 hash * 37; 194 195 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 196 E = e.varargs.end(); I != E; ++I) 197 hash = *I + hash * 37; 198 199 hash = ((unsigned)((uintptr_t)e.function >> 4) ^ 200 (unsigned)((uintptr_t)e.function >> 9)) + 201 hash * 37; 202 203 return hash; 204 } 205 static bool isEqual(const Expression &LHS, const Expression &RHS) { 206 return LHS == RHS; 207 } 208 static bool isPod() { return true; } 209}; 210} 211 212//===----------------------------------------------------------------------===// 213// ValueTable Internal Functions 214//===----------------------------------------------------------------------===// 215Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { 216 switch(BO->getOpcode()) { 217 default: // THIS SHOULD NEVER HAPPEN 218 assert(0 && "Binary operator with unknown opcode?"); 219 case Instruction::Add: return Expression::ADD; 220 case Instruction::Sub: return Expression::SUB; 221 case Instruction::Mul: return Expression::MUL; 222 case Instruction::UDiv: return Expression::UDIV; 223 case Instruction::SDiv: return Expression::SDIV; 224 case Instruction::FDiv: return Expression::FDIV; 225 case Instruction::URem: return Expression::UREM; 226 case Instruction::SRem: return Expression::SREM; 227 case Instruction::FRem: return Expression::FREM; 228 case Instruction::Shl: return Expression::SHL; 229 case Instruction::LShr: return Expression::LSHR; 230 case Instruction::AShr: return Expression::ASHR; 231 case Instruction::And: return Expression::AND; 232 case Instruction::Or: return Expression::OR; 233 case Instruction::Xor: return Expression::XOR; 234 } 235} 236 237Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { 238 if (isa<ICmpInst>(C)) { 239 switch (C->getPredicate()) { 240 default: // THIS SHOULD NEVER HAPPEN 241 assert(0 && "Comparison with unknown predicate?"); 242 case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; 243 case ICmpInst::ICMP_NE: return Expression::ICMPNE; 244 case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; 245 case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; 246 case ICmpInst::ICMP_ULT: return Expression::ICMPULT; 247 case ICmpInst::ICMP_ULE: return Expression::ICMPULE; 248 case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; 249 case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; 250 case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; 251 case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; 252 } 253 } 254 assert(isa<FCmpInst>(C) && "Unknown compare"); 255 switch (C->getPredicate()) { 256 default: // THIS SHOULD NEVER HAPPEN 257 assert(0 && "Comparison with unknown predicate?"); 258 case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; 259 case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; 260 case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; 261 case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; 262 case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; 263 case FCmpInst::FCMP_ONE: return Expression::FCMPONE; 264 case FCmpInst::FCMP_ORD: return Expression::FCMPORD; 265 case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; 266 case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; 267 case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; 268 case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; 269 case FCmpInst::FCMP_ULT: return Expression::FCMPULT; 270 case FCmpInst::FCMP_ULE: return Expression::FCMPULE; 271 case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; 272 } 273} 274 275Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { 276 switch(C->getOpcode()) { 277 default: // THIS SHOULD NEVER HAPPEN 278 assert(0 && "Cast operator with unknown opcode?"); 279 case Instruction::Trunc: return Expression::TRUNC; 280 case Instruction::ZExt: return Expression::ZEXT; 281 case Instruction::SExt: return Expression::SEXT; 282 case Instruction::FPToUI: return Expression::FPTOUI; 283 case Instruction::FPToSI: return Expression::FPTOSI; 284 case Instruction::UIToFP: return Expression::UITOFP; 285 case Instruction::SIToFP: return Expression::SITOFP; 286 case Instruction::FPTrunc: return Expression::FPTRUNC; 287 case Instruction::FPExt: return Expression::FPEXT; 288 case Instruction::PtrToInt: return Expression::PTRTOINT; 289 case Instruction::IntToPtr: return Expression::INTTOPTR; 290 case Instruction::BitCast: return Expression::BITCAST; 291 } 292} 293 294uint32_t ValueTable::hash_operand(Value* v) { 295 if (CallInst* CI = dyn_cast<CallInst>(v)) 296 if (!AA->doesNotAccessMemory(CI)) 297 return nextValueNumber++; 298 299 return lookup_or_add(v); 300} 301 302Expression ValueTable::create_expression(CallInst* C) { 303 Expression e; 304 305 e.type = C->getType(); 306 e.firstVN = 0; 307 e.secondVN = 0; 308 e.thirdVN = 0; 309 e.function = C->getCalledFunction(); 310 e.opcode = Expression::CALL; 311 312 for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); 313 I != E; ++I) 314 e.varargs.push_back(hash_operand(*I)); 315 316 return e; 317} 318 319Expression ValueTable::create_expression(BinaryOperator* BO) { 320 Expression e; 321 322 e.firstVN = hash_operand(BO->getOperand(0)); 323 e.secondVN = hash_operand(BO->getOperand(1)); 324 e.thirdVN = 0; 325 e.function = 0; 326 e.type = BO->getType(); 327 e.opcode = getOpcode(BO); 328 329 return e; 330} 331 332Expression ValueTable::create_expression(CmpInst* C) { 333 Expression e; 334 335 e.firstVN = hash_operand(C->getOperand(0)); 336 e.secondVN = hash_operand(C->getOperand(1)); 337 e.thirdVN = 0; 338 e.function = 0; 339 e.type = C->getType(); 340 e.opcode = getOpcode(C); 341 342 return e; 343} 344 345Expression ValueTable::create_expression(CastInst* C) { 346 Expression e; 347 348 e.firstVN = hash_operand(C->getOperand(0)); 349 e.secondVN = 0; 350 e.thirdVN = 0; 351 e.function = 0; 352 e.type = C->getType(); 353 e.opcode = getOpcode(C); 354 355 return e; 356} 357 358Expression ValueTable::create_expression(ShuffleVectorInst* S) { 359 Expression e; 360 361 e.firstVN = hash_operand(S->getOperand(0)); 362 e.secondVN = hash_operand(S->getOperand(1)); 363 e.thirdVN = hash_operand(S->getOperand(2)); 364 e.function = 0; 365 e.type = S->getType(); 366 e.opcode = Expression::SHUFFLE; 367 368 return e; 369} 370 371Expression ValueTable::create_expression(ExtractElementInst* E) { 372 Expression e; 373 374 e.firstVN = hash_operand(E->getOperand(0)); 375 e.secondVN = hash_operand(E->getOperand(1)); 376 e.thirdVN = 0; 377 e.function = 0; 378 e.type = E->getType(); 379 e.opcode = Expression::EXTRACT; 380 381 return e; 382} 383 384Expression ValueTable::create_expression(InsertElementInst* I) { 385 Expression e; 386 387 e.firstVN = hash_operand(I->getOperand(0)); 388 e.secondVN = hash_operand(I->getOperand(1)); 389 e.thirdVN = hash_operand(I->getOperand(2)); 390 e.function = 0; 391 e.type = I->getType(); 392 e.opcode = Expression::INSERT; 393 394 return e; 395} 396 397Expression ValueTable::create_expression(SelectInst* I) { 398 Expression e; 399 400 e.firstVN = hash_operand(I->getCondition()); 401 e.secondVN = hash_operand(I->getTrueValue()); 402 e.thirdVN = hash_operand(I->getFalseValue()); 403 e.function = 0; 404 e.type = I->getType(); 405 e.opcode = Expression::SELECT; 406 407 return e; 408} 409 410Expression ValueTable::create_expression(GetElementPtrInst* G) { 411 Expression e; 412 413 e.firstVN = hash_operand(G->getPointerOperand()); 414 e.secondVN = 0; 415 e.thirdVN = 0; 416 e.function = 0; 417 e.type = G->getType(); 418 e.opcode = Expression::GEP; 419 420 for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); 421 I != E; ++I) 422 e.varargs.push_back(hash_operand(*I)); 423 424 return e; 425} 426 427//===----------------------------------------------------------------------===// 428// ValueTable External Functions 429//===----------------------------------------------------------------------===// 430 431/// lookup_or_add - Returns the value number for the specified value, assigning 432/// it a new number if it did not have one before. 433uint32_t ValueTable::lookup_or_add(Value* V) { 434 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 435 if (VI != valueNumbering.end()) 436 return VI->second; 437 438 if (CallInst* C = dyn_cast<CallInst>(V)) { 439 if (AA->onlyReadsMemory(C)) { // includes doesNotAccessMemory 440 Expression e = create_expression(C); 441 442 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 443 if (EI != expressionNumbering.end()) { 444 valueNumbering.insert(std::make_pair(V, EI->second)); 445 return EI->second; 446 } else { 447 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 448 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 449 450 return nextValueNumber++; 451 } 452 } else { 453 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 454 return nextValueNumber++; 455 } 456 } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) { 457 Expression e = create_expression(BO); 458 459 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 460 if (EI != expressionNumbering.end()) { 461 valueNumbering.insert(std::make_pair(V, EI->second)); 462 return EI->second; 463 } else { 464 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 465 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 466 467 return nextValueNumber++; 468 } 469 } else if (CmpInst* C = dyn_cast<CmpInst>(V)) { 470 Expression e = create_expression(C); 471 472 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 473 if (EI != expressionNumbering.end()) { 474 valueNumbering.insert(std::make_pair(V, EI->second)); 475 return EI->second; 476 } else { 477 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 478 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 479 480 return nextValueNumber++; 481 } 482 } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) { 483 Expression e = create_expression(U); 484 485 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 486 if (EI != expressionNumbering.end()) { 487 valueNumbering.insert(std::make_pair(V, EI->second)); 488 return EI->second; 489 } else { 490 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 491 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 492 493 return nextValueNumber++; 494 } 495 } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) { 496 Expression e = create_expression(U); 497 498 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 499 if (EI != expressionNumbering.end()) { 500 valueNumbering.insert(std::make_pair(V, EI->second)); 501 return EI->second; 502 } else { 503 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 504 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 505 506 return nextValueNumber++; 507 } 508 } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) { 509 Expression e = create_expression(U); 510 511 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 512 if (EI != expressionNumbering.end()) { 513 valueNumbering.insert(std::make_pair(V, EI->second)); 514 return EI->second; 515 } else { 516 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 517 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 518 519 return nextValueNumber++; 520 } 521 } else if (SelectInst* U = dyn_cast<SelectInst>(V)) { 522 Expression e = create_expression(U); 523 524 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 525 if (EI != expressionNumbering.end()) { 526 valueNumbering.insert(std::make_pair(V, EI->second)); 527 return EI->second; 528 } else { 529 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 530 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 531 532 return nextValueNumber++; 533 } 534 } else if (CastInst* U = dyn_cast<CastInst>(V)) { 535 Expression e = create_expression(U); 536 537 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 538 if (EI != expressionNumbering.end()) { 539 valueNumbering.insert(std::make_pair(V, EI->second)); 540 return EI->second; 541 } else { 542 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 543 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 544 545 return nextValueNumber++; 546 } 547 } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) { 548 Expression e = create_expression(U); 549 550 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 551 if (EI != expressionNumbering.end()) { 552 valueNumbering.insert(std::make_pair(V, EI->second)); 553 return EI->second; 554 } else { 555 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 556 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 557 558 return nextValueNumber++; 559 } 560 } else { 561 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 562 return nextValueNumber++; 563 } 564} 565 566/// lookup - Returns the value number of the specified value. Fails if 567/// the value has not yet been numbered. 568uint32_t ValueTable::lookup(Value* V) const { 569 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 570 assert(VI != valueNumbering.end() && "Value not numbered?"); 571 return VI->second; 572} 573 574/// clear - Remove all entries from the ValueTable 575void ValueTable::clear() { 576 valueNumbering.clear(); 577 expressionNumbering.clear(); 578 nextValueNumber = 1; 579} 580 581/// erase - Remove a value from the value numbering 582void ValueTable::erase(Value* V) { 583 valueNumbering.erase(V); 584} 585 586//===----------------------------------------------------------------------===// 587// ValueNumberedSet Class 588//===----------------------------------------------------------------------===// 589namespace { 590class VISIBILITY_HIDDEN ValueNumberedSet { 591 private: 592 SmallPtrSet<Value*, 8> contents; 593 BitVector numbers; 594 public: 595 ValueNumberedSet() { numbers.resize(1); } 596 ValueNumberedSet(const ValueNumberedSet& other) { 597 numbers = other.numbers; 598 contents = other.contents; 599 } 600 601 typedef SmallPtrSet<Value*, 8>::iterator iterator; 602 603 iterator begin() { return contents.begin(); } 604 iterator end() { return contents.end(); } 605 606 bool insert(Value* v) { return contents.insert(v); } 607 void insert(iterator I, iterator E) { contents.insert(I, E); } 608 void erase(Value* v) { contents.erase(v); } 609 unsigned count(Value* v) { return contents.count(v); } 610 size_t size() { return contents.size(); } 611 612 void set(unsigned i) { 613 if (i >= numbers.size()) 614 numbers.resize(i+1); 615 616 numbers.set(i); 617 } 618 619 void operator=(const ValueNumberedSet& other) { 620 contents = other.contents; 621 numbers = other.numbers; 622 } 623 624 void reset(unsigned i) { 625 if (i < numbers.size()) 626 numbers.reset(i); 627 } 628 629 bool test(unsigned i) { 630 if (i >= numbers.size()) 631 return false; 632 633 return numbers.test(i); 634 } 635 636 void clear() { 637 contents.clear(); 638 numbers.clear(); 639 } 640}; 641} 642 643//===----------------------------------------------------------------------===// 644// GVN Pass 645//===----------------------------------------------------------------------===// 646 647namespace { 648 649 class VISIBILITY_HIDDEN GVN : public FunctionPass { 650 bool runOnFunction(Function &F); 651 public: 652 static char ID; // Pass identification, replacement for typeid 653 GVN() : FunctionPass((intptr_t)&ID) { } 654 655 private: 656 ValueTable VN; 657 658 DenseMap<BasicBlock*, ValueNumberedSet> availableOut; 659 660 typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType; 661 PhiMapType phiMap; 662 663 664 // This transformation requires dominator postdominator info 665 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 666 AU.setPreservesCFG(); 667 AU.addRequired<DominatorTree>(); 668 AU.addRequired<MemoryDependenceAnalysis>(); 669 AU.addRequired<AliasAnalysis>(); 670 AU.addRequired<TargetData>(); 671 AU.addPreserved<AliasAnalysis>(); 672 AU.addPreserved<MemoryDependenceAnalysis>(); 673 AU.addPreserved<TargetData>(); 674 } 675 676 // Helper fuctions 677 // FIXME: eliminate or document these better 678 Value* find_leader(ValueNumberedSet& vals, uint32_t v) ; 679 void val_insert(ValueNumberedSet& s, Value* v); 680 bool processLoad(LoadInst* L, 681 DenseMap<Value*, LoadInst*> &lastLoad, 682 SmallVectorImpl<Instruction*> &toErase); 683 bool processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase); 684 bool processInstruction(Instruction* I, 685 ValueNumberedSet& currAvail, 686 DenseMap<Value*, LoadInst*>& lastSeenLoad, 687 SmallVectorImpl<Instruction*> &toErase); 688 bool processNonLocalLoad(LoadInst* L, 689 SmallVectorImpl<Instruction*> &toErase); 690 bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep, 691 SmallVectorImpl<Instruction*> &toErase); 692 bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C, 693 SmallVectorImpl<Instruction*> &toErase); 694 Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig, 695 DenseMap<BasicBlock*, Value*> &Phis, 696 bool top_level = false); 697 void dump(DenseMap<BasicBlock*, Value*>& d); 698 bool iterateOnFunction(Function &F); 699 Value* CollapsePhi(PHINode* p); 700 bool isSafeReplacement(PHINode* p, Instruction* inst); 701 }; 702 703 char GVN::ID = 0; 704} 705 706// createGVNPass - The public interface to this file... 707FunctionPass *llvm::createGVNPass() { return new GVN(); } 708 709static RegisterPass<GVN> X("gvn", 710 "Global Value Numbering"); 711 712/// find_leader - Given a set and a value number, return the first 713/// element of the set with that value number, or 0 if no such element 714/// is present 715Value* GVN::find_leader(ValueNumberedSet& vals, uint32_t v) { 716 if (!vals.test(v)) 717 return 0; 718 719 for (ValueNumberedSet::iterator I = vals.begin(), E = vals.end(); 720 I != E; ++I) 721 if (v == VN.lookup(*I)) 722 return *I; 723 724 assert(0 && "No leader found, but present bit is set?"); 725 return 0; 726} 727 728/// val_insert - Insert a value into a set only if there is not a value 729/// with the same value number already in the set 730void GVN::val_insert(ValueNumberedSet& s, Value* v) { 731 uint32_t num = VN.lookup(v); 732 if (!s.test(num)) 733 s.insert(v); 734} 735 736void GVN::dump(DenseMap<BasicBlock*, Value*>& d) { 737 printf("{\n"); 738 for (DenseMap<BasicBlock*, Value*>::iterator I = d.begin(), 739 E = d.end(); I != E; ++I) { 740 if (I->second == MemoryDependenceAnalysis::None) 741 printf("None\n"); 742 else 743 I->second->dump(); 744 } 745 printf("}\n"); 746} 747 748Value* GVN::CollapsePhi(PHINode* p) { 749 DominatorTree &DT = getAnalysis<DominatorTree>(); 750 Value* constVal = p->hasConstantValue(); 751 752 if (!constVal) return 0; 753 754 Instruction* inst = dyn_cast<Instruction>(constVal); 755 if (!inst) 756 return constVal; 757 758 if (DT.dominates(inst, p)) 759 if (isSafeReplacement(p, inst)) 760 return inst; 761 return 0; 762} 763 764bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { 765 if (!isa<PHINode>(inst)) 766 return true; 767 768 for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); 769 UI != E; ++UI) 770 if (PHINode* use_phi = dyn_cast<PHINode>(UI)) 771 if (use_phi->getParent() == inst->getParent()) 772 return false; 773 774 return true; 775} 776 777/// GetValueForBlock - Get the value to use within the specified basic block. 778/// available values are in Phis. 779Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig, 780 DenseMap<BasicBlock*, Value*> &Phis, 781 bool top_level) { 782 783 // If we have already computed this value, return the previously computed val. 784 DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB); 785 if (V != Phis.end() && !top_level) return V->second; 786 787 BasicBlock* singlePred = BB->getSinglePredecessor(); 788 if (singlePred) { 789 Value *ret = GetValueForBlock(singlePred, orig, Phis); 790 Phis[BB] = ret; 791 return ret; 792 } 793 794 // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so 795 // now, then get values to fill in the incoming values for the PHI. 796 PHINode *PN = new PHINode(orig->getType(), orig->getName()+".rle", 797 BB->begin()); 798 PN->reserveOperandSpace(std::distance(pred_begin(BB), pred_end(BB))); 799 800 if (Phis.count(BB) == 0) 801 Phis.insert(std::make_pair(BB, PN)); 802 803 // Fill in the incoming values for the block. 804 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 805 Value* val = GetValueForBlock(*PI, orig, Phis); 806 PN->addIncoming(val, *PI); 807 } 808 809 AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); 810 AA.copyValue(orig, PN); 811 812 // Attempt to collapse PHI nodes that are trivially redundant 813 Value* v = CollapsePhi(PN); 814 if (!v) { 815 // Cache our phi construction results 816 phiMap[orig->getPointerOperand()].insert(PN); 817 return PN; 818 } 819 820 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 821 822 MD.removeInstruction(PN); 823 PN->replaceAllUsesWith(v); 824 825 for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(), 826 E = Phis.end(); I != E; ++I) 827 if (I->second == PN) 828 I->second = v; 829 830 PN->eraseFromParent(); 831 832 Phis[BB] = v; 833 return v; 834} 835 836/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 837/// non-local by performing PHI construction. 838bool GVN::processNonLocalLoad(LoadInst* L, 839 SmallVectorImpl<Instruction*> &toErase) { 840 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 841 842 // Find the non-local dependencies of the load 843 DenseMap<BasicBlock*, Value*> deps; 844 MD.getNonLocalDependency(L, deps); 845 846 DenseMap<BasicBlock*, Value*> repl; 847 848 // Filter out useless results (non-locals, etc) 849 for (DenseMap<BasicBlock*, Value*>::iterator I = deps.begin(), E = deps.end(); 850 I != E; ++I) { 851 if (I->second == MemoryDependenceAnalysis::None) 852 return false; 853 854 if (I->second == MemoryDependenceAnalysis::NonLocal) 855 continue; 856 857 if (StoreInst* S = dyn_cast<StoreInst>(I->second)) { 858 if (S->getPointerOperand() != L->getPointerOperand()) 859 return false; 860 repl[I->first] = S->getOperand(0); 861 } else if (LoadInst* LD = dyn_cast<LoadInst>(I->second)) { 862 if (LD->getPointerOperand() != L->getPointerOperand()) 863 return false; 864 repl[I->first] = LD; 865 } else { 866 return false; 867 } 868 } 869 870 // Use cached PHI construction information from previous runs 871 SmallPtrSet<Instruction*, 4>& p = phiMap[L->getPointerOperand()]; 872 for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); 873 I != E; ++I) { 874 if ((*I)->getParent() == L->getParent()) { 875 MD.removeInstruction(L); 876 L->replaceAllUsesWith(*I); 877 toErase.push_back(L); 878 NumGVNLoad++; 879 return true; 880 } 881 882 repl.insert(std::make_pair((*I)->getParent(), *I)); 883 } 884 885 // Perform PHI construction 886 SmallPtrSet<BasicBlock*, 4> visited; 887 Value* v = GetValueForBlock(L->getParent(), L, repl, true); 888 889 MD.removeInstruction(L); 890 L->replaceAllUsesWith(v); 891 toErase.push_back(L); 892 NumGVNLoad++; 893 894 return true; 895} 896 897/// processLoad - Attempt to eliminate a load, first by eliminating it 898/// locally, and then attempting non-local elimination if that fails. 899bool GVN::processLoad(LoadInst *L, DenseMap<Value*, LoadInst*> &lastLoad, 900 SmallVectorImpl<Instruction*> &toErase) { 901 if (L->isVolatile()) { 902 lastLoad[L->getPointerOperand()] = L; 903 return false; 904 } 905 906 Value* pointer = L->getPointerOperand(); 907 LoadInst*& last = lastLoad[pointer]; 908 909 // ... to a pointer that has been loaded from before... 910 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 911 bool removedNonLocal = false; 912 Instruction* dep = MD.getDependency(L); 913 if (dep == MemoryDependenceAnalysis::NonLocal && 914 L->getParent() != &L->getParent()->getParent()->getEntryBlock()) { 915 removedNonLocal = processNonLocalLoad(L, toErase); 916 917 if (!removedNonLocal) 918 last = L; 919 920 return removedNonLocal; 921 } 922 923 924 bool deletedLoad = false; 925 926 // Walk up the dependency chain until we either find 927 // a dependency we can use, or we can't walk any further 928 while (dep != MemoryDependenceAnalysis::None && 929 dep != MemoryDependenceAnalysis::NonLocal && 930 (isa<LoadInst>(dep) || isa<StoreInst>(dep))) { 931 // ... that depends on a store ... 932 if (StoreInst* S = dyn_cast<StoreInst>(dep)) { 933 if (S->getPointerOperand() == pointer) { 934 // Remove it! 935 MD.removeInstruction(L); 936 937 L->replaceAllUsesWith(S->getOperand(0)); 938 toErase.push_back(L); 939 deletedLoad = true; 940 NumGVNLoad++; 941 } 942 943 // Whether we removed it or not, we can't 944 // go any further 945 break; 946 } else if (!last) { 947 // If we don't depend on a store, and we haven't 948 // been loaded before, bail. 949 break; 950 } else if (dep == last) { 951 // Remove it! 952 MD.removeInstruction(L); 953 954 L->replaceAllUsesWith(last); 955 toErase.push_back(L); 956 deletedLoad = true; 957 NumGVNLoad++; 958 959 break; 960 } else { 961 dep = MD.getDependency(L, dep); 962 } 963 } 964 965 if (dep != MemoryDependenceAnalysis::None && 966 dep != MemoryDependenceAnalysis::NonLocal && 967 isa<AllocationInst>(dep)) { 968 // Check that this load is actually from the 969 // allocation we found 970 Value* v = L->getOperand(0); 971 while (true) { 972 if (BitCastInst *BC = dyn_cast<BitCastInst>(v)) 973 v = BC->getOperand(0); 974 else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v)) 975 v = GEP->getOperand(0); 976 else 977 break; 978 } 979 if (v == dep) { 980 // If this load depends directly on an allocation, there isn't 981 // anything stored there; therefore, we can optimize this load 982 // to undef. 983 MD.removeInstruction(L); 984 985 L->replaceAllUsesWith(UndefValue::get(L->getType())); 986 toErase.push_back(L); 987 deletedLoad = true; 988 NumGVNLoad++; 989 } 990 } 991 992 if (!deletedLoad) 993 last = L; 994 995 return deletedLoad; 996} 997 998/// isBytewiseValue - If the specified value can be set by repeating the same 999/// byte in memory, return the i8 value that it is represented with. This is 1000/// true for all i8 values obviously, but is also true for i32 0, i32 -1, 1001/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated 1002/// byte store (e.g. i16 0x1234), return null. 1003static Value *isBytewiseValue(Value *V) { 1004 // All byte-wide stores are splatable, even of arbitrary variables. 1005 if (V->getType() == Type::Int8Ty) return V; 1006 1007 // Constant float and double values can be handled as integer values if the 1008 // corresponding integer value is "byteable". An important case is 0.0. 1009 if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { 1010 if (CFP->getType() == Type::FloatTy) 1011 V = ConstantExpr::getBitCast(CFP, Type::Int32Ty); 1012 if (CFP->getType() == Type::DoubleTy) 1013 V = ConstantExpr::getBitCast(CFP, Type::Int64Ty); 1014 // Don't handle long double formats, which have strange constraints. 1015 } 1016 1017 // We can handle constant integers that are power of two in size and a 1018 // multiple of 8 bits. 1019 if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { 1020 unsigned Width = CI->getBitWidth(); 1021 if (isPowerOf2_32(Width) && Width > 8) { 1022 // We can handle this value if the recursive binary decomposition is the 1023 // same at all levels. 1024 APInt Val = CI->getValue(); 1025 APInt Val2; 1026 while (Val.getBitWidth() != 8) { 1027 unsigned NextWidth = Val.getBitWidth()/2; 1028 Val2 = Val.lshr(NextWidth); 1029 Val2.trunc(Val.getBitWidth()/2); 1030 Val.trunc(Val.getBitWidth()/2); 1031 1032 // If the top/bottom halves aren't the same, reject it. 1033 if (Val != Val2) 1034 return 0; 1035 } 1036 return ConstantInt::get(Val); 1037 } 1038 } 1039 1040 // Conceptually, we could handle things like: 1041 // %a = zext i8 %X to i16 1042 // %b = shl i16 %a, 8 1043 // %c = or i16 %a, %b 1044 // but until there is an example that actually needs this, it doesn't seem 1045 // worth worrying about. 1046 return 0; 1047} 1048 1049static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, 1050 bool &VariableIdxFound, TargetData &TD) { 1051 // Skip over the first indices. 1052 gep_type_iterator GTI = gep_type_begin(GEP); 1053 for (unsigned i = 1; i != Idx; ++i, ++GTI) 1054 /*skip along*/; 1055 1056 // Compute the offset implied by the rest of the indices. 1057 int64_t Offset = 0; 1058 for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { 1059 ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); 1060 if (OpC == 0) 1061 return VariableIdxFound = true; 1062 if (OpC->isZero()) continue; // No offset. 1063 1064 // Handle struct indices, which add their field offset to the pointer. 1065 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 1066 Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 1067 continue; 1068 } 1069 1070 // Otherwise, we have a sequential type like an array or vector. Multiply 1071 // the index by the ElementSize. 1072 uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()); 1073 Offset += Size*OpC->getSExtValue(); 1074 } 1075 1076 return Offset; 1077} 1078 1079/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a 1080/// constant offset, and return that constant offset. For example, Ptr1 might 1081/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. 1082static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, 1083 TargetData &TD) { 1084 // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical 1085 // base. After that base, they may have some number of common (and 1086 // potentially variable) indices. After that they handle some constant 1087 // offset, which determines their offset from each other. At this point, we 1088 // handle no other case. 1089 GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1); 1090 GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2); 1091 if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) 1092 return false; 1093 1094 // Skip any common indices and track the GEP types. 1095 unsigned Idx = 1; 1096 for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) 1097 if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) 1098 break; 1099 1100 bool VariableIdxFound = false; 1101 int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); 1102 int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); 1103 if (VariableIdxFound) return false; 1104 1105 Offset = Offset2-Offset1; 1106 return true; 1107} 1108 1109 1110/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value. 1111/// This allows us to analyze stores like: 1112/// store 0 -> P+1 1113/// store 0 -> P+0 1114/// store 0 -> P+3 1115/// store 0 -> P+2 1116/// which sometimes happens with stores to arrays of structs etc. When we see 1117/// the first store, we make a range [1, 2). The second store extends the range 1118/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the 1119/// two ranges into [0, 3) which is memset'able. 1120namespace { 1121struct MemsetRange { 1122 // Start/End - A semi range that describes the span that this range covers. 1123 // The range is closed at the start and open at the end: [Start, End). 1124 int64_t Start, End; 1125 1126 /// StartPtr - The getelementptr instruction that points to the start of the 1127 /// range. 1128 Value *StartPtr; 1129 1130 /// Alignment - The known alignment of the first store. 1131 unsigned Alignment; 1132 1133 /// TheStores - The actual stores that make up this range. 1134 SmallVector<StoreInst*, 16> TheStores; 1135 1136 bool isProfitableToUseMemset(const TargetData &TD) const; 1137 1138}; 1139} // end anon namespace 1140 1141bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { 1142 // If we found more than 8 stores to merge or 64 bytes, use memset. 1143 if (TheStores.size() >= 8 || End-Start >= 64) return true; 1144 1145 // Assume that the code generator is capable of merging pairs of stores 1146 // together if it wants to. 1147 if (TheStores.size() <= 2) return false; 1148 1149 // If we have fewer than 8 stores, it can still be worthwhile to do this. 1150 // For example, merging 4 i8 stores into an i32 store is useful almost always. 1151 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the 1152 // memset will be split into 2 32-bit stores anyway) and doing so can 1153 // pessimize the llvm optimizer. 1154 // 1155 // Since we don't have perfect knowledge here, make some assumptions: assume 1156 // the maximum GPR width is the same size as the pointer size and assume that 1157 // this width can be stored. If so, check to see whether we will end up 1158 // actually reducing the number of stores used. 1159 unsigned Bytes = unsigned(End-Start); 1160 unsigned NumPointerStores = Bytes/TD.getPointerSize(); 1161 1162 // Assume the remaining bytes if any are done a byte at a time. 1163 unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(); 1164 1165 // If we will reduce the # stores (according to this heuristic), do the 1166 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 1167 // etc. 1168 return TheStores.size() > NumPointerStores+NumByteStores; 1169} 1170 1171 1172namespace { 1173class MemsetRanges { 1174 /// Ranges - A sorted list of the memset ranges. We use std::list here 1175 /// because each element is relatively large and expensive to copy. 1176 std::list<MemsetRange> Ranges; 1177 typedef std::list<MemsetRange>::iterator range_iterator; 1178 TargetData &TD; 1179public: 1180 MemsetRanges(TargetData &td) : TD(td) {} 1181 1182 typedef std::list<MemsetRange>::const_iterator const_iterator; 1183 const_iterator begin() const { return Ranges.begin(); } 1184 const_iterator end() const { return Ranges.end(); } 1185 bool empty() const { return Ranges.empty(); } 1186 1187 void addStore(int64_t OffsetFromFirst, StoreInst *SI); 1188}; 1189 1190} // end anon namespace 1191 1192 1193/// addStore - Add a new store to the MemsetRanges data structure. This adds a 1194/// new range for the specified store at the specified offset, merging into 1195/// existing ranges as appropriate. 1196void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { 1197 int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType()); 1198 1199 // Do a linear search of the ranges to see if this can be joined and/or to 1200 // find the insertion point in the list. We keep the ranges sorted for 1201 // simplicity here. This is a linear search of a linked list, which is ugly, 1202 // however the number of ranges is limited, so this won't get crazy slow. 1203 range_iterator I = Ranges.begin(), E = Ranges.end(); 1204 1205 while (I != E && Start > I->End) 1206 ++I; 1207 1208 // We now know that I == E, in which case we didn't find anything to merge 1209 // with, or that Start <= I->End. If End < I->Start or I == E, then we need 1210 // to insert a new range. Handle this now. 1211 if (I == E || End < I->Start) { 1212 MemsetRange &R = *Ranges.insert(I, MemsetRange()); 1213 R.Start = Start; 1214 R.End = End; 1215 R.StartPtr = SI->getPointerOperand(); 1216 R.Alignment = SI->getAlignment(); 1217 R.TheStores.push_back(SI); 1218 return; 1219 } 1220 1221 // This store overlaps with I, add it. 1222 I->TheStores.push_back(SI); 1223 1224 // At this point, we may have an interval that completely contains our store. 1225 // If so, just add it to the interval and return. 1226 if (I->Start <= Start && I->End >= End) 1227 return; 1228 1229 // Now we know that Start <= I->End and End >= I->Start so the range overlaps 1230 // but is not entirely contained within the range. 1231 1232 // See if the range extends the start of the range. In this case, it couldn't 1233 // possibly cause it to join the prior range, because otherwise we would have 1234 // stopped on *it*. 1235 if (Start < I->Start) 1236 I->Start = Start; 1237 1238 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint 1239 // is in or right at the end of I), and that End >= I->Start. Extend I out to 1240 // End. 1241 if (End > I->End) { 1242 I->End = End; 1243 range_iterator NextI = I;; 1244 while (++NextI != E && End >= NextI->Start) { 1245 // Merge the range in. 1246 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); 1247 if (NextI->End > I->End) 1248 I->End = NextI->End; 1249 Ranges.erase(NextI); 1250 NextI = I; 1251 } 1252 } 1253} 1254 1255 1256 1257/// processStore - When GVN is scanning forward over instructions, we look for 1258/// some other patterns to fold away. In particular, this looks for stores to 1259/// neighboring locations of memory. If it sees enough consequtive ones 1260/// (currently 4) it attempts to merge them together into a memcpy/memset. 1261bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) { 1262 if (!FormMemSet) return false; 1263 if (SI->isVolatile()) return false; 1264 1265 // There are two cases that are interesting for this code to handle: memcpy 1266 // and memset. Right now we only handle memset. 1267 1268 // Ensure that the value being stored is something that can be memset'able a 1269 // byte at a time like "0" or "-1" or any width, as well as things like 1270 // 0xA0A0A0A0 and 0.0. 1271 Value *ByteVal = isBytewiseValue(SI->getOperand(0)); 1272 if (!ByteVal) 1273 return false; 1274 1275 TargetData &TD = getAnalysis<TargetData>(); 1276 AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 1277 1278 // Okay, so we now have a single store that can be splatable. Scan to find 1279 // all subsequent stores of the same value to offset from the same pointer. 1280 // Join these together into ranges, so we can decide whether contiguous blocks 1281 // are stored. 1282 MemsetRanges Ranges(TD); 1283 1284 Value *StartPtr = SI->getPointerOperand(); 1285 1286 BasicBlock::iterator BI = SI; 1287 for (++BI; !isa<TerminatorInst>(BI); ++BI) { 1288 if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { 1289 // If the call is readnone, ignore it, otherwise bail out. We don't even 1290 // allow readonly here because we don't want something like: 1291 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). 1292 if (AA.getModRefBehavior(CallSite::get(BI)) == 1293 AliasAnalysis::DoesNotAccessMemory) 1294 continue; 1295 1296 // TODO: If this is a memset, try to join it in. 1297 1298 break; 1299 } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI)) 1300 break; 1301 1302 // If this is a non-store instruction it is fine, ignore it. 1303 StoreInst *NextStore = dyn_cast<StoreInst>(BI); 1304 if (NextStore == 0) continue; 1305 1306 // If this is a store, see if we can merge it in. 1307 if (NextStore->isVolatile()) break; 1308 1309 // Check to see if this stored value is of the same byte-splattable value. 1310 if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) 1311 break; 1312 1313 // Check to see if this store is to a constant offset from the start ptr. 1314 int64_t Offset; 1315 if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD)) 1316 break; 1317 1318 Ranges.addStore(Offset, NextStore); 1319 } 1320 1321 // If we have no ranges, then we just had a single store with nothing that 1322 // could be merged in. This is a very common case of course. 1323 if (Ranges.empty()) 1324 return false; 1325 1326 // If we had at least one store that could be merged in, add the starting 1327 // store as well. We try to avoid this unless there is at least something 1328 // interesting as a small compile-time optimization. 1329 Ranges.addStore(0, SI); 1330 1331 1332 Function *MemSetF = 0; 1333 1334 // Now that we have full information about ranges, loop over the ranges and 1335 // emit memset's for anything big enough to be worthwhile. 1336 bool MadeChange = false; 1337 for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); 1338 I != E; ++I) { 1339 const MemsetRange &Range = *I; 1340 1341 if (Range.TheStores.size() == 1) continue; 1342 1343 // If it is profitable to lower this range to memset, do so now. 1344 if (!Range.isProfitableToUseMemset(TD)) 1345 continue; 1346 1347 // Otherwise, we do want to transform this! Create a new memset. We put 1348 // the memset right after the first store that we found in this block. This 1349 // ensures that the caller will increment the iterator to the memset before 1350 // it deletes all the stores. 1351 BasicBlock::iterator InsertPt = SI; ++InsertPt; 1352 1353 if (MemSetF == 0) 1354 MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent() 1355 ->getParent(), Intrinsic::memset_i64); 1356 1357 // StartPtr may not dominate the starting point. Instead of using it, base 1358 // the destination pointer off the input to the first store in the block. 1359 StartPtr = SI->getPointerOperand(); 1360 1361 // Cast the start ptr to be i8* as memset requires. 1362 const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty); 1363 if (StartPtr->getType() != i8Ptr) 1364 StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), 1365 InsertPt); 1366 1367 // Offset the pointer if needed. 1368 if (Range.Start) 1369 StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty, 1370 Range.Start), 1371 "ptroffset", InsertPt); 1372 1373 Value *Ops[] = { 1374 StartPtr, ByteVal, // Start, value 1375 ConstantInt::get(Type::Int64Ty, Range.End-Range.Start), // size 1376 ConstantInt::get(Type::Int32Ty, Range.Alignment) // align 1377 }; 1378 Value *C = new CallInst(MemSetF, Ops, Ops+4, "", InsertPt); 1379 DEBUG(cerr << "Replace stores:\n"; 1380 for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) 1381 cerr << *Range.TheStores[i]; 1382 cerr << "With: " << *C); C=C; 1383 1384 // Zap all the stores. 1385 toErase.append(Range.TheStores.begin(), Range.TheStores.end()); 1386 ++NumMemSetInfer; 1387 MadeChange = true; 1388 } 1389 1390 return MadeChange; 1391} 1392 1393 1394/// performCallSlotOptzn - takes a memcpy and a call that it depends on, 1395/// and checks for the possibility of a call slot optimization by having 1396/// the call write its result directly into the destination of the memcpy. 1397bool GVN::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C, 1398 SmallVectorImpl<Instruction*> &toErase) { 1399 // The general transformation to keep in mind is 1400 // 1401 // call @func(..., src, ...) 1402 // memcpy(dest, src, ...) 1403 // 1404 // -> 1405 // 1406 // memcpy(dest, src, ...) 1407 // call @func(..., dest, ...) 1408 // 1409 // Since moving the memcpy is technically awkward, we additionally check that 1410 // src only holds uninitialized values at the moment of the call, meaning that 1411 // the memcpy can be discarded rather than moved. 1412 1413 // Deliberately get the source and destination with bitcasts stripped away, 1414 // because we'll need to do type comparisons based on the underlying type. 1415 Value* cpyDest = cpy->getDest(); 1416 Value* cpySrc = cpy->getSource(); 1417 CallSite CS = CallSite::get(C); 1418 1419 // We need to be able to reason about the size of the memcpy, so we require 1420 // that it be a constant. 1421 ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength()); 1422 if (!cpyLength) 1423 return false; 1424 1425 // Require that src be an alloca. This simplifies the reasoning considerably. 1426 AllocaInst* srcAlloca = dyn_cast<AllocaInst>(cpySrc); 1427 if (!srcAlloca) 1428 return false; 1429 1430 // Check that all of src is copied to dest. 1431 TargetData& TD = getAnalysis<TargetData>(); 1432 1433 ConstantInt* srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); 1434 if (!srcArraySize) 1435 return false; 1436 1437 uint64_t srcSize = TD.getABITypeSize(srcAlloca->getAllocatedType()) * 1438 srcArraySize->getZExtValue(); 1439 1440 if (cpyLength->getZExtValue() < srcSize) 1441 return false; 1442 1443 // Check that accessing the first srcSize bytes of dest will not cause a 1444 // trap. Otherwise the transform is invalid since it might cause a trap 1445 // to occur earlier than it otherwise would. 1446 if (AllocaInst* A = dyn_cast<AllocaInst>(cpyDest)) { 1447 // The destination is an alloca. Check it is larger than srcSize. 1448 ConstantInt* destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); 1449 if (!destArraySize) 1450 return false; 1451 1452 uint64_t destSize = TD.getABITypeSize(A->getAllocatedType()) * 1453 destArraySize->getZExtValue(); 1454 1455 if (destSize < srcSize) 1456 return false; 1457 } else if (Argument* A = dyn_cast<Argument>(cpyDest)) { 1458 // If the destination is an sret parameter then only accesses that are 1459 // outside of the returned struct type can trap. 1460 if (!A->hasStructRetAttr()) 1461 return false; 1462 1463 const Type* StructTy = cast<PointerType>(A->getType())->getElementType(); 1464 uint64_t destSize = TD.getABITypeSize(StructTy); 1465 1466 if (destSize < srcSize) 1467 return false; 1468 } else { 1469 return false; 1470 } 1471 1472 // Check that src is not accessed except via the call and the memcpy. This 1473 // guarantees that it holds only undefined values when passed in (so the final 1474 // memcpy can be dropped), that it is not read or written between the call and 1475 // the memcpy, and that writing beyond the end of it is undefined. 1476 SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), 1477 srcAlloca->use_end()); 1478 while (!srcUseList.empty()) { 1479 User* UI = srcUseList.back(); 1480 srcUseList.pop_back(); 1481 1482 if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) { 1483 for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); 1484 I != E; ++I) 1485 srcUseList.push_back(*I); 1486 } else if (UI != C && UI != cpy) { 1487 return false; 1488 } 1489 } 1490 1491 // Since we're changing the parameter to the callsite, we need to make sure 1492 // that what would be the new parameter dominates the callsite. 1493 DominatorTree& DT = getAnalysis<DominatorTree>(); 1494 if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest)) 1495 if (!DT.dominates(cpyDestInst, C)) 1496 return false; 1497 1498 // In addition to knowing that the call does not access src in some 1499 // unexpected manner, for example via a global, which we deduce from 1500 // the use analysis, we also need to know that it does not sneakily 1501 // access dest. We rely on AA to figure this out for us. 1502 AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); 1503 if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != 1504 AliasAnalysis::NoModRef) 1505 return false; 1506 1507 // All the checks have passed, so do the transformation. 1508 for (unsigned i = 0; i < CS.arg_size(); ++i) 1509 if (CS.getArgument(i) == cpySrc) { 1510 if (cpySrc->getType() != cpyDest->getType()) 1511 cpyDest = CastInst::createPointerCast(cpyDest, cpySrc->getType(), 1512 cpyDest->getName(), C); 1513 CS.setArgument(i, cpyDest); 1514 } 1515 1516 // Drop any cached information about the call, because we may have changed 1517 // its dependence information by changing its parameter. 1518 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 1519 MD.dropInstruction(C); 1520 1521 // Remove the memcpy 1522 MD.removeInstruction(cpy); 1523 toErase.push_back(cpy); 1524 1525 return true; 1526} 1527 1528/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which 1529/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be 1530/// a memcpy from X to Z (or potentially a memmove, depending on circumstances). 1531/// This allows later passes to remove the first memcpy altogether. 1532bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep, 1533 SmallVectorImpl<Instruction*> &toErase) { 1534 // We can only transforms memcpy's where the dest of one is the source of the 1535 // other 1536 if (M->getSource() != MDep->getDest()) 1537 return false; 1538 1539 // Second, the length of the memcpy's must be the same, or the preceeding one 1540 // must be larger than the following one. 1541 ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength()); 1542 ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength()); 1543 if (!C1 || !C2) 1544 return false; 1545 1546 uint64_t DepSize = C1->getValue().getZExtValue(); 1547 uint64_t CpySize = C2->getValue().getZExtValue(); 1548 1549 if (DepSize < CpySize) 1550 return false; 1551 1552 // Finally, we have to make sure that the dest of the second does not 1553 // alias the source of the first 1554 AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); 1555 if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) != 1556 AliasAnalysis::NoAlias) 1557 return false; 1558 else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) != 1559 AliasAnalysis::NoAlias) 1560 return false; 1561 else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize) 1562 != AliasAnalysis::NoAlias) 1563 return false; 1564 1565 // If all checks passed, then we can transform these memcpy's 1566 Function* MemCpyFun = Intrinsic::getDeclaration( 1567 M->getParent()->getParent()->getParent(), 1568 M->getIntrinsicID()); 1569 1570 std::vector<Value*> args; 1571 args.push_back(M->getRawDest()); 1572 args.push_back(MDep->getRawSource()); 1573 args.push_back(M->getLength()); 1574 args.push_back(M->getAlignment()); 1575 1576 CallInst* C = new CallInst(MemCpyFun, args.begin(), args.end(), "", M); 1577 1578 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 1579 if (MD.getDependency(C) == MDep) { 1580 MD.dropInstruction(M); 1581 toErase.push_back(M); 1582 return true; 1583 } 1584 1585 MD.removeInstruction(C); 1586 toErase.push_back(C); 1587 return false; 1588} 1589 1590/// processInstruction - When calculating availability, handle an instruction 1591/// by inserting it into the appropriate sets 1592bool GVN::processInstruction(Instruction *I, ValueNumberedSet &currAvail, 1593 DenseMap<Value*, LoadInst*> &lastSeenLoad, 1594 SmallVectorImpl<Instruction*> &toErase) { 1595 if (LoadInst* L = dyn_cast<LoadInst>(I)) 1596 return processLoad(L, lastSeenLoad, toErase); 1597 1598 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 1599 return processStore(SI, toErase); 1600 1601 if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) { 1602 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 1603 1604 // The are two possible optimizations we can do for memcpy: 1605 // a) memcpy-memcpy xform which exposes redundance for DSE 1606 // b) call-memcpy xform for return slot optimization 1607 Instruction* dep = MD.getDependency(M); 1608 if (dep == MemoryDependenceAnalysis::None || 1609 dep == MemoryDependenceAnalysis::NonLocal) 1610 return false; 1611 if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep)) 1612 return processMemCpy(M, MemCpy, toErase); 1613 if (CallInst* C = dyn_cast<CallInst>(dep)) 1614 return performCallSlotOptzn(M, C, toErase); 1615 return false; 1616 } 1617 1618 unsigned num = VN.lookup_or_add(I); 1619 1620 // Collapse PHI nodes 1621 if (PHINode* p = dyn_cast<PHINode>(I)) { 1622 Value* constVal = CollapsePhi(p); 1623 1624 if (constVal) { 1625 for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); 1626 PI != PE; ++PI) 1627 if (PI->second.count(p)) 1628 PI->second.erase(p); 1629 1630 p->replaceAllUsesWith(constVal); 1631 toErase.push_back(p); 1632 } 1633 // Perform value-number based elimination 1634 } else if (currAvail.test(num)) { 1635 Value* repl = find_leader(currAvail, num); 1636 1637 if (CallInst* CI = dyn_cast<CallInst>(I)) { 1638 AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); 1639 if (!AA.doesNotAccessMemory(CI)) { 1640 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 1641 if (cast<Instruction>(repl)->getParent() != CI->getParent() || 1642 MD.getDependency(CI) != MD.getDependency(cast<CallInst>(repl))) { 1643 // There must be an intervening may-alias store, so nothing from 1644 // this point on will be able to be replaced with the preceding call 1645 currAvail.erase(repl); 1646 currAvail.insert(I); 1647 1648 return false; 1649 } 1650 } 1651 } 1652 1653 // Remove it! 1654 MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); 1655 MD.removeInstruction(I); 1656 1657 VN.erase(I); 1658 I->replaceAllUsesWith(repl); 1659 toErase.push_back(I); 1660 return true; 1661 } else if (!I->isTerminator()) { 1662 currAvail.set(num); 1663 currAvail.insert(I); 1664 } 1665 1666 return false; 1667} 1668 1669// GVN::runOnFunction - This is the main transformation entry point for a 1670// function. 1671// 1672bool GVN::runOnFunction(Function& F) { 1673 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1674 1675 bool changed = false; 1676 bool shouldContinue = true; 1677 1678 while (shouldContinue) { 1679 shouldContinue = iterateOnFunction(F); 1680 changed |= shouldContinue; 1681 } 1682 1683 return changed; 1684} 1685 1686 1687// GVN::iterateOnFunction - Executes one iteration of GVN 1688bool GVN::iterateOnFunction(Function &F) { 1689 // Clean out global sets from any previous functions 1690 VN.clear(); 1691 availableOut.clear(); 1692 phiMap.clear(); 1693 1694 bool changed_function = false; 1695 1696 DominatorTree &DT = getAnalysis<DominatorTree>(); 1697 1698 SmallVector<Instruction*, 4> toErase; 1699 DenseMap<Value*, LoadInst*> lastSeenLoad; 1700 1701 // Top-down walk of the dominator tree 1702 for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()), 1703 E = df_end(DT.getRootNode()); DI != E; ++DI) { 1704 1705 // Get the set to update for this block 1706 ValueNumberedSet& currAvail = availableOut[DI->getBlock()]; 1707 lastSeenLoad.clear(); 1708 1709 BasicBlock* BB = DI->getBlock(); 1710 1711 // A block inherits AVAIL_OUT from its dominator 1712 if (DI->getIDom() != 0) 1713 currAvail = availableOut[DI->getIDom()->getBlock()]; 1714 1715 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1716 BI != BE; ) { 1717 changed_function |= processInstruction(BI, currAvail, 1718 lastSeenLoad, toErase); 1719 1720 NumGVNInstr += toErase.size(); 1721 1722 // Avoid iterator invalidation 1723 ++BI; 1724 1725 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 1726 E = toErase.end(); I != E; ++I) 1727 (*I)->eraseFromParent(); 1728 1729 toErase.clear(); 1730 } 1731 } 1732 1733 return changed_function; 1734} 1735