GVN.cpp revision 70ded19b3f69b8795423ec301da4ad9143ba9f15
12a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 22a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 32a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// The LLVM Compiler Infrastructure 42a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// This file is distributed under the University of Illinois Open Source 62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// License. See LICENSE.TXT for details. 77dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch// 82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===// 92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// This pass performs global value numbering to eliminate fully redundant 112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// instructions. It also performs simple dead load elimination. 129ab5563a3196760eb381d102cbb2bc0f7abc6a50Ben Murdoch// 13f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Note that this pass does the value numbering itself, it does not use the 14f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// ValueNumbering analysis passes. 152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 16a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//===----------------------------------------------------------------------===// 17a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 18a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#define DEBUG_TYPE "gvn" 192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Transforms/Scalar.h" 202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/BasicBlock.h" 212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Constants.h" 222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/DerivedTypes.h" 232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Function.h" 242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Instructions.h" 252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Value.h" 262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/ADT/DenseMap.h" 272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/ADT/DepthFirstIterator.h" 282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/ADT/PostOrderIterator.h" 292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/ADT/SmallPtrSet.h" 302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/ADT/SmallVector.h" 312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/ADT/Statistic.h" 322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Analysis/Dominators.h" 332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Analysis/AliasAnalysis.h" 342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Analysis/MemoryDependenceAnalysis.h" 352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Support/CFG.h" 362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Support/CommandLine.h" 372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Support/Compiler.h" 382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Support/Debug.h" 392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Transforms/Utils/BasicBlockUtils.h" 402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <cstdio> 412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)using namespace llvm; 422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)STATISTIC(NumGVNInstr, "Number of instructions deleted"); 442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)STATISTIC(NumGVNLoad, "Number of loads deleted"); 452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)STATISTIC(NumGVNBlocks, "Number of blocks merged"); 472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)STATISTIC(NumPRELoad, "Number of loads PRE'd"); 482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)static cl::opt<bool> EnablePRE("enable-pre", 502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) cl::init(true), cl::Hidden); 512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)cl::opt<bool> EnableLoadPRE("enable-load-pre"/*, cl::init(true)*/); 522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===// 542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// ValueTable Class 552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===// 562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// This class holds the mapping between values and value numbers. It is used 582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// as an efficient mechanism to determine the expression-wise equivalence of 592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// two values. 602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace { 612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) struct VISIBILITY_HIDDEN Expression { 622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, 632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, 692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, 702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, 722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) EMPTY, TOMBSTONE }; 732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ExpressionOpcode opcode; 752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const Type* type; 762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) uint32_t firstVN; 772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) uint32_t secondVN; 782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) uint32_t thirdVN; 792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SmallVector<uint32_t, 4> varargs; 802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value* function; 812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Expression() { } 832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Expression(ExpressionOpcode o) : opcode(o) { } 842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool operator==(const Expression &other) const { 862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (opcode != other.opcode) 872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (opcode == EMPTY || opcode == TOMBSTONE) 892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (type != other.type) 912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (function != other.function) 932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (firstVN != other.firstVN) 952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (secondVN != other.secondVN) 972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (thirdVN != other.thirdVN) 992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 1002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else { 1012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (varargs.size() != other.varargs.size()) 1022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 1032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) for (size_t i = 0; i < varargs.size(); ++i) 1052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (varargs[i] != other.varargs[i]) 1062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 1072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 1092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool operator!=(const Expression &other) const { 11390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) if (opcode != other.opcode) 1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 1152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (opcode == EMPTY || opcode == TOMBSTONE) 1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 1172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (type != other.type) 1182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 1192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (function != other.function) 1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 1212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (firstVN != other.firstVN) 1222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 1232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (secondVN != other.secondVN) 1247dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch return true; 1252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else if (thirdVN != other.thirdVN) 1267dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch return true; 1273551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) else { 128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) if (varargs.size() != other.varargs.size()) 1292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 1302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 13158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) for (size_t i = 0; i < varargs.size(); ++i) 1322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (varargs[i] != other.varargs[i]) 13358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return true; 13458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 1352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 1362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) }; 1392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) class VISIBILITY_HIDDEN ValueTable { 1412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) private: 1422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value*, uint32_t> valueNumbering; 1432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Expression, uint32_t> expressionNumbering; 1442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) AliasAnalysis* AA; 1452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) MemoryDependenceAnalysis* MD; 1462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DominatorTree* DT; 1472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) uint32_t nextValueNumber; 1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); 1512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Expression::ExpressionOpcode getOpcode(CmpInst* C); 1522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Expression::ExpressionOpcode getOpcode(CastInst* C); 1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Expression create_expression(BinaryOperator* BO); 1542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Expression create_expression(CmpInst* C); 1552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Expression create_expression(ShuffleVectorInst* V); 156f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) Expression create_expression(ExtractElementInst* C); 157f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) Expression create_expression(InsertElementInst* V); 158f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) Expression create_expression(SelectInst* V); 159f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) Expression create_expression(CastInst* C); 160f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) Expression create_expression(GetElementPtrInst* G); 161f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) Expression create_expression(CallInst* C); 162f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) Expression create_expression(Constant* C); 163f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) public: 164f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ValueTable() : nextValueNumber(1) { } 165f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) uint32_t lookup_or_add(Value* V); 1662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) uint32_t lookup(Value* V) const; 167f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void add(Value* V, uint32_t num); 1682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) void clear(); 169f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void erase(Value* v); 170f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) unsigned size(); 1712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 1722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) AliasAnalysis *getAliasAnalysis() const { return AA; } 1732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 1742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) void setDomTree(DominatorTree* D) { DT = D; } 175f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void verifyRemoved(const Value *) const; 177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) }; 178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 180f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)namespace llvm { 181f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)template <> struct DenseMapInfo<Expression> { 182f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) static inline Expression getEmptyKey() { 183f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return Expression(Expression::EMPTY); 184f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 1857dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 186f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) static inline Expression getTombstoneKey() { 1872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return Expression(Expression::TOMBSTONE); 1882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) static unsigned getHashValue(const Expression e) { 191f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) unsigned hash = e.opcode; 192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 193f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) hash = e.firstVN + hash * 37; 194f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) hash = e.secondVN + hash * 37; 195f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) hash = e.thirdVN + hash * 37; 196f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 197f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) hash = ((unsigned)((uintptr_t)e.type >> 4) ^ 198f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) (unsigned)((uintptr_t)e.type >> 9)) + 199f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) hash * 37; 200f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 201f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), 202f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) E = e.varargs.end(); I != E; ++I) 2037dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch hash = *I + hash * 37; 204f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 2052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) hash = ((unsigned)((uintptr_t)e.function >> 4) ^ 2062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) (unsigned)((uintptr_t)e.function >> 9)) + 2072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) hash * 37; 2082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 209f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return hash; 210f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 211f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) static bool isEqual(const Expression &LHS, const Expression &RHS) { 212f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return LHS == RHS; 213f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 214f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) static bool isPod() { return true; } 215f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}; 216f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 217f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 218f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)//===----------------------------------------------------------------------===// 219f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// ValueTable Internal Functions 2202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===// 2212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { 2222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) switch(BO->getOpcode()) { 223f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) default: // THIS SHOULD NEVER HAPPEN 2242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(0 && "Binary operator with unknown opcode?"); 225f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) case Instruction::Add: return Expression::ADD; 2262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) case Instruction::Sub: return Expression::SUB; 2272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) case Instruction::Mul: return Expression::MUL; 2282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) case Instruction::UDiv: return Expression::UDIV; 2292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) case Instruction::SDiv: return Expression::SDIV; 2302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) case Instruction::FDiv: return Expression::FDIV; 231 case Instruction::URem: return Expression::UREM; 232 case Instruction::SRem: return Expression::SREM; 233 case Instruction::FRem: return Expression::FREM; 234 case Instruction::Shl: return Expression::SHL; 235 case Instruction::LShr: return Expression::LSHR; 236 case Instruction::AShr: return Expression::ASHR; 237 case Instruction::And: return Expression::AND; 238 case Instruction::Or: return Expression::OR; 239 case Instruction::Xor: return Expression::XOR; 240 } 241} 242 243Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { 244 if (isa<ICmpInst>(C) || isa<VICmpInst>(C)) { 245 switch (C->getPredicate()) { 246 default: // THIS SHOULD NEVER HAPPEN 247 assert(0 && "Comparison with unknown predicate?"); 248 case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; 249 case ICmpInst::ICMP_NE: return Expression::ICMPNE; 250 case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; 251 case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; 252 case ICmpInst::ICMP_ULT: return Expression::ICMPULT; 253 case ICmpInst::ICMP_ULE: return Expression::ICMPULE; 254 case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; 255 case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; 256 case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; 257 case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; 258 } 259 } 260 assert((isa<FCmpInst>(C) || isa<VFCmpInst>(C)) && "Unknown compare"); 261 switch (C->getPredicate()) { 262 default: // THIS SHOULD NEVER HAPPEN 263 assert(0 && "Comparison with unknown predicate?"); 264 case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; 265 case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; 266 case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; 267 case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; 268 case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; 269 case FCmpInst::FCMP_ONE: return Expression::FCMPONE; 270 case FCmpInst::FCMP_ORD: return Expression::FCMPORD; 271 case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; 272 case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; 273 case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; 274 case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; 275 case FCmpInst::FCMP_ULT: return Expression::FCMPULT; 276 case FCmpInst::FCMP_ULE: return Expression::FCMPULE; 277 case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; 278 } 279} 280 281Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { 282 switch(C->getOpcode()) { 283 default: // THIS SHOULD NEVER HAPPEN 284 assert(0 && "Cast operator with unknown opcode?"); 285 case Instruction::Trunc: return Expression::TRUNC; 286 case Instruction::ZExt: return Expression::ZEXT; 287 case Instruction::SExt: return Expression::SEXT; 288 case Instruction::FPToUI: return Expression::FPTOUI; 289 case Instruction::FPToSI: return Expression::FPTOSI; 290 case Instruction::UIToFP: return Expression::UITOFP; 291 case Instruction::SIToFP: return Expression::SITOFP; 292 case Instruction::FPTrunc: return Expression::FPTRUNC; 293 case Instruction::FPExt: return Expression::FPEXT; 294 case Instruction::PtrToInt: return Expression::PTRTOINT; 295 case Instruction::IntToPtr: return Expression::INTTOPTR; 296 case Instruction::BitCast: return Expression::BITCAST; 297 } 298} 299 300Expression ValueTable::create_expression(CallInst* C) { 301 Expression e; 302 303 e.type = C->getType(); 304 e.firstVN = 0; 305 e.secondVN = 0; 306 e.thirdVN = 0; 307 e.function = C->getCalledFunction(); 308 e.opcode = Expression::CALL; 309 310 for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); 311 I != E; ++I) 312 e.varargs.push_back(lookup_or_add(*I)); 313 314 return e; 315} 316 317Expression ValueTable::create_expression(BinaryOperator* BO) { 318 Expression e; 319 320 e.firstVN = lookup_or_add(BO->getOperand(0)); 321 e.secondVN = lookup_or_add(BO->getOperand(1)); 322 e.thirdVN = 0; 323 e.function = 0; 324 e.type = BO->getType(); 325 e.opcode = getOpcode(BO); 326 327 return e; 328} 329 330Expression ValueTable::create_expression(CmpInst* C) { 331 Expression e; 332 333 e.firstVN = lookup_or_add(C->getOperand(0)); 334 e.secondVN = lookup_or_add(C->getOperand(1)); 335 e.thirdVN = 0; 336 e.function = 0; 337 e.type = C->getType(); 338 e.opcode = getOpcode(C); 339 340 return e; 341} 342 343Expression ValueTable::create_expression(CastInst* C) { 344 Expression e; 345 346 e.firstVN = lookup_or_add(C->getOperand(0)); 347 e.secondVN = 0; 348 e.thirdVN = 0; 349 e.function = 0; 350 e.type = C->getType(); 351 e.opcode = getOpcode(C); 352 353 return e; 354} 355 356Expression ValueTable::create_expression(ShuffleVectorInst* S) { 357 Expression e; 358 359 e.firstVN = lookup_or_add(S->getOperand(0)); 360 e.secondVN = lookup_or_add(S->getOperand(1)); 361 e.thirdVN = lookup_or_add(S->getOperand(2)); 362 e.function = 0; 363 e.type = S->getType(); 364 e.opcode = Expression::SHUFFLE; 365 366 return e; 367} 368 369Expression ValueTable::create_expression(ExtractElementInst* E) { 370 Expression e; 371 372 e.firstVN = lookup_or_add(E->getOperand(0)); 373 e.secondVN = lookup_or_add(E->getOperand(1)); 374 e.thirdVN = 0; 375 e.function = 0; 376 e.type = E->getType(); 377 e.opcode = Expression::EXTRACT; 378 379 return e; 380} 381 382Expression ValueTable::create_expression(InsertElementInst* I) { 383 Expression e; 384 385 e.firstVN = lookup_or_add(I->getOperand(0)); 386 e.secondVN = lookup_or_add(I->getOperand(1)); 387 e.thirdVN = lookup_or_add(I->getOperand(2)); 388 e.function = 0; 389 e.type = I->getType(); 390 e.opcode = Expression::INSERT; 391 392 return e; 393} 394 395Expression ValueTable::create_expression(SelectInst* I) { 396 Expression e; 397 398 e.firstVN = lookup_or_add(I->getCondition()); 399 e.secondVN = lookup_or_add(I->getTrueValue()); 400 e.thirdVN = lookup_or_add(I->getFalseValue()); 401 e.function = 0; 402 e.type = I->getType(); 403 e.opcode = Expression::SELECT; 404 405 return e; 406} 407 408Expression ValueTable::create_expression(GetElementPtrInst* G) { 409 Expression e; 410 411 e.firstVN = lookup_or_add(G->getPointerOperand()); 412 e.secondVN = 0; 413 e.thirdVN = 0; 414 e.function = 0; 415 e.type = G->getType(); 416 e.opcode = Expression::GEP; 417 418 for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); 419 I != E; ++I) 420 e.varargs.push_back(lookup_or_add(*I)); 421 422 return e; 423} 424 425//===----------------------------------------------------------------------===// 426// ValueTable External Functions 427//===----------------------------------------------------------------------===// 428 429/// add - Insert a value into the table with a specified value number. 430void ValueTable::add(Value* V, uint32_t num) { 431 valueNumbering.insert(std::make_pair(V, num)); 432} 433 434/// lookup_or_add - Returns the value number for the specified value, assigning 435/// it a new number if it did not have one before. 436uint32_t ValueTable::lookup_or_add(Value* V) { 437 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 438 if (VI != valueNumbering.end()) 439 return VI->second; 440 441 if (CallInst* C = dyn_cast<CallInst>(V)) { 442 if (AA->doesNotAccessMemory(C)) { 443 Expression e = create_expression(C); 444 445 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 446 if (EI != expressionNumbering.end()) { 447 valueNumbering.insert(std::make_pair(V, EI->second)); 448 return EI->second; 449 } else { 450 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 451 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 452 453 return nextValueNumber++; 454 } 455 } else if (AA->onlyReadsMemory(C)) { 456 Expression e = create_expression(C); 457 458 if (expressionNumbering.find(e) == expressionNumbering.end()) { 459 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 460 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 461 return nextValueNumber++; 462 } 463 464 MemDepResult local_dep = MD->getDependency(C); 465 466 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 467 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 468 return nextValueNumber++; 469 } 470 471 if (local_dep.isDef()) { 472 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 473 474 if (local_cdep->getNumOperands() != C->getNumOperands()) { 475 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 476 return nextValueNumber++; 477 } 478 479 for (unsigned i = 1; i < C->getNumOperands(); ++i) { 480 uint32_t c_vn = lookup_or_add(C->getOperand(i)); 481 uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); 482 if (c_vn != cd_vn) { 483 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 484 return nextValueNumber++; 485 } 486 } 487 488 uint32_t v = lookup_or_add(local_cdep); 489 valueNumbering.insert(std::make_pair(V, v)); 490 return v; 491 } 492 493 // Non-local case. 494 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 495 MD->getNonLocalCallDependency(CallSite(C)); 496 // FIXME: call/call dependencies for readonly calls should return def, not 497 // clobber! Move the checking logic to MemDep! 498 CallInst* cdep = 0; 499 500 // Check to see if we have a single dominating call instruction that is 501 // identical to C. 502 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 503 const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; 504 // Ignore non-local dependencies. 505 if (I->second.isNonLocal()) 506 continue; 507 508 // We don't handle non-depedencies. If we already have a call, reject 509 // instruction dependencies. 510 if (I->second.isClobber() || cdep != 0) { 511 cdep = 0; 512 break; 513 } 514 515 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); 516 // FIXME: All duplicated with non-local case. 517 if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ 518 cdep = NonLocalDepCall; 519 continue; 520 } 521 522 cdep = 0; 523 break; 524 } 525 526 if (!cdep) { 527 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 528 return nextValueNumber++; 529 } 530 531 if (cdep->getNumOperands() != C->getNumOperands()) { 532 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 533 return nextValueNumber++; 534 } 535 for (unsigned i = 1; i < C->getNumOperands(); ++i) { 536 uint32_t c_vn = lookup_or_add(C->getOperand(i)); 537 uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); 538 if (c_vn != cd_vn) { 539 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 540 return nextValueNumber++; 541 } 542 } 543 544 uint32_t v = lookup_or_add(cdep); 545 valueNumbering.insert(std::make_pair(V, v)); 546 return v; 547 548 } else { 549 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 550 return nextValueNumber++; 551 } 552 } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) { 553 Expression e = create_expression(BO); 554 555 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 556 if (EI != expressionNumbering.end()) { 557 valueNumbering.insert(std::make_pair(V, EI->second)); 558 return EI->second; 559 } else { 560 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 561 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 562 563 return nextValueNumber++; 564 } 565 } else if (CmpInst* C = dyn_cast<CmpInst>(V)) { 566 Expression e = create_expression(C); 567 568 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 569 if (EI != expressionNumbering.end()) { 570 valueNumbering.insert(std::make_pair(V, EI->second)); 571 return EI->second; 572 } else { 573 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 574 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 575 576 return nextValueNumber++; 577 } 578 } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) { 579 Expression e = create_expression(U); 580 581 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 582 if (EI != expressionNumbering.end()) { 583 valueNumbering.insert(std::make_pair(V, EI->second)); 584 return EI->second; 585 } else { 586 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 587 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 588 589 return nextValueNumber++; 590 } 591 } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) { 592 Expression e = create_expression(U); 593 594 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 595 if (EI != expressionNumbering.end()) { 596 valueNumbering.insert(std::make_pair(V, EI->second)); 597 return EI->second; 598 } else { 599 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 600 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 601 602 return nextValueNumber++; 603 } 604 } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) { 605 Expression e = create_expression(U); 606 607 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 608 if (EI != expressionNumbering.end()) { 609 valueNumbering.insert(std::make_pair(V, EI->second)); 610 return EI->second; 611 } else { 612 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 613 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 614 615 return nextValueNumber++; 616 } 617 } else if (SelectInst* U = dyn_cast<SelectInst>(V)) { 618 Expression e = create_expression(U); 619 620 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 621 if (EI != expressionNumbering.end()) { 622 valueNumbering.insert(std::make_pair(V, EI->second)); 623 return EI->second; 624 } else { 625 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 626 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 627 628 return nextValueNumber++; 629 } 630 } else if (CastInst* U = dyn_cast<CastInst>(V)) { 631 Expression e = create_expression(U); 632 633 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 634 if (EI != expressionNumbering.end()) { 635 valueNumbering.insert(std::make_pair(V, EI->second)); 636 return EI->second; 637 } else { 638 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 639 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 640 641 return nextValueNumber++; 642 } 643 } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) { 644 Expression e = create_expression(U); 645 646 DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); 647 if (EI != expressionNumbering.end()) { 648 valueNumbering.insert(std::make_pair(V, EI->second)); 649 return EI->second; 650 } else { 651 expressionNumbering.insert(std::make_pair(e, nextValueNumber)); 652 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 653 654 return nextValueNumber++; 655 } 656 } else { 657 valueNumbering.insert(std::make_pair(V, nextValueNumber)); 658 return nextValueNumber++; 659 } 660} 661 662/// lookup - Returns the value number of the specified value. Fails if 663/// the value has not yet been numbered. 664uint32_t ValueTable::lookup(Value* V) const { 665 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 666 assert(VI != valueNumbering.end() && "Value not numbered?"); 667 return VI->second; 668} 669 670/// clear - Remove all entries from the ValueTable 671void ValueTable::clear() { 672 valueNumbering.clear(); 673 expressionNumbering.clear(); 674 nextValueNumber = 1; 675} 676 677/// erase - Remove a value from the value numbering 678void ValueTable::erase(Value* V) { 679 valueNumbering.erase(V); 680} 681 682/// verifyRemoved - Verify that the value is removed from all internal data 683/// structures. 684void ValueTable::verifyRemoved(const Value *V) const { 685 for (DenseMap<Value*, uint32_t>::iterator 686 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 687 assert(I->first != V && "Inst still occurs in value numbering map!"); 688 } 689} 690 691//===----------------------------------------------------------------------===// 692// GVN Pass 693//===----------------------------------------------------------------------===// 694 695namespace { 696 struct VISIBILITY_HIDDEN ValueNumberScope { 697 ValueNumberScope* parent; 698 DenseMap<uint32_t, Value*> table; 699 700 ValueNumberScope(ValueNumberScope* p) : parent(p) { } 701 }; 702} 703 704namespace { 705 706 class VISIBILITY_HIDDEN GVN : public FunctionPass { 707 bool runOnFunction(Function &F); 708 public: 709 static char ID; // Pass identification, replacement for typeid 710 GVN() : FunctionPass(&ID) { } 711 712 private: 713 MemoryDependenceAnalysis *MD; 714 DominatorTree *DT; 715 716 ValueTable VN; 717 DenseMap<BasicBlock*, ValueNumberScope*> localAvail; 718 719 typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType; 720 PhiMapType phiMap; 721 722 723 // This transformation requires dominator postdominator info 724 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 725 AU.addRequired<DominatorTree>(); 726 AU.addRequired<MemoryDependenceAnalysis>(); 727 AU.addRequired<AliasAnalysis>(); 728 729 AU.addPreserved<DominatorTree>(); 730 AU.addPreserved<AliasAnalysis>(); 731 } 732 733 // Helper fuctions 734 // FIXME: eliminate or document these better 735 bool processLoad(LoadInst* L, 736 SmallVectorImpl<Instruction*> &toErase); 737 bool processInstruction(Instruction* I, 738 SmallVectorImpl<Instruction*> &toErase); 739 bool processNonLocalLoad(LoadInst* L, 740 SmallVectorImpl<Instruction*> &toErase); 741 bool processBlock(BasicBlock* BB); 742 Value *GetValueForBlock(BasicBlock *BB, Instruction* orig, 743 DenseMap<BasicBlock*, Value*> &Phis, 744 bool top_level = false); 745 void dump(DenseMap<uint32_t, Value*>& d); 746 bool iterateOnFunction(Function &F); 747 Value* CollapsePhi(PHINode* p); 748 bool isSafeReplacement(PHINode* p, Instruction* inst); 749 bool performPRE(Function& F); 750 Value* lookupNumber(BasicBlock* BB, uint32_t num); 751 bool mergeBlockIntoPredecessor(BasicBlock* BB); 752 Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno); 753 void cleanupGlobalSets(); 754 void verifyRemoved(const Instruction *I) const; 755 }; 756 757 char GVN::ID = 0; 758} 759 760// createGVNPass - The public interface to this file... 761FunctionPass *llvm::createGVNPass() { return new GVN(); } 762 763static RegisterPass<GVN> X("gvn", 764 "Global Value Numbering"); 765 766void GVN::dump(DenseMap<uint32_t, Value*>& d) { 767 printf("{\n"); 768 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 769 E = d.end(); I != E; ++I) { 770 printf("%d\n", I->first); 771 I->second->dump(); 772 } 773 printf("}\n"); 774} 775 776Value* GVN::CollapsePhi(PHINode* p) { 777 Value* constVal = p->hasConstantValue(); 778 if (!constVal) return 0; 779 780 Instruction* inst = dyn_cast<Instruction>(constVal); 781 if (!inst) 782 return constVal; 783 784 if (DT->dominates(inst, p)) 785 if (isSafeReplacement(p, inst)) 786 return inst; 787 return 0; 788} 789 790bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { 791 if (!isa<PHINode>(inst)) 792 return true; 793 794 for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); 795 UI != E; ++UI) 796 if (PHINode* use_phi = dyn_cast<PHINode>(UI)) 797 if (use_phi->getParent() == inst->getParent()) 798 return false; 799 800 return true; 801} 802 803/// GetValueForBlock - Get the value to use within the specified basic block. 804/// available values are in Phis. 805Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, 806 DenseMap<BasicBlock*, Value*> &Phis, 807 bool top_level) { 808 809 // If we have already computed this value, return the previously computed val. 810 DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB); 811 if (V != Phis.end() && !top_level) return V->second; 812 813 // If the block is unreachable, just return undef, since this path 814 // can't actually occur at runtime. 815 if (!DT->isReachableFromEntry(BB)) 816 return Phis[BB] = UndefValue::get(orig->getType()); 817 818 if (BasicBlock *Pred = BB->getSinglePredecessor()) { 819 Value *ret = GetValueForBlock(Pred, orig, Phis); 820 Phis[BB] = ret; 821 return ret; 822 } 823 824 // Get the number of predecessors of this block so we can reserve space later. 825 // If there is already a PHI in it, use the #preds from it, otherwise count. 826 // Getting it from the PHI is constant time. 827 unsigned NumPreds; 828 if (PHINode *ExistingPN = dyn_cast<PHINode>(BB->begin())) 829 NumPreds = ExistingPN->getNumIncomingValues(); 830 else 831 NumPreds = std::distance(pred_begin(BB), pred_end(BB)); 832 833 // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so 834 // now, then get values to fill in the incoming values for the PHI. 835 PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle", 836 BB->begin()); 837 PN->reserveOperandSpace(NumPreds); 838 839 Phis.insert(std::make_pair(BB, PN)); 840 841 // Fill in the incoming values for the block. 842 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 843 Value* val = GetValueForBlock(*PI, orig, Phis); 844 PN->addIncoming(val, *PI); 845 } 846 847 VN.getAliasAnalysis()->copyValue(orig, PN); 848 849 // Attempt to collapse PHI nodes that are trivially redundant 850 Value* v = CollapsePhi(PN); 851 if (!v) { 852 // Cache our phi construction results 853 if (LoadInst* L = dyn_cast<LoadInst>(orig)) 854 phiMap[L->getPointerOperand()].insert(PN); 855 else 856 phiMap[orig].insert(PN); 857 858 return PN; 859 } 860 861 PN->replaceAllUsesWith(v); 862 if (isa<PointerType>(v->getType())) 863 MD->invalidateCachedPointerInfo(v); 864 865 for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(), 866 E = Phis.end(); I != E; ++I) 867 if (I->second == PN) 868 I->second = v; 869 870 DEBUG(cerr << "GVN removed: " << *PN); 871 MD->removeInstruction(PN); 872 PN->eraseFromParent(); 873 DEBUG(verifyRemoved(PN)); 874 875 Phis[BB] = v; 876 return v; 877} 878 879/// IsValueFullyAvailableInBlock - Return true if we can prove that the value 880/// we're analyzing is fully available in the specified block. As we go, keep 881/// track of which blocks we know are fully alive in FullyAvailableBlocks. This 882/// map is actually a tri-state map with the following values: 883/// 0) we know the block *is not* fully available. 884/// 1) we know the block *is* fully available. 885/// 2) we do not know whether the block is fully available or not, but we are 886/// currently speculating that it will be. 887/// 3) we are speculating for this block and have used that to speculate for 888/// other blocks. 889static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 890 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) { 891 // Optimistically assume that the block is fully available and check to see 892 // if we already know about this block in one lookup. 893 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 894 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 895 896 // If the entry already existed for this block, return the precomputed value. 897 if (!IV.second) { 898 // If this is a speculative "available" value, mark it as being used for 899 // speculation of other blocks. 900 if (IV.first->second == 2) 901 IV.first->second = 3; 902 return IV.first->second != 0; 903 } 904 905 // Otherwise, see if it is fully available in all predecessors. 906 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 907 908 // If this block has no predecessors, it isn't live-in here. 909 if (PI == PE) 910 goto SpeculationFailure; 911 912 for (; PI != PE; ++PI) 913 // If the value isn't fully available in one of our predecessors, then it 914 // isn't fully available in this block either. Undo our previous 915 // optimistic assumption and bail out. 916 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 917 goto SpeculationFailure; 918 919 return true; 920 921// SpeculationFailure - If we get here, we found out that this is not, after 922// all, a fully-available block. We have a problem if we speculated on this and 923// used the speculation to mark other blocks as available. 924SpeculationFailure: 925 char &BBVal = FullyAvailableBlocks[BB]; 926 927 // If we didn't speculate on this, just return with it set to false. 928 if (BBVal == 2) { 929 BBVal = 0; 930 return false; 931 } 932 933 // If we did speculate on this value, we could have blocks set to 1 that are 934 // incorrect. Walk the (transitive) successors of this block and mark them as 935 // 0 if set to one. 936 SmallVector<BasicBlock*, 32> BBWorklist; 937 BBWorklist.push_back(BB); 938 939 while (!BBWorklist.empty()) { 940 BasicBlock *Entry = BBWorklist.pop_back_val(); 941 // Note that this sets blocks to 0 (unavailable) if they happen to not 942 // already be in FullyAvailableBlocks. This is safe. 943 char &EntryVal = FullyAvailableBlocks[Entry]; 944 if (EntryVal == 0) continue; // Already unavailable. 945 946 // Mark as unavailable. 947 EntryVal = 0; 948 949 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) 950 BBWorklist.push_back(*I); 951 } 952 953 return false; 954} 955 956/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 957/// non-local by performing PHI construction. 958bool GVN::processNonLocalLoad(LoadInst *LI, 959 SmallVectorImpl<Instruction*> &toErase) { 960 // Find the non-local dependencies of the load. 961 SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps; 962 MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), 963 Deps); 964 //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI); 965 966 // If we had to process more than one hundred blocks to find the 967 // dependencies, this load isn't worth worrying about. Optimizing 968 // it will be too expensive. 969 if (Deps.size() > 100) 970 return false; 971 972 // If we had a phi translation failure, we'll have a single entry which is a 973 // clobber in the current block. Reject this early. 974 if (Deps.size() == 1 && Deps[0].second.isClobber()) 975 return false; 976 977 // Filter out useless results (non-locals, etc). Keep track of the blocks 978 // where we have a value available in repl, also keep track of whether we see 979 // dependencies that produce an unknown value for the load (such as a call 980 // that could potentially clobber the load). 981 SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock; 982 SmallVector<BasicBlock*, 16> UnavailableBlocks; 983 984 for (unsigned i = 0, e = Deps.size(); i != e; ++i) { 985 BasicBlock *DepBB = Deps[i].first; 986 MemDepResult DepInfo = Deps[i].second; 987 988 if (DepInfo.isClobber()) { 989 UnavailableBlocks.push_back(DepBB); 990 continue; 991 } 992 993 Instruction *DepInst = DepInfo.getInst(); 994 995 // Loading the allocation -> undef. 996 if (isa<AllocationInst>(DepInst)) { 997 ValuesPerBlock.push_back(std::make_pair(DepBB, 998 UndefValue::get(LI->getType()))); 999 continue; 1000 } 1001 1002 if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) { 1003 // Reject loads and stores that are to the same address but are of 1004 // different types. 1005 // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because 1006 // of bitfield access, it would be interesting to optimize for it at some 1007 // point. 1008 if (S->getOperand(0)->getType() != LI->getType()) { 1009 UnavailableBlocks.push_back(DepBB); 1010 continue; 1011 } 1012 1013 ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); 1014 1015 } else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) { 1016 if (LD->getType() != LI->getType()) { 1017 UnavailableBlocks.push_back(DepBB); 1018 continue; 1019 } 1020 ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); 1021 } else { 1022 UnavailableBlocks.push_back(DepBB); 1023 continue; 1024 } 1025 } 1026 1027 // If we have no predecessors that produce a known value for this load, exit 1028 // early. 1029 if (ValuesPerBlock.empty()) return false; 1030 1031 // If all of the instructions we depend on produce a known value for this 1032 // load, then it is fully redundant and we can use PHI insertion to compute 1033 // its value. Insert PHIs and remove the fully redundant value now. 1034 if (UnavailableBlocks.empty()) { 1035 // Use cached PHI construction information from previous runs 1036 SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()]; 1037 // FIXME: What does phiMap do? Are we positive it isn't getting invalidated? 1038 for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); 1039 I != E; ++I) { 1040 if ((*I)->getParent() == LI->getParent()) { 1041 DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI); 1042 LI->replaceAllUsesWith(*I); 1043 if (isa<PointerType>((*I)->getType())) 1044 MD->invalidateCachedPointerInfo(*I); 1045 toErase.push_back(LI); 1046 NumGVNLoad++; 1047 return true; 1048 } 1049 1050 ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); 1051 } 1052 1053 DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI); 1054 1055 DenseMap<BasicBlock*, Value*> BlockReplValues; 1056 BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); 1057 // Perform PHI construction. 1058 Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); 1059 LI->replaceAllUsesWith(v); 1060 1061 if (!isa<GlobalValue>(v)) 1062 v->takeName(LI); 1063 if (isa<PointerType>(v->getType())) 1064 MD->invalidateCachedPointerInfo(v); 1065 toErase.push_back(LI); 1066 NumGVNLoad++; 1067 return true; 1068 } 1069 1070 if (!EnablePRE || !EnableLoadPRE) 1071 return false; 1072 1073 // Okay, we have *some* definitions of the value. This means that the value 1074 // is available in some of our (transitive) predecessors. Lets think about 1075 // doing PRE of this load. This will involve inserting a new load into the 1076 // predecessor when it's not available. We could do this in general, but 1077 // prefer to not increase code size. As such, we only do this when we know 1078 // that we only have to insert *one* load (which means we're basically moving 1079 // the load, not inserting a new one). 1080 1081 // Everything we do here is based on local predecessors of LI's block. If it 1082 // only has one predecessor, bail now. 1083 BasicBlock *LoadBB = LI->getParent(); 1084 if (LoadBB->getSinglePredecessor()) 1085 return false; 1086 1087 // If we have a repl set with LI itself in it, this means we have a loop where 1088 // at least one of the values is LI. Since this means that we won't be able 1089 // to eliminate LI even if we insert uses in the other predecessors, we will 1090 // end up increasing code size. Reject this by scanning for LI. 1091 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1092 if (ValuesPerBlock[i].second == LI) 1093 return false; 1094 1095 // Okay, we have some hope :). Check to see if the loaded value is fully 1096 // available in all but one predecessor. 1097 // FIXME: If we could restructure the CFG, we could make a common pred with 1098 // all the preds that don't have an available LI and insert a new load into 1099 // that one block. 1100 BasicBlock *UnavailablePred = 0; 1101 1102 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 1103 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 1104 FullyAvailableBlocks[ValuesPerBlock[i].first] = true; 1105 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 1106 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 1107 1108 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 1109 PI != E; ++PI) { 1110 if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) 1111 continue; 1112 1113 // If this load is not available in multiple predecessors, reject it. 1114 if (UnavailablePred && UnavailablePred != *PI) 1115 return false; 1116 UnavailablePred = *PI; 1117 } 1118 1119 assert(UnavailablePred != 0 && 1120 "Fully available value should be eliminated above!"); 1121 1122 // If the loaded pointer is PHI node defined in this block, do PHI translation 1123 // to get its value in the predecessor. 1124 Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); 1125 1126 // Make sure the value is live in the predecessor. If it was defined by a 1127 // non-PHI instruction in this block, we don't know how to recompute it above. 1128 if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr)) 1129 if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { 1130 DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " 1131 << *LPInst << *LI << "\n"); 1132 return false; 1133 } 1134 1135 // We don't currently handle critical edges :( 1136 if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { 1137 DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" 1138 << UnavailablePred->getName() << "': " << *LI); 1139 return false; 1140 } 1141 1142 // Okay, we can eliminate this load by inserting a reload in the predecessor 1143 // and using PHI construction to get the value in the other predecessors, do 1144 // it. 1145 DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI); 1146 1147 Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 1148 LI->getAlignment(), 1149 UnavailablePred->getTerminator()); 1150 1151 DenseMap<BasicBlock*, Value*> BlockReplValues; 1152 BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); 1153 BlockReplValues[UnavailablePred] = NewLoad; 1154 1155 // Perform PHI construction. 1156 Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); 1157 LI->replaceAllUsesWith(v); 1158 if (!isa<GlobalValue>(v)) 1159 v->takeName(LI); 1160 if (isa<PointerType>(v->getType())) 1161 MD->invalidateCachedPointerInfo(v); 1162 toErase.push_back(LI); 1163 NumPRELoad++; 1164 return true; 1165} 1166 1167/// processLoad - Attempt to eliminate a load, first by eliminating it 1168/// locally, and then attempting non-local elimination if that fails. 1169bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { 1170 if (L->isVolatile()) 1171 return false; 1172 1173 Value* pointer = L->getPointerOperand(); 1174 1175 // ... to a pointer that has been loaded from before... 1176 MemDepResult dep = MD->getDependency(L); 1177 1178 // If the value isn't available, don't do anything! 1179 if (dep.isClobber()) 1180 return false; 1181 1182 // If it is defined in another block, try harder. 1183 if (dep.isNonLocal()) 1184 return processNonLocalLoad(L, toErase); 1185 1186 Instruction *DepInst = dep.getInst(); 1187 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1188 // Only forward substitute stores to loads of the same type. 1189 // FIXME: Could do better! 1190 if (DepSI->getPointerOperand()->getType() != pointer->getType()) 1191 return false; 1192 1193 // Remove it! 1194 L->replaceAllUsesWith(DepSI->getOperand(0)); 1195 if (isa<PointerType>(DepSI->getOperand(0)->getType())) 1196 MD->invalidateCachedPointerInfo(DepSI->getOperand(0)); 1197 toErase.push_back(L); 1198 NumGVNLoad++; 1199 return true; 1200 } 1201 1202 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 1203 // Only forward substitute stores to loads of the same type. 1204 // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian. 1205 if (DepLI->getType() != L->getType()) 1206 return false; 1207 1208 // Remove it! 1209 L->replaceAllUsesWith(DepLI); 1210 if (isa<PointerType>(DepLI->getType())) 1211 MD->invalidateCachedPointerInfo(DepLI); 1212 toErase.push_back(L); 1213 NumGVNLoad++; 1214 return true; 1215 } 1216 1217 // If this load really doesn't depend on anything, then we must be loading an 1218 // undef value. This can happen when loading for a fresh allocation with no 1219 // intervening stores, for example. 1220 if (isa<AllocationInst>(DepInst)) { 1221 L->replaceAllUsesWith(UndefValue::get(L->getType())); 1222 toErase.push_back(L); 1223 NumGVNLoad++; 1224 return true; 1225 } 1226 1227 return false; 1228} 1229 1230Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { 1231 DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB); 1232 if (I == localAvail.end()) 1233 return 0; 1234 1235 ValueNumberScope* locals = I->second; 1236 1237 while (locals) { 1238 DenseMap<uint32_t, Value*>::iterator I = locals->table.find(num); 1239 if (I != locals->table.end()) 1240 return I->second; 1241 else 1242 locals = locals->parent; 1243 } 1244 1245 return 0; 1246} 1247 1248/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination 1249/// by inheritance from the dominator fails, see if we can perform phi 1250/// construction to eliminate the redundancy. 1251Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { 1252 BasicBlock* BaseBlock = orig->getParent(); 1253 1254 SmallPtrSet<BasicBlock*, 4> Visited; 1255 SmallVector<BasicBlock*, 8> Stack; 1256 Stack.push_back(BaseBlock); 1257 1258 DenseMap<BasicBlock*, Value*> Results; 1259 1260 // Walk backwards through our predecessors, looking for instances of the 1261 // value number we're looking for. Instances are recorded in the Results 1262 // map, which is then used to perform phi construction. 1263 while (!Stack.empty()) { 1264 BasicBlock* Current = Stack.back(); 1265 Stack.pop_back(); 1266 1267 // If we've walked all the way to a proper dominator, then give up. Cases 1268 // where the instance is in the dominator will have been caught by the fast 1269 // path, and any cases that require phi construction further than this are 1270 // probably not worth it anyways. Note that this is a SIGNIFICANT compile 1271 // time improvement. 1272 if (DT->properlyDominates(Current, orig->getParent())) return 0; 1273 1274 DenseMap<BasicBlock*, ValueNumberScope*>::iterator LA = 1275 localAvail.find(Current); 1276 if (LA == localAvail.end()) return 0; 1277 DenseMap<unsigned, Value*>::iterator V = LA->second->table.find(valno); 1278 1279 if (V != LA->second->table.end()) { 1280 // Found an instance, record it. 1281 Results.insert(std::make_pair(Current, V->second)); 1282 continue; 1283 } 1284 1285 // If we reach the beginning of the function, then give up. 1286 if (pred_begin(Current) == pred_end(Current)) 1287 return 0; 1288 1289 for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current); 1290 PI != PE; ++PI) 1291 if (Visited.insert(*PI)) 1292 Stack.push_back(*PI); 1293 } 1294 1295 // If we didn't find instances, give up. Otherwise, perform phi construction. 1296 if (Results.size() == 0) 1297 return 0; 1298 else 1299 return GetValueForBlock(BaseBlock, orig, Results, true); 1300} 1301 1302/// processInstruction - When calculating availability, handle an instruction 1303/// by inserting it into the appropriate sets 1304bool GVN::processInstruction(Instruction *I, 1305 SmallVectorImpl<Instruction*> &toErase) { 1306 if (LoadInst* L = dyn_cast<LoadInst>(I)) { 1307 bool changed = processLoad(L, toErase); 1308 1309 if (!changed) { 1310 unsigned num = VN.lookup_or_add(L); 1311 localAvail[I->getParent()]->table.insert(std::make_pair(num, L)); 1312 } 1313 1314 return changed; 1315 } 1316 1317 uint32_t nextNum = VN.getNextUnusedValueNumber(); 1318 unsigned num = VN.lookup_or_add(I); 1319 1320 // Allocations are always uniquely numbered, so we can save time and memory 1321 // by fast failing them. 1322 if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) { 1323 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1324 return false; 1325 } 1326 1327 // Collapse PHI nodes 1328 if (PHINode* p = dyn_cast<PHINode>(I)) { 1329 Value* constVal = CollapsePhi(p); 1330 1331 if (constVal) { 1332 for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); 1333 PI != PE; ++PI) 1334 PI->second.erase(p); 1335 1336 p->replaceAllUsesWith(constVal); 1337 if (isa<PointerType>(constVal->getType())) 1338 MD->invalidateCachedPointerInfo(constVal); 1339 toErase.push_back(p); 1340 } else { 1341 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1342 } 1343 1344 // If the number we were assigned was a brand new VN, then we don't 1345 // need to do a lookup to see if the number already exists 1346 // somewhere in the domtree: it can't! 1347 } else if (num == nextNum) { 1348 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1349 1350 // Perform fast-path value-number based elimination of values inherited from 1351 // dominators. 1352 } else if (Value* repl = lookupNumber(I->getParent(), num)) { 1353 // Remove it! 1354 VN.erase(I); 1355 I->replaceAllUsesWith(repl); 1356 if (isa<PointerType>(repl->getType())) 1357 MD->invalidateCachedPointerInfo(repl); 1358 toErase.push_back(I); 1359 return true; 1360 1361#if 0 1362 // Perform slow-pathvalue-number based elimination with phi construction. 1363 } else if (Value* repl = AttemptRedundancyElimination(I, num)) { 1364 // Remove it! 1365 VN.erase(I); 1366 I->replaceAllUsesWith(repl); 1367 if (isa<PointerType>(repl->getType())) 1368 MD->invalidateCachedPointerInfo(repl); 1369 toErase.push_back(I); 1370 return true; 1371#endif 1372 } else { 1373 localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); 1374 } 1375 1376 return false; 1377} 1378 1379// GVN::runOnFunction - This is the main transformation entry point for a 1380// function. 1381// 1382bool GVN::runOnFunction(Function& F) { 1383 MD = &getAnalysis<MemoryDependenceAnalysis>(); 1384 DT = &getAnalysis<DominatorTree>(); 1385 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 1386 VN.setMemDep(MD); 1387 VN.setDomTree(DT); 1388 1389 bool changed = false; 1390 bool shouldContinue = true; 1391 1392 // Merge unconditional branches, allowing PRE to catch more 1393 // optimization opportunities. 1394 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 1395 BasicBlock* BB = FI; 1396 ++FI; 1397 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 1398 if (removedBlock) NumGVNBlocks++; 1399 1400 changed |= removedBlock; 1401 } 1402 1403 unsigned Iteration = 0; 1404 1405 while (shouldContinue) { 1406 DEBUG(cerr << "GVN iteration: " << Iteration << "\n"); 1407 shouldContinue = iterateOnFunction(F); 1408 changed |= shouldContinue; 1409 ++Iteration; 1410 } 1411 1412 if (EnablePRE) { 1413 bool PREChanged = true; 1414 while (PREChanged) { 1415 PREChanged = performPRE(F); 1416 changed |= PREChanged; 1417 } 1418 } 1419 // FIXME: Should perform GVN again after PRE does something. PRE can move 1420 // computations into blocks where they become fully redundant. Note that 1421 // we can't do this until PRE's critical edge splitting updates memdep. 1422 // Actually, when this happens, we should just fully integrate PRE into GVN. 1423 1424 cleanupGlobalSets(); 1425 1426 return changed; 1427} 1428 1429 1430bool GVN::processBlock(BasicBlock* BB) { 1431 DomTreeNode* DTN = DT->getNode(BB); 1432 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and 1433 // incrementing BI before processing an instruction). 1434 SmallVector<Instruction*, 8> toErase; 1435 bool changed_function = false; 1436 1437 if (DTN->getIDom()) 1438 localAvail[BB] = 1439 new ValueNumberScope(localAvail[DTN->getIDom()->getBlock()]); 1440 else 1441 localAvail[BB] = new ValueNumberScope(0); 1442 1443 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 1444 BI != BE;) { 1445 changed_function |= processInstruction(BI, toErase); 1446 if (toErase.empty()) { 1447 ++BI; 1448 continue; 1449 } 1450 1451 // If we need some instructions deleted, do it now. 1452 NumGVNInstr += toErase.size(); 1453 1454 // Avoid iterator invalidation. 1455 bool AtStart = BI == BB->begin(); 1456 if (!AtStart) 1457 --BI; 1458 1459 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), 1460 E = toErase.end(); I != E; ++I) { 1461 DEBUG(cerr << "GVN removed: " << **I); 1462 MD->removeInstruction(*I); 1463 (*I)->eraseFromParent(); 1464 DEBUG(verifyRemoved(*I)); 1465 } 1466 toErase.clear(); 1467 1468 if (AtStart) 1469 BI = BB->begin(); 1470 else 1471 ++BI; 1472 } 1473 1474 return changed_function; 1475} 1476 1477/// performPRE - Perform a purely local form of PRE that looks for diamond 1478/// control flow patterns and attempts to perform simple PRE at the join point. 1479bool GVN::performPRE(Function& F) { 1480 bool Changed = false; 1481 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 1482 DenseMap<BasicBlock*, Value*> predMap; 1483 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), 1484 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { 1485 BasicBlock* CurrentBlock = *DI; 1486 1487 // Nothing to PRE in the entry block. 1488 if (CurrentBlock == &F.getEntryBlock()) continue; 1489 1490 for (BasicBlock::iterator BI = CurrentBlock->begin(), 1491 BE = CurrentBlock->end(); BI != BE; ) { 1492 Instruction *CurInst = BI++; 1493 1494 if (isa<AllocationInst>(CurInst) || isa<TerminatorInst>(CurInst) || 1495 isa<PHINode>(CurInst) || CurInst->mayReadFromMemory() || 1496 CurInst->mayWriteToMemory()) 1497 continue; 1498 1499 uint32_t valno = VN.lookup(CurInst); 1500 1501 // Look for the predecessors for PRE opportunities. We're 1502 // only trying to solve the basic diamond case, where 1503 // a value is computed in the successor and one predecessor, 1504 // but not the other. We also explicitly disallow cases 1505 // where the successor is its own predecessor, because they're 1506 // more complicated to get right. 1507 unsigned numWith = 0; 1508 unsigned numWithout = 0; 1509 BasicBlock* PREPred = 0; 1510 predMap.clear(); 1511 1512 for (pred_iterator PI = pred_begin(CurrentBlock), 1513 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 1514 // We're not interested in PRE where the block is its 1515 // own predecessor, on in blocks with predecessors 1516 // that are not reachable. 1517 if (*PI == CurrentBlock) { 1518 numWithout = 2; 1519 break; 1520 } else if (!localAvail.count(*PI)) { 1521 numWithout = 2; 1522 break; 1523 } 1524 1525 DenseMap<uint32_t, Value*>::iterator predV = 1526 localAvail[*PI]->table.find(valno); 1527 if (predV == localAvail[*PI]->table.end()) { 1528 PREPred = *PI; 1529 numWithout++; 1530 } else if (predV->second == CurInst) { 1531 numWithout = 2; 1532 } else { 1533 predMap[*PI] = predV->second; 1534 numWith++; 1535 } 1536 } 1537 1538 // Don't do PRE when it might increase code size, i.e. when 1539 // we would need to insert instructions in more than one pred. 1540 if (numWithout != 1 || numWith == 0) 1541 continue; 1542 1543 // We can't do PRE safely on a critical edge, so instead we schedule 1544 // the edge to be split and perform the PRE the next time we iterate 1545 // on the function. 1546 unsigned succNum = 0; 1547 for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); 1548 i != e; ++i) 1549 if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { 1550 succNum = i; 1551 break; 1552 } 1553 1554 if (isCriticalEdge(PREPred->getTerminator(), succNum)) { 1555 toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum)); 1556 continue; 1557 } 1558 1559 // Instantiate the expression the in predecessor that lacked it. 1560 // Because we are going top-down through the block, all value numbers 1561 // will be available in the predecessor by the time we need them. Any 1562 // that weren't original present will have been instantiated earlier 1563 // in this loop. 1564 Instruction* PREInstr = CurInst->clone(); 1565 bool success = true; 1566 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 1567 Value *Op = PREInstr->getOperand(i); 1568 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 1569 continue; 1570 1571 if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { 1572 PREInstr->setOperand(i, V); 1573 } else { 1574 success = false; 1575 break; 1576 } 1577 } 1578 1579 // Fail out if we encounter an operand that is not available in 1580 // the PRE predecessor. This is typically because of loads which 1581 // are not value numbered precisely. 1582 if (!success) { 1583 delete PREInstr; 1584 DEBUG(verifyRemoved(PREInstr)); 1585 continue; 1586 } 1587 1588 PREInstr->insertBefore(PREPred->getTerminator()); 1589 PREInstr->setName(CurInst->getName() + ".pre"); 1590 predMap[PREPred] = PREInstr; 1591 VN.add(PREInstr, valno); 1592 NumGVNPRE++; 1593 1594 // Update the availability map to include the new instruction. 1595 localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr)); 1596 1597 // Create a PHI to make the value available in this block. 1598 PHINode* Phi = PHINode::Create(CurInst->getType(), 1599 CurInst->getName() + ".pre-phi", 1600 CurrentBlock->begin()); 1601 for (pred_iterator PI = pred_begin(CurrentBlock), 1602 PE = pred_end(CurrentBlock); PI != PE; ++PI) 1603 Phi->addIncoming(predMap[*PI], *PI); 1604 1605 VN.add(Phi, valno); 1606 localAvail[CurrentBlock]->table[valno] = Phi; 1607 1608 CurInst->replaceAllUsesWith(Phi); 1609 if (isa<PointerType>(Phi->getType())) 1610 MD->invalidateCachedPointerInfo(Phi); 1611 VN.erase(CurInst); 1612 1613 DEBUG(cerr << "GVN PRE removed: " << *CurInst); 1614 MD->removeInstruction(CurInst); 1615 CurInst->eraseFromParent(); 1616 DEBUG(verifyRemoved(CurInst)); 1617 Changed = true; 1618 } 1619 } 1620 1621 for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator 1622 I = toSplit.begin(), E = toSplit.end(); I != E; ++I) 1623 SplitCriticalEdge(I->first, I->second, this); 1624 1625 return Changed || toSplit.size(); 1626} 1627 1628// iterateOnFunction - Executes one iteration of GVN 1629bool GVN::iterateOnFunction(Function &F) { 1630 cleanupGlobalSets(); 1631 1632 // Top-down walk of the dominator tree 1633 bool changed = false; 1634#if 0 1635 // Needed for value numbering with phi construction to work. 1636 ReversePostOrderTraversal<Function*> RPOT(&F); 1637 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 1638 RE = RPOT.end(); RI != RE; ++RI) 1639 changed |= processBlock(*RI); 1640#else 1641 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), 1642 DE = df_end(DT->getRootNode()); DI != DE; ++DI) 1643 changed |= processBlock(DI->getBlock()); 1644#endif 1645 1646 return changed; 1647} 1648 1649void GVN::cleanupGlobalSets() { 1650 VN.clear(); 1651 phiMap.clear(); 1652 1653 for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator 1654 I = localAvail.begin(), E = localAvail.end(); I != E; ++I) 1655 delete I->second; 1656 localAvail.clear(); 1657} 1658 1659/// verifyRemoved - Verify that the specified instruction does not occur in our 1660/// internal data structures. 1661void GVN::verifyRemoved(const Instruction *I) const { 1662 VN.verifyRemoved(I); 1663 1664 // Walk through the PHI map to make sure the instruction isn't hiding in there 1665 // somewhere. 1666 for (PhiMapType::iterator 1667 II = phiMap.begin(), IE = phiMap.end(); II != IE; ++II) { 1668 assert(II->first != I && "Inst is still a key in PHI map!"); 1669 1670 for (SmallPtrSet<Instruction*, 4>::iterator 1671 SI = II->second.begin(), SE = II->second.end(); SI != SE; ++SI) { 1672 assert(*SI != I && "Inst is still a value in PHI map!"); 1673 } 1674 } 1675} 1676