BypassSlowDivision.cpp revision 7b2d20d0600ffbc9ae6df67a18b6f6485ebceb54
1//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains an optimization for div and rem on architectures that 11// execute short instructions significantly faster than longer instructions. 12// For example, on Intel Atom 32-bit divides are slow enough that during 13// runtime it is profitable to check the value of the operands, and if they are 14// positive and less than 256 use an unsigned 8-bit divide. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "bypass-slow-division" 19#include "llvm/Instructions.h" 20#include "llvm/Function.h" 21#include "llvm/IRBuilder.h" 22#include "llvm/ADT/DenseMap.h" 23#include "llvm/Transforms/Utils/BypassSlowDivision.h" 24 25using namespace llvm; 26 27namespace llvm { 28 struct DivOpInfo { 29 bool SignedOp; 30 Value *Dividend; 31 Value *Divisor; 32 33 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) 34 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} 35 }; 36 37 struct DivPhiNodes { 38 PHINode *Quotient; 39 PHINode *Remainder; 40 41 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) 42 : Quotient(InQuotient), Remainder(InRemainder) {} 43 }; 44 45 template<> 46 struct DenseMapInfo<DivOpInfo> { 47 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { 48 return Val1.SignedOp == Val2.SignedOp && 49 Val1.Dividend == Val2.Dividend && 50 Val1.Divisor == Val2.Divisor; 51 } 52 53 static DivOpInfo getEmptyKey() { 54 return DivOpInfo(false, 0, 0); 55 } 56 57 static DivOpInfo getTombstoneKey() { 58 return DivOpInfo(true, 0, 0); 59 } 60 61 static unsigned getHashValue(const DivOpInfo &Val) { 62 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ 63 reinterpret_cast<uintptr_t>(Val.Divisor)) ^ 64 (unsigned)Val.SignedOp; 65 } 66 }; 67 68 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; 69} 70 71// insertFastDiv - Substitutes the div/rem instruction with code that checks the 72// value of the operands and uses a shorter-faster div/rem instruction when 73// possible and the longer-slower div/rem instruction otherwise. 74static bool insertFastDiv(Function &F, 75 Function::iterator &I, 76 BasicBlock::iterator &J, 77 IntegerType *BypassType, 78 bool UseDivOp, 79 bool UseSignedOp, 80 DivCacheTy &PerBBDivCache) { 81 // Get instruction operands 82 Instruction *Instr = J; 83 Value *Dividend = Instr->getOperand(0); 84 Value *Divisor = Instr->getOperand(1); 85 86 if (isa<ConstantInt>(Divisor) || 87 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { 88 // Operations with immediate values should have 89 // been solved and replaced during compile time. 90 return false; 91 } 92 93 // Basic Block is split before divide 94 BasicBlock *MainBB = I; 95 BasicBlock *SuccessorBB = I->splitBasicBlock(J); 96 ++I; //advance iterator I to successorBB 97 98 // Add new basic block for slow divide operation 99 BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", 100 MainBB->getParent(), SuccessorBB); 101 SlowBB->moveBefore(SuccessorBB); 102 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); 103 Value *SlowQuotientV; 104 Value *SlowRemainderV; 105 if (UseSignedOp) { 106 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); 107 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); 108 } else { 109 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); 110 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); 111 } 112 SlowBuilder.CreateBr(SuccessorBB); 113 114 // Add new basic block for fast divide operation 115 BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", 116 MainBB->getParent(), SuccessorBB); 117 FastBB->moveBefore(SlowBB); 118 IRBuilder<> FastBuilder(FastBB, FastBB->begin()); 119 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, 120 BypassType); 121 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, 122 BypassType); 123 124 // udiv/urem because optimization only handles positive numbers 125 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, 126 ShortDivisorV); 127 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, 128 ShortDivisorV); 129 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, 130 ShortQuotientV, 131 Dividend->getType()); 132 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, 133 ShortRemainderV, 134 Dividend->getType()); 135 FastBuilder.CreateBr(SuccessorBB); 136 137 // Phi nodes for result of div and rem 138 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); 139 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); 140 QuoPhi->addIncoming(SlowQuotientV, SlowBB); 141 QuoPhi->addIncoming(FastQuotientV, FastBB); 142 PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); 143 RemPhi->addIncoming(SlowRemainderV, SlowBB); 144 RemPhi->addIncoming(FastRemainderV, FastBB); 145 146 // Replace Instr with appropriate phi node 147 if (UseDivOp) 148 Instr->replaceAllUsesWith(QuoPhi); 149 else 150 Instr->replaceAllUsesWith(RemPhi); 151 Instr->eraseFromParent(); 152 153 // Combine operands into a single value with OR for value testing below 154 MainBB->getInstList().back().eraseFromParent(); 155 IRBuilder<> MainBuilder(MainBB, MainBB->end()); 156 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); 157 158 // BitMask is inverted to check if the operands are 159 // larger than the bypass type 160 uint64_t BitMask = ~BypassType->getBitMask(); 161 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); 162 163 // Compare operand values and branch 164 Value *ZeroV = MainBuilder.getInt32(0); 165 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); 166 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); 167 168 // point iterator J at first instruction of successorBB 169 J = I->begin(); 170 171 // Cache phi nodes to be used later in place of other instances 172 // of div or rem with the same sign, dividend, and divisor 173 DivOpInfo Key(UseSignedOp, Dividend, Divisor); 174 DivPhiNodes Value(QuoPhi, RemPhi); 175 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); 176 return true; 177} 178 179// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if 180// operands and operation are identical. Otherwise call insertFastDiv to perform 181// the optimization and cache the resulting dividend and remainder. 182static bool reuseOrInsertFastDiv(Function &F, 183 Function::iterator &I, 184 BasicBlock::iterator &J, 185 IntegerType *BypassType, 186 bool UseDivOp, 187 bool UseSignedOp, 188 DivCacheTy &PerBBDivCache) { 189 // Get instruction operands 190 Instruction *Instr = J; 191 DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); 192 DivCacheTy::const_iterator CacheI = PerBBDivCache.find(Key); 193 194 if (CacheI == PerBBDivCache.end()) { 195 // If previous instance does not exist, insert fast div 196 return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, 197 PerBBDivCache); 198 } 199 200 // Replace operation value with previously generated phi node 201 DivPhiNodes Value = CacheI->second; 202 if (UseDivOp) { 203 // Replace all uses of div instruction with quotient phi node 204 J->replaceAllUsesWith(Value.Quotient); 205 } else { 206 // Replace all uses of rem instruction with remainder phi node 207 J->replaceAllUsesWith(Value.Remainder); 208 } 209 210 // Advance to next operation 211 ++J; 212 213 // Remove redundant operation 214 Instr->eraseFromParent(); 215 return true; 216} 217 218// bypassSlowDivision - This optimization identifies DIV instructions that can 219// be profitably bypassed and carried out with a shorter, faster divide. 220bool bypassSlowDivision(Function &F, 221 Function::iterator &I, 222 const llvm::DenseMap<Type *, Type *> &BypassTypeMap) { 223 DivCacheTy DivCache; 224 225 bool MadeChange = false; 226 for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { 227 228 // Get instruction details 229 unsigned Opcode = J->getOpcode(); 230 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; 231 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; 232 bool UseSignedOp = Opcode == Instruction::SDiv || 233 Opcode == Instruction::SRem; 234 235 // Only optimize div or rem ops 236 if (!UseDivOp && !UseRemOp) 237 continue; 238 239 // Continue if div/rem type is not bypassed 240 DenseMap<Type *, Type *>::const_iterator BT = 241 BypassTypeMap.find(J->getType()); 242 if (BT == BypassTypeMap.end()) 243 continue; 244 245 IntegerType *BypassType = cast<IntegerType>(BT->second); 246 MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, 247 UseSignedOp, DivCache); 248 MadeChange = true; 249 } 250 251 return MadeChange; 252} 253