BypassSlowDivision.cpp revision 9a2cfffdb6340c54ff553c1b81364d0f17fa8f45
1//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "bypass-slow-division"
19#include "llvm/Transforms/Utils/BypassSlowDivision.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/IR/Function.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Instructions.h"
24
25using namespace llvm;
26
27namespace {
28  struct DivOpInfo {
29    bool SignedOp;
30    Value *Dividend;
31    Value *Divisor;
32
33    DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
34      : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
35  };
36
37  struct DivPhiNodes {
38    PHINode *Quotient;
39    PHINode *Remainder;
40
41    DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
42      : Quotient(InQuotient), Remainder(InRemainder) {}
43  };
44}
45
46namespace llvm {
47  template<>
48  struct DenseMapInfo<DivOpInfo> {
49    static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
50      return Val1.SignedOp == Val2.SignedOp &&
51             Val1.Dividend == Val2.Dividend &&
52             Val1.Divisor == Val2.Divisor;
53    }
54
55    static DivOpInfo getEmptyKey() {
56      return DivOpInfo(false, 0, 0);
57    }
58
59    static DivOpInfo getTombstoneKey() {
60      return DivOpInfo(true, 0, 0);
61    }
62
63    static unsigned getHashValue(const DivOpInfo &Val) {
64      return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
65                        reinterpret_cast<uintptr_t>(Val.Divisor)) ^
66                        (unsigned)Val.SignedOp;
67    }
68  };
69
70  typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
71}
72
73// insertFastDiv - Substitutes the div/rem instruction with code that checks the
74// value of the operands and uses a shorter-faster div/rem instruction when
75// possible and the longer-slower div/rem instruction otherwise.
76static bool insertFastDiv(Function &F,
77                          Function::iterator &I,
78                          BasicBlock::iterator &J,
79                          IntegerType *BypassType,
80                          bool UseDivOp,
81                          bool UseSignedOp,
82                          DivCacheTy &PerBBDivCache) {
83  // Get instruction operands
84  Instruction *Instr = J;
85  Value *Dividend = Instr->getOperand(0);
86  Value *Divisor = Instr->getOperand(1);
87
88  if (isa<ConstantInt>(Divisor) ||
89      (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
90    // Operations with immediate values should have
91    // been solved and replaced during compile time.
92    return false;
93  }
94
95  // Basic Block is split before divide
96  BasicBlock *MainBB = I;
97  BasicBlock *SuccessorBB = I->splitBasicBlock(J);
98  ++I; //advance iterator I to successorBB
99
100  // Add new basic block for slow divide operation
101  BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
102                                          MainBB->getParent(), SuccessorBB);
103  SlowBB->moveBefore(SuccessorBB);
104  IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
105  Value *SlowQuotientV;
106  Value *SlowRemainderV;
107  if (UseSignedOp) {
108    SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
109    SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
110  } else {
111    SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
112    SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
113  }
114  SlowBuilder.CreateBr(SuccessorBB);
115
116  // Add new basic block for fast divide operation
117  BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
118                                          MainBB->getParent(), SuccessorBB);
119  FastBB->moveBefore(SlowBB);
120  IRBuilder<> FastBuilder(FastBB, FastBB->begin());
121  Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
122                                                BypassType);
123  Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
124                                                 BypassType);
125
126  // udiv/urem because optimization only handles positive numbers
127  Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
128                                                      ShortDivisorV);
129  Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
130                                                  ShortDivisorV);
131  Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
132                                                ShortQuotientV,
133                                                Dividend->getType());
134  Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
135                                                 ShortRemainderV,
136                                                 Dividend->getType());
137  FastBuilder.CreateBr(SuccessorBB);
138
139  // Phi nodes for result of div and rem
140  IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
141  PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
142  QuoPhi->addIncoming(SlowQuotientV, SlowBB);
143  QuoPhi->addIncoming(FastQuotientV, FastBB);
144  PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
145  RemPhi->addIncoming(SlowRemainderV, SlowBB);
146  RemPhi->addIncoming(FastRemainderV, FastBB);
147
148  // Replace Instr with appropriate phi node
149  if (UseDivOp)
150    Instr->replaceAllUsesWith(QuoPhi);
151  else
152    Instr->replaceAllUsesWith(RemPhi);
153  Instr->eraseFromParent();
154
155  // Combine operands into a single value with OR for value testing below
156  MainBB->getInstList().back().eraseFromParent();
157  IRBuilder<> MainBuilder(MainBB, MainBB->end());
158  Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
159
160  // BitMask is inverted to check if the operands are
161  // larger than the bypass type
162  uint64_t BitMask = ~BypassType->getBitMask();
163  Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
164
165  // Compare operand values and branch
166  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
167  Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
168  MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
169
170  // point iterator J at first instruction of successorBB
171  J = I->begin();
172
173  // Cache phi nodes to be used later in place of other instances
174  // of div or rem with the same sign, dividend, and divisor
175  DivOpInfo Key(UseSignedOp, Dividend, Divisor);
176  DivPhiNodes Value(QuoPhi, RemPhi);
177  PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
178  return true;
179}
180
181// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
182// operands and operation are identical. Otherwise call insertFastDiv to perform
183// the optimization and cache the resulting dividend and remainder.
184static bool reuseOrInsertFastDiv(Function &F,
185                                 Function::iterator &I,
186                                 BasicBlock::iterator &J,
187                                 IntegerType *BypassType,
188                                 bool UseDivOp,
189                                 bool UseSignedOp,
190                                 DivCacheTy &PerBBDivCache) {
191  // Get instruction operands
192  Instruction *Instr = J;
193  DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
194  DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
195
196  if (CacheI == PerBBDivCache.end()) {
197    // If previous instance does not exist, insert fast div
198    return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
199                         PerBBDivCache);
200  }
201
202  // Replace operation value with previously generated phi node
203  DivPhiNodes &Value = CacheI->second;
204  if (UseDivOp) {
205    // Replace all uses of div instruction with quotient phi node
206    J->replaceAllUsesWith(Value.Quotient);
207  } else {
208    // Replace all uses of rem instruction with remainder phi node
209    J->replaceAllUsesWith(Value.Remainder);
210  }
211
212  // Advance to next operation
213  ++J;
214
215  // Remove redundant operation
216  Instr->eraseFromParent();
217  return true;
218}
219
220// bypassSlowDivision - This optimization identifies DIV instructions that can
221// be profitably bypassed and carried out with a shorter, faster divide.
222bool llvm::bypassSlowDivision(Function &F,
223                              Function::iterator &I,
224                              const DenseMap<unsigned int, unsigned int> &BypassWidths) {
225  DivCacheTy DivCache;
226
227  bool MadeChange = false;
228  for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
229
230    // Get instruction details
231    unsigned Opcode = J->getOpcode();
232    bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
233    bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
234    bool UseSignedOp = Opcode == Instruction::SDiv ||
235                       Opcode == Instruction::SRem;
236
237    // Only optimize div or rem ops
238    if (!UseDivOp && !UseRemOp)
239      continue;
240
241    // Skip division on vector types, only optimize integer instructions
242    if (!J->getType()->isIntegerTy())
243      continue;
244
245    // Get bitwidth of div/rem instruction
246    IntegerType *T = cast<IntegerType>(J->getType());
247    unsigned int bitwidth = T->getBitWidth();
248
249    // Continue if bitwidth is not bypassed
250    DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
251    if (BI == BypassWidths.end())
252      continue;
253
254    // Get type for div/rem instruction with bypass bitwidth
255    IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
256
257    MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
258                                       UseSignedOp, DivCache);
259  }
260
261  return MadeChange;
262}
263