BypassSlowDivision.cpp revision ed0e3a31e1dd201d87288c2e73fc74484d2e8c4d
1//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "bypass-slow-division"
19#include "llvm/Instructions.h"
20#include "llvm/Function.h"
21#include "llvm/IRBuilder.h"
22#include "llvm/ADT/DenseMap.h"
23#include "llvm/Transforms/Utils/BypassSlowDivision.h"
24
25using namespace llvm;
26
27namespace llvm {
28  struct DivOpInfo {
29    bool SignedOp;
30    Value *Dividend;
31    Value *Divisor;
32
33    DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
34      : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
35  };
36
37  struct DivPhiNodes {
38    PHINode *Quotient;
39    PHINode *Remainder;
40
41    DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
42      : Quotient(InQuotient), Remainder(InRemainder) {}
43  };
44
45  template<>
46  struct DenseMapInfo<DivOpInfo> {
47    static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
48      return Val1.SignedOp == Val2.SignedOp &&
49             Val1.Dividend == Val2.Dividend &&
50             Val1.Divisor == Val2.Divisor;
51    }
52
53    static DivOpInfo getEmptyKey() {
54      return DivOpInfo(false, 0, 0);
55    }
56
57    static DivOpInfo getTombstoneKey() {
58      return DivOpInfo(true, 0, 0);
59    }
60
61    static unsigned getHashValue(const DivOpInfo &Val) {
62      return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
63                        reinterpret_cast<uintptr_t>(Val.Divisor)) ^
64                        (unsigned)Val.SignedOp;
65    }
66  };
67
68  typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
69}
70
71// insertFastDiv - Substitutes the div/rem instruction with code that checks the
72// value of the operands and uses a shorter-faster div/rem instruction when
73// possible and the longer-slower div/rem instruction otherwise.
74static bool insertFastDiv(Function &F,
75                          Function::iterator &I,
76                          BasicBlock::iterator &J,
77                          IntegerType *BypassType,
78                          bool UseDivOp,
79                          bool UseSignedOp,
80                          DivCacheTy &PerBBDivCache) {
81  // Get instruction operands
82  Instruction *Instr = J;
83  Value *Dividend = Instr->getOperand(0);
84  Value *Divisor = Instr->getOperand(1);
85
86  if (isa<ConstantInt>(Divisor) ||
87      (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
88    // Operations with immediate values should have
89    // been solved and replaced during compile time.
90    return false;
91  }
92
93  // Basic Block is split before divide
94  BasicBlock *MainBB = I;
95  BasicBlock *SuccessorBB = I->splitBasicBlock(J);
96  ++I; //advance iterator I to successorBB
97
98  // Add new basic block for slow divide operation
99  BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
100                                          MainBB->getParent(), SuccessorBB);
101  SlowBB->moveBefore(SuccessorBB);
102  IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
103  Value *SlowQuotientV;
104  Value *SlowRemainderV;
105  if (UseSignedOp) {
106    SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
107    SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
108  } else {
109    SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
110    SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
111  }
112  SlowBuilder.CreateBr(SuccessorBB);
113
114  // Add new basic block for fast divide operation
115  BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
116                                          MainBB->getParent(), SuccessorBB);
117  FastBB->moveBefore(SlowBB);
118  IRBuilder<> FastBuilder(FastBB, FastBB->begin());
119  Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
120                                                BypassType);
121  Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
122                                                 BypassType);
123
124  // udiv/urem because optimization only handles positive numbers
125  Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
126                                                      ShortDivisorV);
127  Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
128                                                  ShortDivisorV);
129  Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
130                                                ShortQuotientV,
131                                                Dividend->getType());
132  Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
133                                                 ShortRemainderV,
134                                                 Dividend->getType());
135  FastBuilder.CreateBr(SuccessorBB);
136
137  // Phi nodes for result of div and rem
138  IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
139  PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
140  QuoPhi->addIncoming(SlowQuotientV, SlowBB);
141  QuoPhi->addIncoming(FastQuotientV, FastBB);
142  PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
143  RemPhi->addIncoming(SlowRemainderV, SlowBB);
144  RemPhi->addIncoming(FastRemainderV, FastBB);
145
146  // Replace Instr with appropriate phi node
147  if (UseDivOp)
148    Instr->replaceAllUsesWith(QuoPhi);
149  else
150    Instr->replaceAllUsesWith(RemPhi);
151  Instr->eraseFromParent();
152
153  // Combine operands into a single value with OR for value testing below
154  MainBB->getInstList().back().eraseFromParent();
155  IRBuilder<> MainBuilder(MainBB, MainBB->end());
156  Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
157
158  // BitMask is inverted to check if the operands are
159  // larger than the bypass type
160  uint64_t BitMask = ~BypassType->getBitMask();
161  Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
162
163  // Compare operand values and branch
164  Value *ZeroV = MainBuilder.getInt32(0);
165  Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
166  MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
167
168  // point iterator J at first instruction of successorBB
169  J = I->begin();
170
171  // Cache phi nodes to be used later in place of other instances
172  // of div or rem with the same sign, dividend, and divisor
173  DivOpInfo Key(UseSignedOp, Dividend, Divisor);
174  DivPhiNodes Value(QuoPhi, RemPhi);
175  PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
176  return true;
177}
178
179// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
180// operands and operation are identical. Otherwise call insertFastDiv to perform
181// the optimization and cache the resulting dividend and remainder.
182static bool reuseOrInsertFastDiv(Function &F,
183                                 Function::iterator &I,
184                                 BasicBlock::iterator &J,
185                                 IntegerType *BypassType,
186                                 bool UseDivOp,
187                                 bool UseSignedOp,
188                                 DivCacheTy &PerBBDivCache) {
189  // Get instruction operands
190  Instruction *Instr = J;
191  DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
192  DivCacheTy::const_iterator CacheI = PerBBDivCache.find(Key);
193
194  if (CacheI == PerBBDivCache.end()) {
195    // If previous instance does not exist, insert fast div
196    return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
197                         PerBBDivCache);
198  }
199
200  // Replace operation value with previously generated phi node
201  DivPhiNodes Value = CacheI->second;
202  if (UseDivOp) {
203    // Replace all uses of div instruction with quotient phi node
204    J->replaceAllUsesWith(Value.Quotient);
205  } else {
206    // Replace all uses of rem instruction with remainder phi node
207    J->replaceAllUsesWith(Value.Remainder);
208  }
209
210  // Advance to next operation
211  ++J;
212
213  // Remove redundant operation
214  Instr->eraseFromParent();
215  return true;
216}
217
218// bypassSlowDivision - This optimization identifies DIV instructions that can
219// be profitably bypassed and carried out with a shorter, faster divide.
220bool bypassSlowDivision(Function &F,
221                        Function::iterator &I,
222                        const llvm::DenseMap<Type *, Type *> &BypassTypeMap) {
223  DivCacheTy DivCache;
224
225  bool MadeChange = false;
226  for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
227
228    // Get instruction details
229    unsigned Opcode = J->getOpcode();
230    bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
231    bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
232    bool UseSignedOp = Opcode == Instruction::SDiv ||
233                       Opcode == Instruction::SRem;
234
235    // Only optimize div or rem ops
236    if (!UseDivOp && !UseRemOp)
237      continue;
238
239    // Continue if div/rem type is not bypassed
240    DenseMap<Type *, Type *>::const_iterator BT =
241      BypassTypeMap.find(J->getType());
242    if (BT == BypassTypeMap.end())
243      continue;
244
245    IntegerType *BypassType = cast<IntegerType>(BT->second);
246    MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp,
247                                       UseSignedOp, DivCache);
248  }
249
250  return MadeChange;
251}
252