BypassSlowDivision.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/Transforms/Utils/BypassSlowDivision.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instructions.h"
23
24using namespace llvm;
25
26#define DEBUG_TYPE "bypass-slow-division"
27
28namespace {
29  struct DivOpInfo {
30    bool SignedOp;
31    Value *Dividend;
32    Value *Divisor;
33
34    DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
35      : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
36  };
37
38  struct DivPhiNodes {
39    PHINode *Quotient;
40    PHINode *Remainder;
41
42    DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
43      : Quotient(InQuotient), Remainder(InRemainder) {}
44  };
45}
46
47namespace llvm {
48  template<>
49  struct DenseMapInfo<DivOpInfo> {
50    static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
51      return Val1.SignedOp == Val2.SignedOp &&
52             Val1.Dividend == Val2.Dividend &&
53             Val1.Divisor == Val2.Divisor;
54    }
55
56    static DivOpInfo getEmptyKey() {
57      return DivOpInfo(false, nullptr, nullptr);
58    }
59
60    static DivOpInfo getTombstoneKey() {
61      return DivOpInfo(true, nullptr, nullptr);
62    }
63
64    static unsigned getHashValue(const DivOpInfo &Val) {
65      return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
66                        reinterpret_cast<uintptr_t>(Val.Divisor)) ^
67                        (unsigned)Val.SignedOp;
68    }
69  };
70
71  typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
72}
73
74// insertFastDiv - Substitutes the div/rem instruction with code that checks the
75// value of the operands and uses a shorter-faster div/rem instruction when
76// possible and the longer-slower div/rem instruction otherwise.
77static bool insertFastDiv(Function &F,
78                          Function::iterator &I,
79                          BasicBlock::iterator &J,
80                          IntegerType *BypassType,
81                          bool UseDivOp,
82                          bool UseSignedOp,
83                          DivCacheTy &PerBBDivCache) {
84  // Get instruction operands
85  Instruction *Instr = J;
86  Value *Dividend = Instr->getOperand(0);
87  Value *Divisor = Instr->getOperand(1);
88
89  if (isa<ConstantInt>(Divisor) ||
90      (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
91    // Operations with immediate values should have
92    // been solved and replaced during compile time.
93    return false;
94  }
95
96  // Basic Block is split before divide
97  BasicBlock *MainBB = I;
98  BasicBlock *SuccessorBB = I->splitBasicBlock(J);
99  ++I; //advance iterator I to successorBB
100
101  // Add new basic block for slow divide operation
102  BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
103                                          MainBB->getParent(), SuccessorBB);
104  SlowBB->moveBefore(SuccessorBB);
105  IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
106  Value *SlowQuotientV;
107  Value *SlowRemainderV;
108  if (UseSignedOp) {
109    SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
110    SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
111  } else {
112    SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
113    SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
114  }
115  SlowBuilder.CreateBr(SuccessorBB);
116
117  // Add new basic block for fast divide operation
118  BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
119                                          MainBB->getParent(), SuccessorBB);
120  FastBB->moveBefore(SlowBB);
121  IRBuilder<> FastBuilder(FastBB, FastBB->begin());
122  Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
123                                                BypassType);
124  Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
125                                                 BypassType);
126
127  // udiv/urem because optimization only handles positive numbers
128  Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
129                                                      ShortDivisorV);
130  Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
131                                                  ShortDivisorV);
132  Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
133                                                ShortQuotientV,
134                                                Dividend->getType());
135  Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
136                                                 ShortRemainderV,
137                                                 Dividend->getType());
138  FastBuilder.CreateBr(SuccessorBB);
139
140  // Phi nodes for result of div and rem
141  IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
142  PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
143  QuoPhi->addIncoming(SlowQuotientV, SlowBB);
144  QuoPhi->addIncoming(FastQuotientV, FastBB);
145  PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
146  RemPhi->addIncoming(SlowRemainderV, SlowBB);
147  RemPhi->addIncoming(FastRemainderV, FastBB);
148
149  // Replace Instr with appropriate phi node
150  if (UseDivOp)
151    Instr->replaceAllUsesWith(QuoPhi);
152  else
153    Instr->replaceAllUsesWith(RemPhi);
154  Instr->eraseFromParent();
155
156  // Combine operands into a single value with OR for value testing below
157  MainBB->getInstList().back().eraseFromParent();
158  IRBuilder<> MainBuilder(MainBB, MainBB->end());
159  Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
160
161  // BitMask is inverted to check if the operands are
162  // larger than the bypass type
163  uint64_t BitMask = ~BypassType->getBitMask();
164  Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
165
166  // Compare operand values and branch
167  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
168  Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
169  MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
170
171  // point iterator J at first instruction of successorBB
172  J = I->begin();
173
174  // Cache phi nodes to be used later in place of other instances
175  // of div or rem with the same sign, dividend, and divisor
176  DivOpInfo Key(UseSignedOp, Dividend, Divisor);
177  DivPhiNodes Value(QuoPhi, RemPhi);
178  PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
179  return true;
180}
181
182// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
183// operands and operation are identical. Otherwise call insertFastDiv to perform
184// the optimization and cache the resulting dividend and remainder.
185static bool reuseOrInsertFastDiv(Function &F,
186                                 Function::iterator &I,
187                                 BasicBlock::iterator &J,
188                                 IntegerType *BypassType,
189                                 bool UseDivOp,
190                                 bool UseSignedOp,
191                                 DivCacheTy &PerBBDivCache) {
192  // Get instruction operands
193  Instruction *Instr = J;
194  DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
195  DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
196
197  if (CacheI == PerBBDivCache.end()) {
198    // If previous instance does not exist, insert fast div
199    return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
200                         PerBBDivCache);
201  }
202
203  // Replace operation value with previously generated phi node
204  DivPhiNodes &Value = CacheI->second;
205  if (UseDivOp) {
206    // Replace all uses of div instruction with quotient phi node
207    J->replaceAllUsesWith(Value.Quotient);
208  } else {
209    // Replace all uses of rem instruction with remainder phi node
210    J->replaceAllUsesWith(Value.Remainder);
211  }
212
213  // Advance to next operation
214  ++J;
215
216  // Remove redundant operation
217  Instr->eraseFromParent();
218  return true;
219}
220
221// bypassSlowDivision - This optimization identifies DIV instructions that can
222// be profitably bypassed and carried out with a shorter, faster divide.
223bool llvm::bypassSlowDivision(Function &F,
224                              Function::iterator &I,
225                              const DenseMap<unsigned int, unsigned int> &BypassWidths) {
226  DivCacheTy DivCache;
227
228  bool MadeChange = false;
229  for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
230
231    // Get instruction details
232    unsigned Opcode = J->getOpcode();
233    bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
234    bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
235    bool UseSignedOp = Opcode == Instruction::SDiv ||
236                       Opcode == Instruction::SRem;
237
238    // Only optimize div or rem ops
239    if (!UseDivOp && !UseRemOp)
240      continue;
241
242    // Skip division on vector types, only optimize integer instructions
243    if (!J->getType()->isIntegerTy())
244      continue;
245
246    // Get bitwidth of div/rem instruction
247    IntegerType *T = cast<IntegerType>(J->getType());
248    unsigned int bitwidth = T->getBitWidth();
249
250    // Continue if bitwidth is not bypassed
251    DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
252    if (BI == BypassWidths.end())
253      continue;
254
255    // Get type for div/rem instruction with bypass bitwidth
256    IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
257
258    MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
259                                       UseSignedOp, DivCache);
260  }
261
262  return MadeChange;
263}
264