1//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/Transforms/Utils/BypassSlowDivision.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instructions.h"
23
24using namespace llvm;
25
26#define DEBUG_TYPE "bypass-slow-division"
27
28namespace {
29  struct DivOpInfo {
30    bool SignedOp;
31    Value *Dividend;
32    Value *Divisor;
33
34    DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
35      : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
36  };
37
38  struct DivPhiNodes {
39    PHINode *Quotient;
40    PHINode *Remainder;
41
42    DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
43      : Quotient(InQuotient), Remainder(InRemainder) {}
44  };
45}
46
47namespace llvm {
48  template<>
49  struct DenseMapInfo<DivOpInfo> {
50    static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
51      return Val1.SignedOp == Val2.SignedOp &&
52             Val1.Dividend == Val2.Dividend &&
53             Val1.Divisor == Val2.Divisor;
54    }
55
56    static DivOpInfo getEmptyKey() {
57      return DivOpInfo(false, nullptr, nullptr);
58    }
59
60    static DivOpInfo getTombstoneKey() {
61      return DivOpInfo(true, nullptr, nullptr);
62    }
63
64    static unsigned getHashValue(const DivOpInfo &Val) {
65      return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
66                        reinterpret_cast<uintptr_t>(Val.Divisor)) ^
67                        (unsigned)Val.SignedOp;
68    }
69  };
70
71  typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
72}
73
74// insertFastDiv - Substitutes the div/rem instruction with code that checks the
75// value of the operands and uses a shorter-faster div/rem instruction when
76// possible and the longer-slower div/rem instruction otherwise.
77static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
78                          bool UseDivOp, bool UseSignedOp,
79                          DivCacheTy &PerBBDivCache) {
80  Function *F = I->getParent()->getParent();
81  // Get instruction operands
82  Value *Dividend = I->getOperand(0);
83  Value *Divisor = I->getOperand(1);
84
85  if (isa<ConstantInt>(Divisor) ||
86      (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
87    // Operations with immediate values should have
88    // been solved and replaced during compile time.
89    return false;
90  }
91
92  // Basic Block is split before divide
93  BasicBlock *MainBB = &*I->getParent();
94  BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
95
96  // Add new basic block for slow divide operation
97  BasicBlock *SlowBB =
98      BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
99  SlowBB->moveBefore(SuccessorBB);
100  IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
101  Value *SlowQuotientV;
102  Value *SlowRemainderV;
103  if (UseSignedOp) {
104    SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
105    SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
106  } else {
107    SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
108    SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
109  }
110  SlowBuilder.CreateBr(SuccessorBB);
111
112  // Add new basic block for fast divide operation
113  BasicBlock *FastBB =
114      BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
115  FastBB->moveBefore(SlowBB);
116  IRBuilder<> FastBuilder(FastBB, FastBB->begin());
117  Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
118                                                BypassType);
119  Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
120                                                 BypassType);
121
122  // udiv/urem because optimization only handles positive numbers
123  Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
124                                                      ShortDivisorV);
125  Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
126                                                  ShortDivisorV);
127  Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
128                                                ShortQuotientV,
129                                                Dividend->getType());
130  Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
131                                                 ShortRemainderV,
132                                                 Dividend->getType());
133  FastBuilder.CreateBr(SuccessorBB);
134
135  // Phi nodes for result of div and rem
136  IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
137  PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
138  QuoPhi->addIncoming(SlowQuotientV, SlowBB);
139  QuoPhi->addIncoming(FastQuotientV, FastBB);
140  PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
141  RemPhi->addIncoming(SlowRemainderV, SlowBB);
142  RemPhi->addIncoming(FastRemainderV, FastBB);
143
144  // Replace I with appropriate phi node
145  if (UseDivOp)
146    I->replaceAllUsesWith(QuoPhi);
147  else
148    I->replaceAllUsesWith(RemPhi);
149  I->eraseFromParent();
150
151  // Combine operands into a single value with OR for value testing below
152  MainBB->getInstList().back().eraseFromParent();
153  IRBuilder<> MainBuilder(MainBB, MainBB->end());
154  Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
155
156  // BitMask is inverted to check if the operands are
157  // larger than the bypass type
158  uint64_t BitMask = ~BypassType->getBitMask();
159  Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
160
161  // Compare operand values and branch
162  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
163  Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
164  MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
165
166  // Cache phi nodes to be used later in place of other instances
167  // of div or rem with the same sign, dividend, and divisor
168  DivOpInfo Key(UseSignedOp, Dividend, Divisor);
169  DivPhiNodes Value(QuoPhi, RemPhi);
170  PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
171  return true;
172}
173
174// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
175// the current BB if operands and operation are identical. Otherwise calls
176// insertFastDiv to perform the optimization and caches the resulting dividend
177// and remainder.
178static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
179                                 bool UseDivOp, bool UseSignedOp,
180                                 DivCacheTy &PerBBDivCache) {
181  // Get instruction operands
182  DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
183  DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
184
185  if (CacheI == PerBBDivCache.end()) {
186    // If previous instance does not exist, insert fast div
187    return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
188  }
189
190  // Replace operation value with previously generated phi node
191  DivPhiNodes &Value = CacheI->second;
192  if (UseDivOp) {
193    // Replace all uses of div instruction with quotient phi node
194    I->replaceAllUsesWith(Value.Quotient);
195  } else {
196    // Replace all uses of rem instruction with remainder phi node
197    I->replaceAllUsesWith(Value.Remainder);
198  }
199
200  // Remove redundant operation
201  I->eraseFromParent();
202  return true;
203}
204
205// bypassSlowDivision - This optimization identifies DIV instructions in a BB
206// that can be profitably bypassed and carried out with a shorter, faster
207// divide.
208bool llvm::bypassSlowDivision(
209    BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
210  DivCacheTy DivCache;
211
212  bool MadeChange = false;
213  Instruction* Next = &*BB->begin();
214  while (Next != nullptr) {
215    // We may add instructions immediately after I, but we want to skip over
216    // them.
217    Instruction* I = Next;
218    Next = Next->getNextNode();
219
220    // Get instruction details
221    unsigned Opcode = I->getOpcode();
222    bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
223    bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
224    bool UseSignedOp = Opcode == Instruction::SDiv ||
225                       Opcode == Instruction::SRem;
226
227    // Only optimize div or rem ops
228    if (!UseDivOp && !UseRemOp)
229      continue;
230
231    // Skip division on vector types, only optimize integer instructions
232    if (!I->getType()->isIntegerTy())
233      continue;
234
235    // Get bitwidth of div/rem instruction
236    IntegerType *T = cast<IntegerType>(I->getType());
237    unsigned int bitwidth = T->getBitWidth();
238
239    // Continue if bitwidth is not bypassed
240    DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
241    if (BI == BypassWidths.end())
242      continue;
243
244    // Get type for div/rem instruction with bypass bitwidth
245    IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
246
247    MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
248  }
249
250  return MadeChange;
251}
252