1//===--- HexagonGenExtract.cpp --------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/CodeGen/MachineFunctionAnalysis.h"
12#include "llvm/IR/Constants.h"
13#include "llvm/IR/Dominators.h"
14#include "llvm/IR/Function.h"
15#include "llvm/IR/Instructions.h"
16#include "llvm/IR/IntrinsicInst.h"
17#include "llvm/IR/IRBuilder.h"
18#include "llvm/IR/PatternMatch.h"
19#include "llvm/Pass.h"
20#include "llvm/Support/CommandLine.h"
21#include "llvm/Support/Debug.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/raw_ostream.h"
24
25using namespace llvm;
26
27static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U),
28  cl::Hidden, cl::desc("Cutoff for generating \"extract\""
29  " instructions"));
30
31// This prevents generating extract instructions that have the offset of 0.
32// One of the reasons for "extract" is to put a sequence of bits in a regis-
33// ter, starting at offset 0 (so that these bits can then be used by an
34// "insert"). If the bits are already at offset 0, it is better not to gene-
35// rate "extract", since logical bit operations can be merged into compound
36// instructions (as opposed to "extract").
37static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden,
38  cl::desc("No extract instruction with offset 0"));
39
40static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden,
41  cl::desc("Require & in extract patterns"));
42
43namespace llvm {
44  void initializeHexagonGenExtractPass(PassRegistry&);
45  FunctionPass *createHexagonGenExtract();
46}
47
48
49namespace {
50  class HexagonGenExtract : public FunctionPass {
51  public:
52    static char ID;
53    HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) {
54      initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry());
55    }
56    virtual const char *getPassName() const override {
57      return "Hexagon generate \"extract\" instructions";
58    }
59    virtual bool runOnFunction(Function &F) override;
60    virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
61      AU.addRequired<DominatorTreeWrapperPass>();
62      AU.addPreserved<DominatorTreeWrapperPass>();
63      AU.addPreserved<MachineFunctionAnalysis>();
64      FunctionPass::getAnalysisUsage(AU);
65    }
66  private:
67    bool visitBlock(BasicBlock *B);
68    bool convert(Instruction *In);
69
70    unsigned ExtractCount;
71    DominatorTree *DT;
72  };
73
74  char HexagonGenExtract::ID = 0;
75}
76
77INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate "
78  "\"extract\" instructions", false, false)
79INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
80INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate "
81  "\"extract\" instructions", false, false)
82
83
84bool HexagonGenExtract::convert(Instruction *In) {
85  using namespace PatternMatch;
86  Value *BF = 0;
87  ConstantInt *CSL = 0, *CSR = 0, *CM = 0;
88  BasicBlock *BB = In->getParent();
89  LLVMContext &Ctx = BB->getContext();
90  bool LogicalSR;
91
92  // (and (shl (lshr x, #sr), #sl), #m)
93  LogicalSR = true;
94  bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
95                               m_ConstantInt(CSL)),
96                         m_ConstantInt(CM)));
97
98  if (!Match) {
99    // (and (shl (ashr x, #sr), #sl), #m)
100    LogicalSR = false;
101    Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
102                            m_ConstantInt(CSL)),
103                      m_ConstantInt(CM)));
104  }
105  if (!Match) {
106    // (and (shl x, #sl), #m)
107    LogicalSR = true;
108    CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
109    Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
110                      m_ConstantInt(CM)));
111    if (Match && NoSR0)
112      return false;
113  }
114  if (!Match) {
115    // (and (lshr x, #sr), #m)
116    LogicalSR = true;
117    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
118    Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
119                            m_ConstantInt(CM)));
120  }
121  if (!Match) {
122    // (and (ashr x, #sr), #m)
123    LogicalSR = false;
124    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
125    Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
126                            m_ConstantInt(CM)));
127  }
128  if (!Match) {
129    CM = 0;
130    // (shl (lshr x, #sr), #sl)
131    LogicalSR = true;
132    Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
133                            m_ConstantInt(CSL)));
134  }
135  if (!Match) {
136    CM = 0;
137    // (shl (ashr x, #sr), #sl)
138    LogicalSR = false;
139    Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
140                            m_ConstantInt(CSL)));
141  }
142  if (!Match)
143    return false;
144
145  Type *Ty = BF->getType();
146  if (!Ty->isIntegerTy())
147    return false;
148  unsigned BW = Ty->getPrimitiveSizeInBits();
149  if (BW != 32 && BW != 64)
150    return false;
151
152  uint32_t SR = CSR->getZExtValue();
153  uint32_t SL = CSL->getZExtValue();
154
155  if (!CM) {
156    // If there was no and, and the shift left did not remove all potential
157    // sign bits created by the shift right, then extractu cannot reproduce
158    // this value.
159    if (!LogicalSR && (SR > SL))
160      return false;
161    APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
162    CM = ConstantInt::get(Ctx, A);
163  }
164
165  // CM is the shifted-left mask. Shift it back right to remove the zero
166  // bits on least-significant positions.
167  APInt M = CM->getValue().lshr(SL);
168  uint32_t T = M.countTrailingOnes();
169
170  // During the shifts some of the bits will be lost. Calculate how many
171  // of the original value will remain after shift right and then left.
172  uint32_t U = BW - std::max(SL, SR);
173  // The width of the extracted field is the minimum of the original bits
174  // that remain after the shifts and the number of contiguous 1s in the mask.
175  uint32_t W = std::min(U, T);
176  if (W == 0)
177    return false;
178
179  // Check if the extracted bits are contained within the mask that it is
180  // and-ed with. The extract operation will copy these bits, and so the
181  // mask cannot any holes in it that would clear any of the bits of the
182  // extracted field.
183  if (!LogicalSR) {
184    // If the shift right was arithmetic, it could have included some 1 bits.
185    // It is still ok to generate extract, but only if the mask eliminates
186    // those bits (i.e. M does not have any bits set beyond U).
187    APInt C = APInt::getHighBitsSet(BW, BW-U);
188    if (M.intersects(C) || !APIntOps::isMask(W, M))
189      return false;
190  } else {
191    // Check if M starts with a contiguous sequence of W times 1 bits. Get
192    // the low U bits of M (which eliminates the 0 bits shifted in on the
193    // left), and check if the result is APInt's "mask":
194    if (!APIntOps::isMask(W, M.getLoBits(U)))
195      return false;
196  }
197
198  IRBuilder<> IRB(In);
199  Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
200                                   : Intrinsic::hexagon_S2_extractup;
201  Module *Mod = BB->getParent()->getParent();
202  Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
203  Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
204  if (SL != 0)
205    NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
206  In->replaceAllUsesWith(NewIn);
207  return true;
208}
209
210
211bool HexagonGenExtract::visitBlock(BasicBlock *B) {
212  // Depth-first, bottom-up traversal.
213  DomTreeNode *DTN = DT->getNode(B);
214  typedef GraphTraits<DomTreeNode*> GTN;
215  typedef GTN::ChildIteratorType Iter;
216  for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
217    visitBlock((*I)->getBlock());
218
219  // Allow limiting the number of generated extracts for debugging purposes.
220  bool HasCutoff = ExtractCutoff.getPosition();
221  unsigned Cutoff = ExtractCutoff;
222
223  bool Changed = false;
224  BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin();
225  while (true) {
226    if (HasCutoff && (ExtractCount >= Cutoff))
227      return Changed;
228    bool Last = (I == Begin);
229    if (!Last)
230      NextI = std::prev(I);
231    Instruction *In = &*I;
232    bool Done = convert(In);
233    if (HasCutoff && Done)
234      ExtractCount++;
235    Changed |= Done;
236    if (Last)
237      break;
238    I = NextI;
239  }
240  return Changed;
241}
242
243
244bool HexagonGenExtract::runOnFunction(Function &F) {
245  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
246  bool Changed;
247
248  // Traverse the function bottom-up, to see super-expressions before their
249  // sub-expressions.
250  BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F);
251  Changed = visitBlock(Entry);
252
253  return Changed;
254}
255
256
257FunctionPass *llvm::createHexagonGenExtract() {
258  return new HexagonGenExtract();
259}
260