RSForEachExpand.cpp revision 4102bec56151fb5d9c962fb298412f34a6eacaa8
1/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bcc/Assert.h"
18#include "bcc/Renderscript/RSTransforms.h"
19
20#include <cstdlib>
21
22#include <llvm/IR/DerivedTypes.h>
23#include <llvm/IR/Function.h>
24#include <llvm/IR/Instructions.h>
25#include <llvm/IR/IRBuilder.h>
26#include <llvm/IR/Module.h>
27#include <llvm/Pass.h>
28#include <llvm/Support/raw_ostream.h>
29#include <llvm/IR/DataLayout.h>
30#include <llvm/IR/Type.h>
31#include <llvm/Transforms/Utils/BasicBlockUtils.h>
32
33#include "bcc/Config/Config.h"
34#include "bcc/Renderscript/RSInfo.h"
35#include "bcc/Support/Log.h"
36
37using namespace bcc;
38
39namespace {
40
41/* RSForEachExpandPass - This pass operates on functions that are able to be
42 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
43 * ForEach-able function to be invoked over the appropriate data cells of the
44 * input/output allocations (adjusting other relevant parameters as we go). We
45 * support doing this for any ForEach-able compute kernels. The new function
46 * name is the original function name followed by ".expand". Note that we
47 * still generate code for the original function.
48 */
49class RSForEachExpandPass : public llvm::ModulePass {
50private:
51  static char ID;
52
53  llvm::Module *M;
54  llvm::LLVMContext *C;
55
56  const RSInfo::ExportForeachFuncListTy &mFuncs;
57
58  // Turns on optimization of allocation stride values.
59  bool mEnableStepOpt;
60
61  uint32_t getRootSignature(llvm::Function *F) {
62    const llvm::NamedMDNode *ExportForEachMetadata =
63        M->getNamedMetadata("#rs_export_foreach");
64
65    if (!ExportForEachMetadata) {
66      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
67      for (llvm::Function::arg_iterator B = F->arg_begin(),
68                                        E = F->arg_end();
69           B != E;
70           ++B) {
71        RootArgTys.push_back(B->getType());
72      }
73
74      // For pre-ICS bitcode, we may not have signature information. In that
75      // case, we use the size of the RootArgTys to select the number of
76      // arguments.
77      return (1 << RootArgTys.size()) - 1;
78    }
79
80    if (ExportForEachMetadata->getNumOperands() == 0) {
81      return 0;
82    }
83
84    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
85
86    // We only handle the case for legacy root() functions here, so this is
87    // hard-coded to look at only the first such function.
88    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
89    if (SigNode != NULL && SigNode->getNumOperands() == 1) {
90      llvm::Value *SigVal = SigNode->getOperand(0);
91      if (SigVal->getValueID() == llvm::Value::MDStringVal) {
92        llvm::StringRef SigString =
93            static_cast<llvm::MDString*>(SigVal)->getString();
94        uint32_t Signature = 0;
95        if (SigString.getAsInteger(10, Signature)) {
96          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
97          return 0;
98        }
99        return Signature;
100      }
101    }
102
103    return 0;
104  }
105
106  // Get the actual value we should use to step through an allocation.
107  //
108  // Normally the value we use to step through an allocation is given to us by
109  // the driver. However, for certain primitive data types, we can derive an
110  // integer constant for the step value. We use this integer constant whenever
111  // possible to allow further compiler optimizations to take place.
112  //
113  // DL - Target Data size/layout information.
114  // T - Type of allocation (should be a pointer).
115  // OrigStep - Original step increment (root.expand() input from driver).
116  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T,
117                            llvm::Value *OrigStep) {
118    bccAssert(DL);
119    bccAssert(T);
120    bccAssert(OrigStep);
121    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
122    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
123    if (mEnableStepOpt && T != VoidPtrTy && PT) {
124      llvm::Type *ET = PT->getElementType();
125      uint64_t ETSize = DL->getTypeAllocSize(ET);
126      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
127      return llvm::ConstantInt::get(Int32Ty, ETSize);
128    } else {
129      return OrigStep;
130    }
131  }
132
133  static bool hasIn(uint32_t Signature) {
134    return Signature & 0x01;
135  }
136
137  static bool hasOut(uint32_t Signature) {
138    return Signature & 0x02;
139  }
140
141  static bool hasUsrData(uint32_t Signature) {
142    return Signature & 0x04;
143  }
144
145  static bool hasX(uint32_t Signature) {
146    return Signature & 0x08;
147  }
148
149  static bool hasY(uint32_t Signature) {
150    return Signature & 0x10;
151  }
152
153  static bool isKernel(uint32_t Signature) {
154    return Signature & 0x20;
155  }
156
157  /// @brief Returns the type of the ForEach stub parameter structure.
158  ///
159  /// Renderscript uses a single structure in which all parameters are passed
160  /// to keep the signature of the expanded function independent of the
161  /// parameters passed to it.
162  llvm::Type *getForeachStubTy() {
163    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
164    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
165    llvm::Type *SizeTy = Int32Ty;
166    /* Defined in frameworks/base/libs/rs/rs_hal.h:
167     *
168     * struct RsForEachStubParamStruct {
169     *   const void *in;
170     *   void *out;
171     *   const void *usr;
172     *   size_t usr_len;
173     *   uint32_t x;
174     *   uint32_t y;
175     *   uint32_t z;
176     *   uint32_t lod;
177     *   enum RsAllocationCubemapFace face;
178     *   uint32_t ar[16];
179     * };
180     */
181    llvm::SmallVector<llvm::Type*, 9> StructTys;
182    StructTys.push_back(VoidPtrTy);  // const void *in
183    StructTys.push_back(VoidPtrTy);  // void *out
184    StructTys.push_back(VoidPtrTy);  // const void *usr
185    StructTys.push_back(SizeTy);     // size_t usr_len
186    StructTys.push_back(Int32Ty);    // uint32_t x
187    StructTys.push_back(Int32Ty);    // uint32_t y
188    StructTys.push_back(Int32Ty);    // uint32_t z
189    StructTys.push_back(Int32Ty);    // uint32_t lod
190    StructTys.push_back(Int32Ty);    // enum RsAllocationCubemapFace
191    StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16));  // uint32_t ar[16]
192
193    return llvm::StructType::create(StructTys, "RsForEachStubParamStruct");
194  }
195
196  /// @brief Create skeleton of the expanded function.
197  ///
198  /// This creates a function with the following signature:
199  ///
200  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
201  ///         uint32_t instep, uint32_t outstep)
202  ///
203  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
204    llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo();
205    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
206
207    llvm::SmallVector<llvm::Type*, 8> ParamTys;
208    ParamTys.push_back(ForEachStubPtrTy);  // const RsForEachStubParamStruct *p
209    ParamTys.push_back(Int32Ty);           // uint32_t x1
210    ParamTys.push_back(Int32Ty);           // uint32_t x2
211    ParamTys.push_back(Int32Ty);           // uint32_t instep
212    ParamTys.push_back(Int32Ty);           // uint32_t outstep
213
214    llvm::FunctionType *FT =
215        llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
216    llvm::Function *F =
217        llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage,
218                               OldName + ".expand", M);
219
220    llvm::Function::arg_iterator AI = F->arg_begin();
221
222    AI->setName("p");
223    AI++;
224    AI->setName("x1");
225    AI++;
226    AI->setName("x2");
227    AI++;
228    AI->setName("arg_instep");
229    AI++;
230    AI->setName("arg_outstep");
231    AI++;
232
233    assert(AI == F->arg_end());
234
235    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*C, "Begin", F);
236    llvm::IRBuilder<> Builder(Begin);
237    Builder.CreateRetVoid();
238
239    return F;
240  }
241
242  /// @brief Create an empty loop
243  ///
244  /// Create a loop of the form:
245  ///
246  /// for (i = LowerBound; i < UpperBound; i++)
247  ///   ;
248  ///
249  /// After the loop has been created, the builder is set such that
250  /// instructions can be added to the loop body.
251  ///
252  /// @param Builder The builder to use to build this loop. The current
253  ///                position of the builder is the position the loop
254  ///                will be inserted.
255  /// @param LowerBound The first value of the loop iterator
256  /// @param UpperBound The maximal value of the loop iterator
257  /// @param LoopIV A reference that will be set to the loop iterator.
258  /// @return The BasicBlock that will be executed after the loop.
259  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
260                               llvm::Value *LowerBound,
261                               llvm::Value *UpperBound,
262                               llvm::PHINode **LoopIV) {
263    assert(LowerBound->getType() == UpperBound->getType());
264
265    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
266    llvm::Value *Cond, *IVNext;
267    llvm::PHINode *IV;
268
269    CondBB = Builder.GetInsertBlock();
270    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
271    HeaderBB = llvm::BasicBlock::Create(*C, "Loop", CondBB->getParent());
272
273    // if (LowerBound < Upperbound)
274    //   goto LoopHeader
275    // else
276    //   goto AfterBB
277    CondBB->getTerminator()->eraseFromParent();
278    Builder.SetInsertPoint(CondBB);
279    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
280    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
281
282    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
283    // iv.next = iv + 1
284    // if (iv.next < Upperbound)
285    //   goto LoopHeader
286    // else
287    //   goto AfterBB
288    Builder.SetInsertPoint(HeaderBB);
289    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
290    IV->addIncoming(LowerBound, CondBB);
291    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
292    IV->addIncoming(IVNext, HeaderBB);
293    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
294    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
295    AfterBB->setName("Exit");
296    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
297    *LoopIV = IV;
298    return AfterBB;
299  }
300
301public:
302  RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
303                      bool pEnableStepOpt)
304      : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
305        mEnableStepOpt(pEnableStepOpt) {
306  }
307
308  /* Performs the actual optimization on a selected function. On success, the
309   * Module will contain a new function of the name "<NAME>.expand" that
310   * invokes <NAME>() in a loop with the appropriate parameters.
311   */
312  bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
313    ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
314
315    if (!Signature) {
316      Signature = getRootSignature(F);
317      if (!Signature) {
318        // We couldn't determine how to expand this function based on its
319        // function signature.
320        return false;
321      }
322    }
323
324    llvm::DataLayout DL(M);
325
326    llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
327
328    // Create and name the actual arguments to this expanded function.
329    llvm::SmallVector<llvm::Argument*, 8> ArgVec;
330    for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
331                                      E = ExpandedFunc->arg_end();
332         B != E;
333         ++B) {
334      ArgVec.push_back(B);
335    }
336
337    if (ArgVec.size() != 5) {
338      ALOGE("Incorrect number of arguments to function: %zu",
339            ArgVec.size());
340      return false;
341    }
342    llvm::Value *Arg_p = ArgVec[0];
343    llvm::Value *Arg_x1 = ArgVec[1];
344    llvm::Value *Arg_x2 = ArgVec[2];
345    llvm::Value *Arg_instep = ArgVec[3];
346    llvm::Value *Arg_outstep = ArgVec[4];
347
348    llvm::Value *InStep = NULL;
349    llvm::Value *OutStep = NULL;
350
351    // Construct the actual function body.
352    llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin());
353
354    // Collect and construct the arguments for the kernel().
355    // Note that we load any loop-invariant arguments before entering the Loop.
356    llvm::Function::arg_iterator Args = F->arg_begin();
357
358    llvm::Type *InTy = NULL;
359    llvm::Value *InBasePtr = NULL;
360    if (hasIn(Signature)) {
361      InTy = Args->getType();
362      InStep = getStepValue(&DL, InTy, Arg_instep);
363      InStep->setName("instep");
364      InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
365      Args++;
366    }
367
368    llvm::Type *OutTy = NULL;
369    llvm::Value *OutBasePtr = NULL;
370    if (hasOut(Signature)) {
371      OutTy = Args->getType();
372      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
373      OutStep->setName("outstep");
374      OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
375      Args++;
376    }
377
378    llvm::Value *UsrData = NULL;
379    if (hasUsrData(Signature)) {
380      llvm::Type *UsrDataTy = Args->getType();
381      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
382          Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
383      UsrData->setName("UsrData");
384      Args++;
385    }
386
387    if (hasX(Signature)) {
388      Args++;
389    }
390
391    llvm::Value *Y = NULL;
392    if (hasY(Signature)) {
393      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
394      Args++;
395    }
396
397    bccAssert(Args == F->arg_end());
398
399    llvm::PHINode *IV;
400    createLoop(Builder, Arg_x1, Arg_x2, &IV);
401
402    // Populate the actual call to kernel().
403    llvm::SmallVector<llvm::Value*, 8> RootArgs;
404
405    llvm::Value *InPtr = NULL;
406    llvm::Value *OutPtr = NULL;
407
408    // Calculate the current input and output pointers
409    //
410    // We always calculate the input/output pointers with a GEP operating on i8
411    // values and only cast at the very end to OutTy. This is because the step
412    // between two values is given in bytes.
413    //
414    // TODO: We could further optimize the output by using a GEP operation of
415    // type 'OutTy' in cases where the element type of the allocation allows.
416    if (OutBasePtr) {
417      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
418      OutOffset = Builder.CreateMul(OutOffset, OutStep);
419      OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
420      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
421    }
422    if (InBasePtr) {
423      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
424      InOffset = Builder.CreateMul(InOffset, InStep);
425      InPtr = Builder.CreateGEP(InBasePtr, InOffset);
426      InPtr = Builder.CreatePointerCast(InPtr, InTy);
427    }
428
429    if (InPtr) {
430      RootArgs.push_back(InPtr);
431    }
432
433    if (OutPtr) {
434      RootArgs.push_back(OutPtr);
435    }
436
437    if (UsrData) {
438      RootArgs.push_back(UsrData);
439    }
440
441    llvm::Value *X = IV;
442    if (hasX(Signature)) {
443      RootArgs.push_back(X);
444    }
445
446    if (Y) {
447      RootArgs.push_back(Y);
448    }
449
450    Builder.CreateCall(F, RootArgs);
451
452    return true;
453  }
454
455  /* Expand a pass-by-value kernel.
456   */
457  bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
458    bccAssert(isKernel(Signature));
459    ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
460
461    // TODO: Refactor this to share functionality with ExpandFunction.
462    llvm::DataLayout DL(M);
463
464    llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
465
466    // Create and name the actual arguments to this expanded function.
467    llvm::SmallVector<llvm::Argument*, 8> ArgVec;
468    for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
469                                      E = ExpandedFunc->arg_end();
470         B != E;
471         ++B) {
472      ArgVec.push_back(B);
473    }
474
475    if (ArgVec.size() != 5) {
476      ALOGE("Incorrect number of arguments to function: %zu",
477            ArgVec.size());
478      return false;
479    }
480    llvm::Value *Arg_p = ArgVec[0];
481    llvm::Value *Arg_x1 = ArgVec[1];
482    llvm::Value *Arg_x2 = ArgVec[2];
483    llvm::Value *Arg_instep = ArgVec[3];
484    llvm::Value *Arg_outstep = ArgVec[4];
485
486    llvm::Value *InStep = NULL;
487    llvm::Value *OutStep = NULL;
488
489    // Construct the actual function body.
490    llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin());
491
492    // Collect and construct the arguments for the kernel().
493    // Note that we load any loop-invariant arguments before entering the Loop.
494    llvm::Function::arg_iterator Args = F->arg_begin();
495
496    llvm::Type *OutTy = NULL;
497    bool PassOutByReference = false;
498    llvm::Value *OutBasePtr = NULL;
499    if (hasOut(Signature)) {
500      llvm::Type *OutBaseTy = F->getReturnType();
501      if (OutBaseTy->isVoidTy()) {
502        PassOutByReference = true;
503        OutTy = Args->getType();
504        Args++;
505      } else {
506        OutTy = OutBaseTy->getPointerTo();
507        // We don't increment Args, since we are using the actual return type.
508      }
509      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
510      OutStep->setName("outstep");
511      OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
512    }
513
514    llvm::Type *InBaseTy = NULL;
515    llvm::Type *InTy = NULL;
516    llvm::Value *InBasePtr = NULL;
517    if (hasIn(Signature)) {
518      InBaseTy = Args->getType();
519      InTy =InBaseTy->getPointerTo();
520      InStep = getStepValue(&DL, InTy, Arg_instep);
521      InStep->setName("instep");
522      InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
523      Args++;
524    }
525
526    // No usrData parameter on kernels.
527    bccAssert(!hasUsrData(Signature));
528
529    if (hasX(Signature)) {
530      Args++;
531    }
532
533    llvm::Value *Y = NULL;
534    if (hasY(Signature)) {
535      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
536      Args++;
537    }
538
539    bccAssert(Args == F->arg_end());
540
541    llvm::PHINode *IV;
542    createLoop(Builder, Arg_x1, Arg_x2, &IV);
543
544    // Populate the actual call to kernel().
545    llvm::SmallVector<llvm::Value*, 8> RootArgs;
546
547    llvm::Value *InPtr = NULL;
548    llvm::Value *OutPtr = NULL;
549
550    // Calculate the current input and output pointers
551    //
552    // We always calculate the input/output pointers with a GEP operating on i8
553    // values and only cast at the very end to OutTy. This is because the step
554    // between two values is given in bytes.
555    //
556    // TODO: We could further optimize the output by using a GEP operation of
557    // type 'OutTy' in cases where the element type of the allocation allows.
558    if (OutBasePtr) {
559      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
560      OutOffset = Builder.CreateMul(OutOffset, OutStep);
561      OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
562      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
563    }
564    if (InBasePtr) {
565      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
566      InOffset = Builder.CreateMul(InOffset, InStep);
567      InPtr = Builder.CreateGEP(InBasePtr, InOffset);
568      InPtr = Builder.CreatePointerCast(InPtr, InTy);
569    }
570
571    if (PassOutByReference) {
572      RootArgs.push_back(OutPtr);
573    }
574
575    if (InPtr) {
576      llvm::Value *In = Builder.CreateLoad(InPtr, "In");
577      RootArgs.push_back(In);
578    }
579
580    llvm::Value *X = IV;
581    if (hasX(Signature)) {
582      RootArgs.push_back(X);
583    }
584
585    if (Y) {
586      RootArgs.push_back(Y);
587    }
588
589    llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
590
591    if (OutPtr && !PassOutByReference) {
592      Builder.CreateStore(RetVal, OutPtr);
593    }
594
595    return true;
596  }
597
598  virtual bool runOnModule(llvm::Module &M) {
599    bool Changed = false;
600    this->M = &M;
601    C = &M.getContext();
602
603    for (RSInfo::ExportForeachFuncListTy::const_iterator
604             func_iter = mFuncs.begin(), func_end = mFuncs.end();
605         func_iter != func_end; func_iter++) {
606      const char *name = func_iter->first;
607      uint32_t signature = func_iter->second;
608      llvm::Function *kernel = M.getFunction(name);
609      if (kernel && isKernel(signature)) {
610        Changed |= ExpandKernel(kernel, signature);
611      }
612      else if (kernel && kernel->getReturnType()->isVoidTy()) {
613        Changed |= ExpandFunction(kernel, signature);
614      }
615    }
616
617    return Changed;
618  }
619
620  virtual const char *getPassName() const {
621    return "ForEach-able Function Expansion";
622  }
623
624}; // end RSForEachExpandPass
625
626} // end anonymous namespace
627
628char RSForEachExpandPass::ID = 0;
629
630namespace bcc {
631
632llvm::ModulePass *
633createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
634                          bool pEnableStepOpt){
635  return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
636}
637
638} // end namespace bcc
639