RSForEachExpand.cpp revision c754d49ee856be620e041348a9f2b3d5610a5a26
1/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bcc/Assert.h"
18#include "bcc/Renderscript/RSTransforms.h"
19
20#include <cstdlib>
21
22#include <llvm/IR/DerivedTypes.h>
23#include <llvm/IR/Function.h>
24#include <llvm/IR/Instructions.h>
25#include <llvm/IR/IRBuilder.h>
26#include <llvm/IR/MDBuilder.h>
27#include <llvm/IR/Module.h>
28#include <llvm/Pass.h>
29#include <llvm/Support/raw_ostream.h>
30#include <llvm/IR/DataLayout.h>
31#include <llvm/IR/Function.h>
32#include <llvm/IR/Type.h>
33#include <llvm/Transforms/Utils/BasicBlockUtils.h>
34
35#include "bcc/Config/Config.h"
36#include "bcc/Support/Log.h"
37
38#include "bcinfo/MetadataExtractor.h"
39
40#define NUM_EXPANDED_FUNCTION_PARAMS 4
41
42using namespace bcc;
43
44namespace {
45
46static const bool gEnableRsTbaa = true;
47
48/* RSForEachExpandPass - This pass operates on functions that are able to be
49 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
50 * ForEach-able function to be invoked over the appropriate data cells of the
51 * input/output allocations (adjusting other relevant parameters as we go). We
52 * support doing this for any ForEach-able compute kernels. The new function
53 * name is the original function name followed by ".expand". Note that we
54 * still generate code for the original function.
55 */
56class RSForEachExpandPass : public llvm::ModulePass {
57private:
58  static char ID;
59
60  llvm::Module *Module;
61  llvm::LLVMContext *Context;
62
63  /*
64   * Pointer to LLVM type information for the ForEachStubType and the function
65   * signature for expanded kernels.  These must be re-calculated for each
66   * module the pass is run on.
67   */
68  llvm::StructType   *ForEachStubType;
69  llvm::FunctionType *ExpandedFunctionType;
70
71  uint32_t mExportForEachCount;
72  const char **mExportForEachNameList;
73  const uint32_t *mExportForEachSignatureList;
74
75  // Turns on optimization of allocation stride values.
76  bool mEnableStepOpt;
77
78  uint32_t getRootSignature(llvm::Function *Function) {
79    const llvm::NamedMDNode *ExportForEachMetadata =
80        Module->getNamedMetadata("#rs_export_foreach");
81
82    if (!ExportForEachMetadata) {
83      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
84      for (llvm::Function::arg_iterator B = Function->arg_begin(),
85                                        E = Function->arg_end();
86           B != E;
87           ++B) {
88        RootArgTys.push_back(B->getType());
89      }
90
91      // For pre-ICS bitcode, we may not have signature information. In that
92      // case, we use the size of the RootArgTys to select the number of
93      // arguments.
94      return (1 << RootArgTys.size()) - 1;
95    }
96
97    if (ExportForEachMetadata->getNumOperands() == 0) {
98      return 0;
99    }
100
101    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
102
103    // We only handle the case for legacy root() functions here, so this is
104    // hard-coded to look at only the first such function.
105    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
106    if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
107      llvm::Value *SigVal = SigNode->getOperand(0);
108      if (SigVal->getValueID() == llvm::Value::MDStringVal) {
109        llvm::StringRef SigString =
110            static_cast<llvm::MDString*>(SigVal)->getString();
111        uint32_t Signature = 0;
112        if (SigString.getAsInteger(10, Signature)) {
113          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
114          return 0;
115        }
116        return Signature;
117      }
118    }
119
120    return 0;
121  }
122
123  bool isStepOptSupported(llvm::Type *AllocType) {
124
125    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
126    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
127
128    if (mEnableStepOpt) {
129      return false;
130    }
131
132    if (AllocType == VoidPtrTy) {
133      return false;
134    }
135
136    if (!PT) {
137      return false;
138    }
139
140    // remaining conditions are 64-bit only
141    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
142      return true;
143    }
144
145    // coerce suggests an upconverted struct type, which we can't support
146    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
147      return false;
148    }
149
150    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
151    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
152    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
153    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
154      return false;
155    }
156
157    return true;
158  }
159
160  // Get the actual value we should use to step through an allocation.
161  //
162  // Normally the value we use to step through an allocation is given to us by
163  // the driver. However, for certain primitive data types, we can derive an
164  // integer constant for the step value. We use this integer constant whenever
165  // possible to allow further compiler optimizations to take place.
166  //
167  // DL - Target Data size/layout information.
168  // T - Type of allocation (should be a pointer).
169  // OrigStep - Original step increment (root.expand() input from driver).
170  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
171                            llvm::Value *OrigStep) {
172    bccAssert(DL);
173    bccAssert(AllocType);
174    bccAssert(OrigStep);
175    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
176    if (isStepOptSupported(AllocType)) {
177      llvm::Type *ET = PT->getElementType();
178      uint64_t ETSize = DL->getTypeAllocSize(ET);
179      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
180      return llvm::ConstantInt::get(Int32Ty, ETSize);
181    } else {
182      return OrigStep;
183    }
184  }
185
186#define PARAM_FIELD_INS         0
187#define PARAM_FIELD_INESTRIDES  1
188#define PARAM_FIELD_OUT         2
189#define PARAM_FIELD_Y           3
190#define PARAM_FIELD_Z           4
191#define PARAM_FIELD_LID         5
192#define PARAM_FIELD_USR         6
193#define PARAM_FIELD_DIMX        7
194#define PARAM_FIELD_DIMY        8
195#define PARAM_FIELD_DIMZ        9
196#define PARAM_FIELD_SLOT       10
197
198  /// Builds the types required by the pass for the given context.
199  void buildTypes(void) {
200    // Create the RsForEachStubParam struct.
201
202    llvm::Type *VoidPtrTy    = llvm::Type::getInt8PtrTy(*Context);
203    llvm::Type *VoidPtrPtrTy = VoidPtrTy->getPointerTo();
204    llvm::Type *Int32Ty      = llvm::Type::getInt32Ty(*Context);
205    llvm::Type *Int32PtrTy   = Int32Ty->getPointerTo();
206
207    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
208     *
209     * struct RsForEachKernelStruct{
210     *   const void *in;
211     *   void *out;
212     *   uint32_t y;
213     *   uint32_t z;
214     *   uint32_t lid;
215     *   const void **ins;
216     *   uint32_t *inEStrides;
217     *   const void *usr;
218     *   uint32_t dimX;
219     *   uint32_t dimY;
220     *   uint32_t dimZ;
221     *   uint32_t slot;
222     * };
223     */
224    llvm::SmallVector<llvm::Type*, 12> StructTypes;
225    StructTypes.push_back(VoidPtrPtrTy); // const void **ins
226    StructTypes.push_back(Int32PtrTy);   // uint32_t *inEStrides
227    StructTypes.push_back(VoidPtrTy);    // void *out
228    StructTypes.push_back(Int32Ty);      // uint32_t y
229    StructTypes.push_back(Int32Ty);      // uint32_t z
230    StructTypes.push_back(Int32Ty);      // uint32_t lid
231    StructTypes.push_back(VoidPtrTy);    // const void *usr
232    StructTypes.push_back(Int32Ty);      // uint32_t dimX
233    StructTypes.push_back(Int32Ty);      // uint32_t dimY
234    StructTypes.push_back(Int32Ty);      // uint32_t dimZ
235    StructTypes.push_back(Int32Ty);      // uint32_t slot
236
237    ForEachStubType =
238      llvm::StructType::create(StructTypes, "RsForEachStubParamStruct");
239
240    // Create the function type for expanded kernels.
241
242    llvm::Type *ForEachStubPtrTy = ForEachStubType->getPointerTo();
243
244    llvm::SmallVector<llvm::Type*, 8> ParamTypes;
245    ParamTypes.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
246    ParamTypes.push_back(Int32Ty);          // uint32_t x1
247    ParamTypes.push_back(Int32Ty);          // uint32_t x2
248    ParamTypes.push_back(Int32Ty);          // uint32_t outstep
249
250    ExpandedFunctionType =
251        llvm::FunctionType::get(llvm::Type::getVoidTy(*Context), ParamTypes,
252                                false);
253  }
254
255  /// @brief Create skeleton of the expanded function.
256  ///
257  /// This creates a function with the following signature:
258  ///
259  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
260  ///         uint32_t outstep)
261  ///
262  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
263    llvm::Function *ExpandedFunction =
264      llvm::Function::Create(ExpandedFunctionType,
265                             llvm::GlobalValue::ExternalLinkage,
266                             OldName + ".expand", Module);
267
268    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
269
270    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
271
272    (AI++)->setName("p");
273    (AI++)->setName("x1");
274    (AI++)->setName("x2");
275    (AI++)->setName("arg_outstep");
276
277    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
278                                                       ExpandedFunction);
279    llvm::IRBuilder<> Builder(Begin);
280    Builder.CreateRetVoid();
281
282    return ExpandedFunction;
283  }
284
285  /// @brief Create an empty loop
286  ///
287  /// Create a loop of the form:
288  ///
289  /// for (i = LowerBound; i < UpperBound; i++)
290  ///   ;
291  ///
292  /// After the loop has been created, the builder is set such that
293  /// instructions can be added to the loop body.
294  ///
295  /// @param Builder The builder to use to build this loop. The current
296  ///                position of the builder is the position the loop
297  ///                will be inserted.
298  /// @param LowerBound The first value of the loop iterator
299  /// @param UpperBound The maximal value of the loop iterator
300  /// @param LoopIV A reference that will be set to the loop iterator.
301  /// @return The BasicBlock that will be executed after the loop.
302  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
303                               llvm::Value *LowerBound,
304                               llvm::Value *UpperBound,
305                               llvm::PHINode **LoopIV) {
306    assert(LowerBound->getType() == UpperBound->getType());
307
308    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
309    llvm::Value *Cond, *IVNext;
310    llvm::PHINode *IV;
311
312    CondBB = Builder.GetInsertBlock();
313    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
314    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
315
316    // if (LowerBound < Upperbound)
317    //   goto LoopHeader
318    // else
319    //   goto AfterBB
320    CondBB->getTerminator()->eraseFromParent();
321    Builder.SetInsertPoint(CondBB);
322    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
323    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
324
325    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
326    // iv.next = iv + 1
327    // if (iv.next < Upperbound)
328    //   goto LoopHeader
329    // else
330    //   goto AfterBB
331    Builder.SetInsertPoint(HeaderBB);
332    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
333    IV->addIncoming(LowerBound, CondBB);
334    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
335    IV->addIncoming(IVNext, HeaderBB);
336    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
337    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
338    AfterBB->setName("Exit");
339    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
340    *LoopIV = IV;
341    return AfterBB;
342  }
343
344public:
345  RSForEachExpandPass(bool pEnableStepOpt)
346      : ModulePass(ID), Module(nullptr), Context(nullptr),
347        mEnableStepOpt(pEnableStepOpt) {
348
349  }
350
351  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
352    // This pass does not use any other analysis passes, but it does
353    // add/wrap the existing functions in the module (thus altering the CFG).
354  }
355
356  /* Performs the actual optimization on a selected function. On success, the
357   * Module will contain a new function of the name "<NAME>.expand" that
358   * invokes <NAME>() in a loop with the appropriate parameters.
359   */
360  bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
361    ALOGV("Expanding ForEach-able Function %s",
362          Function->getName().str().c_str());
363
364    if (!Signature) {
365      Signature = getRootSignature(Function);
366      if (!Signature) {
367        // We couldn't determine how to expand this function based on its
368        // function signature.
369        return false;
370      }
371    }
372
373    llvm::DataLayout DL(Module);
374
375    llvm::Function *ExpandedFunction =
376      createEmptyExpandedFunction(Function->getName());
377
378    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
379
380    /*
381     * Extract the expanded function's parameters.  It is guaranteed by
382     * createEmptyExpandedFunction that there will be five parameters.
383     */
384    llvm::Function::arg_iterator ExpandedFunctionArgIter =
385      ExpandedFunction->arg_begin();
386
387    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
388    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
389    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
390    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
391
392    llvm::Value *InStep  = nullptr;
393    llvm::Value *OutStep = nullptr;
394
395    // Construct the actual function body.
396    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
397
398    // Collect and construct the arguments for the kernel().
399    // Note that we load any loop-invariant arguments before entering the Loop.
400    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
401
402    llvm::Type  *InTy      = nullptr;
403    llvm::Value *InBasePtr = nullptr;
404    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
405      llvm::Value    *InsMember  = Builder.CreateStructGEP(Arg_p,
406                                                           PARAM_FIELD_INS);
407      llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember, "inputs_base");
408
409      llvm::Value *InStepsMember =
410        Builder.CreateStructGEP(Arg_p, PARAM_FIELD_INESTRIDES);
411      llvm::LoadInst *InStepsBase = Builder.CreateLoad(InStepsMember,
412                                                       "insteps_base");
413
414      llvm::Value *IndexVal = Builder.getInt32(0);
415
416      llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
417      llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
418                                                      "instep_addr");
419
420      InTy = (FunctionArgIter++)->getType();
421      InStep = getStepValue(&DL, InTy, InStepArg);
422
423      InStep->setName("instep");
424
425      llvm::Value *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
426      InBasePtr = Builder.CreateLoad(InputAddr, "input_base");
427    }
428
429    llvm::Type *OutTy = nullptr;
430    llvm::Value *OutBasePtr = nullptr;
431    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
432      OutTy = (FunctionArgIter++)->getType();
433      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
434      OutStep->setName("outstep");
435      OutBasePtr = Builder.CreateLoad(
436                     Builder.CreateStructGEP(Arg_p, PARAM_FIELD_OUT));
437    }
438
439    llvm::Value *UsrData = nullptr;
440    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
441      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
442      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
443          Builder.CreateStructGEP(Arg_p, PARAM_FIELD_USR)), UsrDataTy);
444      UsrData->setName("UsrData");
445    }
446
447    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
448      FunctionArgIter++;
449    }
450
451    llvm::Value *Y = nullptr;
452    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
453      Y = Builder.CreateLoad(
454            Builder.CreateStructGEP(Arg_p, PARAM_FIELD_Y), "Y");
455
456      FunctionArgIter++;
457    }
458
459    bccAssert(FunctionArgIter == Function->arg_end());
460
461    llvm::PHINode *IV;
462    createLoop(Builder, Arg_x1, Arg_x2, &IV);
463
464    // Populate the actual call to kernel().
465    llvm::SmallVector<llvm::Value*, 8> RootArgs;
466
467    llvm::Value *InPtr  = nullptr;
468    llvm::Value *OutPtr = nullptr;
469
470    // Calculate the current input and output pointers
471    //
472    // We always calculate the input/output pointers with a GEP operating on i8
473    // values and only cast at the very end to OutTy. This is because the step
474    // between two values is given in bytes.
475    //
476    // TODO: We could further optimize the output by using a GEP operation of
477    // type 'OutTy' in cases where the element type of the allocation allows.
478    if (OutBasePtr) {
479      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
480      OutOffset = Builder.CreateMul(OutOffset, OutStep);
481      OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
482      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
483    }
484
485    if (InBasePtr) {
486      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
487      InOffset = Builder.CreateMul(InOffset, InStep);
488      InPtr = Builder.CreateGEP(InBasePtr, InOffset);
489      InPtr = Builder.CreatePointerCast(InPtr, InTy);
490    }
491
492    if (InPtr) {
493      RootArgs.push_back(InPtr);
494    }
495
496    if (OutPtr) {
497      RootArgs.push_back(OutPtr);
498    }
499
500    if (UsrData) {
501      RootArgs.push_back(UsrData);
502    }
503
504    llvm::Value *X = IV;
505    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
506      RootArgs.push_back(X);
507    }
508
509    if (Y) {
510      RootArgs.push_back(Y);
511    }
512
513    Builder.CreateCall(Function, RootArgs);
514
515    return true;
516  }
517
518  /* Expand a pass-by-value kernel.
519   */
520  bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
521    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
522    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
523
524    // TODO: Refactor this to share functionality with ExpandFunction.
525    llvm::DataLayout DL(Module);
526
527    llvm::Function *ExpandedFunction =
528      createEmptyExpandedFunction(Function->getName());
529
530    /*
531     * Extract the expanded function's parameters.  It is guaranteed by
532     * createEmptyExpandedFunction that there will be five parameters.
533     */
534
535    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
536
537    llvm::Function::arg_iterator ExpandedFunctionArgIter =
538      ExpandedFunction->arg_begin();
539
540    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
541    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
542    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
543    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
544
545    // Construct the actual function body.
546    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
547
548    // Create TBAA meta-data.
549    llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
550    llvm::MDBuilder MDHelper(*Context);
551
552    TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
553    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
554                                                       TBAARenderScript);
555    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
556                                                      TBAAAllocation, 0);
557    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
558                                                    TBAARenderScript);
559    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
560
561    llvm::MDNode *AliasingDomain, *AliasingScope;
562    AliasingDomain = MDHelper.createAnonymousAliasScopeDomain("RS argument scope domain");
563    AliasingScope = MDHelper.createAnonymousAliasScope(AliasingDomain, "RS argument scope");
564
565    /*
566     * Collect and construct the arguments for the kernel().
567     *
568     * Note that we load any loop-invariant arguments before entering the Loop.
569     */
570    size_t NumInputs = Function->arg_size();
571
572    llvm::Value *Y = nullptr;
573    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
574      Y = Builder.CreateLoad(
575            Builder.CreateStructGEP(Arg_p, PARAM_FIELD_Y), "Y");
576
577      --NumInputs;
578    }
579
580    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
581      --NumInputs;
582    }
583
584    // No usrData parameter on kernels.
585    bccAssert(
586        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
587
588    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
589
590    // Check the return type
591    llvm::Type     *OutTy            = nullptr;
592    llvm::Value    *OutStep          = nullptr;
593    llvm::LoadInst *OutBasePtr       = nullptr;
594    llvm::Value    *CastedOutBasePtr = nullptr;
595
596    bool PassOutByPointer = false;
597
598    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
599      llvm::Type *OutBaseTy = Function->getReturnType();
600
601      if (OutBaseTy->isVoidTy()) {
602        PassOutByPointer = true;
603        OutTy = ArgIter->getType();
604
605        ArgIter++;
606        --NumInputs;
607      } else {
608        // We don't increment Args, since we are using the actual return type.
609        OutTy = OutBaseTy->getPointerTo();
610      }
611
612      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
613      OutStep->setName("outstep");
614      OutBasePtr = Builder.CreateLoad(
615                     Builder.CreateStructGEP(Arg_p, PARAM_FIELD_OUT));
616
617      if (gEnableRsTbaa) {
618        OutBasePtr->setMetadata("tbaa", TBAAPointer);
619      }
620
621      OutBasePtr->setMetadata("alias.scope", AliasingScope);
622
623      CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
624    }
625
626    llvm::SmallVector<llvm::Type*,  8> InTypes;
627    llvm::SmallVector<llvm::Value*, 8> InSteps;
628    llvm::SmallVector<llvm::Value*, 8> InBasePtrs;
629    llvm::SmallVector<bool,         8> InIsStructPointer;
630
631    if (NumInputs > 0) {
632      llvm::Value *InsMember = Builder.CreateStructGEP(Arg_p, PARAM_FIELD_INS);
633      llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember, "inputs_base");
634
635      llvm::Value *InStepsMember =
636        Builder.CreateStructGEP(Arg_p, PARAM_FIELD_INESTRIDES);
637      llvm::LoadInst *InStepsBase = Builder.CreateLoad(InStepsMember,
638                                                         "insteps_base");
639
640      for (size_t InputIndex = 0; InputIndex < NumInputs;
641           ++InputIndex, ArgIter++) {
642
643          llvm::Value *IndexVal = Builder.getInt32(InputIndex);
644
645          llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
646          llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
647                                                          "instep_addr");
648
649          llvm::Type *InType = ArgIter->getType();
650
651        /*
652         * AArch64 calling dictate that structs of sufficient size get passed by
653         * pointer instead of passed by value.  This, combined with the fact
654         * that we don't allow kernels to operate on pointer data means that if
655         * we see a kernel with a pointer parameter we know that it is struct
656         * input that has been promoted.  As such we don't need to convert its
657         * type to a pointer.  Later we will need to know to avoid a load, so we
658         * save this information in InIsStructPointer.
659         */
660          if (!InType->isPointerTy()) {
661            InType = InType->getPointerTo();
662            InIsStructPointer.push_back(false);
663          } else {
664            InIsStructPointer.push_back(true);
665          }
666
667          llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
668
669          InStep->setName("instep");
670
671          llvm::Value    *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
672          llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
673                                                         "input_base");
674          llvm::Value    *CastInBasePtr = Builder.CreatePointerCast(InBasePtr,
675                                                                    InType, "casted_in");
676          if (gEnableRsTbaa) {
677            InBasePtr->setMetadata("tbaa", TBAAPointer);
678          }
679
680          InBasePtr->setMetadata("alias.scope", AliasingScope);
681
682          InTypes.push_back(InType);
683          InSteps.push_back(InStep);
684          InBasePtrs.push_back(CastInBasePtr);
685      }
686    }
687
688    llvm::PHINode *IV;
689    createLoop(Builder, Arg_x1, Arg_x2, &IV);
690
691    // Populate the actual call to kernel().
692    llvm::SmallVector<llvm::Value*, 8> RootArgs;
693
694    // Calculate the current input and output pointers
695    //
696    //
697    // We always calculate the input/output pointers with a GEP operating on i8
698    // values combined with a multiplication and only cast at the very end to
699    // OutTy.  This is to account for dynamic stepping sizes when the value
700    // isn't apparent at compile time.  In the (very common) case when we know
701    // the step size at compile time, due to haveing complete type information
702    // this multiplication will optmized out and produces code equivalent to a
703    // a GEP on a pointer of the correct type.
704
705    // Output
706
707    llvm::Value *OutPtr = nullptr;
708    if (CastedOutBasePtr) {
709      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
710
711      OutPtr    = Builder.CreateGEP(CastedOutBasePtr, OutOffset);
712
713      if (PassOutByPointer) {
714        RootArgs.push_back(OutPtr);
715      }
716    }
717
718    // Inputs
719
720    if (NumInputs > 0) {
721      llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
722
723      for (size_t Index = 0; Index < NumInputs; ++Index) {
724        llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], Offset);
725        llvm::Value *Input;
726
727        if (InIsStructPointer[Index]) {
728          Input = InPtr;
729
730        } else {
731          llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
732
733          if (gEnableRsTbaa) {
734            InputLoad->setMetadata("tbaa", TBAAAllocation);
735          }
736
737          InputLoad->setMetadata("alias.scope", AliasingScope);
738
739          Input = InputLoad;
740        }
741
742        RootArgs.push_back(Input);
743      }
744    }
745
746    llvm::Value *X = IV;
747    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
748      RootArgs.push_back(X);
749    }
750
751    if (Y) {
752      RootArgs.push_back(Y);
753    }
754
755    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
756
757    if (OutPtr && !PassOutByPointer) {
758      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
759      if (gEnableRsTbaa) {
760        Store->setMetadata("tbaa", TBAAAllocation);
761      }
762      Store->setMetadata("alias.scope", AliasingScope);
763    }
764
765    return true;
766  }
767
768  /// @brief Checks if pointers to allocation internals are exposed
769  ///
770  /// This function verifies if through the parameters passed to the kernel
771  /// or through calls to the runtime library the script gains access to
772  /// pointers pointing to data within a RenderScript Allocation.
773  /// If we know we control all loads from and stores to data within
774  /// RenderScript allocations and if we know the run-time internal accesses
775  /// are all annotated with RenderScript TBAA metadata, only then we
776  /// can safely use TBAA to distinguish between generic and from-allocation
777  /// pointers.
778  bool allocPointersExposed(llvm::Module &Module) {
779    // Old style kernel function can expose pointers to elements within
780    // allocations.
781    // TODO: Extend analysis to allow simple cases of old-style kernels.
782    for (size_t i = 0; i < mExportForEachCount; ++i) {
783      const char *Name = mExportForEachNameList[i];
784      uint32_t Signature = mExportForEachSignatureList[i];
785      if (Module.getFunction(Name) &&
786          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
787        return true;
788      }
789    }
790
791    // Check for library functions that expose a pointer to an Allocation or
792    // that are not yet annotated with RenderScript-specific tbaa information.
793    static std::vector<std::string> Funcs;
794
795    // rsGetElementAt(...)
796    Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
797    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
798    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
799    // rsSetElementAt()
800    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
801    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
802    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
803    // rsGetElementAtYuv_uchar_Y()
804    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
805    // rsGetElementAtYuv_uchar_U()
806    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
807    // rsGetElementAtYuv_uchar_V()
808    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
809
810    for (std::vector<std::string>::iterator FI = Funcs.begin(),
811                                            FE = Funcs.end();
812         FI != FE; ++FI) {
813      llvm::Function *Function = Module.getFunction(*FI);
814
815      if (!Function) {
816        ALOGE("Missing run-time function '%s'", FI->c_str());
817        return true;
818      }
819
820      if (Function->getNumUses() > 0) {
821        return true;
822      }
823    }
824
825    return false;
826  }
827
828  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
829  ///
830  /// The TBAA metadata used to annotate loads/stores from RenderScript
831  /// Allocations is generated in a separate TBAA tree with a
832  /// "RenderScript TBAA" root node. LLVM does assume may-alias for all nodes in
833  /// unrelated alias analysis trees. This function makes the RenderScript TBAA
834  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
835  /// the connected trees every access to an Allocation is resolved to
836  /// must-alias if compared to a normal C/C++ access.
837  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
838    llvm::MDBuilder MDHelper(*Context);
839    llvm::MDNode *TBAARenderScript =
840      MDHelper.createTBAARoot("RenderScript TBAA");
841
842    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
843    llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript",
844                                                         TBAARoot);
845
846    TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
847  }
848
849  virtual bool runOnModule(llvm::Module &Module) {
850    bool Changed  = false;
851    this->Module  = &Module;
852    this->Context = &Module.getContext();
853
854    this->buildTypes();
855
856    bcinfo::MetadataExtractor me(&Module);
857    if (!me.extract()) {
858      ALOGE("Could not extract metadata from module!");
859      return false;
860    }
861    mExportForEachCount = me.getExportForEachSignatureCount();
862    mExportForEachNameList = me.getExportForEachNameList();
863    mExportForEachSignatureList = me.getExportForEachSignatureList();
864
865    bool AllocsExposed = allocPointersExposed(Module);
866
867    for (size_t i = 0; i < mExportForEachCount; ++i) {
868      const char *name = mExportForEachNameList[i];
869      uint32_t signature = mExportForEachSignatureList[i];
870      llvm::Function *kernel = Module.getFunction(name);
871      if (kernel) {
872        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
873          Changed |= ExpandKernel(kernel, signature);
874          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
875        } else if (kernel->getReturnType()->isVoidTy()) {
876          Changed |= ExpandFunction(kernel, signature);
877          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
878        } else {
879          // There are some graphics root functions that are not
880          // expanded, but that will be called directly. For those
881          // functions, we can not set the linkage to internal.
882        }
883      }
884    }
885
886    if (gEnableRsTbaa && !AllocsExposed) {
887      connectRenderScriptTBAAMetadata(Module);
888    }
889
890    return Changed;
891  }
892
893  virtual const char *getPassName() const {
894    return "ForEach-able Function Expansion";
895  }
896
897}; // end RSForEachExpandPass
898
899} // end anonymous namespace
900
901char RSForEachExpandPass::ID = 0;
902
903namespace bcc {
904
905llvm::ModulePass *
906createRSForEachExpandPass(bool pEnableStepOpt){
907  return new RSForEachExpandPass(pEnableStepOpt);
908}
909
910} // end namespace bcc
911