RSForEachExpand.cpp revision 5010f641d1df6bc3447646ca7ef837410fb9b3dc
1/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bcc/Assert.h"
18#include "bcc/Renderscript/RSTransforms.h"
19
20#include <cstdlib>
21
22#include <llvm/IR/DerivedTypes.h>
23#include <llvm/IR/Function.h>
24#include <llvm/IR/Instructions.h>
25#include <llvm/IR/IRBuilder.h>
26#include <llvm/IR/MDBuilder.h>
27#include <llvm/IR/Module.h>
28#include <llvm/Pass.h>
29#include <llvm/Support/raw_ostream.h>
30#include <llvm/IR/DataLayout.h>
31#include <llvm/IR/Function.h>
32#include <llvm/IR/Type.h>
33#include <llvm/Transforms/Utils/BasicBlockUtils.h>
34
35#include "bcc/Config/Config.h"
36#include "bcc/Support/Log.h"
37
38#include "bcinfo/MetadataExtractor.h"
39
40#define NUM_EXPANDED_FUNCTION_PARAMS 4
41
42using namespace bcc;
43
44namespace {
45
46static const bool gEnableRsTbaa = true;
47
48/* RSForEachExpandPass - This pass operates on functions that are able to be
49 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
50 * ForEach-able function to be invoked over the appropriate data cells of the
51 * input/output allocations (adjusting other relevant parameters as we go). We
52 * support doing this for any ForEach-able compute kernels. The new function
53 * name is the original function name followed by ".expand". Note that we
54 * still generate code for the original function.
55 */
56class RSForEachExpandPass : public llvm::ModulePass {
57private:
58  static char ID;
59
60  llvm::Module *Module;
61  llvm::LLVMContext *Context;
62
63  /*
64   * Pointer to LLVM type information for the ForEachStubType and the function
65   * signature for expanded kernels.  These must be re-calculated for each
66   * module the pass is run on.
67   */
68  llvm::StructType   *ForEachStubType;
69  llvm::FunctionType *ExpandedFunctionType;
70
71  uint32_t mExportForEachCount;
72  const char **mExportForEachNameList;
73  const uint32_t *mExportForEachSignatureList;
74
75  // Turns on optimization of allocation stride values.
76  bool mEnableStepOpt;
77
78  uint32_t getRootSignature(llvm::Function *Function) {
79    const llvm::NamedMDNode *ExportForEachMetadata =
80        Module->getNamedMetadata("#rs_export_foreach");
81
82    if (!ExportForEachMetadata) {
83      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
84      for (llvm::Function::arg_iterator B = Function->arg_begin(),
85                                        E = Function->arg_end();
86           B != E;
87           ++B) {
88        RootArgTys.push_back(B->getType());
89      }
90
91      // For pre-ICS bitcode, we may not have signature information. In that
92      // case, we use the size of the RootArgTys to select the number of
93      // arguments.
94      return (1 << RootArgTys.size()) - 1;
95    }
96
97    if (ExportForEachMetadata->getNumOperands() == 0) {
98      return 0;
99    }
100
101    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
102
103    // We only handle the case for legacy root() functions here, so this is
104    // hard-coded to look at only the first such function.
105    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
106    if (SigNode != NULL && SigNode->getNumOperands() == 1) {
107      llvm::Value *SigVal = SigNode->getOperand(0);
108      if (SigVal->getValueID() == llvm::Value::MDStringVal) {
109        llvm::StringRef SigString =
110            static_cast<llvm::MDString*>(SigVal)->getString();
111        uint32_t Signature = 0;
112        if (SigString.getAsInteger(10, Signature)) {
113          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
114          return 0;
115        }
116        return Signature;
117      }
118    }
119
120    return 0;
121  }
122
123  // Get the actual value we should use to step through an allocation.
124  //
125  // Normally the value we use to step through an allocation is given to us by
126  // the driver. However, for certain primitive data types, we can derive an
127  // integer constant for the step value. We use this integer constant whenever
128  // possible to allow further compiler optimizations to take place.
129  //
130  // DL - Target Data size/layout information.
131  // T - Type of allocation (should be a pointer).
132  // OrigStep - Original step increment (root.expand() input from driver).
133  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
134                            llvm::Value *OrigStep) {
135    bccAssert(DL);
136    bccAssert(AllocType);
137    bccAssert(OrigStep);
138    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
139    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
140    if (mEnableStepOpt && AllocType != VoidPtrTy && PT) {
141      llvm::Type *ET = PT->getElementType();
142      uint64_t ETSize = DL->getTypeAllocSize(ET);
143      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
144      return llvm::ConstantInt::get(Int32Ty, ETSize);
145    } else {
146      return OrigStep;
147    }
148  }
149
150#define PARAM_FIELD_INS         0
151#define PARAM_FIELD_INESTRIDES  1
152#define PARAM_FIELD_OUT         2
153#define PARAM_FIELD_Y           3
154#define PARAM_FIELD_Z           4
155#define PARAM_FIELD_LID         5
156#define PARAM_FIELD_USR         6
157#define PARAM_FIELD_DIMX        7
158#define PARAM_FIELD_DIMY        8
159#define PARAM_FIELD_DIMZ        9
160#define PARAM_FIELD_SLOT       10
161
162  /// Builds the types required by the pass for the given context.
163  void buildTypes(void) {
164    // Create the RsForEachStubParam struct.
165
166    llvm::Type *VoidPtrTy    = llvm::Type::getInt8PtrTy(*Context);
167    llvm::Type *VoidPtrPtrTy = VoidPtrTy->getPointerTo();
168    llvm::Type *Int32Ty      = llvm::Type::getInt32Ty(*Context);
169    llvm::Type *Int32PtrTy   = Int32Ty->getPointerTo();
170
171    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
172     *
173     * struct RsForEachKernelStruct{
174     *   const void *in;
175     *   void *out;
176     *   uint32_t y;
177     *   uint32_t z;
178     *   uint32_t lid;
179     *   const void **ins;
180     *   uint32_t *inEStrides;
181     *   const void *usr;
182     *   uint32_t dimX;
183     *   uint32_t dimY;
184     *   uint32_t dimZ;
185     *   uint32_t slot;
186     * };
187     */
188    llvm::SmallVector<llvm::Type*, 12> StructTypes;
189    StructTypes.push_back(VoidPtrPtrTy); // const void **ins
190    StructTypes.push_back(Int32PtrTy);   // uint32_t *inEStrides
191    StructTypes.push_back(VoidPtrTy);    // void *out
192    StructTypes.push_back(Int32Ty);      // uint32_t y
193    StructTypes.push_back(Int32Ty);      // uint32_t z
194    StructTypes.push_back(Int32Ty);      // uint32_t lid
195    StructTypes.push_back(VoidPtrTy);    // const void *usr
196    StructTypes.push_back(Int32Ty);      // uint32_t dimX
197    StructTypes.push_back(Int32Ty);      // uint32_t dimY
198    StructTypes.push_back(Int32Ty);      // uint32_t dimZ
199    StructTypes.push_back(Int32Ty);      // uint32_t slot
200
201    ForEachStubType =
202      llvm::StructType::create(StructTypes, "RsForEachStubParamStruct");
203
204    // Create the function type for expanded kernels.
205
206    llvm::Type *ForEachStubPtrTy = ForEachStubType->getPointerTo();
207
208    llvm::SmallVector<llvm::Type*, 8> ParamTypes;
209    ParamTypes.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
210    ParamTypes.push_back(Int32Ty);          // uint32_t x1
211    ParamTypes.push_back(Int32Ty);          // uint32_t x2
212    ParamTypes.push_back(Int32Ty);          // uint32_t outstep
213
214    ExpandedFunctionType =
215        llvm::FunctionType::get(llvm::Type::getVoidTy(*Context), ParamTypes,
216                                false);
217  }
218
219  /// @brief Create skeleton of the expanded function.
220  ///
221  /// This creates a function with the following signature:
222  ///
223  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
224  ///         uint32_t outstep)
225  ///
226  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
227    llvm::Function *ExpandedFunction =
228      llvm::Function::Create(ExpandedFunctionType,
229                             llvm::GlobalValue::ExternalLinkage,
230                             OldName + ".expand", Module);
231
232    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
233
234    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
235
236    (AI++)->setName("p");
237    (AI++)->setName("x1");
238    (AI++)->setName("x2");
239    (AI++)->setName("arg_outstep");
240
241    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
242                                                       ExpandedFunction);
243    llvm::IRBuilder<> Builder(Begin);
244    Builder.CreateRetVoid();
245
246    return ExpandedFunction;
247  }
248
249  /// @brief Create an empty loop
250  ///
251  /// Create a loop of the form:
252  ///
253  /// for (i = LowerBound; i < UpperBound; i++)
254  ///   ;
255  ///
256  /// After the loop has been created, the builder is set such that
257  /// instructions can be added to the loop body.
258  ///
259  /// @param Builder The builder to use to build this loop. The current
260  ///                position of the builder is the position the loop
261  ///                will be inserted.
262  /// @param LowerBound The first value of the loop iterator
263  /// @param UpperBound The maximal value of the loop iterator
264  /// @param LoopIV A reference that will be set to the loop iterator.
265  /// @return The BasicBlock that will be executed after the loop.
266  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
267                               llvm::Value *LowerBound,
268                               llvm::Value *UpperBound,
269                               llvm::PHINode **LoopIV) {
270    assert(LowerBound->getType() == UpperBound->getType());
271
272    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
273    llvm::Value *Cond, *IVNext;
274    llvm::PHINode *IV;
275
276    CondBB = Builder.GetInsertBlock();
277    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
278    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
279
280    // if (LowerBound < Upperbound)
281    //   goto LoopHeader
282    // else
283    //   goto AfterBB
284    CondBB->getTerminator()->eraseFromParent();
285    Builder.SetInsertPoint(CondBB);
286    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
287    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
288
289    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
290    // iv.next = iv + 1
291    // if (iv.next < Upperbound)
292    //   goto LoopHeader
293    // else
294    //   goto AfterBB
295    Builder.SetInsertPoint(HeaderBB);
296    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
297    IV->addIncoming(LowerBound, CondBB);
298    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
299    IV->addIncoming(IVNext, HeaderBB);
300    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
301    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
302    AfterBB->setName("Exit");
303    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
304    *LoopIV = IV;
305    return AfterBB;
306  }
307
308public:
309  RSForEachExpandPass(bool pEnableStepOpt)
310      : ModulePass(ID), Module(NULL), Context(NULL),
311        mEnableStepOpt(pEnableStepOpt) {
312
313  }
314
315  /* Performs the actual optimization on a selected function. On success, the
316   * Module will contain a new function of the name "<NAME>.expand" that
317   * invokes <NAME>() in a loop with the appropriate parameters.
318   */
319  bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
320    ALOGV("Expanding ForEach-able Function %s",
321          Function->getName().str().c_str());
322
323    if (!Signature) {
324      Signature = getRootSignature(Function);
325      if (!Signature) {
326        // We couldn't determine how to expand this function based on its
327        // function signature.
328        return false;
329      }
330    }
331
332    llvm::DataLayout DL(Module);
333
334    llvm::Function *ExpandedFunction =
335      createEmptyExpandedFunction(Function->getName());
336
337    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
338
339    /*
340     * Extract the expanded function's parameters.  It is guaranteed by
341     * createEmptyExpandedFunction that there will be five parameters.
342     */
343    llvm::Function::arg_iterator ExpandedFunctionArgIter =
344      ExpandedFunction->arg_begin();
345
346    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
347    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
348    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
349    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
350
351    llvm::Value *InStep  = NULL;
352    llvm::Value *OutStep = NULL;
353
354    // Construct the actual function body.
355    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
356
357    // Collect and construct the arguments for the kernel().
358    // Note that we load any loop-invariant arguments before entering the Loop.
359    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
360
361    llvm::Type  *InTy      = NULL;
362    llvm::Value *InBasePtr = NULL;
363    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
364      llvm::Value    *InsMember  = Builder.CreateStructGEP(Arg_p,
365                                                           PARAM_FIELD_INS);
366      llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember, "inputs_base");
367
368      llvm::Value *InStepsMember =
369        Builder.CreateStructGEP(Arg_p, PARAM_FIELD_INESTRIDES);
370      llvm::LoadInst *InStepsBase = Builder.CreateLoad(InStepsMember,
371                                                       "insteps_base");
372
373      llvm::Value *IndexVal = Builder.getInt32(0);
374
375      llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
376      llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
377                                                      "instep_addr");
378
379      InTy = (FunctionArgIter++)->getType();
380      InStep = getStepValue(&DL, InTy, InStepArg);
381
382      InStep->setName("instep");
383
384      llvm::Value *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
385      InBasePtr = Builder.CreateLoad(InputAddr, "input_base");
386    }
387
388    llvm::Type *OutTy = NULL;
389    llvm::Value *OutBasePtr = NULL;
390    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
391      OutTy = (FunctionArgIter++)->getType();
392      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
393      OutStep->setName("outstep");
394      OutBasePtr = Builder.CreateLoad(
395                     Builder.CreateStructGEP(Arg_p, PARAM_FIELD_OUT));
396    }
397
398    llvm::Value *UsrData = NULL;
399    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
400      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
401      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
402          Builder.CreateStructGEP(Arg_p, PARAM_FIELD_USR)), UsrDataTy);
403      UsrData->setName("UsrData");
404    }
405
406    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
407      FunctionArgIter++;
408    }
409
410    llvm::Value *Y = NULL;
411    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
412      Y = Builder.CreateLoad(
413            Builder.CreateStructGEP(Arg_p, PARAM_FIELD_Y), "Y");
414
415      FunctionArgIter++;
416    }
417
418    bccAssert(FunctionArgIter == Function->arg_end());
419
420    llvm::PHINode *IV;
421    createLoop(Builder, Arg_x1, Arg_x2, &IV);
422
423    // Populate the actual call to kernel().
424    llvm::SmallVector<llvm::Value*, 8> RootArgs;
425
426    llvm::Value *InPtr  = NULL;
427    llvm::Value *OutPtr = NULL;
428
429    // Calculate the current input and output pointers
430    //
431    // We always calculate the input/output pointers with a GEP operating on i8
432    // values and only cast at the very end to OutTy. This is because the step
433    // between two values is given in bytes.
434    //
435    // TODO: We could further optimize the output by using a GEP operation of
436    // type 'OutTy' in cases where the element type of the allocation allows.
437    if (OutBasePtr) {
438      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
439      OutOffset = Builder.CreateMul(OutOffset, OutStep);
440      OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
441      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
442    }
443
444    if (InBasePtr) {
445      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
446      InOffset = Builder.CreateMul(InOffset, InStep);
447      InPtr = Builder.CreateGEP(InBasePtr, InOffset);
448      InPtr = Builder.CreatePointerCast(InPtr, InTy);
449    }
450
451    if (InPtr) {
452      RootArgs.push_back(InPtr);
453    }
454
455    if (OutPtr) {
456      RootArgs.push_back(OutPtr);
457    }
458
459    if (UsrData) {
460      RootArgs.push_back(UsrData);
461    }
462
463    llvm::Value *X = IV;
464    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
465      RootArgs.push_back(X);
466    }
467
468    if (Y) {
469      RootArgs.push_back(Y);
470    }
471
472    Builder.CreateCall(Function, RootArgs);
473
474    return true;
475  }
476
477  /* Expand a pass-by-value kernel.
478   */
479  bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
480    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
481    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
482
483    // TODO: Refactor this to share functionality with ExpandFunction.
484    llvm::DataLayout DL(Module);
485
486    llvm::Function *ExpandedFunction =
487      createEmptyExpandedFunction(Function->getName());
488
489    /*
490     * Extract the expanded function's parameters.  It is guaranteed by
491     * createEmptyExpandedFunction that there will be five parameters.
492     */
493
494    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
495
496    llvm::Function::arg_iterator ExpandedFunctionArgIter =
497      ExpandedFunction->arg_begin();
498
499    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
500    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
501    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
502    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
503
504    // Construct the actual function body.
505    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
506
507    // Create TBAA meta-data.
508    llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
509    llvm::MDBuilder MDHelper(*Context);
510
511    TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
512    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
513                                                       TBAARenderScript);
514    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
515                                                      TBAAAllocation, 0);
516    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
517                                                    TBAARenderScript);
518    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
519
520    /*
521     * Collect and construct the arguments for the kernel().
522     *
523     * Note that we load any loop-invariant arguments before entering the Loop.
524     */
525    size_t NumInputs = Function->arg_size();
526
527    llvm::Value *Y = NULL;
528    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
529      Y = Builder.CreateLoad(
530            Builder.CreateStructGEP(Arg_p, PARAM_FIELD_Y), "Y");
531
532      --NumInputs;
533    }
534
535    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
536      --NumInputs;
537    }
538
539    // No usrData parameter on kernels.
540    bccAssert(
541        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
542
543    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
544
545    // Check the return type
546    llvm::Type     *OutTy      = NULL;
547    llvm::Value    *OutStep    = NULL;
548    llvm::LoadInst *OutBasePtr = NULL;
549
550    bool PassOutByPointer = false;
551
552    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
553      llvm::Type *OutBaseTy = Function->getReturnType();
554
555      if (OutBaseTy->isVoidTy()) {
556        PassOutByPointer = true;
557        OutTy = ArgIter->getType();
558
559        ArgIter++;
560        --NumInputs;
561      } else {
562        // We don't increment Args, since we are using the actual return type.
563        OutTy = OutBaseTy->getPointerTo();
564      }
565
566      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
567      OutStep->setName("outstep");
568      OutBasePtr = Builder.CreateLoad(
569                     Builder.CreateStructGEP(Arg_p, PARAM_FIELD_OUT));
570
571      if (gEnableRsTbaa) {
572        OutBasePtr->setMetadata("tbaa", TBAAPointer);
573      }
574    }
575
576    llvm::SmallVector<llvm::Type*,     8> InTypes;
577    llvm::SmallVector<llvm::Value*,    8> InSteps;
578    llvm::SmallVector<llvm::LoadInst*, 8> InBasePtrs;
579    llvm::SmallVector<bool,            8> InIsStructPointer;
580
581    if (NumInputs > 0) {
582      llvm::Value *InsMember = Builder.CreateStructGEP(Arg_p, PARAM_FIELD_INS);
583      llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember, "inputs_base");
584
585      llvm::Value *InStepsMember =
586        Builder.CreateStructGEP(Arg_p, PARAM_FIELD_INESTRIDES);
587      llvm::LoadInst *InStepsBase = Builder.CreateLoad(InStepsMember,
588                                                         "insteps_base");
589
590      for (size_t InputIndex = 0; InputIndex < NumInputs;
591           ++InputIndex, ArgIter++) {
592
593          llvm::Value *IndexVal = Builder.getInt32(InputIndex);
594
595          llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
596          llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
597                                                          "instep_addr");
598
599          llvm::Type *InType = ArgIter->getType();
600
601        /*
602         * AArch64 calling dictate that structs of sufficient size get passed by
603         * pointer instead of passed by value.  This, combined with the fact
604         * that we don't allow kernels to operate on pointer data means that if
605         * we see a kernel with a pointer parameter we know that it is struct
606         * input that has been promoted.  As such we don't need to convert its
607         * type to a pointer.  Later we will need to know to avoid a load, so we
608         * save this information in InIsStructPointer.
609         */
610          if (!InType->isPointerTy()) {
611            InType = InType->getPointerTo();
612            InIsStructPointer.push_back(false);
613          } else {
614            InIsStructPointer.push_back(true);
615          }
616
617          llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
618
619          InStep->setName("instep");
620
621          llvm::Value    *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
622          llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
623                                                         "input_base");
624
625          if (gEnableRsTbaa) {
626            InBasePtr->setMetadata("tbaa", TBAAPointer);
627          }
628
629          InTypes.push_back(InType);
630          InSteps.push_back(InStep);
631          InBasePtrs.push_back(InBasePtr);
632      }
633    }
634
635    llvm::PHINode *IV;
636    createLoop(Builder, Arg_x1, Arg_x2, &IV);
637
638    // Populate the actual call to kernel().
639    llvm::SmallVector<llvm::Value*, 8> RootArgs;
640
641    // Calculate the current input and output pointers
642    //
643    //
644    // We always calculate the input/output pointers with a GEP operating on i8
645    // values combined with a multiplication and only cast at the very end to
646    // OutTy.  This is to account for dynamic stepping sizes when the value
647    // isn't apparent at compile time.  In the (very common) case when we know
648    // the step size at compile time, due to haveing complete type information
649    // this multiplication will optmized out and produces code equivalent to a
650    // a GEP on a pointer of the correct type.
651
652    // Output
653
654    llvm::Value *OutPtr = NULL;
655    if (OutBasePtr) {
656      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
657
658      OutOffset = Builder.CreateMul(OutOffset, OutStep);
659      OutPtr    = Builder.CreateGEP(OutBasePtr, OutOffset);
660      OutPtr    = Builder.CreatePointerCast(OutPtr, OutTy);
661
662      if (PassOutByPointer) {
663        RootArgs.push_back(OutPtr);
664      }
665    }
666
667    // Inputs
668
669    if (NumInputs > 0) {
670      llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
671
672      for (size_t Index = 0; Index < NumInputs; ++Index) {
673        llvm::Value *InOffset = Builder.CreateMul(Offset, InSteps[Index]);
674        llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], InOffset);
675
676        InPtr = Builder.CreatePointerCast(InPtr, InTypes[Index]);
677
678        llvm::Value *Input;
679
680        if (InIsStructPointer[Index]) {
681          Input = InPtr;
682
683        } else {
684          llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
685
686          if (gEnableRsTbaa) {
687            InputLoad->setMetadata("tbaa", TBAAAllocation);
688          }
689
690          Input = InputLoad;
691        }
692
693        RootArgs.push_back(Input);
694      }
695    }
696
697    llvm::Value *X = IV;
698    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
699      RootArgs.push_back(X);
700    }
701
702    if (Y) {
703      RootArgs.push_back(Y);
704    }
705
706    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
707
708    if (OutPtr && !PassOutByPointer) {
709      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
710      if (gEnableRsTbaa) {
711        Store->setMetadata("tbaa", TBAAAllocation);
712      }
713    }
714
715    return true;
716  }
717
718  /// @brief Checks if pointers to allocation internals are exposed
719  ///
720  /// This function verifies if through the parameters passed to the kernel
721  /// or through calls to the runtime library the script gains access to
722  /// pointers pointing to data within a RenderScript Allocation.
723  /// If we know we control all loads from and stores to data within
724  /// RenderScript allocations and if we know the run-time internal accesses
725  /// are all annotated with RenderScript TBAA metadata, only then we
726  /// can safely use TBAA to distinguish between generic and from-allocation
727  /// pointers.
728  bool allocPointersExposed(llvm::Module &Module) {
729    // Old style kernel function can expose pointers to elements within
730    // allocations.
731    // TODO: Extend analysis to allow simple cases of old-style kernels.
732    for (size_t i = 0; i < mExportForEachCount; ++i) {
733      const char *Name = mExportForEachNameList[i];
734      uint32_t Signature = mExportForEachSignatureList[i];
735      if (Module.getFunction(Name) &&
736          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
737        return true;
738      }
739    }
740
741    // Check for library functions that expose a pointer to an Allocation or
742    // that are not yet annotated with RenderScript-specific tbaa information.
743    static std::vector<std::string> Funcs;
744
745    // rsGetElementAt(...)
746    Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
747    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
748    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
749    // rsSetElementAt()
750    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
751    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
752    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
753    // rsGetElementAtYuv_uchar_Y()
754    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
755    // rsGetElementAtYuv_uchar_U()
756    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
757    // rsGetElementAtYuv_uchar_V()
758    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
759
760    for (std::vector<std::string>::iterator FI = Funcs.begin(),
761                                            FE = Funcs.end();
762         FI != FE; ++FI) {
763      llvm::Function *Function = Module.getFunction(*FI);
764
765      if (!Function) {
766        ALOGE("Missing run-time function '%s'", FI->c_str());
767        return true;
768      }
769
770      if (Function->getNumUses() > 0) {
771        return true;
772      }
773    }
774
775    return false;
776  }
777
778  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
779  ///
780  /// The TBAA metadata used to annotate loads/stores from RenderScript
781  /// Allocations is generated in a separate TBAA tree with a
782  /// "RenderScript TBAA" root node. LLVM does assume may-alias for all nodes in
783  /// unrelated alias analysis trees. This function makes the RenderScript TBAA
784  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
785  /// the connected trees every access to an Allocation is resolved to
786  /// must-alias if compared to a normal C/C++ access.
787  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
788    llvm::MDBuilder MDHelper(*Context);
789    llvm::MDNode *TBAARenderScript =
790      MDHelper.createTBAARoot("RenderScript TBAA");
791
792    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
793    llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript",
794                                                         TBAARoot);
795
796    TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
797  }
798
799  virtual bool runOnModule(llvm::Module &Module) {
800    bool Changed  = false;
801    this->Module  = &Module;
802    this->Context = &Module.getContext();
803
804    this->buildTypes();
805
806    bcinfo::MetadataExtractor me(&Module);
807    if (!me.extract()) {
808      ALOGE("Could not extract metadata from module!");
809      return false;
810    }
811    mExportForEachCount = me.getExportForEachSignatureCount();
812    mExportForEachNameList = me.getExportForEachNameList();
813    mExportForEachSignatureList = me.getExportForEachSignatureList();
814
815    bool AllocsExposed = allocPointersExposed(Module);
816
817    for (size_t i = 0; i < mExportForEachCount; ++i) {
818      const char *name = mExportForEachNameList[i];
819      uint32_t signature = mExportForEachSignatureList[i];
820      llvm::Function *kernel = Module.getFunction(name);
821      if (kernel) {
822        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
823          Changed |= ExpandKernel(kernel, signature);
824          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
825        } else if (kernel->getReturnType()->isVoidTy()) {
826          Changed |= ExpandFunction(kernel, signature);
827          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
828        } else {
829          // There are some graphics root functions that are not
830          // expanded, but that will be called directly. For those
831          // functions, we can not set the linkage to internal.
832        }
833      }
834    }
835
836    if (gEnableRsTbaa && !AllocsExposed) {
837      connectRenderScriptTBAAMetadata(Module);
838    }
839
840    return Changed;
841  }
842
843  virtual const char *getPassName() const {
844    return "ForEach-able Function Expansion";
845  }
846
847}; // end RSForEachExpandPass
848
849} // end anonymous namespace
850
851char RSForEachExpandPass::ID = 0;
852
853namespace bcc {
854
855llvm::ModulePass *
856createRSForEachExpandPass(bool pEnableStepOpt){
857  return new RSForEachExpandPass(pEnableStepOpt);
858}
859
860} // end namespace bcc
861