RSForEachExpand.cpp revision c43e1ba9cf40277dd496fe503e777906d2755251
1/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bcc/Assert.h"
18#include "bcc/Renderscript/RSTransforms.h"
19
20#include <cstdlib>
21
22#include <llvm/IR/DerivedTypes.h>
23#include <llvm/IR/Function.h>
24#include <llvm/IR/Instructions.h>
25#include <llvm/IR/IRBuilder.h>
26#include <llvm/IR/MDBuilder.h>
27#include <llvm/IR/Module.h>
28#include <llvm/Pass.h>
29#include <llvm/Support/raw_ostream.h>
30#include <llvm/IR/DataLayout.h>
31#include <llvm/IR/Function.h>
32#include <llvm/IR/Type.h>
33#include <llvm/Transforms/Utils/BasicBlockUtils.h>
34
35#include "bcc/Config/Config.h"
36#include "bcc/Support/Log.h"
37
38#include "bcinfo/MetadataExtractor.h"
39
40#define NUM_EXPANDED_FUNCTION_PARAMS 5
41
42using namespace bcc;
43
44namespace {
45
46static const bool gEnableRsTbaa = true;
47
48/* RSForEachExpandPass - This pass operates on functions that are able to be
49 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
50 * ForEach-able function to be invoked over the appropriate data cells of the
51 * input/output allocations (adjusting other relevant parameters as we go). We
52 * support doing this for any ForEach-able compute kernels. The new function
53 * name is the original function name followed by ".expand". Note that we
54 * still generate code for the original function.
55 */
56class RSForEachExpandPass : public llvm::ModulePass {
57private:
58  static char ID;
59
60  llvm::Module *Module;
61  llvm::LLVMContext *Context;
62
63  /*
64   * Pointer to LLVM type information for the ForEachStubType and the function
65   * signature for expanded kernels.  These must be re-calculated for each
66   * module the pass is run on.
67   */
68  llvm::StructType   *ForEachStubType;
69  llvm::FunctionType *ExpandedFunctionType;
70
71  uint32_t mExportForEachCount;
72  const char **mExportForEachNameList;
73  const uint32_t *mExportForEachSignatureList;
74
75  // Turns on optimization of allocation stride values.
76  bool mEnableStepOpt;
77
78  uint32_t getRootSignature(llvm::Function *Function) {
79    const llvm::NamedMDNode *ExportForEachMetadata =
80        Module->getNamedMetadata("#rs_export_foreach");
81
82    if (!ExportForEachMetadata) {
83      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
84      for (llvm::Function::arg_iterator B = Function->arg_begin(),
85                                        E = Function->arg_end();
86           B != E;
87           ++B) {
88        RootArgTys.push_back(B->getType());
89      }
90
91      // For pre-ICS bitcode, we may not have signature information. In that
92      // case, we use the size of the RootArgTys to select the number of
93      // arguments.
94      return (1 << RootArgTys.size()) - 1;
95    }
96
97    if (ExportForEachMetadata->getNumOperands() == 0) {
98      return 0;
99    }
100
101    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
102
103    // We only handle the case for legacy root() functions here, so this is
104    // hard-coded to look at only the first such function.
105    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
106    if (SigNode != NULL && SigNode->getNumOperands() == 1) {
107      llvm::Value *SigVal = SigNode->getOperand(0);
108      if (SigVal->getValueID() == llvm::Value::MDStringVal) {
109        llvm::StringRef SigString =
110            static_cast<llvm::MDString*>(SigVal)->getString();
111        uint32_t Signature = 0;
112        if (SigString.getAsInteger(10, Signature)) {
113          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
114          return 0;
115        }
116        return Signature;
117      }
118    }
119
120    return 0;
121  }
122
123  // Get the actual value we should use to step through an allocation.
124  //
125  // Normally the value we use to step through an allocation is given to us by
126  // the driver. However, for certain primitive data types, we can derive an
127  // integer constant for the step value. We use this integer constant whenever
128  // possible to allow further compiler optimizations to take place.
129  //
130  // DL - Target Data size/layout information.
131  // T - Type of allocation (should be a pointer).
132  // OrigStep - Original step increment (root.expand() input from driver).
133  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
134                            llvm::Value *OrigStep) {
135    bccAssert(DL);
136    bccAssert(AllocType);
137    bccAssert(OrigStep);
138    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
139    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
140    if (mEnableStepOpt && AllocType != VoidPtrTy && PT) {
141      llvm::Type *ET = PT->getElementType();
142      uint64_t ETSize = DL->getTypeAllocSize(ET);
143      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
144      return llvm::ConstantInt::get(Int32Ty, ETSize);
145    } else {
146      return OrigStep;
147    }
148  }
149
150  /// @brief Builds the types required by the pass for the given context.
151  void buildTypes(void) {
152    // Create the RsForEachStubParam struct.
153
154    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
155    llvm::Type *Int32Ty   = llvm::Type::getInt32Ty(*Context);
156    /* Defined in frameworks/base/libs/rs/rs_hal.h:
157     *
158     * struct RsForEachStubParamStruct {
159     *   const void *in;
160     *   void *out;
161     *   const void *usr;
162     *   uint32_t usr_len;
163     *   uint32_t x;
164     *   uint32_t y;
165     *   uint32_t z;
166     *   uint32_t lod;
167     *   enum RsAllocationCubemapFace face;
168     *   uint32_t ar[16];
169     *   const void **ins;
170     *   uint32_t *eStrideIns;
171     * };
172     */
173    llvm::SmallVector<llvm::Type*, 16> StructTypes;
174    StructTypes.push_back(VoidPtrTy);  // const void *in
175    StructTypes.push_back(VoidPtrTy);  // void *out
176    StructTypes.push_back(VoidPtrTy);  // const void *usr
177    StructTypes.push_back(Int32Ty);    // uint32_t usr_len
178    StructTypes.push_back(Int32Ty);    // uint32_t x
179    StructTypes.push_back(Int32Ty);    // uint32_t y
180    StructTypes.push_back(Int32Ty);    // uint32_t z
181    StructTypes.push_back(Int32Ty);    // uint32_t lod
182    StructTypes.push_back(Int32Ty);    // enum RsAllocationCubemapFace
183    StructTypes.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
184
185    StructTypes.push_back(llvm::PointerType::getUnqual(VoidPtrTy)); // const void **ins
186    StructTypes.push_back(Int32Ty->getPointerTo()); // uint32_t *eStrideIns
187
188    ForEachStubType =
189      llvm::StructType::create(StructTypes, "RsForEachStubParamStruct");
190
191    // Create the function type for expanded kernels.
192
193    llvm::Type *ForEachStubPtrTy = ForEachStubType->getPointerTo();
194
195    llvm::SmallVector<llvm::Type*, 8> ParamTypes;
196    ParamTypes.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
197    ParamTypes.push_back(Int32Ty);          // uint32_t x1
198    ParamTypes.push_back(Int32Ty);          // uint32_t x2
199    ParamTypes.push_back(Int32Ty);          // uint32_t instep
200    ParamTypes.push_back(Int32Ty);          // uint32_t outstep
201
202    ExpandedFunctionType = llvm::FunctionType::get(llvm::Type::getVoidTy(*Context),
203                                              ParamTypes,
204                                              false);
205  }
206
207  /// @brief Create skeleton of the expanded function.
208  ///
209  /// This creates a function with the following signature:
210  ///
211  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
212  ///         uint32_t instep, uint32_t outstep)
213  ///
214  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
215    llvm::Function *ExpandedFunction =
216      llvm::Function::Create(ExpandedFunctionType,
217                             llvm::GlobalValue::ExternalLinkage,
218                             OldName + ".expand", Module);
219
220    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
221
222    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
223
224    (AI++)->setName("p");
225    (AI++)->setName("x1");
226    (AI++)->setName("x2");
227    (AI++)->setName("arg_instep");
228    (AI++)->setName("arg_outstep");
229
230    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
231                                                       ExpandedFunction);
232    llvm::IRBuilder<> Builder(Begin);
233    Builder.CreateRetVoid();
234
235    return ExpandedFunction;
236  }
237
238  /// @brief Create an empty loop
239  ///
240  /// Create a loop of the form:
241  ///
242  /// for (i = LowerBound; i < UpperBound; i++)
243  ///   ;
244  ///
245  /// After the loop has been created, the builder is set such that
246  /// instructions can be added to the loop body.
247  ///
248  /// @param Builder The builder to use to build this loop. The current
249  ///                position of the builder is the position the loop
250  ///                will be inserted.
251  /// @param LowerBound The first value of the loop iterator
252  /// @param UpperBound The maximal value of the loop iterator
253  /// @param LoopIV A reference that will be set to the loop iterator.
254  /// @return The BasicBlock that will be executed after the loop.
255  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
256                               llvm::Value *LowerBound,
257                               llvm::Value *UpperBound,
258                               llvm::PHINode **LoopIV) {
259    assert(LowerBound->getType() == UpperBound->getType());
260
261    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
262    llvm::Value *Cond, *IVNext;
263    llvm::PHINode *IV;
264
265    CondBB = Builder.GetInsertBlock();
266    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
267    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
268
269    // if (LowerBound < Upperbound)
270    //   goto LoopHeader
271    // else
272    //   goto AfterBB
273    CondBB->getTerminator()->eraseFromParent();
274    Builder.SetInsertPoint(CondBB);
275    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
276    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
277
278    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
279    // iv.next = iv + 1
280    // if (iv.next < Upperbound)
281    //   goto LoopHeader
282    // else
283    //   goto AfterBB
284    Builder.SetInsertPoint(HeaderBB);
285    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
286    IV->addIncoming(LowerBound, CondBB);
287    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
288    IV->addIncoming(IVNext, HeaderBB);
289    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
290    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
291    AfterBB->setName("Exit");
292    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
293    *LoopIV = IV;
294    return AfterBB;
295  }
296
297public:
298  RSForEachExpandPass(bool pEnableStepOpt)
299      : ModulePass(ID), Module(NULL), Context(NULL),
300        mEnableStepOpt(pEnableStepOpt) {
301
302  }
303
304  /* Performs the actual optimization on a selected function. On success, the
305   * Module will contain a new function of the name "<NAME>.expand" that
306   * invokes <NAME>() in a loop with the appropriate parameters.
307   */
308  bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
309    ALOGV("Expanding ForEach-able Function %s",
310          Function->getName().str().c_str());
311
312    if (!Signature) {
313      Signature = getRootSignature(Function);
314      if (!Signature) {
315        // We couldn't determine how to expand this function based on its
316        // function signature.
317        return false;
318      }
319    }
320
321    llvm::DataLayout DL(Module);
322
323    llvm::Function *ExpandedFunction =
324      createEmptyExpandedFunction(Function->getName());
325
326    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
327
328    /*
329     * Extract the expanded function's parameters.  It is guaranteed by
330     * createEmptyExpandedFunction that there will be five parameters.
331     */
332    llvm::Function::arg_iterator ExpandedFunctionArgIter =
333      ExpandedFunction->arg_begin();
334
335    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
336    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
337    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
338    llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
339    llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
340
341    llvm::Value *InStep  = NULL;
342    llvm::Value *OutStep = NULL;
343
344    // Construct the actual function body.
345    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
346
347    // Collect and construct the arguments for the kernel().
348    // Note that we load any loop-invariant arguments before entering the Loop.
349    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
350
351    llvm::Type *InTy = NULL;
352    llvm::Value *InBasePtr = NULL;
353    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
354      InTy = (FunctionArgIter++)->getType();
355      InStep = getStepValue(&DL, InTy, Arg_instep);
356      InStep->setName("instep");
357      InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
358    }
359
360    llvm::Type *OutTy = NULL;
361    llvm::Value *OutBasePtr = NULL;
362    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
363      OutTy = (FunctionArgIter++)->getType();
364      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
365      OutStep->setName("outstep");
366      OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
367    }
368
369    llvm::Value *UsrData = NULL;
370    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
371      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
372      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
373          Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
374      UsrData->setName("UsrData");
375    }
376
377    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
378      FunctionArgIter++;
379    }
380
381    llvm::Value *Y = NULL;
382    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
383      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
384      FunctionArgIter++;
385    }
386
387    bccAssert(FunctionArgIter == Function->arg_end());
388
389    llvm::PHINode *IV;
390    createLoop(Builder, Arg_x1, Arg_x2, &IV);
391
392    // Populate the actual call to kernel().
393    llvm::SmallVector<llvm::Value*, 8> RootArgs;
394
395    llvm::Value *InPtr  = NULL;
396    llvm::Value *OutPtr = NULL;
397
398    // Calculate the current input and output pointers
399    //
400    // We always calculate the input/output pointers with a GEP operating on i8
401    // values and only cast at the very end to OutTy. This is because the step
402    // between two values is given in bytes.
403    //
404    // TODO: We could further optimize the output by using a GEP operation of
405    // type 'OutTy' in cases where the element type of the allocation allows.
406    if (OutBasePtr) {
407      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
408      OutOffset = Builder.CreateMul(OutOffset, OutStep);
409      OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
410      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
411    }
412
413    if (InBasePtr) {
414      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
415      InOffset = Builder.CreateMul(InOffset, InStep);
416      InPtr = Builder.CreateGEP(InBasePtr, InOffset);
417      InPtr = Builder.CreatePointerCast(InPtr, InTy);
418    }
419
420    if (InPtr) {
421      RootArgs.push_back(InPtr);
422    }
423
424    if (OutPtr) {
425      RootArgs.push_back(OutPtr);
426    }
427
428    if (UsrData) {
429      RootArgs.push_back(UsrData);
430    }
431
432    llvm::Value *X = IV;
433    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
434      RootArgs.push_back(X);
435    }
436
437    if (Y) {
438      RootArgs.push_back(Y);
439    }
440
441    Builder.CreateCall(Function, RootArgs);
442
443    return true;
444  }
445
446  /* Expand a pass-by-value kernel.
447   */
448  bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
449    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
450    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
451
452    // TODO: Refactor this to share functionality with ExpandFunction.
453    llvm::DataLayout DL(Module);
454
455    llvm::Function *ExpandedFunction =
456      createEmptyExpandedFunction(Function->getName());
457
458    /*
459     * Extract the expanded function's parameters.  It is guaranteed by
460     * createEmptyExpandedFunction that there will be five parameters.
461     */
462
463    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
464
465    llvm::Function::arg_iterator ExpandedFunctionArgIter =
466      ExpandedFunction->arg_begin();
467
468    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
469    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
470    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
471    llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
472    llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
473
474    // Construct the actual function body.
475    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
476
477    // Create TBAA meta-data.
478    llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
479    llvm::MDBuilder MDHelper(*Context);
480
481    TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
482    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", TBAARenderScript);
483    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, TBAAAllocation, 0);
484    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", TBAARenderScript);
485    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
486
487    /*
488     * Collect and construct the arguments for the kernel().
489     *
490     * Note that we load any loop-invariant arguments before entering the Loop.
491     */
492    size_t NumInputs = Function->arg_size();
493
494    llvm::Value *Y = NULL;
495    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
496      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
497      --NumInputs;
498    }
499
500    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
501      --NumInputs;
502    }
503
504    // No usrData parameter on kernels.
505    bccAssert(
506        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
507
508    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
509
510    // Check the return type
511    llvm::Type     *OutTy      = NULL;
512    llvm::Value    *OutStep    = NULL;
513    llvm::LoadInst *OutBasePtr = NULL;
514
515    bool PassOutByReference = false;
516
517    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
518      llvm::Type *OutBaseTy = Function->getReturnType();
519
520      if (OutBaseTy->isVoidTy()) {
521        PassOutByReference = true;
522        OutTy = ArgIter->getType();
523
524        ArgIter++;
525        --NumInputs;
526      } else {
527        // We don't increment Args, since we are using the actual return type.
528        OutTy = OutBaseTy->getPointerTo();
529      }
530
531      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
532      OutStep->setName("outstep");
533      OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
534      if (gEnableRsTbaa) {
535        OutBasePtr->setMetadata("tbaa", TBAAPointer);
536      }
537    }
538
539    llvm::SmallVector<llvm::Type*,     8> InTypes;
540    llvm::SmallVector<llvm::Value*,    8> InSteps;
541    llvm::SmallVector<llvm::LoadInst*, 8> InBasePtrs;
542    llvm::SmallVector<bool,            8> InIsStructPointer;
543
544    if (NumInputs == 1) {
545      llvm::Type *InType = ArgIter->getType();
546
547      /*
548       * AArch64 calling dictate that structs of sufficient size get passed by
549       * poiter instead of passed by value.  This, combined with the fact that
550       * we don't allow kernels to operate on pointer data means that if we see
551       * a kernel with a pointer parameter we know that it is struct input that
552       * has been promoted.  As such we don't need to convert its type to a
553       * pointer.  Later we will need to know to avoid a load, so we save this
554       * information in InIsStructPointer.
555       */
556      if (!InType->isPointerTy()) {
557        InType = InType->getPointerTo();
558        InIsStructPointer.push_back(false);
559      } else {
560        InIsStructPointer.push_back(true);
561      }
562
563      llvm::Value *InStep = getStepValue(&DL, InType, Arg_instep);
564
565      InStep->setName("instep");
566
567      llvm::Value    *Input     = Builder.CreateStructGEP(Arg_p, 0);
568      llvm::LoadInst *InBasePtr = Builder.CreateLoad(Input, "input_base");
569
570      if (gEnableRsTbaa) {
571        InBasePtr->setMetadata("tbaa", TBAAPointer);
572      }
573
574      InTypes.push_back(InType);
575      InSteps.push_back(InStep);
576      InBasePtrs.push_back(InBasePtr);
577
578    } else if (NumInputs > 1) {
579      llvm::Value    *InsMember  = Builder.CreateStructGEP(Arg_p, 10);
580      llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember,
581                                                      "inputs_base");
582
583      llvm::Value    *InStepsMember = Builder.CreateStructGEP(Arg_p, 11);
584      llvm::LoadInst *InStepsBase   = Builder.CreateLoad(InStepsMember,
585                                                         "insteps_base");
586
587      for (size_t InputIndex = 0; InputIndex < NumInputs;
588           ++InputIndex, ArgIter++) {
589
590          llvm::Value *IndexVal = Builder.getInt32(InputIndex);
591
592          llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
593          llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
594                                                          "instep_addr");
595
596          llvm::Type *InType = ArgIter->getType();
597
598          /*
599         * AArch64 calling dictate that structs of sufficient size get passed by
600         * poiter instead of passed by value.  This, combined with the fact that
601         * we don't allow kernels to operate on pointer data means that if we
602         * see a kernel with a pointer parameter we know that it is struct input
603         * that has been promoted.  As such we don't need to convert its type to
604         * a pointer.  Later we will need to know to avoid a load, so we save
605         * this information in InIsStructPointer.
606         */
607          if (!InType->isPointerTy()) {
608            InType = InType->getPointerTo();
609            InIsStructPointer.push_back(false);
610          } else {
611            InIsStructPointer.push_back(true);
612          }
613
614          llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
615
616          InStep->setName("instep");
617
618          llvm::Value    *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
619          llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
620                                                         "input_base");
621
622          if (gEnableRsTbaa) {
623            InBasePtr->setMetadata("tbaa", TBAAPointer);
624          }
625
626          InTypes.push_back(InType);
627          InSteps.push_back(InStep);
628          InBasePtrs.push_back(InBasePtr);
629      }
630    }
631
632    llvm::PHINode *IV;
633    createLoop(Builder, Arg_x1, Arg_x2, &IV);
634
635    // Populate the actual call to kernel().
636    llvm::SmallVector<llvm::Value*, 8> RootArgs;
637
638    // Calculate the current input and output pointers
639    //
640    //
641    // We always calculate the input/output pointers with a GEP operating on i8
642    // values combined with a multiplication and only cast at the very end to
643    // OutTy.  This is to account for dynamic stepping sizes when the value
644    // isn't apparent at compile time.  In the (very common) case when we know
645    // the step size at compile time, due to haveing complete type information
646    // this multiplication will optmized out and produces code equivalent to a
647    // a GEP on a pointer of the correct type.
648
649    // Output
650
651    llvm::Value *OutPtr = NULL;
652    if (OutBasePtr) {
653      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
654
655      OutOffset = Builder.CreateMul(OutOffset, OutStep);
656      OutPtr    = Builder.CreateGEP(OutBasePtr, OutOffset);
657      OutPtr    = Builder.CreatePointerCast(OutPtr, OutTy);
658
659      if (PassOutByReference) {
660        RootArgs.push_back(OutPtr);
661      }
662    }
663
664    // Inputs
665
666    if (NumInputs > 0) {
667      llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
668
669      for (size_t Index = 0; Index < NumInputs; ++Index) {
670        llvm::Value *InOffset = Builder.CreateMul(Offset, InSteps[Index]);
671        llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], InOffset);
672
673        InPtr = Builder.CreatePointerCast(InPtr, InTypes[Index]);
674
675        llvm::Value *Input;
676
677        if (InIsStructPointer[Index]) {
678          Input = InPtr;
679
680        } else {
681          llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
682
683          if (gEnableRsTbaa) {
684            InputLoad->setMetadata("tbaa", TBAAAllocation);
685          }
686
687          Input = InputLoad;
688        }
689
690        RootArgs.push_back(Input);
691      }
692    }
693
694    llvm::Value *X = IV;
695    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
696      RootArgs.push_back(X);
697    }
698
699    if (Y) {
700      RootArgs.push_back(Y);
701    }
702
703    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
704
705    if (OutPtr && !PassOutByReference) {
706      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
707      if (gEnableRsTbaa) {
708        Store->setMetadata("tbaa", TBAAAllocation);
709      }
710    }
711
712    return true;
713  }
714
715  /// @brief Checks if pointers to allocation internals are exposed
716  ///
717  /// This function verifies if through the parameters passed to the kernel
718  /// or through calls to the runtime library the script gains access to
719  /// pointers pointing to data within a RenderScript Allocation.
720  /// If we know we control all loads from and stores to data within
721  /// RenderScript allocations and if we know the run-time internal accesses
722  /// are all annotated with RenderScript TBAA metadata, only then we
723  /// can safely use TBAA to distinguish between generic and from-allocation
724  /// pointers.
725  bool allocPointersExposed(llvm::Module &Module) {
726    // Old style kernel function can expose pointers to elements within
727    // allocations.
728    // TODO: Extend analysis to allow simple cases of old-style kernels.
729    for (size_t i = 0; i < mExportForEachCount; ++i) {
730      const char *Name = mExportForEachNameList[i];
731      uint32_t Signature = mExportForEachSignatureList[i];
732      if (Module.getFunction(Name) &&
733          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
734        return true;
735      }
736    }
737
738    // Check for library functions that expose a pointer to an Allocation or
739    // that are not yet annotated with RenderScript-specific tbaa information.
740    static std::vector<std::string> Funcs;
741
742    // rsGetElementAt(...)
743    Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
744    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
745    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
746    // rsSetElementAt()
747    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
748    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
749    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
750    // rsGetElementAtYuv_uchar_Y()
751    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
752    // rsGetElementAtYuv_uchar_U()
753    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
754    // rsGetElementAtYuv_uchar_V()
755    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
756
757    for (std::vector<std::string>::iterator FI = Funcs.begin(),
758                                            FE = Funcs.end();
759         FI != FE; ++FI) {
760      llvm::Function *Function = Module.getFunction(*FI);
761
762      if (!Function) {
763        ALOGE("Missing run-time function '%s'", FI->c_str());
764        return true;
765      }
766
767      if (Function->getNumUses() > 0) {
768        return true;
769      }
770    }
771
772    return false;
773  }
774
775  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
776  ///
777  /// The TBAA metadata used to annotate loads/stores from RenderScript
778  /// Allocations is generated in a separate TBAA tree with a "RenderScript TBAA"
779  /// root node. LLVM does assume may-alias for all nodes in unrelated alias
780  /// analysis trees. This function makes the RenderScript TBAA a subtree of the
781  /// normal C/C++ TBAA tree aside of normal C/C++ types. With the connected trees
782  /// every access to an Allocation is resolved to must-alias if compared to
783  /// a normal C/C++ access.
784  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
785    llvm::MDBuilder MDHelper(*Context);
786    llvm::MDNode *TBAARenderScript =
787      MDHelper.createTBAARoot("RenderScript TBAA");
788
789    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
790    llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript",
791                                                         TBAARoot);
792
793    TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
794  }
795
796  virtual bool runOnModule(llvm::Module &Module) {
797    bool Changed  = false;
798    this->Module  = &Module;
799    this->Context = &Module.getContext();
800
801    this->buildTypes();
802
803    bcinfo::MetadataExtractor me(&Module);
804    if (!me.extract()) {
805      ALOGE("Could not extract metadata from module!");
806      return false;
807    }
808    mExportForEachCount = me.getExportForEachSignatureCount();
809    mExportForEachNameList = me.getExportForEachNameList();
810    mExportForEachSignatureList = me.getExportForEachSignatureList();
811
812    bool AllocsExposed = allocPointersExposed(Module);
813
814    for (size_t i = 0; i < mExportForEachCount; ++i) {
815      const char *name = mExportForEachNameList[i];
816      uint32_t signature = mExportForEachSignatureList[i];
817      llvm::Function *kernel = Module.getFunction(name);
818      if (kernel) {
819        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
820          Changed |= ExpandKernel(kernel, signature);
821          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
822        } else if (kernel->getReturnType()->isVoidTy()) {
823          Changed |= ExpandFunction(kernel, signature);
824          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
825        } else {
826          // There are some graphics root functions that are not
827          // expanded, but that will be called directly. For those
828          // functions, we can not set the linkage to internal.
829        }
830      }
831    }
832
833    if (gEnableRsTbaa && !AllocsExposed) {
834      connectRenderScriptTBAAMetadata(Module);
835    }
836
837    return Changed;
838  }
839
840  virtual const char *getPassName() const {
841    return "ForEach-able Function Expansion";
842  }
843
844}; // end RSForEachExpandPass
845
846} // end anonymous namespace
847
848char RSForEachExpandPass::ID = 0;
849
850namespace bcc {
851
852llvm::ModulePass *
853createRSForEachExpandPass(bool pEnableStepOpt){
854  return new RSForEachExpandPass(pEnableStepOpt);
855}
856
857} // end namespace bcc
858