RSForEachExpand.cpp revision a108bc5ec0ca0cb48c72492d54a71126bccfa7d6
1/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bcc/Assert.h"
18#include "bcc/Renderscript/RSTransforms.h"
19
20#include <cstdlib>
21#include <functional>
22
23#include <llvm/IR/DerivedTypes.h>
24#include <llvm/IR/Function.h>
25#include <llvm/IR/Instructions.h>
26#include <llvm/IR/IRBuilder.h>
27#include <llvm/IR/MDBuilder.h>
28#include <llvm/IR/Module.h>
29#include <llvm/Pass.h>
30#include <llvm/Support/raw_ostream.h>
31#include <llvm/IR/DataLayout.h>
32#include <llvm/IR/Function.h>
33#include <llvm/IR/Type.h>
34#include <llvm/Transforms/Utils/BasicBlockUtils.h>
35
36#include "bcc/Config/Config.h"
37#include "bcc/Support/Log.h"
38
39#include "bcinfo/MetadataExtractor.h"
40
41#define NUM_EXPANDED_FUNCTION_PARAMS 4
42
43using namespace bcc;
44
45namespace {
46
47static const bool gEnableRsTbaa = true;
48
49/* RSForEachExpandPass - This pass operates on functions that are able to be
50 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
51 * ForEach-able function to be invoked over the appropriate data cells of the
52 * input/output allocations (adjusting other relevant parameters as we go). We
53 * support doing this for any ForEach-able compute kernels. The new function
54 * name is the original function name followed by ".expand". Note that we
55 * still generate code for the original function.
56 */
57class RSForEachExpandPass : public llvm::ModulePass {
58public:
59  static char ID;
60
61private:
62  static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
63
64  enum RsLaunchDimensionsField {
65    RsLaunchDimensionsFieldX,
66    RsLaunchDimensionsFieldY,
67    RsLaunchDimensionsFieldZ,
68    RsLaunchDimensionsFieldLod,
69    RsLaunchDimensionsFieldFace,
70    RsLaunchDimensionsFieldArray,
71
72    RsLaunchDimensionsFieldCount
73  };
74
75  enum RsExpandKernelDriverInfoPfxField {
76    RsExpandKernelDriverInfoPfxFieldInPtr,
77    RsExpandKernelDriverInfoPfxFieldInStride,
78    RsExpandKernelDriverInfoPfxFieldInLen,
79    RsExpandKernelDriverInfoPfxFieldOutPtr,
80    RsExpandKernelDriverInfoPfxFieldOutStride,
81    RsExpandKernelDriverInfoPfxFieldOutLen,
82    RsExpandKernelDriverInfoPfxFieldDim,
83    RsExpandKernelDriverInfoPfxFieldCurrent,
84    RsExpandKernelDriverInfoPfxFieldUsr,
85    RsExpandKernelDriverInfoPfxFieldUsLenr,
86
87    RsExpandKernelDriverInfoPfxFieldCount
88  };
89
90  llvm::Module *Module;
91  llvm::LLVMContext *Context;
92
93  /*
94   * Pointer to LLVM type information for the the function signature
95   * for expanded kernels.  This must be re-calculated for each
96   * module the pass is run on.
97   */
98  llvm::FunctionType *ExpandedFunctionType;
99
100  uint32_t mExportForEachCount;
101  const char **mExportForEachNameList;
102  const uint32_t *mExportForEachSignatureList;
103
104  // Turns on optimization of allocation stride values.
105  bool mEnableStepOpt;
106
107  uint32_t getRootSignature(llvm::Function *Function) {
108    const llvm::NamedMDNode *ExportForEachMetadata =
109        Module->getNamedMetadata("#rs_export_foreach");
110
111    if (!ExportForEachMetadata) {
112      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
113      for (llvm::Function::arg_iterator B = Function->arg_begin(),
114                                        E = Function->arg_end();
115           B != E;
116           ++B) {
117        RootArgTys.push_back(B->getType());
118      }
119
120      // For pre-ICS bitcode, we may not have signature information. In that
121      // case, we use the size of the RootArgTys to select the number of
122      // arguments.
123      return (1 << RootArgTys.size()) - 1;
124    }
125
126    if (ExportForEachMetadata->getNumOperands() == 0) {
127      return 0;
128    }
129
130    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
131
132    // We only handle the case for legacy root() functions here, so this is
133    // hard-coded to look at only the first such function.
134    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
135    if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
136      llvm::Metadata *SigMD = SigNode->getOperand(0);
137      if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
138        llvm::StringRef SigString = SigS->getString();
139        uint32_t Signature = 0;
140        if (SigString.getAsInteger(10, Signature)) {
141          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
142          return 0;
143        }
144        return Signature;
145      }
146    }
147
148    return 0;
149  }
150
151  bool isStepOptSupported(llvm::Type *AllocType) {
152
153    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
154    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
155
156    if (mEnableStepOpt) {
157      return false;
158    }
159
160    if (AllocType == VoidPtrTy) {
161      return false;
162    }
163
164    if (!PT) {
165      return false;
166    }
167
168    // remaining conditions are 64-bit only
169    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
170      return true;
171    }
172
173    // coerce suggests an upconverted struct type, which we can't support
174    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
175      return false;
176    }
177
178    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
179    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
180    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
181    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
182      return false;
183    }
184
185    return true;
186  }
187
188  // Get the actual value we should use to step through an allocation.
189  //
190  // Normally the value we use to step through an allocation is given to us by
191  // the driver. However, for certain primitive data types, we can derive an
192  // integer constant for the step value. We use this integer constant whenever
193  // possible to allow further compiler optimizations to take place.
194  //
195  // DL - Target Data size/layout information.
196  // T - Type of allocation (should be a pointer).
197  // OrigStep - Original step increment (root.expand() input from driver).
198  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
199                            llvm::Value *OrigStep) {
200    bccAssert(DL);
201    bccAssert(AllocType);
202    bccAssert(OrigStep);
203    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
204    if (isStepOptSupported(AllocType)) {
205      llvm::Type *ET = PT->getElementType();
206      uint64_t ETSize = DL->getTypeAllocSize(ET);
207      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
208      return llvm::ConstantInt::get(Int32Ty, ETSize);
209    } else {
210      return OrigStep;
211    }
212  }
213
214  /// Builds the types required by the pass for the given context.
215  void buildTypes(void) {
216    // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
217
218    llvm::Type *Int8Ty                   = llvm::Type::getInt8Ty(*Context);
219    llvm::Type *Int8PtrTy                = Int8Ty->getPointerTo();
220    llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
221    llvm::Type *Int32Ty                  = llvm::Type::getInt32Ty(*Context);
222    llvm::Type *Int32ArrayInputLimitTy   = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
223    llvm::Type *VoidPtrTy                = llvm::Type::getInt8PtrTy(*Context);
224    llvm::Type *Int32Array4Ty            = llvm::ArrayType::get(Int32Ty, 4);
225
226    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
227     *
228     * struct RsLaunchDimensions {
229     *   uint32_t x;
230     *   uint32_t y;
231     *   uint32_t z;
232     *   uint32_t lod;
233     *   uint32_t face;
234     *   uint32_t array[4];
235     * };
236     */
237    llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
238    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t x
239    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t y
240    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t z
241    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t lod
242    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t face
243    RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
244    llvm::StructType *RsLaunchDimensionsTy =
245        llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
246
247    /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
248     *
249     * struct RsExpandKernelDriverInfoPfx {
250     *     const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
251     *     uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
252     *     uint32_t inLen;
253     *
254     *     uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
255     *     uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
256     *     uint32_t outLen;
257     *
258     *     // Dimension of the launch
259     *     RsLaunchDimensions dim;
260     *
261     *     // The walking iterator of the launch
262     *     RsLaunchDimensions current;
263     *
264     *     const void *usr;
265     *     uint32_t usrLen;
266     *
267     *     // Items below this line are not used by the compiler and can be change in the driver.
268     *     // So the compiler must assume there are an unknown number of fields of unknown type
269     *     // beginning here.
270     * };
271     *
272     * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
273     */
274    llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
275    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
276    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
277    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t inLen
278    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
279    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
280    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t outLen
281    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions dim
282    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions current
283    RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy);                // const void *usr
284    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t usrLen
285    llvm::StructType *RsExpandKernelDriverInfoPfxTy =
286        llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
287
288    // Create the function type for expanded kernels.
289
290    llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
291
292    llvm::SmallVector<llvm::Type*, 8> ParamTypes;
293    ParamTypes.push_back(RsExpandKernelDriverInfoPfxPtrTy); // const RsExpandKernelDriverInfoPfx *p
294    ParamTypes.push_back(Int32Ty);                          // uint32_t x1
295    ParamTypes.push_back(Int32Ty);                          // uint32_t x2
296    ParamTypes.push_back(Int32Ty);                          // uint32_t outstep
297
298    ExpandedFunctionType =
299        llvm::FunctionType::get(llvm::Type::getVoidTy(*Context), ParamTypes,
300                                false);
301  }
302
303  /// @brief Create skeleton of the expanded function.
304  ///
305  /// This creates a function with the following signature:
306  ///
307  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
308  ///         uint32_t outstep)
309  ///
310  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
311    llvm::Function *ExpandedFunction =
312      llvm::Function::Create(ExpandedFunctionType,
313                             llvm::GlobalValue::ExternalLinkage,
314                             OldName + ".expand", Module);
315
316    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
317
318    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
319
320    (AI++)->setName("p");
321    (AI++)->setName("x1");
322    (AI++)->setName("x2");
323    (AI++)->setName("arg_outstep");
324
325    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
326                                                       ExpandedFunction);
327    llvm::IRBuilder<> Builder(Begin);
328    Builder.CreateRetVoid();
329
330    return ExpandedFunction;
331  }
332
333  /// @brief Create an empty loop
334  ///
335  /// Create a loop of the form:
336  ///
337  /// for (i = LowerBound; i < UpperBound; i++)
338  ///   ;
339  ///
340  /// After the loop has been created, the builder is set such that
341  /// instructions can be added to the loop body.
342  ///
343  /// @param Builder The builder to use to build this loop. The current
344  ///                position of the builder is the position the loop
345  ///                will be inserted.
346  /// @param LowerBound The first value of the loop iterator
347  /// @param UpperBound The maximal value of the loop iterator
348  /// @param LoopIV A reference that will be set to the loop iterator.
349  /// @return The BasicBlock that will be executed after the loop.
350  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
351                               llvm::Value *LowerBound,
352                               llvm::Value *UpperBound,
353                               llvm::PHINode **LoopIV) {
354    assert(LowerBound->getType() == UpperBound->getType());
355
356    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
357    llvm::Value *Cond, *IVNext;
358    llvm::PHINode *IV;
359
360    CondBB = Builder.GetInsertBlock();
361    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr);
362    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
363
364    // if (LowerBound < Upperbound)
365    //   goto LoopHeader
366    // else
367    //   goto AfterBB
368    CondBB->getTerminator()->eraseFromParent();
369    Builder.SetInsertPoint(CondBB);
370    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
371    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
372
373    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
374    // iv.next = iv + 1
375    // if (iv.next < Upperbound)
376    //   goto LoopHeader
377    // else
378    //   goto AfterBB
379    Builder.SetInsertPoint(HeaderBB);
380    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
381    IV->addIncoming(LowerBound, CondBB);
382    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
383    IV->addIncoming(IVNext, HeaderBB);
384    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
385    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
386    AfterBB->setName("Exit");
387    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
388    *LoopIV = IV;
389    return AfterBB;
390  }
391
392public:
393  RSForEachExpandPass(bool pEnableStepOpt = true)
394      : ModulePass(ID), Module(nullptr), Context(nullptr),
395        mEnableStepOpt(pEnableStepOpt) {
396
397  }
398
399  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
400    // This pass does not use any other analysis passes, but it does
401    // add/wrap the existing functions in the module (thus altering the CFG).
402  }
403
404  // Build contribution to outgoing argument list for calling a
405  // ForEach-able function, based on the special parameters of that
406  // function.
407  //
408  // Signature - metadata bits for the signature of the ForEach-able function
409  // X, Arg_p - values derived directly from expanded function,
410  //            suitable for computing arguments for the ForEach-able function
411  // CalleeArgs - contribution is accumulated here
412  // Bump - invoked once for each contributed outgoing argument
413  void ExpandSpecialArguments(uint32_t Signature,
414                              llvm::Value *X,
415                              llvm::Value *Arg_p,
416                              llvm::IRBuilder<> &Builder,
417                              llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
418                              std::function<void ()> Bump) {
419
420    if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
421      CalleeArgs.push_back(Arg_p);
422      Bump();
423    }
424
425    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
426      CalleeArgs.push_back(X);
427      Bump();
428    }
429
430    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
431        bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
432
433      llvm::Value *Current = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldCurrent);
434
435      if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
436        llvm::Value *Y = Builder.CreateLoad(
437            Builder.CreateStructGEP(Current, RsLaunchDimensionsFieldY), "Y");
438        CalleeArgs.push_back(Y);
439        Bump();
440      }
441
442      if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
443        llvm::Value *Z = Builder.CreateLoad(
444            Builder.CreateStructGEP(Current, RsLaunchDimensionsFieldZ), "Z");
445        CalleeArgs.push_back(Z);
446        Bump();
447      }
448    }
449  }
450
451  /* Performs the actual optimization on a selected function. On success, the
452   * Module will contain a new function of the name "<NAME>.expand" that
453   * invokes <NAME>() in a loop with the appropriate parameters.
454   */
455  bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
456    ALOGV("Expanding ForEach-able Function %s",
457          Function->getName().str().c_str());
458
459    if (!Signature) {
460      Signature = getRootSignature(Function);
461      if (!Signature) {
462        // We couldn't determine how to expand this function based on its
463        // function signature.
464        return false;
465      }
466    }
467
468    llvm::DataLayout DL(Module);
469
470    llvm::Function *ExpandedFunction =
471      createEmptyExpandedFunction(Function->getName());
472
473    /*
474     * Extract the expanded function's parameters.  It is guaranteed by
475     * createEmptyExpandedFunction that there will be five parameters.
476     */
477
478    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
479
480    llvm::Function::arg_iterator ExpandedFunctionArgIter =
481      ExpandedFunction->arg_begin();
482
483    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
484    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
485    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
486    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
487
488    llvm::Value *InStep  = nullptr;
489    llvm::Value *OutStep = nullptr;
490
491    // Construct the actual function body.
492    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
493
494    // Collect and construct the arguments for the kernel().
495    // Note that we load any loop-invariant arguments before entering the Loop.
496    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
497
498    llvm::Type  *InTy      = nullptr;
499    llvm::Value *InBasePtr = nullptr;
500    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
501      llvm::Value *InsBasePtr  = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInPtr, "inputs_base");
502
503      llvm::Value *InStepsBase = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInStride, "insteps_base");
504
505      llvm::Value    *InStepAddr = Builder.CreateConstInBoundsGEP2_32(InStepsBase, 0, 0);
506      llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
507                                                      "instep_addr");
508
509      InTy = (FunctionArgIter++)->getType();
510      InStep = getStepValue(&DL, InTy, InStepArg);
511
512      InStep->setName("instep");
513
514      llvm::Value *InputAddr = Builder.CreateConstInBoundsGEP2_32(InsBasePtr, 0, 0);
515      InBasePtr = Builder.CreateLoad(InputAddr, "input_base");
516    }
517
518    llvm::Type *OutTy = nullptr;
519    llvm::Value *OutBasePtr = nullptr;
520    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
521      OutTy = (FunctionArgIter++)->getType();
522      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
523      OutStep->setName("outstep");
524      OutBasePtr = Builder.CreateLoad(
525                     Builder.CreateConstInBoundsGEP2_32(
526                         Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldOutPtr), 0, 0));
527    }
528
529    llvm::Value *UsrData = nullptr;
530    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
531      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
532      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
533          Builder.CreateStructGEP(Arg_p,  RsExpandKernelDriverInfoPfxFieldUsr)), UsrDataTy);
534      UsrData->setName("UsrData");
535    }
536
537    llvm::PHINode *IV;
538    createLoop(Builder, Arg_x1, Arg_x2, &IV);
539
540    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
541    ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
542                           [&FunctionArgIter]() { FunctionArgIter++; });
543
544    bccAssert(FunctionArgIter == Function->arg_end());
545
546    // Populate the actual call to kernel().
547    llvm::SmallVector<llvm::Value*, 8> RootArgs;
548
549    llvm::Value *InPtr  = nullptr;
550    llvm::Value *OutPtr = nullptr;
551
552    // Calculate the current input and output pointers
553    //
554    // We always calculate the input/output pointers with a GEP operating on i8
555    // values and only cast at the very end to OutTy. This is because the step
556    // between two values is given in bytes.
557    //
558    // TODO: We could further optimize the output by using a GEP operation of
559    // type 'OutTy' in cases where the element type of the allocation allows.
560    if (OutBasePtr) {
561      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
562      OutOffset = Builder.CreateMul(OutOffset, OutStep);
563      OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
564      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
565    }
566
567    if (InBasePtr) {
568      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
569      InOffset = Builder.CreateMul(InOffset, InStep);
570      InPtr = Builder.CreateGEP(InBasePtr, InOffset);
571      InPtr = Builder.CreatePointerCast(InPtr, InTy);
572    }
573
574    if (InPtr) {
575      RootArgs.push_back(InPtr);
576    }
577
578    if (OutPtr) {
579      RootArgs.push_back(OutPtr);
580    }
581
582    if (UsrData) {
583      RootArgs.push_back(UsrData);
584    }
585
586    RootArgs.append(CalleeArgs.begin(), CalleeArgs.end());
587
588    Builder.CreateCall(Function, RootArgs);
589
590    return true;
591  }
592
593  /* Expand a pass-by-value kernel.
594   */
595  bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
596    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
597    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
598
599    // TODO: Refactor this to share functionality with ExpandFunction.
600    llvm::DataLayout DL(Module);
601
602    llvm::Function *ExpandedFunction =
603      createEmptyExpandedFunction(Function->getName());
604
605    /*
606     * Extract the expanded function's parameters.  It is guaranteed by
607     * createEmptyExpandedFunction that there will be five parameters.
608     */
609
610    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
611
612    llvm::Function::arg_iterator ExpandedFunctionArgIter =
613      ExpandedFunction->arg_begin();
614
615    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
616    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
617    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
618    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
619
620    // Construct the actual function body.
621    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
622
623    // Create TBAA meta-data.
624    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
625                 *TBAAAllocation, *TBAAPointer;
626    llvm::MDBuilder MDHelper(*Context);
627
628    TBAARenderScriptDistinct =
629      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
630    TBAARenderScript = MDHelper.createTBAANode("RenderScript TBAA",
631        TBAARenderScriptDistinct);
632    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
633                                                       TBAARenderScript);
634    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
635                                                      TBAAAllocation, 0);
636    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
637                                                    TBAARenderScript);
638    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
639
640    llvm::MDNode *AliasingDomain, *AliasingScope;
641    AliasingDomain = MDHelper.createAnonymousAliasScopeDomain("RS argument scope domain");
642    AliasingScope = MDHelper.createAnonymousAliasScope(AliasingDomain, "RS argument scope");
643
644    /*
645     * Collect and construct the arguments for the kernel().
646     *
647     * Note that we load any loop-invariant arguments before entering the Loop.
648     */
649    size_t NumInputs = Function->arg_size();
650
651    // No usrData parameter on kernels.
652    bccAssert(
653        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
654
655    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
656
657    // Check the return type
658    llvm::Type     *OutTy            = nullptr;
659    llvm::Value    *OutStep          = nullptr;
660    llvm::LoadInst *OutBasePtr       = nullptr;
661    llvm::Value    *CastedOutBasePtr = nullptr;
662
663    bool PassOutByPointer = false;
664
665    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
666      llvm::Type *OutBaseTy = Function->getReturnType();
667
668      if (OutBaseTy->isVoidTy()) {
669        PassOutByPointer = true;
670        OutTy = ArgIter->getType();
671
672        ArgIter++;
673        --NumInputs;
674      } else {
675        // We don't increment Args, since we are using the actual return type.
676        OutTy = OutBaseTy->getPointerTo();
677      }
678
679      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
680      OutStep->setName("outstep");
681      OutBasePtr = Builder.CreateLoad(
682                     Builder.CreateConstInBoundsGEP2_32(
683                         Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldOutPtr), 0, 0));
684
685      if (gEnableRsTbaa) {
686        OutBasePtr->setMetadata("tbaa", TBAAPointer);
687      }
688
689      OutBasePtr->setMetadata("alias.scope", AliasingScope);
690
691      CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
692    }
693
694    llvm::PHINode *IV;
695    createLoop(Builder, Arg_x1, Arg_x2, &IV);
696
697    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
698    ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
699                           [&NumInputs]() { --NumInputs; });
700
701    llvm::SmallVector<llvm::Type*,  8> InTypes;
702    llvm::SmallVector<llvm::Value*, 8> InSteps;
703    llvm::SmallVector<llvm::Value*, 8> InBasePtrs;
704    llvm::SmallVector<bool,         8> InIsStructPointer;
705
706    bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
707
708    if (NumInputs > 0) {
709      llvm::Value *InsBasePtr  = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInPtr, "inputs_base");
710
711      llvm::Value *InStepsBase = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInStride, "insteps_base");
712
713      for (size_t InputIndex = 0; InputIndex < NumInputs;
714           ++InputIndex, ArgIter++) {
715
716          llvm::Value    *InStepAddr = Builder.CreateConstInBoundsGEP2_32(InStepsBase, 0, InputIndex);
717          llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
718                                                          "instep_addr");
719
720          llvm::Type *InType = ArgIter->getType();
721
722        /*
723         * AArch64 calling dictate that structs of sufficient size get passed by
724         * pointer instead of passed by value.  This, combined with the fact
725         * that we don't allow kernels to operate on pointer data means that if
726         * we see a kernel with a pointer parameter we know that it is struct
727         * input that has been promoted.  As such we don't need to convert its
728         * type to a pointer.  Later we will need to know to avoid a load, so we
729         * save this information in InIsStructPointer.
730         */
731          if (!InType->isPointerTy()) {
732            InType = InType->getPointerTo();
733            InIsStructPointer.push_back(false);
734          } else {
735            InIsStructPointer.push_back(true);
736          }
737
738          llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
739
740          InStep->setName("instep");
741
742          llvm::Value    *InputAddr = Builder.CreateConstInBoundsGEP2_32(InsBasePtr, 0, InputIndex);
743          llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
744                                                         "input_base");
745          llvm::Value    *CastInBasePtr = Builder.CreatePointerCast(InBasePtr,
746                                                                    InType, "casted_in");
747          if (gEnableRsTbaa) {
748            InBasePtr->setMetadata("tbaa", TBAAPointer);
749          }
750
751          InBasePtr->setMetadata("alias.scope", AliasingScope);
752
753          InTypes.push_back(InType);
754          InSteps.push_back(InStep);
755          InBasePtrs.push_back(CastInBasePtr);
756      }
757    }
758
759    // Populate the actual call to kernel().
760    llvm::SmallVector<llvm::Value*, 8> RootArgs;
761
762    // Calculate the current input and output pointers
763    //
764    //
765    // We always calculate the input/output pointers with a GEP operating on i8
766    // values combined with a multiplication and only cast at the very end to
767    // OutTy.  This is to account for dynamic stepping sizes when the value
768    // isn't apparent at compile time.  In the (very common) case when we know
769    // the step size at compile time, due to haveing complete type information
770    // this multiplication will optmized out and produces code equivalent to a
771    // a GEP on a pointer of the correct type.
772
773    // Output
774
775    llvm::Value *OutPtr = nullptr;
776    if (CastedOutBasePtr) {
777      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
778
779      OutPtr    = Builder.CreateGEP(CastedOutBasePtr, OutOffset);
780
781      if (PassOutByPointer) {
782        RootArgs.push_back(OutPtr);
783      }
784    }
785
786    // Inputs
787
788    if (NumInputs > 0) {
789      llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
790
791      for (size_t Index = 0; Index < NumInputs; ++Index) {
792        llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], Offset);
793        llvm::Value *Input;
794
795        if (InIsStructPointer[Index]) {
796          Input = InPtr;
797
798        } else {
799          llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
800
801          if (gEnableRsTbaa) {
802            InputLoad->setMetadata("tbaa", TBAAAllocation);
803          }
804
805          InputLoad->setMetadata("alias.scope", AliasingScope);
806
807          Input = InputLoad;
808        }
809
810        RootArgs.push_back(Input);
811      }
812    }
813
814    RootArgs.append(CalleeArgs.begin(), CalleeArgs.end());
815
816    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
817
818    if (OutPtr && !PassOutByPointer) {
819      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
820      if (gEnableRsTbaa) {
821        Store->setMetadata("tbaa", TBAAAllocation);
822      }
823      Store->setMetadata("alias.scope", AliasingScope);
824    }
825
826    return true;
827  }
828
829  /// @brief Checks if pointers to allocation internals are exposed
830  ///
831  /// This function verifies if through the parameters passed to the kernel
832  /// or through calls to the runtime library the script gains access to
833  /// pointers pointing to data within a RenderScript Allocation.
834  /// If we know we control all loads from and stores to data within
835  /// RenderScript allocations and if we know the run-time internal accesses
836  /// are all annotated with RenderScript TBAA metadata, only then we
837  /// can safely use TBAA to distinguish between generic and from-allocation
838  /// pointers.
839  bool allocPointersExposed(llvm::Module &Module) {
840    // Old style kernel function can expose pointers to elements within
841    // allocations.
842    // TODO: Extend analysis to allow simple cases of old-style kernels.
843    for (size_t i = 0; i < mExportForEachCount; ++i) {
844      const char *Name = mExportForEachNameList[i];
845      uint32_t Signature = mExportForEachSignatureList[i];
846      if (Module.getFunction(Name) &&
847          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
848        return true;
849      }
850    }
851
852    // Check for library functions that expose a pointer to an Allocation or
853    // that are not yet annotated with RenderScript-specific tbaa information.
854    static std::vector<std::string> Funcs;
855
856    // rsGetElementAt(...)
857    Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
858    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
859    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
860    // rsSetElementAt()
861    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
862    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
863    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
864    // rsGetElementAtYuv_uchar_Y()
865    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
866    // rsGetElementAtYuv_uchar_U()
867    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
868    // rsGetElementAtYuv_uchar_V()
869    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
870
871    for (std::vector<std::string>::iterator FI = Funcs.begin(),
872                                            FE = Funcs.end();
873         FI != FE; ++FI) {
874      llvm::Function *Function = Module.getFunction(*FI);
875
876      if (!Function) {
877        ALOGE("Missing run-time function '%s'", FI->c_str());
878        return true;
879      }
880
881      if (Function->getNumUses() > 0) {
882        return true;
883      }
884    }
885
886    return false;
887  }
888
889  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
890  ///
891  /// The TBAA metadata used to annotate loads/stores from RenderScript
892  /// Allocations is generated in a separate TBAA tree with a
893  /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
894  /// all nodes in unrelated alias analysis trees. This function makes the
895  /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
896  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
897  /// the connected trees every access to an Allocation is resolved to
898  /// must-alias if compared to a normal C/C++ access.
899  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
900    llvm::MDBuilder MDHelper(*Context);
901    llvm::MDNode *TBAARenderScriptDistinct =
902      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
903    llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
904        "RenderScript TBAA", TBAARenderScriptDistinct);
905    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
906    TBAARenderScript->replaceOperandWith(1, TBAARoot);
907  }
908
909  virtual bool runOnModule(llvm::Module &Module) {
910    bool Changed  = false;
911    this->Module  = &Module;
912    this->Context = &Module.getContext();
913
914    this->buildTypes();
915
916    bcinfo::MetadataExtractor me(&Module);
917    if (!me.extract()) {
918      ALOGE("Could not extract metadata from module!");
919      return false;
920    }
921    mExportForEachCount = me.getExportForEachSignatureCount();
922    mExportForEachNameList = me.getExportForEachNameList();
923    mExportForEachSignatureList = me.getExportForEachSignatureList();
924
925    bool AllocsExposed = allocPointersExposed(Module);
926
927    for (size_t i = 0; i < mExportForEachCount; ++i) {
928      const char *name = mExportForEachNameList[i];
929      uint32_t signature = mExportForEachSignatureList[i];
930      llvm::Function *kernel = Module.getFunction(name);
931      if (kernel) {
932        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
933          Changed |= ExpandKernel(kernel, signature);
934          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
935        } else if (kernel->getReturnType()->isVoidTy()) {
936          Changed |= ExpandFunction(kernel, signature);
937          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
938        } else {
939          // There are some graphics root functions that are not
940          // expanded, but that will be called directly. For those
941          // functions, we can not set the linkage to internal.
942        }
943      }
944    }
945
946    if (gEnableRsTbaa && !AllocsExposed) {
947      connectRenderScriptTBAAMetadata(Module);
948    }
949
950    return Changed;
951  }
952
953  virtual const char *getPassName() const {
954    return "ForEach-able Function Expansion";
955  }
956
957}; // end RSForEachExpandPass
958
959} // end anonymous namespace
960
961char RSForEachExpandPass::ID = 0;
962static llvm::RegisterPass<RSForEachExpandPass> X("foreachexp", "ForEach Expand Pass");
963
964namespace bcc {
965
966llvm::ModulePass *
967createRSForEachExpandPass(bool pEnableStepOpt){
968  return new RSForEachExpandPass(pEnableStepOpt);
969}
970
971} // end namespace bcc
972