RSForEachExpand.cpp revision 33cda5cf335afc6aa2dbe02062bc9e6649e1f87c
1/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bcc/Assert.h"
18#include "bcc/Renderscript/RSTransforms.h"
19
20#include <cstdlib>
21#include <functional>
22
23#include <llvm/IR/DerivedTypes.h>
24#include <llvm/IR/Function.h>
25#include <llvm/IR/Instructions.h>
26#include <llvm/IR/IRBuilder.h>
27#include <llvm/IR/MDBuilder.h>
28#include <llvm/IR/Module.h>
29#include <llvm/Pass.h>
30#include <llvm/Support/raw_ostream.h>
31#include <llvm/IR/DataLayout.h>
32#include <llvm/IR/Function.h>
33#include <llvm/IR/Type.h>
34#include <llvm/Transforms/Utils/BasicBlockUtils.h>
35
36#include "bcc/Config/Config.h"
37#include "bcc/Support/Log.h"
38
39#include "bcinfo/MetadataExtractor.h"
40
41#define NUM_EXPANDED_FUNCTION_PARAMS 4
42
43using namespace bcc;
44
45namespace {
46
47static const bool gEnableRsTbaa = true;
48
49/* RSForEachExpandPass - This pass operates on functions that are able to be
50 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
51 * ForEach-able function to be invoked over the appropriate data cells of the
52 * input/output allocations (adjusting other relevant parameters as we go). We
53 * support doing this for any ForEach-able compute kernels. The new function
54 * name is the original function name followed by ".expand". Note that we
55 * still generate code for the original function.
56 */
57class RSForEachExpandPass : public llvm::ModulePass {
58public:
59  static char ID;
60
61private:
62
63  llvm::Module *Module;
64  llvm::LLVMContext *Context;
65
66  /*
67   * Pointer to LLVM type information for the ForEachStubType and the function
68   * signature for expanded kernels.  These must be re-calculated for each
69   * module the pass is run on.
70   */
71  llvm::StructType   *ForEachStubType;
72  llvm::FunctionType *ExpandedFunctionType;
73
74  uint32_t mExportForEachCount;
75  const char **mExportForEachNameList;
76  const uint32_t *mExportForEachSignatureList;
77
78  // Turns on optimization of allocation stride values.
79  bool mEnableStepOpt;
80
81  uint32_t getRootSignature(llvm::Function *Function) {
82    const llvm::NamedMDNode *ExportForEachMetadata =
83        Module->getNamedMetadata("#rs_export_foreach");
84
85    if (!ExportForEachMetadata) {
86      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
87      for (llvm::Function::arg_iterator B = Function->arg_begin(),
88                                        E = Function->arg_end();
89           B != E;
90           ++B) {
91        RootArgTys.push_back(B->getType());
92      }
93
94      // For pre-ICS bitcode, we may not have signature information. In that
95      // case, we use the size of the RootArgTys to select the number of
96      // arguments.
97      return (1 << RootArgTys.size()) - 1;
98    }
99
100    if (ExportForEachMetadata->getNumOperands() == 0) {
101      return 0;
102    }
103
104    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
105
106    // We only handle the case for legacy root() functions here, so this is
107    // hard-coded to look at only the first such function.
108    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
109    if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
110      llvm::Value *SigVal = SigNode->getOperand(0);
111      if (SigVal->getValueID() == llvm::Value::MDStringVal) {
112        llvm::StringRef SigString =
113            static_cast<llvm::MDString*>(SigVal)->getString();
114        uint32_t Signature = 0;
115        if (SigString.getAsInteger(10, Signature)) {
116          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
117          return 0;
118        }
119        return Signature;
120      }
121    }
122
123    return 0;
124  }
125
126  bool isStepOptSupported(llvm::Type *AllocType) {
127
128    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
129    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
130
131    if (mEnableStepOpt) {
132      return false;
133    }
134
135    if (AllocType == VoidPtrTy) {
136      return false;
137    }
138
139    if (!PT) {
140      return false;
141    }
142
143    // remaining conditions are 64-bit only
144    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
145      return true;
146    }
147
148    // coerce suggests an upconverted struct type, which we can't support
149    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
150      return false;
151    }
152
153    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
154    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
155    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
156    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
157      return false;
158    }
159
160    return true;
161  }
162
163  // Get the actual value we should use to step through an allocation.
164  //
165  // Normally the value we use to step through an allocation is given to us by
166  // the driver. However, for certain primitive data types, we can derive an
167  // integer constant for the step value. We use this integer constant whenever
168  // possible to allow further compiler optimizations to take place.
169  //
170  // DL - Target Data size/layout information.
171  // T - Type of allocation (should be a pointer).
172  // OrigStep - Original step increment (root.expand() input from driver).
173  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
174                            llvm::Value *OrigStep) {
175    bccAssert(DL);
176    bccAssert(AllocType);
177    bccAssert(OrigStep);
178    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
179    if (isStepOptSupported(AllocType)) {
180      llvm::Type *ET = PT->getElementType();
181      uint64_t ETSize = DL->getTypeAllocSize(ET);
182      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
183      return llvm::ConstantInt::get(Int32Ty, ETSize);
184    } else {
185      return OrigStep;
186    }
187  }
188
189#define PARAM_FIELD_INS         0
190#define PARAM_FIELD_INESTRIDES  1
191#define PARAM_FIELD_OUT         2
192#define PARAM_FIELD_Y           3
193#define PARAM_FIELD_Z           4
194#define PARAM_FIELD_LID         5
195#define PARAM_FIELD_USR         6
196#define PARAM_FIELD_DIMX        7
197#define PARAM_FIELD_DIMY        8
198#define PARAM_FIELD_DIMZ        9
199#define PARAM_FIELD_SLOT       10
200
201  /// Builds the types required by the pass for the given context.
202  void buildTypes(void) {
203    // Create the RsForEachStubParam struct.
204
205    llvm::Type *VoidPtrTy    = llvm::Type::getInt8PtrTy(*Context);
206    llvm::Type *VoidPtrPtrTy = VoidPtrTy->getPointerTo();
207    llvm::Type *Int32Ty      = llvm::Type::getInt32Ty(*Context);
208    llvm::Type *Int32PtrTy   = Int32Ty->getPointerTo();
209
210    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
211     *
212     * struct RsForEachKernelStruct{
213     *   const void **ins;
214     *   uint32_t *inEStrides;
215     *   void *out;
216     *   uint32_t y;
217     *   uint32_t z;
218     *   uint32_t lid;
219     *   const void *usr;
220     *   uint32_t dimX;
221     *   uint32_t dimY;
222     *   uint32_t dimZ;
223     *   uint32_t slot;
224     * };
225     */
226    llvm::SmallVector<llvm::Type*, 12> StructTypes;
227    StructTypes.push_back(VoidPtrPtrTy); // const void **ins
228    StructTypes.push_back(Int32PtrTy);   // uint32_t *inEStrides
229    StructTypes.push_back(VoidPtrTy);    // void *out
230    StructTypes.push_back(Int32Ty);      // uint32_t y
231    StructTypes.push_back(Int32Ty);      // uint32_t z
232    StructTypes.push_back(Int32Ty);      // uint32_t lid
233    StructTypes.push_back(VoidPtrTy);    // const void *usr
234    StructTypes.push_back(Int32Ty);      // uint32_t dimX
235    StructTypes.push_back(Int32Ty);      // uint32_t dimY
236    StructTypes.push_back(Int32Ty);      // uint32_t dimZ
237    StructTypes.push_back(Int32Ty);      // uint32_t slot
238
239    ForEachStubType =
240      llvm::StructType::create(StructTypes, "RsForEachStubParamStruct");
241
242    // Create the function type for expanded kernels.
243
244    llvm::Type *ForEachStubPtrTy = ForEachStubType->getPointerTo();
245
246    llvm::SmallVector<llvm::Type*, 8> ParamTypes;
247    ParamTypes.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
248    ParamTypes.push_back(Int32Ty);          // uint32_t x1
249    ParamTypes.push_back(Int32Ty);          // uint32_t x2
250    ParamTypes.push_back(Int32Ty);          // uint32_t outstep
251
252    ExpandedFunctionType =
253        llvm::FunctionType::get(llvm::Type::getVoidTy(*Context), ParamTypes,
254                                false);
255  }
256
257  /// @brief Create skeleton of the expanded function.
258  ///
259  /// This creates a function with the following signature:
260  ///
261  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
262  ///         uint32_t outstep)
263  ///
264  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
265    llvm::Function *ExpandedFunction =
266      llvm::Function::Create(ExpandedFunctionType,
267                             llvm::GlobalValue::ExternalLinkage,
268                             OldName + ".expand", Module);
269
270    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
271
272    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
273
274    (AI++)->setName("p");
275    (AI++)->setName("x1");
276    (AI++)->setName("x2");
277    (AI++)->setName("arg_outstep");
278
279    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
280                                                       ExpandedFunction);
281    llvm::IRBuilder<> Builder(Begin);
282    Builder.CreateRetVoid();
283
284    return ExpandedFunction;
285  }
286
287  /// @brief Create an empty loop
288  ///
289  /// Create a loop of the form:
290  ///
291  /// for (i = LowerBound; i < UpperBound; i++)
292  ///   ;
293  ///
294  /// After the loop has been created, the builder is set such that
295  /// instructions can be added to the loop body.
296  ///
297  /// @param Builder The builder to use to build this loop. The current
298  ///                position of the builder is the position the loop
299  ///                will be inserted.
300  /// @param LowerBound The first value of the loop iterator
301  /// @param UpperBound The maximal value of the loop iterator
302  /// @param LoopIV A reference that will be set to the loop iterator.
303  /// @return The BasicBlock that will be executed after the loop.
304  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
305                               llvm::Value *LowerBound,
306                               llvm::Value *UpperBound,
307                               llvm::PHINode **LoopIV) {
308    assert(LowerBound->getType() == UpperBound->getType());
309
310    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
311    llvm::Value *Cond, *IVNext;
312    llvm::PHINode *IV;
313
314    CondBB = Builder.GetInsertBlock();
315    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
316    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
317
318    // if (LowerBound < Upperbound)
319    //   goto LoopHeader
320    // else
321    //   goto AfterBB
322    CondBB->getTerminator()->eraseFromParent();
323    Builder.SetInsertPoint(CondBB);
324    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
325    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
326
327    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
328    // iv.next = iv + 1
329    // if (iv.next < Upperbound)
330    //   goto LoopHeader
331    // else
332    //   goto AfterBB
333    Builder.SetInsertPoint(HeaderBB);
334    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
335    IV->addIncoming(LowerBound, CondBB);
336    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
337    IV->addIncoming(IVNext, HeaderBB);
338    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
339    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
340    AfterBB->setName("Exit");
341    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
342    *LoopIV = IV;
343    return AfterBB;
344  }
345
346public:
347  RSForEachExpandPass(bool pEnableStepOpt = true)
348      : ModulePass(ID), Module(nullptr), Context(nullptr),
349        mEnableStepOpt(pEnableStepOpt) {
350
351  }
352
353  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
354    // This pass does not use any other analysis passes, but it does
355    // add/wrap the existing functions in the module (thus altering the CFG).
356  }
357
358  // Build contribution to outgoing argument list for calling a
359  // ForEach-able function, based on the special parameters of that
360  // function.
361  //
362  // Signature - metadata bits for the signature of the ForEach-able function
363  // X, Arg_p - values derived directly from expanded function,
364  //            suitable for computing arguments for the ForEach-able function
365  // CalleeArgs - contribution is accumulated here
366  // Bump - invoked once for each contributed outgoing argument
367  void ExpandSpecialArguments(uint32_t Signature,
368                              llvm::Value *X,
369                              llvm::Value *Arg_p,
370                              llvm::IRBuilder<> &Builder,
371                              llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
372                              std::function<void ()> Bump) {
373
374    if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
375      CalleeArgs.push_back(Arg_p);
376      Bump();
377    }
378
379    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
380      CalleeArgs.push_back(X);
381      Bump();
382    }
383
384    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
385      llvm::Value *Y = Builder.CreateLoad(
386                         Builder.CreateStructGEP(Arg_p, PARAM_FIELD_Y), "Y");
387      CalleeArgs.push_back(Y);
388      Bump();
389    }
390
391    if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
392      llvm::Value *Z = Builder.CreateLoad(
393                         Builder.CreateStructGEP(Arg_p, PARAM_FIELD_Z), "Z");
394      CalleeArgs.push_back(Z);
395      Bump();
396    }
397  }
398
399  /* Performs the actual optimization on a selected function. On success, the
400   * Module will contain a new function of the name "<NAME>.expand" that
401   * invokes <NAME>() in a loop with the appropriate parameters.
402   */
403  bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
404    ALOGV("Expanding ForEach-able Function %s",
405          Function->getName().str().c_str());
406
407    if (!Signature) {
408      Signature = getRootSignature(Function);
409      if (!Signature) {
410        // We couldn't determine how to expand this function based on its
411        // function signature.
412        return false;
413      }
414    }
415
416    llvm::DataLayout DL(Module);
417
418    llvm::Function *ExpandedFunction =
419      createEmptyExpandedFunction(Function->getName());
420
421    /*
422     * Extract the expanded function's parameters.  It is guaranteed by
423     * createEmptyExpandedFunction that there will be five parameters.
424     */
425
426    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
427
428    llvm::Function::arg_iterator ExpandedFunctionArgIter =
429      ExpandedFunction->arg_begin();
430
431    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
432    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
433    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
434    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
435
436    llvm::Value *InStep  = nullptr;
437    llvm::Value *OutStep = nullptr;
438
439    // Construct the actual function body.
440    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
441
442    // Collect and construct the arguments for the kernel().
443    // Note that we load any loop-invariant arguments before entering the Loop.
444    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
445
446    llvm::Type  *InTy      = nullptr;
447    llvm::Value *InBasePtr = nullptr;
448    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
449      llvm::Value    *InsMember  = Builder.CreateStructGEP(Arg_p,
450                                                           PARAM_FIELD_INS);
451      llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember, "inputs_base");
452
453      llvm::Value *InStepsMember =
454        Builder.CreateStructGEP(Arg_p, PARAM_FIELD_INESTRIDES);
455      llvm::LoadInst *InStepsBase = Builder.CreateLoad(InStepsMember,
456                                                       "insteps_base");
457
458      llvm::Value *IndexVal = Builder.getInt32(0);
459
460      llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
461      llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
462                                                      "instep_addr");
463
464      InTy = (FunctionArgIter++)->getType();
465      InStep = getStepValue(&DL, InTy, InStepArg);
466
467      InStep->setName("instep");
468
469      llvm::Value *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
470      InBasePtr = Builder.CreateLoad(InputAddr, "input_base");
471    }
472
473    llvm::Type *OutTy = nullptr;
474    llvm::Value *OutBasePtr = nullptr;
475    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
476      OutTy = (FunctionArgIter++)->getType();
477      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
478      OutStep->setName("outstep");
479      OutBasePtr = Builder.CreateLoad(
480                     Builder.CreateStructGEP(Arg_p, PARAM_FIELD_OUT));
481    }
482
483    llvm::Value *UsrData = nullptr;
484    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
485      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
486      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
487          Builder.CreateStructGEP(Arg_p, PARAM_FIELD_USR)), UsrDataTy);
488      UsrData->setName("UsrData");
489    }
490
491    llvm::PHINode *IV;
492    createLoop(Builder, Arg_x1, Arg_x2, &IV);
493
494    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
495    ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
496                           [&FunctionArgIter]() { FunctionArgIter++; });
497
498    bccAssert(FunctionArgIter == Function->arg_end());
499
500    // Populate the actual call to kernel().
501    llvm::SmallVector<llvm::Value*, 8> RootArgs;
502
503    llvm::Value *InPtr  = nullptr;
504    llvm::Value *OutPtr = nullptr;
505
506    // Calculate the current input and output pointers
507    //
508    // We always calculate the input/output pointers with a GEP operating on i8
509    // values and only cast at the very end to OutTy. This is because the step
510    // between two values is given in bytes.
511    //
512    // TODO: We could further optimize the output by using a GEP operation of
513    // type 'OutTy' in cases where the element type of the allocation allows.
514    if (OutBasePtr) {
515      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
516      OutOffset = Builder.CreateMul(OutOffset, OutStep);
517      OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
518      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
519    }
520
521    if (InBasePtr) {
522      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
523      InOffset = Builder.CreateMul(InOffset, InStep);
524      InPtr = Builder.CreateGEP(InBasePtr, InOffset);
525      InPtr = Builder.CreatePointerCast(InPtr, InTy);
526    }
527
528    if (InPtr) {
529      RootArgs.push_back(InPtr);
530    }
531
532    if (OutPtr) {
533      RootArgs.push_back(OutPtr);
534    }
535
536    if (UsrData) {
537      RootArgs.push_back(UsrData);
538    }
539
540    RootArgs.append(CalleeArgs.begin(), CalleeArgs.end());
541
542    Builder.CreateCall(Function, RootArgs);
543
544    return true;
545  }
546
547  /* Expand a pass-by-value kernel.
548   */
549  bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
550    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
551    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
552
553    // TODO: Refactor this to share functionality with ExpandFunction.
554    llvm::DataLayout DL(Module);
555
556    llvm::Function *ExpandedFunction =
557      createEmptyExpandedFunction(Function->getName());
558
559    /*
560     * Extract the expanded function's parameters.  It is guaranteed by
561     * createEmptyExpandedFunction that there will be five parameters.
562     */
563
564    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
565
566    llvm::Function::arg_iterator ExpandedFunctionArgIter =
567      ExpandedFunction->arg_begin();
568
569    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
570    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
571    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
572    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
573
574    // Construct the actual function body.
575    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
576
577    // Create TBAA meta-data.
578    llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
579    llvm::MDBuilder MDHelper(*Context);
580
581    TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
582    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
583                                                       TBAARenderScript);
584    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
585                                                      TBAAAllocation, 0);
586    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
587                                                    TBAARenderScript);
588    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
589
590    llvm::MDNode *AliasingDomain, *AliasingScope;
591    AliasingDomain = MDHelper.createAnonymousAliasScopeDomain("RS argument scope domain");
592    AliasingScope = MDHelper.createAnonymousAliasScope(AliasingDomain, "RS argument scope");
593
594    /*
595     * Collect and construct the arguments for the kernel().
596     *
597     * Note that we load any loop-invariant arguments before entering the Loop.
598     */
599    size_t NumInputs = Function->arg_size();
600
601    // No usrData parameter on kernels.
602    bccAssert(
603        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
604
605    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
606
607    // Check the return type
608    llvm::Type     *OutTy            = nullptr;
609    llvm::Value    *OutStep          = nullptr;
610    llvm::LoadInst *OutBasePtr       = nullptr;
611    llvm::Value    *CastedOutBasePtr = nullptr;
612
613    bool PassOutByPointer = false;
614
615    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
616      llvm::Type *OutBaseTy = Function->getReturnType();
617
618      if (OutBaseTy->isVoidTy()) {
619        PassOutByPointer = true;
620        OutTy = ArgIter->getType();
621
622        ArgIter++;
623        --NumInputs;
624      } else {
625        // We don't increment Args, since we are using the actual return type.
626        OutTy = OutBaseTy->getPointerTo();
627      }
628
629      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
630      OutStep->setName("outstep");
631      OutBasePtr = Builder.CreateLoad(
632                     Builder.CreateStructGEP(Arg_p, PARAM_FIELD_OUT));
633
634      if (gEnableRsTbaa) {
635        OutBasePtr->setMetadata("tbaa", TBAAPointer);
636      }
637
638      OutBasePtr->setMetadata("alias.scope", AliasingScope);
639
640      CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
641    }
642
643    llvm::PHINode *IV;
644    createLoop(Builder, Arg_x1, Arg_x2, &IV);
645
646    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
647    ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
648                           [&NumInputs]() { --NumInputs; });
649
650    llvm::SmallVector<llvm::Type*,  8> InTypes;
651    llvm::SmallVector<llvm::Value*, 8> InSteps;
652    llvm::SmallVector<llvm::Value*, 8> InBasePtrs;
653    llvm::SmallVector<bool,         8> InIsStructPointer;
654
655    if (NumInputs > 0) {
656      llvm::Value *InsMember = Builder.CreateStructGEP(Arg_p, PARAM_FIELD_INS);
657      llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember, "inputs_base");
658
659      llvm::Value *InStepsMember =
660        Builder.CreateStructGEP(Arg_p, PARAM_FIELD_INESTRIDES);
661      llvm::LoadInst *InStepsBase = Builder.CreateLoad(InStepsMember,
662                                                         "insteps_base");
663
664      for (size_t InputIndex = 0; InputIndex < NumInputs;
665           ++InputIndex, ArgIter++) {
666
667          llvm::Value *IndexVal = Builder.getInt32(InputIndex);
668
669          llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
670          llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
671                                                          "instep_addr");
672
673          llvm::Type *InType = ArgIter->getType();
674
675        /*
676         * AArch64 calling dictate that structs of sufficient size get passed by
677         * pointer instead of passed by value.  This, combined with the fact
678         * that we don't allow kernels to operate on pointer data means that if
679         * we see a kernel with a pointer parameter we know that it is struct
680         * input that has been promoted.  As such we don't need to convert its
681         * type to a pointer.  Later we will need to know to avoid a load, so we
682         * save this information in InIsStructPointer.
683         */
684          if (!InType->isPointerTy()) {
685            InType = InType->getPointerTo();
686            InIsStructPointer.push_back(false);
687          } else {
688            InIsStructPointer.push_back(true);
689          }
690
691          llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
692
693          InStep->setName("instep");
694
695          llvm::Value    *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
696          llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
697                                                         "input_base");
698          llvm::Value    *CastInBasePtr = Builder.CreatePointerCast(InBasePtr,
699                                                                    InType, "casted_in");
700          if (gEnableRsTbaa) {
701            InBasePtr->setMetadata("tbaa", TBAAPointer);
702          }
703
704          InBasePtr->setMetadata("alias.scope", AliasingScope);
705
706          InTypes.push_back(InType);
707          InSteps.push_back(InStep);
708          InBasePtrs.push_back(CastInBasePtr);
709      }
710    }
711
712    // Populate the actual call to kernel().
713    llvm::SmallVector<llvm::Value*, 8> RootArgs;
714
715    // Calculate the current input and output pointers
716    //
717    //
718    // We always calculate the input/output pointers with a GEP operating on i8
719    // values combined with a multiplication and only cast at the very end to
720    // OutTy.  This is to account for dynamic stepping sizes when the value
721    // isn't apparent at compile time.  In the (very common) case when we know
722    // the step size at compile time, due to haveing complete type information
723    // this multiplication will optmized out and produces code equivalent to a
724    // a GEP on a pointer of the correct type.
725
726    // Output
727
728    llvm::Value *OutPtr = nullptr;
729    if (CastedOutBasePtr) {
730      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
731
732      OutPtr    = Builder.CreateGEP(CastedOutBasePtr, OutOffset);
733
734      if (PassOutByPointer) {
735        RootArgs.push_back(OutPtr);
736      }
737    }
738
739    // Inputs
740
741    if (NumInputs > 0) {
742      llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
743
744      for (size_t Index = 0; Index < NumInputs; ++Index) {
745        llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], Offset);
746        llvm::Value *Input;
747
748        if (InIsStructPointer[Index]) {
749          Input = InPtr;
750
751        } else {
752          llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
753
754          if (gEnableRsTbaa) {
755            InputLoad->setMetadata("tbaa", TBAAAllocation);
756          }
757
758          InputLoad->setMetadata("alias.scope", AliasingScope);
759
760          Input = InputLoad;
761        }
762
763        RootArgs.push_back(Input);
764      }
765    }
766
767    RootArgs.append(CalleeArgs.begin(), CalleeArgs.end());
768
769    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
770
771    if (OutPtr && !PassOutByPointer) {
772      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
773      if (gEnableRsTbaa) {
774        Store->setMetadata("tbaa", TBAAAllocation);
775      }
776      Store->setMetadata("alias.scope", AliasingScope);
777    }
778
779    return true;
780  }
781
782  /// @brief Checks if pointers to allocation internals are exposed
783  ///
784  /// This function verifies if through the parameters passed to the kernel
785  /// or through calls to the runtime library the script gains access to
786  /// pointers pointing to data within a RenderScript Allocation.
787  /// If we know we control all loads from and stores to data within
788  /// RenderScript allocations and if we know the run-time internal accesses
789  /// are all annotated with RenderScript TBAA metadata, only then we
790  /// can safely use TBAA to distinguish between generic and from-allocation
791  /// pointers.
792  bool allocPointersExposed(llvm::Module &Module) {
793    // Old style kernel function can expose pointers to elements within
794    // allocations.
795    // TODO: Extend analysis to allow simple cases of old-style kernels.
796    for (size_t i = 0; i < mExportForEachCount; ++i) {
797      const char *Name = mExportForEachNameList[i];
798      uint32_t Signature = mExportForEachSignatureList[i];
799      if (Module.getFunction(Name) &&
800          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
801        return true;
802      }
803    }
804
805    // Check for library functions that expose a pointer to an Allocation or
806    // that are not yet annotated with RenderScript-specific tbaa information.
807    static std::vector<std::string> Funcs;
808
809    // rsGetElementAt(...)
810    Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
811    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
812    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
813    // rsSetElementAt()
814    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
815    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
816    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
817    // rsGetElementAtYuv_uchar_Y()
818    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
819    // rsGetElementAtYuv_uchar_U()
820    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
821    // rsGetElementAtYuv_uchar_V()
822    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
823
824    for (std::vector<std::string>::iterator FI = Funcs.begin(),
825                                            FE = Funcs.end();
826         FI != FE; ++FI) {
827      llvm::Function *Function = Module.getFunction(*FI);
828
829      if (!Function) {
830        ALOGE("Missing run-time function '%s'", FI->c_str());
831        return true;
832      }
833
834      if (Function->getNumUses() > 0) {
835        return true;
836      }
837    }
838
839    return false;
840  }
841
842  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
843  ///
844  /// The TBAA metadata used to annotate loads/stores from RenderScript
845  /// Allocations is generated in a separate TBAA tree with a
846  /// "RenderScript TBAA" root node. LLVM does assume may-alias for all nodes in
847  /// unrelated alias analysis trees. This function makes the RenderScript TBAA
848  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
849  /// the connected trees every access to an Allocation is resolved to
850  /// must-alias if compared to a normal C/C++ access.
851  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
852    llvm::MDBuilder MDHelper(*Context);
853    llvm::MDNode *TBAARenderScript =
854      MDHelper.createTBAARoot("RenderScript TBAA");
855
856    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
857    llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript",
858                                                         TBAARoot);
859
860    TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
861  }
862
863  virtual bool runOnModule(llvm::Module &Module) {
864    bool Changed  = false;
865    this->Module  = &Module;
866    this->Context = &Module.getContext();
867
868    this->buildTypes();
869
870    bcinfo::MetadataExtractor me(&Module);
871    if (!me.extract()) {
872      ALOGE("Could not extract metadata from module!");
873      return false;
874    }
875    mExportForEachCount = me.getExportForEachSignatureCount();
876    mExportForEachNameList = me.getExportForEachNameList();
877    mExportForEachSignatureList = me.getExportForEachSignatureList();
878
879    bool AllocsExposed = allocPointersExposed(Module);
880
881    for (size_t i = 0; i < mExportForEachCount; ++i) {
882      const char *name = mExportForEachNameList[i];
883      uint32_t signature = mExportForEachSignatureList[i];
884      llvm::Function *kernel = Module.getFunction(name);
885      if (kernel) {
886        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
887          Changed |= ExpandKernel(kernel, signature);
888          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
889        } else if (kernel->getReturnType()->isVoidTy()) {
890          Changed |= ExpandFunction(kernel, signature);
891          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
892        } else {
893          // There are some graphics root functions that are not
894          // expanded, but that will be called directly. For those
895          // functions, we can not set the linkage to internal.
896        }
897      }
898    }
899
900    if (gEnableRsTbaa && !AllocsExposed) {
901      connectRenderScriptTBAAMetadata(Module);
902    }
903
904    return Changed;
905  }
906
907  virtual const char *getPassName() const {
908    return "ForEach-able Function Expansion";
909  }
910
911}; // end RSForEachExpandPass
912
913} // end anonymous namespace
914
915char RSForEachExpandPass::ID = 0;
916static llvm::RegisterPass<RSForEachExpandPass> X("foreachexp", "ForEach Expand Pass");
917
918namespace bcc {
919
920llvm::ModulePass *
921createRSForEachExpandPass(bool pEnableStepOpt){
922  return new RSForEachExpandPass(pEnableStepOpt);
923}
924
925} // end namespace bcc
926