1/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bcc/Assert.h"
18#include "bcc/Renderscript/RSTransforms.h"
19
20#include <cstdlib>
21
22#include <llvm/IR/DerivedTypes.h>
23#include <llvm/IR/Function.h>
24#include <llvm/IR/Instructions.h>
25#include <llvm/IR/IRBuilder.h>
26#include <llvm/IR/MDBuilder.h>
27#include <llvm/IR/Module.h>
28#include <llvm/Pass.h>
29#include <llvm/Support/raw_ostream.h>
30#include <llvm/IR/DataLayout.h>
31#include <llvm/IR/Function.h>
32#include <llvm/IR/Type.h>
33#include <llvm/Transforms/Utils/BasicBlockUtils.h>
34
35#include "bcc/Config/Config.h"
36#include "bcc/Support/Log.h"
37
38#include "bcinfo/MetadataExtractor.h"
39
40#define NUM_EXPANDED_FUNCTION_PARAMS 5
41
42using namespace bcc;
43
44namespace {
45
46static const bool gEnableRsTbaa = true;
47
48/* RSForEachExpandPass - This pass operates on functions that are able to be
49 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
50 * ForEach-able function to be invoked over the appropriate data cells of the
51 * input/output allocations (adjusting other relevant parameters as we go). We
52 * support doing this for any ForEach-able compute kernels. The new function
53 * name is the original function name followed by ".expand". Note that we
54 * still generate code for the original function.
55 */
56class RSForEachExpandPass : public llvm::ModulePass {
57private:
58  static char ID;
59
60  llvm::Module *Module;
61  llvm::LLVMContext *Context;
62
63  /*
64   * Pointer to LLVM type information for the ForEachStubType and the function
65   * signature for expanded kernels.  These must be re-calculated for each
66   * module the pass is run on.
67   */
68  llvm::StructType   *ForEachStubType;
69  llvm::FunctionType *ExpandedFunctionType;
70
71  uint32_t mExportForEachCount;
72  const char **mExportForEachNameList;
73  const uint32_t *mExportForEachSignatureList;
74
75  // Turns on optimization of allocation stride values.
76  bool mEnableStepOpt;
77
78  uint32_t getRootSignature(llvm::Function *Function) {
79    const llvm::NamedMDNode *ExportForEachMetadata =
80        Module->getNamedMetadata("#rs_export_foreach");
81
82    if (!ExportForEachMetadata) {
83      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
84      for (llvm::Function::arg_iterator B = Function->arg_begin(),
85                                        E = Function->arg_end();
86           B != E;
87           ++B) {
88        RootArgTys.push_back(B->getType());
89      }
90
91      // For pre-ICS bitcode, we may not have signature information. In that
92      // case, we use the size of the RootArgTys to select the number of
93      // arguments.
94      return (1 << RootArgTys.size()) - 1;
95    }
96
97    if (ExportForEachMetadata->getNumOperands() == 0) {
98      return 0;
99    }
100
101    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
102
103    // We only handle the case for legacy root() functions here, so this is
104    // hard-coded to look at only the first such function.
105    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
106    if (SigNode != NULL && SigNode->getNumOperands() == 1) {
107      llvm::Value *SigVal = SigNode->getOperand(0);
108      if (SigVal->getValueID() == llvm::Value::MDStringVal) {
109        llvm::StringRef SigString =
110            static_cast<llvm::MDString*>(SigVal)->getString();
111        uint32_t Signature = 0;
112        if (SigString.getAsInteger(10, Signature)) {
113          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
114          return 0;
115        }
116        return Signature;
117      }
118    }
119
120    return 0;
121  }
122
123  bool isStepOptSupported(llvm::Type *AllocType) {
124
125    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
126    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
127
128    if (mEnableStepOpt) {
129      return false;
130    }
131
132    if (AllocType == VoidPtrTy) {
133      return false;
134    }
135
136    if (!PT) {
137      return false;
138    }
139
140    // remaining conditions are 64-bit only
141    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
142      return true;
143    }
144
145    // coerce suggests an upconverted struct type, which we can't support
146    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
147      return false;
148    }
149
150    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
151    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
152    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
153    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
154      return false;
155    }
156
157    return true;
158  }
159
160  // Get the actual value we should use to step through an allocation.
161  //
162  // Normally the value we use to step through an allocation is given to us by
163  // the driver. However, for certain primitive data types, we can derive an
164  // integer constant for the step value. We use this integer constant whenever
165  // possible to allow further compiler optimizations to take place.
166  //
167  // DL - Target Data size/layout information.
168  // T - Type of allocation (should be a pointer).
169  // OrigStep - Original step increment (root.expand() input from driver).
170  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
171                            llvm::Value *OrigStep) {
172    bccAssert(DL);
173    bccAssert(AllocType);
174    bccAssert(OrigStep);
175    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
176    if (isStepOptSupported(AllocType)) {
177      llvm::Type *ET = PT->getElementType();
178      uint64_t ETSize = DL->getTypeAllocSize(ET);
179      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
180      return llvm::ConstantInt::get(Int32Ty, ETSize);
181    } else {
182      return OrigStep;
183    }
184  }
185
186  /// @brief Builds the types required by the pass for the given context.
187  void buildTypes(void) {
188    // Create the RsForEachStubParam struct.
189
190    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
191    llvm::Type *Int32Ty   = llvm::Type::getInt32Ty(*Context);
192    /* Defined in frameworks/base/libs/rs/rs_hal.h:
193     *
194     * struct RsForEachStubParamStruct {
195     *   const void *in;
196     *   void *out;
197     *   const void *usr;
198     *   uint32_t usr_len;
199     *   uint32_t x;
200     *   uint32_t y;
201     *   uint32_t z;
202     *   uint32_t lod;
203     *   enum RsAllocationCubemapFace face;
204     *   uint32_t ar[16];
205     *   const void **ins;
206     *   uint32_t *eStrideIns;
207     * };
208     */
209    llvm::SmallVector<llvm::Type*, 16> StructTypes;
210    StructTypes.push_back(VoidPtrTy);  // const void *in
211    StructTypes.push_back(VoidPtrTy);  // void *out
212    StructTypes.push_back(VoidPtrTy);  // const void *usr
213    StructTypes.push_back(Int32Ty);    // uint32_t usr_len
214    StructTypes.push_back(Int32Ty);    // uint32_t x
215    StructTypes.push_back(Int32Ty);    // uint32_t y
216    StructTypes.push_back(Int32Ty);    // uint32_t z
217    StructTypes.push_back(Int32Ty);    // uint32_t lod
218    StructTypes.push_back(Int32Ty);    // enum RsAllocationCubemapFace
219    StructTypes.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
220
221    StructTypes.push_back(llvm::PointerType::getUnqual(VoidPtrTy)); // const void **ins
222    StructTypes.push_back(Int32Ty->getPointerTo()); // uint32_t *eStrideIns
223
224    ForEachStubType =
225      llvm::StructType::create(StructTypes, "RsForEachStubParamStruct");
226
227    // Create the function type for expanded kernels.
228
229    llvm::Type *ForEachStubPtrTy = ForEachStubType->getPointerTo();
230
231    llvm::SmallVector<llvm::Type*, 8> ParamTypes;
232    ParamTypes.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
233    ParamTypes.push_back(Int32Ty);          // uint32_t x1
234    ParamTypes.push_back(Int32Ty);          // uint32_t x2
235    ParamTypes.push_back(Int32Ty);          // uint32_t instep
236    ParamTypes.push_back(Int32Ty);          // uint32_t outstep
237
238    ExpandedFunctionType = llvm::FunctionType::get(llvm::Type::getVoidTy(*Context),
239                                              ParamTypes,
240                                              false);
241  }
242
243  /// @brief Create skeleton of the expanded function.
244  ///
245  /// This creates a function with the following signature:
246  ///
247  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
248  ///         uint32_t instep, uint32_t outstep)
249  ///
250  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
251    llvm::Function *ExpandedFunction =
252      llvm::Function::Create(ExpandedFunctionType,
253                             llvm::GlobalValue::ExternalLinkage,
254                             OldName + ".expand", Module);
255
256    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
257
258    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
259
260    (AI++)->setName("p");
261    (AI++)->setName("x1");
262    (AI++)->setName("x2");
263    (AI++)->setName("arg_instep");
264    (AI++)->setName("arg_outstep");
265
266    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
267                                                       ExpandedFunction);
268    llvm::IRBuilder<> Builder(Begin);
269    Builder.CreateRetVoid();
270
271    return ExpandedFunction;
272  }
273
274  /// @brief Create an empty loop
275  ///
276  /// Create a loop of the form:
277  ///
278  /// for (i = LowerBound; i < UpperBound; i++)
279  ///   ;
280  ///
281  /// After the loop has been created, the builder is set such that
282  /// instructions can be added to the loop body.
283  ///
284  /// @param Builder The builder to use to build this loop. The current
285  ///                position of the builder is the position the loop
286  ///                will be inserted.
287  /// @param LowerBound The first value of the loop iterator
288  /// @param UpperBound The maximal value of the loop iterator
289  /// @param LoopIV A reference that will be set to the loop iterator.
290  /// @return The BasicBlock that will be executed after the loop.
291  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
292                               llvm::Value *LowerBound,
293                               llvm::Value *UpperBound,
294                               llvm::PHINode **LoopIV) {
295    assert(LowerBound->getType() == UpperBound->getType());
296
297    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
298    llvm::Value *Cond, *IVNext;
299    llvm::PHINode *IV;
300
301    CondBB = Builder.GetInsertBlock();
302    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
303    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
304
305    // if (LowerBound < Upperbound)
306    //   goto LoopHeader
307    // else
308    //   goto AfterBB
309    CondBB->getTerminator()->eraseFromParent();
310    Builder.SetInsertPoint(CondBB);
311    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
312    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
313
314    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
315    // iv.next = iv + 1
316    // if (iv.next < Upperbound)
317    //   goto LoopHeader
318    // else
319    //   goto AfterBB
320    Builder.SetInsertPoint(HeaderBB);
321    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
322    IV->addIncoming(LowerBound, CondBB);
323    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
324    IV->addIncoming(IVNext, HeaderBB);
325    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
326    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
327    AfterBB->setName("Exit");
328    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
329    *LoopIV = IV;
330    return AfterBB;
331  }
332
333public:
334  RSForEachExpandPass(bool pEnableStepOpt)
335      : ModulePass(ID), Module(NULL), Context(NULL),
336        mEnableStepOpt(pEnableStepOpt) {
337
338  }
339
340  /* Performs the actual optimization on a selected function. On success, the
341   * Module will contain a new function of the name "<NAME>.expand" that
342   * invokes <NAME>() in a loop with the appropriate parameters.
343   */
344  bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
345    ALOGV("Expanding ForEach-able Function %s",
346          Function->getName().str().c_str());
347
348    if (!Signature) {
349      Signature = getRootSignature(Function);
350      if (!Signature) {
351        // We couldn't determine how to expand this function based on its
352        // function signature.
353        return false;
354      }
355    }
356
357    llvm::DataLayout DL(Module);
358
359    llvm::Function *ExpandedFunction =
360      createEmptyExpandedFunction(Function->getName());
361
362    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
363
364    /*
365     * Extract the expanded function's parameters.  It is guaranteed by
366     * createEmptyExpandedFunction that there will be five parameters.
367     */
368    llvm::Function::arg_iterator ExpandedFunctionArgIter =
369      ExpandedFunction->arg_begin();
370
371    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
372    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
373    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
374    llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
375    llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
376
377    llvm::Value *InStep  = NULL;
378    llvm::Value *OutStep = NULL;
379
380    // Construct the actual function body.
381    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
382
383    // Collect and construct the arguments for the kernel().
384    // Note that we load any loop-invariant arguments before entering the Loop.
385    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
386
387    llvm::Type *InTy = NULL;
388    llvm::Value *InBasePtr = NULL;
389    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
390      InTy = (FunctionArgIter++)->getType();
391      InStep = getStepValue(&DL, InTy, Arg_instep);
392      InStep->setName("instep");
393      InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
394    }
395
396    llvm::Type *OutTy = NULL;
397    llvm::Value *OutBasePtr = NULL;
398    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
399      OutTy = (FunctionArgIter++)->getType();
400      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
401      OutStep->setName("outstep");
402      OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
403    }
404
405    llvm::Value *UsrData = NULL;
406    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
407      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
408      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
409          Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
410      UsrData->setName("UsrData");
411    }
412
413    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
414      FunctionArgIter++;
415    }
416
417    llvm::Value *Y = NULL;
418    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
419      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
420      FunctionArgIter++;
421    }
422
423    bccAssert(FunctionArgIter == Function->arg_end());
424
425    llvm::PHINode *IV;
426    createLoop(Builder, Arg_x1, Arg_x2, &IV);
427
428    // Populate the actual call to kernel().
429    llvm::SmallVector<llvm::Value*, 8> RootArgs;
430
431    llvm::Value *InPtr  = NULL;
432    llvm::Value *OutPtr = NULL;
433
434    // Calculate the current input and output pointers
435    //
436    // We always calculate the input/output pointers with a GEP operating on i8
437    // values and only cast at the very end to OutTy. This is because the step
438    // between two values is given in bytes.
439    //
440    // TODO: We could further optimize the output by using a GEP operation of
441    // type 'OutTy' in cases where the element type of the allocation allows.
442    if (OutBasePtr) {
443      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
444      OutOffset = Builder.CreateMul(OutOffset, OutStep);
445      OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
446      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
447    }
448
449    if (InBasePtr) {
450      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
451      InOffset = Builder.CreateMul(InOffset, InStep);
452      InPtr = Builder.CreateGEP(InBasePtr, InOffset);
453      InPtr = Builder.CreatePointerCast(InPtr, InTy);
454    }
455
456    if (InPtr) {
457      RootArgs.push_back(InPtr);
458    }
459
460    if (OutPtr) {
461      RootArgs.push_back(OutPtr);
462    }
463
464    if (UsrData) {
465      RootArgs.push_back(UsrData);
466    }
467
468    llvm::Value *X = IV;
469    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
470      RootArgs.push_back(X);
471    }
472
473    if (Y) {
474      RootArgs.push_back(Y);
475    }
476
477    Builder.CreateCall(Function, RootArgs);
478
479    return true;
480  }
481
482  /* Expand a pass-by-value kernel.
483   */
484  bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
485    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
486    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
487
488    // TODO: Refactor this to share functionality with ExpandFunction.
489    llvm::DataLayout DL(Module);
490
491    llvm::Function *ExpandedFunction =
492      createEmptyExpandedFunction(Function->getName());
493
494    /*
495     * Extract the expanded function's parameters.  It is guaranteed by
496     * createEmptyExpandedFunction that there will be five parameters.
497     */
498
499    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
500
501    llvm::Function::arg_iterator ExpandedFunctionArgIter =
502      ExpandedFunction->arg_begin();
503
504    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
505    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
506    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
507    llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
508    llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
509
510    // Construct the actual function body.
511    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
512
513    // Create TBAA meta-data.
514    llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
515    llvm::MDBuilder MDHelper(*Context);
516
517    TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
518    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", TBAARenderScript);
519    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, TBAAAllocation, 0);
520    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", TBAARenderScript);
521    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
522
523    /*
524     * Collect and construct the arguments for the kernel().
525     *
526     * Note that we load any loop-invariant arguments before entering the Loop.
527     */
528    size_t NumInputs = Function->arg_size();
529
530    llvm::Value *Y = NULL;
531    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
532      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
533      --NumInputs;
534    }
535
536    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
537      --NumInputs;
538    }
539
540    // No usrData parameter on kernels.
541    bccAssert(
542        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
543
544    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
545
546    // Check the return type
547    llvm::Type     *OutTy      = NULL;
548    llvm::Value    *OutStep    = NULL;
549    llvm::LoadInst *OutBasePtr = NULL;
550
551    bool PassOutByReference = false;
552
553    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
554      llvm::Type *OutBaseTy = Function->getReturnType();
555
556      if (OutBaseTy->isVoidTy()) {
557        PassOutByReference = true;
558        OutTy = ArgIter->getType();
559
560        ArgIter++;
561        --NumInputs;
562      } else {
563        // We don't increment Args, since we are using the actual return type.
564        OutTy = OutBaseTy->getPointerTo();
565      }
566
567      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
568      OutStep->setName("outstep");
569      OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
570      if (gEnableRsTbaa) {
571        OutBasePtr->setMetadata("tbaa", TBAAPointer);
572      }
573    }
574
575    llvm::SmallVector<llvm::Type*,     8> InTypes;
576    llvm::SmallVector<llvm::Value*,    8> InSteps;
577    llvm::SmallVector<llvm::LoadInst*, 8> InBasePtrs;
578    llvm::SmallVector<bool,            8> InIsStructPointer;
579
580    if (NumInputs == 1) {
581      llvm::Type *InType = ArgIter->getType();
582
583      /*
584       * AArch64 calling dictate that structs of sufficient size get passed by
585       * poiter instead of passed by value.  This, combined with the fact that
586       * we don't allow kernels to operate on pointer data means that if we see
587       * a kernel with a pointer parameter we know that it is struct input that
588       * has been promoted.  As such we don't need to convert its type to a
589       * pointer.  Later we will need to know to avoid a load, so we save this
590       * information in InIsStructPointer.
591       */
592      if (!InType->isPointerTy()) {
593        InType = InType->getPointerTo();
594        InIsStructPointer.push_back(false);
595      } else {
596        InIsStructPointer.push_back(true);
597      }
598
599      llvm::Value *InStep = getStepValue(&DL, InType, Arg_instep);
600
601      InStep->setName("instep");
602
603      llvm::Value    *Input     = Builder.CreateStructGEP(Arg_p, 0);
604      llvm::LoadInst *InBasePtr = Builder.CreateLoad(Input, "input_base");
605
606      if (gEnableRsTbaa) {
607        InBasePtr->setMetadata("tbaa", TBAAPointer);
608      }
609
610      InTypes.push_back(InType);
611      InSteps.push_back(InStep);
612      InBasePtrs.push_back(InBasePtr);
613
614    } else if (NumInputs > 1) {
615      llvm::Value    *InsMember  = Builder.CreateStructGEP(Arg_p, 10);
616      llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember,
617                                                      "inputs_base");
618
619      llvm::Value    *InStepsMember = Builder.CreateStructGEP(Arg_p, 11);
620      llvm::LoadInst *InStepsBase   = Builder.CreateLoad(InStepsMember,
621                                                         "insteps_base");
622
623      for (size_t InputIndex = 0; InputIndex < NumInputs;
624           ++InputIndex, ArgIter++) {
625
626          llvm::Value *IndexVal = Builder.getInt32(InputIndex);
627
628          llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
629          llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
630                                                          "instep_addr");
631
632          llvm::Type *InType = ArgIter->getType();
633
634          /*
635         * AArch64 calling dictate that structs of sufficient size get passed by
636         * poiter instead of passed by value.  This, combined with the fact that
637         * we don't allow kernels to operate on pointer data means that if we
638         * see a kernel with a pointer parameter we know that it is struct input
639         * that has been promoted.  As such we don't need to convert its type to
640         * a pointer.  Later we will need to know to avoid a load, so we save
641         * this information in InIsStructPointer.
642         */
643          if (!InType->isPointerTy()) {
644            InType = InType->getPointerTo();
645            InIsStructPointer.push_back(false);
646          } else {
647            InIsStructPointer.push_back(true);
648          }
649
650          llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
651
652          InStep->setName("instep");
653
654          llvm::Value    *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
655          llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
656                                                         "input_base");
657
658          if (gEnableRsTbaa) {
659            InBasePtr->setMetadata("tbaa", TBAAPointer);
660          }
661
662          InTypes.push_back(InType);
663          InSteps.push_back(InStep);
664          InBasePtrs.push_back(InBasePtr);
665      }
666    }
667
668    llvm::PHINode *IV;
669    createLoop(Builder, Arg_x1, Arg_x2, &IV);
670
671    // Populate the actual call to kernel().
672    llvm::SmallVector<llvm::Value*, 8> RootArgs;
673
674    // Calculate the current input and output pointers
675    //
676    //
677    // We always calculate the input/output pointers with a GEP operating on i8
678    // values combined with a multiplication and only cast at the very end to
679    // OutTy.  This is to account for dynamic stepping sizes when the value
680    // isn't apparent at compile time.  In the (very common) case when we know
681    // the step size at compile time, due to haveing complete type information
682    // this multiplication will optmized out and produces code equivalent to a
683    // a GEP on a pointer of the correct type.
684
685    // Output
686
687    llvm::Value *OutPtr = NULL;
688    if (OutBasePtr) {
689      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
690
691      OutOffset = Builder.CreateMul(OutOffset, OutStep);
692      OutPtr    = Builder.CreateGEP(OutBasePtr, OutOffset);
693      OutPtr    = Builder.CreatePointerCast(OutPtr, OutTy);
694
695      if (PassOutByReference) {
696        RootArgs.push_back(OutPtr);
697      }
698    }
699
700    // Inputs
701
702    if (NumInputs > 0) {
703      llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
704
705      for (size_t Index = 0; Index < NumInputs; ++Index) {
706        llvm::Value *InOffset = Builder.CreateMul(Offset, InSteps[Index]);
707        llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], InOffset);
708
709        InPtr = Builder.CreatePointerCast(InPtr, InTypes[Index]);
710
711        llvm::Value *Input;
712
713        if (InIsStructPointer[Index]) {
714          Input = InPtr;
715
716        } else {
717          llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
718
719          if (gEnableRsTbaa) {
720            InputLoad->setMetadata("tbaa", TBAAAllocation);
721          }
722
723          Input = InputLoad;
724        }
725
726        RootArgs.push_back(Input);
727      }
728    }
729
730    llvm::Value *X = IV;
731    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
732      RootArgs.push_back(X);
733    }
734
735    if (Y) {
736      RootArgs.push_back(Y);
737    }
738
739    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
740
741    if (OutPtr && !PassOutByReference) {
742      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
743      if (gEnableRsTbaa) {
744        Store->setMetadata("tbaa", TBAAAllocation);
745      }
746    }
747
748    return true;
749  }
750
751  /// @brief Checks if pointers to allocation internals are exposed
752  ///
753  /// This function verifies if through the parameters passed to the kernel
754  /// or through calls to the runtime library the script gains access to
755  /// pointers pointing to data within a RenderScript Allocation.
756  /// If we know we control all loads from and stores to data within
757  /// RenderScript allocations and if we know the run-time internal accesses
758  /// are all annotated with RenderScript TBAA metadata, only then we
759  /// can safely use TBAA to distinguish between generic and from-allocation
760  /// pointers.
761  bool allocPointersExposed(llvm::Module &Module) {
762    // Old style kernel function can expose pointers to elements within
763    // allocations.
764    // TODO: Extend analysis to allow simple cases of old-style kernels.
765    for (size_t i = 0; i < mExportForEachCount; ++i) {
766      const char *Name = mExportForEachNameList[i];
767      uint32_t Signature = mExportForEachSignatureList[i];
768      if (Module.getFunction(Name) &&
769          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
770        return true;
771      }
772    }
773
774    // Check for library functions that expose a pointer to an Allocation or
775    // that are not yet annotated with RenderScript-specific tbaa information.
776    static std::vector<std::string> Funcs;
777
778    // rsGetElementAt(...)
779    Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
780    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
781    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
782    // rsSetElementAt()
783    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
784    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
785    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
786    // rsGetElementAtYuv_uchar_Y()
787    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
788    // rsGetElementAtYuv_uchar_U()
789    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
790    // rsGetElementAtYuv_uchar_V()
791    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
792
793    for (std::vector<std::string>::iterator FI = Funcs.begin(),
794                                            FE = Funcs.end();
795         FI != FE; ++FI) {
796      llvm::Function *Function = Module.getFunction(*FI);
797
798      if (!Function) {
799        ALOGE("Missing run-time function '%s'", FI->c_str());
800        return true;
801      }
802
803      if (Function->getNumUses() > 0) {
804        return true;
805      }
806    }
807
808    return false;
809  }
810
811  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
812  ///
813  /// The TBAA metadata used to annotate loads/stores from RenderScript
814  /// Allocations is generated in a separate TBAA tree with a "RenderScript TBAA"
815  /// root node. LLVM does assume may-alias for all nodes in unrelated alias
816  /// analysis trees. This function makes the RenderScript TBAA a subtree of the
817  /// normal C/C++ TBAA tree aside of normal C/C++ types. With the connected trees
818  /// every access to an Allocation is resolved to must-alias if compared to
819  /// a normal C/C++ access.
820  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
821    llvm::MDBuilder MDHelper(*Context);
822    llvm::MDNode *TBAARenderScript =
823      MDHelper.createTBAARoot("RenderScript TBAA");
824
825    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
826    llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript",
827                                                         TBAARoot);
828
829    TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
830  }
831
832  virtual bool runOnModule(llvm::Module &Module) {
833    bool Changed  = false;
834    this->Module  = &Module;
835    this->Context = &Module.getContext();
836
837    this->buildTypes();
838
839    bcinfo::MetadataExtractor me(&Module);
840    if (!me.extract()) {
841      ALOGE("Could not extract metadata from module!");
842      return false;
843    }
844    mExportForEachCount = me.getExportForEachSignatureCount();
845    mExportForEachNameList = me.getExportForEachNameList();
846    mExportForEachSignatureList = me.getExportForEachSignatureList();
847
848    bool AllocsExposed = allocPointersExposed(Module);
849
850    for (size_t i = 0; i < mExportForEachCount; ++i) {
851      const char *name = mExportForEachNameList[i];
852      uint32_t signature = mExportForEachSignatureList[i];
853      llvm::Function *kernel = Module.getFunction(name);
854      if (kernel) {
855        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
856          Changed |= ExpandKernel(kernel, signature);
857          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
858        } else if (kernel->getReturnType()->isVoidTy()) {
859          Changed |= ExpandFunction(kernel, signature);
860          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
861        } else {
862          // There are some graphics root functions that are not
863          // expanded, but that will be called directly. For those
864          // functions, we can not set the linkage to internal.
865        }
866      }
867    }
868
869    if (gEnableRsTbaa && !AllocsExposed) {
870      connectRenderScriptTBAAMetadata(Module);
871    }
872
873    return Changed;
874  }
875
876  virtual const char *getPassName() const {
877    return "ForEach-able Function Expansion";
878  }
879
880}; // end RSForEachExpandPass
881
882} // end anonymous namespace
883
884char RSForEachExpandPass::ID = 0;
885
886namespace bcc {
887
888llvm::ModulePass *
889createRSForEachExpandPass(bool pEnableStepOpt){
890  return new RSForEachExpandPass(pEnableStepOpt);
891}
892
893} // end namespace bcc
894