RSForEachExpand.cpp revision efcede6546a85d4b93088c99f330c8871ddd39a3
1/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bcc/Assert.h"
18#include "bcc/Renderscript/RSTransforms.h"
19
20#include <cstdlib>
21
22#include <llvm/IR/DerivedTypes.h>
23#include <llvm/IR/Function.h>
24#include <llvm/IR/Instructions.h>
25#include <llvm/IR/IRBuilder.h>
26#include <llvm/IR/MDBuilder.h>
27#include <llvm/IR/Module.h>
28#include <llvm/Pass.h>
29#include <llvm/Support/raw_ostream.h>
30#include <llvm/IR/DataLayout.h>
31#include <llvm/IR/Function.h>
32#include <llvm/IR/Type.h>
33#include <llvm/Transforms/Utils/BasicBlockUtils.h>
34
35#include "bcc/Config/Config.h"
36#include "bcc/Support/Log.h"
37
38#include "bcinfo/MetadataExtractor.h"
39
40#define NUM_EXPANDED_FUNCTION_PARAMS 5
41
42using namespace bcc;
43
44namespace {
45
46static const bool gEnableRsTbaa = true;
47
48/* RSForEachExpandPass - This pass operates on functions that are able to be
49 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
50 * ForEach-able function to be invoked over the appropriate data cells of the
51 * input/output allocations (adjusting other relevant parameters as we go). We
52 * support doing this for any ForEach-able compute kernels. The new function
53 * name is the original function name followed by ".expand". Note that we
54 * still generate code for the original function.
55 */
56class RSForEachExpandPass : public llvm::ModulePass {
57private:
58  static char ID;
59
60  llvm::Module *Module;
61  llvm::LLVMContext *Context;
62
63  /*
64   * Pointer to LLVM type information for the ForEachStubType and the function
65   * signature for expanded kernels.  These must be re-calculated for each
66   * module the pass is run on.
67   */
68  llvm::StructType   *ForEachStubType;
69  llvm::FunctionType *ExpandedFunctionType;
70
71  uint32_t mExportForEachCount;
72  const char **mExportForEachNameList;
73  const uint32_t *mExportForEachSignatureList;
74
75  // Turns on optimization of allocation stride values.
76  bool mEnableStepOpt;
77
78  uint32_t getRootSignature(llvm::Function *Function) {
79    const llvm::NamedMDNode *ExportForEachMetadata =
80        Module->getNamedMetadata("#rs_export_foreach");
81
82    if (!ExportForEachMetadata) {
83      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
84      for (llvm::Function::arg_iterator B = Function->arg_begin(),
85                                        E = Function->arg_end();
86           B != E;
87           ++B) {
88        RootArgTys.push_back(B->getType());
89      }
90
91      // For pre-ICS bitcode, we may not have signature information. In that
92      // case, we use the size of the RootArgTys to select the number of
93      // arguments.
94      return (1 << RootArgTys.size()) - 1;
95    }
96
97    if (ExportForEachMetadata->getNumOperands() == 0) {
98      return 0;
99    }
100
101    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
102
103    // We only handle the case for legacy root() functions here, so this is
104    // hard-coded to look at only the first such function.
105    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
106    if (SigNode != NULL && SigNode->getNumOperands() == 1) {
107      llvm::Value *SigVal = SigNode->getOperand(0);
108      if (SigVal->getValueID() == llvm::Value::MDStringVal) {
109        llvm::StringRef SigString =
110            static_cast<llvm::MDString*>(SigVal)->getString();
111        uint32_t Signature = 0;
112        if (SigString.getAsInteger(10, Signature)) {
113          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
114          return 0;
115        }
116        return Signature;
117      }
118    }
119
120    return 0;
121  }
122
123  // Get the actual value we should use to step through an allocation.
124  //
125  // Normally the value we use to step through an allocation is given to us by
126  // the driver. However, for certain primitive data types, we can derive an
127  // integer constant for the step value. We use this integer constant whenever
128  // possible to allow further compiler optimizations to take place.
129  //
130  // DL - Target Data size/layout information.
131  // T - Type of allocation (should be a pointer).
132  // OrigStep - Original step increment (root.expand() input from driver).
133  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
134                            llvm::Value *OrigStep) {
135    bccAssert(DL);
136    bccAssert(AllocType);
137    bccAssert(OrigStep);
138    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
139    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
140    if (mEnableStepOpt && AllocType != VoidPtrTy && PT) {
141      llvm::Type *ET = PT->getElementType();
142      uint64_t ETSize = DL->getTypeAllocSize(ET);
143      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
144      return llvm::ConstantInt::get(Int32Ty, ETSize);
145    } else {
146      return OrigStep;
147    }
148  }
149
150#define PARAM_FIELD_IN          0
151#define PARAM_FIELD_OUT         1
152#define PARAM_FIELD_Y           2
153#define PARAM_FIELD_Z           3
154#define PARAM_FIELD_LID         4
155#define PARAM_FIELD_INS         5
156#define PARAM_FIELD_ESTRIDEINS  6
157#define PARAM_FIELD_USR         7
158#define PARAM_FIELD_DIMX        8
159#define PARAM_FIELD_DIMY        9
160#define PARAM_FIELD_DIMZ       10
161#define PARAM_FIELD_SLOT       11
162
163  /// Builds the types required by the pass for the given context.
164  void buildTypes(void) {
165    // Create the RsForEachStubParam struct.
166
167    llvm::Type *VoidPtrTy    = llvm::Type::getInt8PtrTy(*Context);
168    llvm::Type *VoidPtrPtrTy = VoidPtrTy->getPointerTo();
169    llvm::Type *Int32Ty      = llvm::Type::getInt32Ty(*Context);
170    llvm::Type *Int32PtrTy   = Int32Ty->getPointerTo();
171
172    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
173     *
174     * struct RsForEachKernelStruct{
175     *   const void *in;
176     *   void *out;
177     *   uint32_t y;
178     *   uint32_t z;
179     *   uint32_t lid;
180     *   const void **ins;
181     *   uint32_t *eStrideIns;
182     *   const void *usr;
183     *   uint32_t dimX;
184     *   uint32_t dimY;
185     *   uint32_t dimZ;
186     *   uint32_t slot;
187     * };
188     */
189    llvm::SmallVector<llvm::Type*, 12> StructTypes;
190    StructTypes.push_back(VoidPtrTy);    // const void *in
191    StructTypes.push_back(VoidPtrTy);    // void *out
192    StructTypes.push_back(Int32Ty);      // uint32_t y
193    StructTypes.push_back(Int32Ty);      // uint32_t z
194    StructTypes.push_back(Int32Ty);      // uint32_t lid
195    StructTypes.push_back(VoidPtrPtrTy); // const void **ins
196    StructTypes.push_back(Int32PtrTy);   // uint32_t *eStrideIns
197    StructTypes.push_back(VoidPtrTy);    // const void *usr
198    StructTypes.push_back(Int32Ty);      // uint32_t dimX
199    StructTypes.push_back(Int32Ty);      // uint32_t dimY
200    StructTypes.push_back(Int32Ty);      // uint32_t dimZ
201    StructTypes.push_back(Int32Ty);      // uint32_t slot
202
203    ForEachStubType =
204      llvm::StructType::create(StructTypes, "RsForEachStubParamStruct");
205
206    // Create the function type for expanded kernels.
207
208    llvm::Type *ForEachStubPtrTy = ForEachStubType->getPointerTo();
209
210    llvm::SmallVector<llvm::Type*, 8> ParamTypes;
211    ParamTypes.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
212    ParamTypes.push_back(Int32Ty);          // uint32_t x1
213    ParamTypes.push_back(Int32Ty);          // uint32_t x2
214    ParamTypes.push_back(Int32Ty);          // uint32_t instep
215    ParamTypes.push_back(Int32Ty);          // uint32_t outstep
216
217    ExpandedFunctionType = llvm::FunctionType::get(llvm::Type::getVoidTy(*Context),
218                                              ParamTypes,
219                                              false);
220  }
221
222  /// @brief Create skeleton of the expanded function.
223  ///
224  /// This creates a function with the following signature:
225  ///
226  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
227  ///         uint32_t instep, uint32_t outstep)
228  ///
229  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
230    llvm::Function *ExpandedFunction =
231      llvm::Function::Create(ExpandedFunctionType,
232                             llvm::GlobalValue::ExternalLinkage,
233                             OldName + ".expand", Module);
234
235    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
236
237    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
238
239    (AI++)->setName("p");
240    (AI++)->setName("x1");
241    (AI++)->setName("x2");
242    (AI++)->setName("arg_instep");
243    (AI++)->setName("arg_outstep");
244
245    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
246                                                       ExpandedFunction);
247    llvm::IRBuilder<> Builder(Begin);
248    Builder.CreateRetVoid();
249
250    return ExpandedFunction;
251  }
252
253  /// @brief Create an empty loop
254  ///
255  /// Create a loop of the form:
256  ///
257  /// for (i = LowerBound; i < UpperBound; i++)
258  ///   ;
259  ///
260  /// After the loop has been created, the builder is set such that
261  /// instructions can be added to the loop body.
262  ///
263  /// @param Builder The builder to use to build this loop. The current
264  ///                position of the builder is the position the loop
265  ///                will be inserted.
266  /// @param LowerBound The first value of the loop iterator
267  /// @param UpperBound The maximal value of the loop iterator
268  /// @param LoopIV A reference that will be set to the loop iterator.
269  /// @return The BasicBlock that will be executed after the loop.
270  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
271                               llvm::Value *LowerBound,
272                               llvm::Value *UpperBound,
273                               llvm::PHINode **LoopIV) {
274    assert(LowerBound->getType() == UpperBound->getType());
275
276    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
277    llvm::Value *Cond, *IVNext;
278    llvm::PHINode *IV;
279
280    CondBB = Builder.GetInsertBlock();
281    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
282    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
283
284    // if (LowerBound < Upperbound)
285    //   goto LoopHeader
286    // else
287    //   goto AfterBB
288    CondBB->getTerminator()->eraseFromParent();
289    Builder.SetInsertPoint(CondBB);
290    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
291    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
292
293    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
294    // iv.next = iv + 1
295    // if (iv.next < Upperbound)
296    //   goto LoopHeader
297    // else
298    //   goto AfterBB
299    Builder.SetInsertPoint(HeaderBB);
300    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
301    IV->addIncoming(LowerBound, CondBB);
302    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
303    IV->addIncoming(IVNext, HeaderBB);
304    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
305    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
306    AfterBB->setName("Exit");
307    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
308    *LoopIV = IV;
309    return AfterBB;
310  }
311
312public:
313  RSForEachExpandPass(bool pEnableStepOpt)
314      : ModulePass(ID), Module(NULL), Context(NULL),
315        mEnableStepOpt(pEnableStepOpt) {
316
317  }
318
319  /* Performs the actual optimization on a selected function. On success, the
320   * Module will contain a new function of the name "<NAME>.expand" that
321   * invokes <NAME>() in a loop with the appropriate parameters.
322   */
323  bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
324    ALOGV("Expanding ForEach-able Function %s",
325          Function->getName().str().c_str());
326
327    if (!Signature) {
328      Signature = getRootSignature(Function);
329      if (!Signature) {
330        // We couldn't determine how to expand this function based on its
331        // function signature.
332        return false;
333      }
334    }
335
336    llvm::DataLayout DL(Module);
337
338    llvm::Function *ExpandedFunction =
339      createEmptyExpandedFunction(Function->getName());
340
341    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
342
343    /*
344     * Extract the expanded function's parameters.  It is guaranteed by
345     * createEmptyExpandedFunction that there will be five parameters.
346     */
347    llvm::Function::arg_iterator ExpandedFunctionArgIter =
348      ExpandedFunction->arg_begin();
349
350    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
351    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
352    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
353    llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
354    llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
355
356    llvm::Value *InStep  = NULL;
357    llvm::Value *OutStep = NULL;
358
359    // Construct the actual function body.
360    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
361
362    // Collect and construct the arguments for the kernel().
363    // Note that we load any loop-invariant arguments before entering the Loop.
364    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
365
366    llvm::Type *InTy = NULL;
367    llvm::Value *InBasePtr = NULL;
368    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
369      InTy = (FunctionArgIter++)->getType();
370      InStep = getStepValue(&DL, InTy, Arg_instep);
371      InStep->setName("instep");
372      InBasePtr = Builder.CreateLoad(
373                    Builder.CreateStructGEP(Arg_p, PARAM_FIELD_IN));
374    }
375
376    llvm::Type *OutTy = NULL;
377    llvm::Value *OutBasePtr = NULL;
378    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
379      OutTy = (FunctionArgIter++)->getType();
380      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
381      OutStep->setName("outstep");
382      OutBasePtr = Builder.CreateLoad(
383                     Builder.CreateStructGEP(Arg_p, PARAM_FIELD_OUT));
384    }
385
386    llvm::Value *UsrData = NULL;
387    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
388      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
389      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
390          Builder.CreateStructGEP(Arg_p, PARAM_FIELD_USR)), UsrDataTy);
391      UsrData->setName("UsrData");
392    }
393
394    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
395      FunctionArgIter++;
396    }
397
398    llvm::Value *Y = NULL;
399    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
400      Y = Builder.CreateLoad(
401            Builder.CreateStructGEP(Arg_p, PARAM_FIELD_Y), "Y");
402
403      FunctionArgIter++;
404    }
405
406    bccAssert(FunctionArgIter == Function->arg_end());
407
408    llvm::PHINode *IV;
409    createLoop(Builder, Arg_x1, Arg_x2, &IV);
410
411    // Populate the actual call to kernel().
412    llvm::SmallVector<llvm::Value*, 8> RootArgs;
413
414    llvm::Value *InPtr  = NULL;
415    llvm::Value *OutPtr = NULL;
416
417    // Calculate the current input and output pointers
418    //
419    // We always calculate the input/output pointers with a GEP operating on i8
420    // values and only cast at the very end to OutTy. This is because the step
421    // between two values is given in bytes.
422    //
423    // TODO: We could further optimize the output by using a GEP operation of
424    // type 'OutTy' in cases where the element type of the allocation allows.
425    if (OutBasePtr) {
426      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
427      OutOffset = Builder.CreateMul(OutOffset, OutStep);
428      OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
429      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
430    }
431
432    if (InBasePtr) {
433      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
434      InOffset = Builder.CreateMul(InOffset, InStep);
435      InPtr = Builder.CreateGEP(InBasePtr, InOffset);
436      InPtr = Builder.CreatePointerCast(InPtr, InTy);
437    }
438
439    if (InPtr) {
440      RootArgs.push_back(InPtr);
441    }
442
443    if (OutPtr) {
444      RootArgs.push_back(OutPtr);
445    }
446
447    if (UsrData) {
448      RootArgs.push_back(UsrData);
449    }
450
451    llvm::Value *X = IV;
452    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
453      RootArgs.push_back(X);
454    }
455
456    if (Y) {
457      RootArgs.push_back(Y);
458    }
459
460    Builder.CreateCall(Function, RootArgs);
461
462    return true;
463  }
464
465  /* Expand a pass-by-value kernel.
466   */
467  bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
468    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
469    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
470
471    // TODO: Refactor this to share functionality with ExpandFunction.
472    llvm::DataLayout DL(Module);
473
474    llvm::Function *ExpandedFunction =
475      createEmptyExpandedFunction(Function->getName());
476
477    /*
478     * Extract the expanded function's parameters.  It is guaranteed by
479     * createEmptyExpandedFunction that there will be five parameters.
480     */
481
482    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
483
484    llvm::Function::arg_iterator ExpandedFunctionArgIter =
485      ExpandedFunction->arg_begin();
486
487    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
488    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
489    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
490    llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
491    llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
492
493    // Construct the actual function body.
494    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
495
496    // Create TBAA meta-data.
497    llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
498    llvm::MDBuilder MDHelper(*Context);
499
500    TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
501    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", TBAARenderScript);
502    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, TBAAAllocation, 0);
503    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", TBAARenderScript);
504    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
505
506    /*
507     * Collect and construct the arguments for the kernel().
508     *
509     * Note that we load any loop-invariant arguments before entering the Loop.
510     */
511    size_t NumInputs = Function->arg_size();
512
513    llvm::Value *Y = NULL;
514    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
515      Y = Builder.CreateLoad(
516            Builder.CreateStructGEP(Arg_p, PARAM_FIELD_Y), "Y");
517
518      --NumInputs;
519    }
520
521    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
522      --NumInputs;
523    }
524
525    // No usrData parameter on kernels.
526    bccAssert(
527        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
528
529    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
530
531    // Check the return type
532    llvm::Type     *OutTy      = NULL;
533    llvm::Value    *OutStep    = NULL;
534    llvm::LoadInst *OutBasePtr = NULL;
535
536    bool PassOutByReference = false;
537
538    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
539      llvm::Type *OutBaseTy = Function->getReturnType();
540
541      if (OutBaseTy->isVoidTy()) {
542        PassOutByReference = true;
543        OutTy = ArgIter->getType();
544
545        ArgIter++;
546        --NumInputs;
547      } else {
548        // We don't increment Args, since we are using the actual return type.
549        OutTy = OutBaseTy->getPointerTo();
550      }
551
552      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
553      OutStep->setName("outstep");
554      OutBasePtr = Builder.CreateLoad(
555                     Builder.CreateStructGEP(Arg_p, PARAM_FIELD_OUT));
556
557      if (gEnableRsTbaa) {
558        OutBasePtr->setMetadata("tbaa", TBAAPointer);
559      }
560    }
561
562    llvm::SmallVector<llvm::Type*,     8> InTypes;
563    llvm::SmallVector<llvm::Value*,    8> InSteps;
564    llvm::SmallVector<llvm::LoadInst*, 8> InBasePtrs;
565
566    if (NumInputs == 1) {
567      llvm::Type  *InType = ArgIter->getType()->getPointerTo();
568      llvm::Value *InStep = getStepValue(&DL, InType, Arg_instep);
569
570      InStep->setName("instep");
571
572      llvm::Value    *Input     = Builder.CreateStructGEP(Arg_p, PARAM_FIELD_IN);
573      llvm::LoadInst *InBasePtr = Builder.CreateLoad(Input, "input_base");
574
575      if (gEnableRsTbaa) {
576        InBasePtr->setMetadata("tbaa", TBAAPointer);
577      }
578
579      InTypes.push_back(InType);
580      InSteps.push_back(InStep);
581      InBasePtrs.push_back(InBasePtr);
582
583    } else if (NumInputs > 1) {
584      llvm::Value    *InsMember  = Builder.CreateStructGEP(Arg_p, PARAM_FIELD_INS);
585      llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember,
586                                                      "inputs_base");
587
588      llvm::Value    *InStepsMember = Builder.CreateStructGEP(Arg_p, PARAM_FIELD_ESTRIDEINS);
589      llvm::LoadInst *InStepsBase   = Builder.CreateLoad(InStepsMember,
590                                                         "insteps_base");
591
592      for (size_t InputIndex = 0; InputIndex < NumInputs;
593           ++InputIndex, ArgIter++) {
594
595          llvm::Value *IndexVal = Builder.getInt32(InputIndex);
596
597          llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
598          llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
599                                                          "instep_addr");
600
601          llvm::Type  *InType = ArgIter->getType()->getPointerTo();
602          llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
603
604          InStep->setName("instep");
605
606          llvm::Value    *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
607          llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
608                                                         "input_base");
609
610          if (gEnableRsTbaa) {
611            InBasePtr->setMetadata("tbaa", TBAAPointer);
612          }
613
614          InTypes.push_back(InType);
615          InSteps.push_back(InStep);
616          InBasePtrs.push_back(InBasePtr);
617      }
618    }
619
620    llvm::PHINode *IV;
621    createLoop(Builder, Arg_x1, Arg_x2, &IV);
622
623    // Populate the actual call to kernel().
624    llvm::SmallVector<llvm::Value*, 8> RootArgs;
625
626    // Calculate the current input and output pointers
627    //
628    //
629    // We always calculate the input/output pointers with a GEP operating on i8
630    // values combined with a multiplication and only cast at the very end to
631    // OutTy.  This is to account for dynamic stepping sizes when the value
632    // isn't apparent at compile time.  In the (very common) case when we know
633    // the step size at compile time, due to haveing complete type information
634    // this multiplication will optmized out and produces code equivalent to a
635    // a GEP on a pointer of the correct type.
636
637    // Output
638
639    llvm::Value *OutPtr = NULL;
640    if (OutBasePtr) {
641      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
642
643      OutOffset = Builder.CreateMul(OutOffset, OutStep);
644      OutPtr    = Builder.CreateGEP(OutBasePtr, OutOffset);
645      OutPtr    = Builder.CreatePointerCast(OutPtr, OutTy);
646
647      if (PassOutByReference) {
648        RootArgs.push_back(OutPtr);
649      }
650    }
651
652    // Inputs
653
654    if (NumInputs > 0) {
655      llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
656
657      for (size_t Index = 0; Index < NumInputs; ++Index) {
658        llvm::Value *InOffset = Builder.CreateMul(Offset, InSteps[Index]);
659        llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], InOffset);
660
661        InPtr = Builder.CreatePointerCast(InPtr, InTypes[Index]);
662
663        llvm::LoadInst *Input = Builder.CreateLoad(InPtr, "input");
664
665        if (gEnableRsTbaa) {
666          Input->setMetadata("tbaa", TBAAAllocation);
667        }
668
669        RootArgs.push_back(Input);
670      }
671    }
672
673    llvm::Value *X = IV;
674    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
675      RootArgs.push_back(X);
676    }
677
678    if (Y) {
679      RootArgs.push_back(Y);
680    }
681
682    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
683
684    if (OutPtr && !PassOutByReference) {
685      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
686      if (gEnableRsTbaa) {
687        Store->setMetadata("tbaa", TBAAAllocation);
688      }
689    }
690
691    return true;
692  }
693
694  /// @brief Checks if pointers to allocation internals are exposed
695  ///
696  /// This function verifies if through the parameters passed to the kernel
697  /// or through calls to the runtime library the script gains access to
698  /// pointers pointing to data within a RenderScript Allocation.
699  /// If we know we control all loads from and stores to data within
700  /// RenderScript allocations and if we know the run-time internal accesses
701  /// are all annotated with RenderScript TBAA metadata, only then we
702  /// can safely use TBAA to distinguish between generic and from-allocation
703  /// pointers.
704  bool allocPointersExposed(llvm::Module &Module) {
705    // Old style kernel function can expose pointers to elements within
706    // allocations.
707    // TODO: Extend analysis to allow simple cases of old-style kernels.
708    for (size_t i = 0; i < mExportForEachCount; ++i) {
709      const char *Name = mExportForEachNameList[i];
710      uint32_t Signature = mExportForEachSignatureList[i];
711      if (Module.getFunction(Name) &&
712          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
713        return true;
714      }
715    }
716
717    // Check for library functions that expose a pointer to an Allocation or
718    // that are not yet annotated with RenderScript-specific tbaa information.
719    static std::vector<std::string> Funcs;
720
721    // rsGetElementAt(...)
722    Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
723    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
724    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
725    // rsSetElementAt()
726    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
727    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
728    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
729    // rsGetElementAtYuv_uchar_Y()
730    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
731    // rsGetElementAtYuv_uchar_U()
732    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
733    // rsGetElementAtYuv_uchar_V()
734    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
735
736    for (std::vector<std::string>::iterator FI = Funcs.begin(),
737                                            FE = Funcs.end();
738         FI != FE; ++FI) {
739      llvm::Function *Function = Module.getFunction(*FI);
740
741      if (!Function) {
742        ALOGE("Missing run-time function '%s'", FI->c_str());
743        return true;
744      }
745
746      if (Function->getNumUses() > 0) {
747        return true;
748      }
749    }
750
751    return false;
752  }
753
754  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
755  ///
756  /// The TBAA metadata used to annotate loads/stores from RenderScript
757  /// Allocations is generated in a separate TBAA tree with a "RenderScript TBAA"
758  /// root node. LLVM does assume may-alias for all nodes in unrelated alias
759  /// analysis trees. This function makes the RenderScript TBAA a subtree of the
760  /// normal C/C++ TBAA tree aside of normal C/C++ types. With the connected trees
761  /// every access to an Allocation is resolved to must-alias if compared to
762  /// a normal C/C++ access.
763  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
764    llvm::MDBuilder MDHelper(*Context);
765    llvm::MDNode *TBAARenderScript =
766      MDHelper.createTBAARoot("RenderScript TBAA");
767
768    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
769    llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript",
770                                                         TBAARoot);
771
772    TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
773  }
774
775  virtual bool runOnModule(llvm::Module &Module) {
776    bool Changed  = false;
777    this->Module  = &Module;
778    this->Context = &Module.getContext();
779
780    this->buildTypes();
781
782    bcinfo::MetadataExtractor me(&Module);
783    if (!me.extract()) {
784      ALOGE("Could not extract metadata from module!");
785      return false;
786    }
787    mExportForEachCount = me.getExportForEachSignatureCount();
788    mExportForEachNameList = me.getExportForEachNameList();
789    mExportForEachSignatureList = me.getExportForEachSignatureList();
790
791    bool AllocsExposed = allocPointersExposed(Module);
792
793    for (size_t i = 0; i < mExportForEachCount; ++i) {
794      const char *name = mExportForEachNameList[i];
795      uint32_t signature = mExportForEachSignatureList[i];
796      llvm::Function *kernel = Module.getFunction(name);
797      if (kernel) {
798        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
799          Changed |= ExpandKernel(kernel, signature);
800          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
801        } else if (kernel->getReturnType()->isVoidTy()) {
802          Changed |= ExpandFunction(kernel, signature);
803          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
804        } else {
805          // There are some graphics root functions that are not
806          // expanded, but that will be called directly. For those
807          // functions, we can not set the linkage to internal.
808        }
809      }
810    }
811
812    if (gEnableRsTbaa && !AllocsExposed) {
813      connectRenderScriptTBAAMetadata(Module);
814    }
815
816    return Changed;
817  }
818
819  virtual const char *getPassName() const {
820    return "ForEach-able Function Expansion";
821  }
822
823}; // end RSForEachExpandPass
824
825} // end anonymous namespace
826
827char RSForEachExpandPass::ID = 0;
828
829namespace bcc {
830
831llvm::ModulePass *
832createRSForEachExpandPass(bool pEnableStepOpt){
833  return new RSForEachExpandPass(pEnableStepOpt);
834}
835
836} // end namespace bcc
837