RSForEachExpand.cpp revision 802f65931852d925bbe2e478bafe422b4002e7c4
1/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bcc/Assert.h"
18#include "bcc/Renderscript/RSTransforms.h"
19
20#include <cstdlib>
21
22#include <llvm/IR/DerivedTypes.h>
23#include <llvm/IR/Function.h>
24#include <llvm/IR/Instructions.h>
25#include <llvm/IR/IRBuilder.h>
26#include <llvm/IR/Module.h>
27#include <llvm/Pass.h>
28#include <llvm/Support/raw_ostream.h>
29#include <llvm/IR/DataLayout.h>
30#include <llvm/IR/Type.h>
31
32#include "bcc/Config/Config.h"
33#include "bcc/Renderscript/RSInfo.h"
34#include "bcc/Support/Log.h"
35
36using namespace bcc;
37
38namespace {
39
40/* RSForEachExpandPass - This pass operates on functions that are able to be
41 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
42 * ForEach-able function to be invoked over the appropriate data cells of the
43 * input/output allocations (adjusting other relevant parameters as we go). We
44 * support doing this for any ForEach-able compute kernels. The new function
45 * name is the original function name followed by ".expand". Note that we
46 * still generate code for the original function.
47 */
48class RSForEachExpandPass : public llvm::ModulePass {
49private:
50  static char ID;
51
52  llvm::Module *M;
53  llvm::LLVMContext *C;
54
55  const RSInfo::ExportForeachFuncListTy &mFuncs;
56
57  // Turns on optimization of allocation stride values.
58  bool mEnableStepOpt;
59
60  uint32_t getRootSignature(llvm::Function *F) {
61    const llvm::NamedMDNode *ExportForEachMetadata =
62        M->getNamedMetadata("#rs_export_foreach");
63
64    if (!ExportForEachMetadata) {
65      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
66      for (llvm::Function::arg_iterator B = F->arg_begin(),
67                                        E = F->arg_end();
68           B != E;
69           ++B) {
70        RootArgTys.push_back(B->getType());
71      }
72
73      // For pre-ICS bitcode, we may not have signature information. In that
74      // case, we use the size of the RootArgTys to select the number of
75      // arguments.
76      return (1 << RootArgTys.size()) - 1;
77    }
78
79    if (ExportForEachMetadata->getNumOperands() == 0) {
80      return 0;
81    }
82
83    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
84
85    // We only handle the case for legacy root() functions here, so this is
86    // hard-coded to look at only the first such function.
87    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
88    if (SigNode != NULL && SigNode->getNumOperands() == 1) {
89      llvm::Value *SigVal = SigNode->getOperand(0);
90      if (SigVal->getValueID() == llvm::Value::MDStringVal) {
91        llvm::StringRef SigString =
92            static_cast<llvm::MDString*>(SigVal)->getString();
93        uint32_t Signature = 0;
94        if (SigString.getAsInteger(10, Signature)) {
95          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
96          return 0;
97        }
98        return Signature;
99      }
100    }
101
102    return 0;
103  }
104
105  // Get the actual value we should use to step through an allocation.
106  // DL - Target Data size/layout information.
107  // T - Type of allocation (should be a pointer).
108  // OrigStep - Original step increment (root.expand() input from driver).
109  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T,
110                            llvm::Value *OrigStep) {
111    bccAssert(DL);
112    bccAssert(T);
113    bccAssert(OrigStep);
114    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
115    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
116    if (mEnableStepOpt && T != VoidPtrTy && PT) {
117      llvm::Type *ET = PT->getElementType();
118      uint64_t ETSize = DL->getTypeAllocSize(ET);
119      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
120      return llvm::ConstantInt::get(Int32Ty, ETSize);
121    } else {
122      return OrigStep;
123    }
124  }
125
126  static bool hasIn(uint32_t Signature) {
127    return Signature & 0x01;
128  }
129
130  static bool hasOut(uint32_t Signature) {
131    return Signature & 0x02;
132  }
133
134  static bool hasUsrData(uint32_t Signature) {
135    return Signature & 0x04;
136  }
137
138  static bool hasX(uint32_t Signature) {
139    return Signature & 0x08;
140  }
141
142  static bool hasY(uint32_t Signature) {
143    return Signature & 0x10;
144  }
145
146  static bool isKernel(uint32_t Signature) {
147    return Signature & 0x20;
148  }
149
150  /// @brief Returns the type of the ForEach stub parameter structure.
151  ///
152  /// Renderscript uses a single structure in which all parameters are passed
153  /// to keep the signature of the expanded function independent of the
154  /// parameters passed to it.
155  llvm::Type *getForeachStubTy() {
156    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
157    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
158    llvm::Type *SizeTy = Int32Ty;
159    /* Defined in frameworks/base/libs/rs/rs_hal.h:
160     *
161     * struct RsForEachStubParamStruct {
162     *   const void *in;
163     *   void *out;
164     *   const void *usr;
165     *   size_t usr_len;
166     *   uint32_t x;
167     *   uint32_t y;
168     *   uint32_t z;
169     *   uint32_t lod;
170     *   enum RsAllocationCubemapFace face;
171     *   uint32_t ar[16];
172     * };
173     */
174    llvm::SmallVector<llvm::Type*, 9> StructTys;
175    StructTys.push_back(VoidPtrTy);  // const void *in
176    StructTys.push_back(VoidPtrTy);  // void *out
177    StructTys.push_back(VoidPtrTy);  // const void *usr
178    StructTys.push_back(SizeTy);     // size_t usr_len
179    StructTys.push_back(Int32Ty);    // uint32_t x
180    StructTys.push_back(Int32Ty);    // uint32_t y
181    StructTys.push_back(Int32Ty);    // uint32_t z
182    StructTys.push_back(Int32Ty);    // uint32_t lod
183    StructTys.push_back(Int32Ty);    // enum RsAllocationCubemapFace
184    StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16));  // uint32_t ar[16]
185
186    return llvm::StructType::create(StructTys, "RsForEachStubParamStruct");
187  }
188
189  /// @brief Create skeleton of the expanded function.
190  ///
191  /// This creates a function with the following signature:
192  ///
193  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
194  ///         uint32_t instep, uint32_t outstep)
195  ///
196  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
197    llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo();
198    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
199
200    llvm::SmallVector<llvm::Type*, 8> ParamTys;
201    ParamTys.push_back(ForEachStubPtrTy);  // const RsForEachStubParamStruct *p
202    ParamTys.push_back(Int32Ty);           // uint32_t x1
203    ParamTys.push_back(Int32Ty);           // uint32_t x2
204    ParamTys.push_back(Int32Ty);           // uint32_t instep
205    ParamTys.push_back(Int32Ty);           // uint32_t outstep
206
207    llvm::FunctionType *FT =
208        llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
209    llvm::Function *F =
210        llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage,
211                               OldName + ".expand", M);
212
213    llvm::Function::arg_iterator AI = F->arg_begin();
214
215    AI->setName("p");
216    AI++;
217    AI->setName("x1");
218    AI++;
219    AI->setName("x2");
220    AI++;
221    AI->setName("arg_instep");
222    AI++;
223    AI->setName("arg_outstep");
224    AI++;
225
226    assert(AI == F->arg_end());
227
228    return F;
229  }
230
231public:
232  RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
233                      bool pEnableStepOpt)
234      : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
235        mEnableStepOpt(pEnableStepOpt) {
236  }
237
238  /* Performs the actual optimization on a selected function. On success, the
239   * Module will contain a new function of the name "<NAME>.expand" that
240   * invokes <NAME>() in a loop with the appropriate parameters.
241   */
242  bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
243    ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
244
245    if (!Signature) {
246      Signature = getRootSignature(F);
247      if (!Signature) {
248        // We couldn't determine how to expand this function based on its
249        // function signature.
250        return false;
251      }
252    }
253
254    llvm::DataLayout DL(M);
255
256    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
257    llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
258
259    // Create and name the actual arguments to this expanded function.
260    llvm::SmallVector<llvm::Argument*, 8> ArgVec;
261    for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
262                                      E = ExpandedFunc->arg_end();
263         B != E;
264         ++B) {
265      ArgVec.push_back(B);
266    }
267
268    if (ArgVec.size() != 5) {
269      ALOGE("Incorrect number of arguments to function: %zu",
270            ArgVec.size());
271      return false;
272    }
273    llvm::Value *Arg_p = ArgVec[0];
274    llvm::Value *Arg_x1 = ArgVec[1];
275    llvm::Value *Arg_x2 = ArgVec[2];
276    llvm::Value *Arg_instep = ArgVec[3];
277    llvm::Value *Arg_outstep = ArgVec[4];
278
279    llvm::Value *InStep = NULL;
280    llvm::Value *OutStep = NULL;
281
282    // Construct the actual function body.
283    llvm::BasicBlock *Begin =
284        llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
285    llvm::IRBuilder<> Builder(Begin);
286
287    // uint32_t X = x1;
288    llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
289    Builder.CreateStore(Arg_x1, AX);
290
291    // Collect and construct the arguments for the kernel().
292    // Note that we load any loop-invariant arguments before entering the Loop.
293    llvm::Function::arg_iterator Args = F->arg_begin();
294
295    llvm::Type *InTy = NULL;
296    llvm::AllocaInst *AIn = NULL;
297    if (hasIn(Signature)) {
298      InTy = Args->getType();
299      AIn = Builder.CreateAlloca(InTy, 0, "AIn");
300      InStep = getStepValue(&DL, InTy, Arg_instep);
301      InStep->setName("instep");
302      Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
303          Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
304      Args++;
305    }
306
307    llvm::Type *OutTy = NULL;
308    llvm::AllocaInst *AOut = NULL;
309    if (hasOut(Signature)) {
310      OutTy = Args->getType();
311      AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
312      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
313      OutStep->setName("outstep");
314      Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
315          Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
316      Args++;
317    }
318
319    llvm::Value *UsrData = NULL;
320    if (hasUsrData(Signature)) {
321      llvm::Type *UsrDataTy = Args->getType();
322      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
323          Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
324      UsrData->setName("UsrData");
325      Args++;
326    }
327
328    if (hasX(Signature)) {
329      Args++;
330    }
331
332    llvm::Value *Y = NULL;
333    if (hasY(Signature)) {
334      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
335      Args++;
336    }
337
338    bccAssert(Args == F->arg_end());
339
340    llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
341    llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
342
343    // if (x1 < x2) goto Loop; else goto Exit;
344    llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
345    Builder.CreateCondBr(Cond, Loop, Exit);
346
347    // Loop:
348    Builder.SetInsertPoint(Loop);
349
350    // Populate the actual call to kernel().
351    llvm::SmallVector<llvm::Value*, 8> RootArgs;
352
353    llvm::Value *InPtr = NULL;
354    llvm::Value *OutPtr = NULL;
355
356    if (AIn) {
357      InPtr = Builder.CreateLoad(AIn, "InPtr");
358      RootArgs.push_back(InPtr);
359    }
360
361    if (AOut) {
362      OutPtr = Builder.CreateLoad(AOut, "OutPtr");
363      RootArgs.push_back(OutPtr);
364    }
365
366    if (UsrData) {
367      RootArgs.push_back(UsrData);
368    }
369
370    // We always have to load X, since it is used to iterate through the loop.
371    llvm::Value *X = Builder.CreateLoad(AX, "X");
372    if (hasX(Signature)) {
373      RootArgs.push_back(X);
374    }
375
376    if (Y) {
377      RootArgs.push_back(Y);
378    }
379
380    Builder.CreateCall(F, RootArgs);
381
382    if (InPtr) {
383      // InPtr += instep
384      llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
385          Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
386      Builder.CreateStore(NewIn, AIn);
387    }
388
389    if (OutPtr) {
390      // OutPtr += outstep
391      llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
392          Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
393      Builder.CreateStore(NewOut, AOut);
394    }
395
396    // X++;
397    llvm::Value *XPlusOne =
398        Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
399    Builder.CreateStore(XPlusOne, AX);
400
401    // If (X < x2) goto Loop; else goto Exit;
402    Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
403    Builder.CreateCondBr(Cond, Loop, Exit);
404
405    // Exit:
406    Builder.SetInsertPoint(Exit);
407    Builder.CreateRetVoid();
408
409    return true;
410  }
411
412  /* Expand a pass-by-value kernel.
413   */
414  bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
415    bccAssert(isKernel(Signature));
416    ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
417
418    // TODO: Refactor this to share functionality with ExpandFunction.
419    llvm::DataLayout DL(M);
420
421    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
422    llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
423
424    // Create and name the actual arguments to this expanded function.
425    llvm::SmallVector<llvm::Argument*, 8> ArgVec;
426    for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
427                                      E = ExpandedFunc->arg_end();
428         B != E;
429         ++B) {
430      ArgVec.push_back(B);
431    }
432
433    if (ArgVec.size() != 5) {
434      ALOGE("Incorrect number of arguments to function: %zu",
435            ArgVec.size());
436      return false;
437    }
438    llvm::Value *Arg_p = ArgVec[0];
439    llvm::Value *Arg_x1 = ArgVec[1];
440    llvm::Value *Arg_x2 = ArgVec[2];
441    llvm::Value *Arg_instep = ArgVec[3];
442    llvm::Value *Arg_outstep = ArgVec[4];
443
444    llvm::Value *InStep = NULL;
445    llvm::Value *OutStep = NULL;
446
447    // Construct the actual function body.
448    llvm::BasicBlock *Begin =
449        llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
450    llvm::IRBuilder<> Builder(Begin);
451
452    // uint32_t X = x1;
453    llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
454    Builder.CreateStore(Arg_x1, AX);
455
456    // Collect and construct the arguments for the kernel().
457    // Note that we load any loop-invariant arguments before entering the Loop.
458    llvm::Function::arg_iterator Args = F->arg_begin();
459
460    llvm::Type *OutTy = NULL;
461    llvm::AllocaInst *AOut = NULL;
462    bool PassOutByReference = false;
463    if (hasOut(Signature)) {
464      llvm::Type *OutBaseTy = F->getReturnType();
465      if (OutBaseTy->isVoidTy()) {
466        PassOutByReference = true;
467        OutTy = Args->getType();
468        Args++;
469      } else {
470        OutTy = OutBaseTy->getPointerTo();
471        // We don't increment Args, since we are using the actual return type.
472      }
473      AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
474      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
475      OutStep->setName("outstep");
476      Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
477          Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
478    }
479
480    llvm::Type *InBaseTy = NULL;
481    llvm::Type *InTy = NULL;
482    llvm::AllocaInst *AIn = NULL;
483    if (hasIn(Signature)) {
484      InBaseTy = Args->getType();
485      InTy =InBaseTy->getPointerTo();
486      AIn = Builder.CreateAlloca(InTy, 0, "AIn");
487      InStep = getStepValue(&DL, InTy, Arg_instep);
488      InStep->setName("instep");
489      Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
490          Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
491      Args++;
492    }
493
494    // No usrData parameter on kernels.
495    bccAssert(!hasUsrData(Signature));
496
497    if (hasX(Signature)) {
498      Args++;
499    }
500
501    llvm::Value *Y = NULL;
502    if (hasY(Signature)) {
503      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
504      Args++;
505    }
506
507    bccAssert(Args == F->arg_end());
508
509    llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
510    llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
511
512    // if (x1 < x2) goto Loop; else goto Exit;
513    llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
514    Builder.CreateCondBr(Cond, Loop, Exit);
515
516    // Loop:
517    Builder.SetInsertPoint(Loop);
518
519    // Populate the actual call to kernel().
520    llvm::SmallVector<llvm::Value*, 8> RootArgs;
521
522    llvm::Value *InPtr = NULL;
523    llvm::Value *In = NULL;
524    llvm::Value *OutPtr = NULL;
525
526    if (PassOutByReference) {
527      OutPtr = Builder.CreateLoad(AOut, "OutPtr");
528      RootArgs.push_back(OutPtr);
529    }
530
531    if (AIn) {
532      InPtr = Builder.CreateLoad(AIn, "InPtr");
533      In = Builder.CreateLoad(InPtr, "In");
534      RootArgs.push_back(In);
535    }
536
537    // We always have to load X, since it is used to iterate through the loop.
538    llvm::Value *X = Builder.CreateLoad(AX, "X");
539    if (hasX(Signature)) {
540      RootArgs.push_back(X);
541    }
542
543    if (Y) {
544      RootArgs.push_back(Y);
545    }
546
547    llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
548
549    if (AOut && !PassOutByReference) {
550      OutPtr = Builder.CreateLoad(AOut, "OutPtr");
551      Builder.CreateStore(RetVal, OutPtr);
552    }
553
554    if (InPtr) {
555      // InPtr += instep
556      llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
557          Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
558      Builder.CreateStore(NewIn, AIn);
559    }
560
561    if (OutPtr) {
562      // OutPtr += outstep
563      llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
564          Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
565      Builder.CreateStore(NewOut, AOut);
566    }
567
568    // X++;
569    llvm::Value *XPlusOne =
570        Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
571    Builder.CreateStore(XPlusOne, AX);
572
573    // If (X < x2) goto Loop; else goto Exit;
574    Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
575    Builder.CreateCondBr(Cond, Loop, Exit);
576
577    // Exit:
578    Builder.SetInsertPoint(Exit);
579    Builder.CreateRetVoid();
580
581    return true;
582  }
583
584  virtual bool runOnModule(llvm::Module &M) {
585    bool Changed = false;
586    this->M = &M;
587    C = &M.getContext();
588
589    for (RSInfo::ExportForeachFuncListTy::const_iterator
590             func_iter = mFuncs.begin(), func_end = mFuncs.end();
591         func_iter != func_end; func_iter++) {
592      const char *name = func_iter->first;
593      uint32_t signature = func_iter->second;
594      llvm::Function *kernel = M.getFunction(name);
595      if (kernel && isKernel(signature)) {
596        Changed |= ExpandKernel(kernel, signature);
597      }
598      else if (kernel && kernel->getReturnType()->isVoidTy()) {
599        Changed |= ExpandFunction(kernel, signature);
600      }
601    }
602
603    return Changed;
604  }
605
606  virtual const char *getPassName() const {
607    return "ForEach-able Function Expansion";
608  }
609
610}; // end RSForEachExpandPass
611
612} // end anonymous namespace
613
614char RSForEachExpandPass::ID = 0;
615
616namespace bcc {
617
618llvm::ModulePass *
619createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
620                          bool pEnableStepOpt){
621  return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
622}
623
624} // end namespace bcc
625