RSForEachExpand.cpp revision 2b04086acbef6520ae2c54a868b1271abf053122
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/*
2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project
3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License");
5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License.
6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at
7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *     http://www.apache.org/licenses/LICENSE-2.0
9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software
11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS,
12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and
14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License.
15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */
16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h"
18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h"
197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib>
217a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
22c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/DerivedTypes.h>
23c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Function.h>
24c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Instructions.h>
25c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Module.h>
26c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h>
27c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Support/IRBuilder.h>
282b04086acbef6520ae2c54a868b1271abf053122Stephen Hines#include <llvm/Target/TargetData.h>
29c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Type.h>
30c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang
31c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h"
32e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSInfo.h"
33ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h"
34db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
357a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc;
367a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
37db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace {
387a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
397a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao/* RSForEachExpandPass - This pass operates on functions that are able to be
407a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
417a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * ForEach-able function to be invoked over the appropriate data cells of the
427a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * input/output allocations (adjusting other relevant parameters as we go). We
437a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * support doing this for any ForEach-able compute kernels. The new function
447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * name is the original function name followed by ".expand". Note that we
457a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * still generate code for the original function.
467a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */
477a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoclass RSForEachExpandPass : public llvm::ModulePass {
487a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoprivate:
49db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static char ID;
50db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
51db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  llvm::Module *M;
52db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  llvm::LLVMContext *C;
53db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
547a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao  const RSInfo::ExportForeachFuncListTy &mFuncs;
55cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines
562b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Turns on optimization of allocation stride values.
572b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  bool mEnableStepOpt;
582b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
59cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines  uint32_t getRootSignature(llvm::Function *F) {
60db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    const llvm::NamedMDNode *ExportForEachMetadata =
61db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        M->getNamedMetadata("#rs_export_foreach");
62db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
63db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (!ExportForEachMetadata) {
64db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
65db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      for (llvm::Function::arg_iterator B = F->arg_begin(),
66db169187dea4602e4ad32058762d23d474753fd0Stephen Hines                                        E = F->arg_end();
67db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           B != E;
68db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           ++B) {
69db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        RootArgTys.push_back(B->getType());
70db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
71db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
72db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // For pre-ICS bitcode, we may not have signature information. In that
73db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // case, we use the size of the RootArgTys to select the number of
74db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // arguments.
75db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      return (1 << RootArgTys.size()) - 1;
76db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
77db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
786e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
79db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
80cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // We only handle the case for legacy root() functions here, so this is
81cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // hard-coded to look at only the first such function.
82db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
83db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (SigNode != NULL && SigNode->getNumOperands() == 1) {
84db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::Value *SigVal = SigNode->getOperand(0);
85db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      if (SigVal->getValueID() == llvm::Value::MDStringVal) {
86db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        llvm::StringRef SigString =
87db169187dea4602e4ad32058762d23d474753fd0Stephen Hines            static_cast<llvm::MDString*>(SigVal)->getString();
88db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        uint32_t Signature = 0;
89db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        if (SigString.getAsInteger(10, Signature)) {
90db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
91db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          return 0;
92db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        }
93db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        return Signature;
94db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
95db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
96db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
97db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return 0;
98db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
99db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1002b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Get the actual value we should use to step through an allocation.
1012b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // TD - Target Data size/layout information.
1022b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // T - Type of allocation (should be a pointer).
1032b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // OrigStep - Original step increment (root.expand() input from driver).
1042b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  llvm::Value *getStepValue(llvm::TargetData *TD, llvm::Type *T,
1052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines                            llvm::Value *OrigStep) {
1062b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    bccAssert(TD);
1072b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    bccAssert(T);
1082b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    bccAssert(OrigStep);
1092b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
1102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
1112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    if (mEnableStepOpt && T != VoidPtrTy && PT) {
1122b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      llvm::Type *ET = PT->getElementType();
1132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      uint64_t ETSize = TD->getTypeStoreSize(ET);
1142b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
1152b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return llvm::ConstantInt::get(Int32Ty, ETSize);
1162b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    } else {
1172b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return OrigStep;
1182b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    }
1192b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  }
1202b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
121db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static bool hasIn(uint32_t Signature) {
122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return Signature & 1;
123db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
125db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static bool hasOut(uint32_t Signature) {
126db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return Signature & 2;
127db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
128db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
129db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static bool hasUsrData(uint32_t Signature) {
130db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return Signature & 4;
131db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
132db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
133db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static bool hasX(uint32_t Signature) {
134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return Signature & 8;
135db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
136db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
137db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static bool hasY(uint32_t Signature) {
138db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return Signature & 16;
139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
140db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1417a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaopublic:
1422b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
1432b04086acbef6520ae2c54a868b1271abf053122Stephen Hines                      bool pEnableStepOpt)
1442b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
1452b04086acbef6520ae2c54a868b1271abf053122Stephen Hines        mEnableStepOpt(pEnableStepOpt) {
146db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
147db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  /* Performs the actual optimization on a selected function. On success, the
149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines   * Module will contain a new function of the name "<NAME>.expand" that
150db169187dea4602e4ad32058762d23d474753fd0Stephen Hines   * invokes <NAME>() in a loop with the appropriate parameters.
151db169187dea4602e4ad32058762d23d474753fd0Stephen Hines   */
152cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines  bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
153cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
154db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (!Signature) {
156cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      Signature = getRootSignature(F);
157cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      if (!Signature) {
158cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines        // We couldn't determine how to expand this function based on its
159cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines        // function signature.
160cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines        return false;
161cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      }
162db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
163db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1642b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    llvm::TargetData TD(M);
1652b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
166db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
167db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
168db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Type *SizeTy = Int32Ty;
169db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
170db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    /* Defined in frameworks/base/libs/rs/rs_hal.h:
171db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *
172db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     * struct RsForEachStubParamStruct {
173db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   const void *in;
174db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   void *out;
175db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   const void *usr;
176db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   size_t usr_len;
177db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t x;
178db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t y;
179db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t z;
180db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t lod;
181db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   enum RsAllocationCubemapFace face;
182db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t ar[16];
183db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     * };
184db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     */
185db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Type*, 9> StructTys;
186db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    StructTys.push_back(VoidPtrTy);  // const void *in
187db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    StructTys.push_back(VoidPtrTy);  // void *out
188db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    StructTys.push_back(VoidPtrTy);  // const void *usr
189db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    StructTys.push_back(SizeTy);     // size_t usr_len
190db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    StructTys.push_back(Int32Ty);    // uint32_t x
191db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    StructTys.push_back(Int32Ty);    // uint32_t y
192db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    StructTys.push_back(Int32Ty);    // uint32_t z
193db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    StructTys.push_back(Int32Ty);    // uint32_t lod
194db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    StructTys.push_back(Int32Ty);    // enum RsAllocationCubemapFace
195db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16));  // uint32_t ar[16]
196db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
197db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Type *ForEachStubPtrTy = llvm::StructType::create(
198db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        StructTys, "RsForEachStubParamStruct")->getPointerTo();
199db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
200db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    /* Create the function signature for our expanded function.
201db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
202db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *       uint32_t instep, uint32_t outstep)
203db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     */
204db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Type*, 8> ParamTys;
205db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    ParamTys.push_back(ForEachStubPtrTy);  // const RsForEachStubParamStruct *p
206db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    ParamTys.push_back(Int32Ty);           // uint32_t x1
207db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    ParamTys.push_back(Int32Ty);           // uint32_t x2
208db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    ParamTys.push_back(Int32Ty);           // uint32_t instep
209db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    ParamTys.push_back(Int32Ty);           // uint32_t outstep
210db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
211db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::FunctionType *FT =
212db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
213db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Function *ExpandedFunc =
214db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        llvm::Function::Create(FT,
215db169187dea4602e4ad32058762d23d474753fd0Stephen Hines                               llvm::GlobalValue::ExternalLinkage,
216db169187dea4602e4ad32058762d23d474753fd0Stephen Hines                               F->getName() + ".expand", M);
217db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
218db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Create and name the actual arguments to this expanded function.
219db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Argument*, 8> ArgVec;
220db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
221db169187dea4602e4ad32058762d23d474753fd0Stephen Hines                                      E = ExpandedFunc->arg_end();
222db169187dea4602e4ad32058762d23d474753fd0Stephen Hines         B != E;
223db169187dea4602e4ad32058762d23d474753fd0Stephen Hines         ++B) {
224db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      ArgVec.push_back(B);
225db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
226db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
227db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (ArgVec.size() != 5) {
22889e8490c80505468f2b816ca9d12fefa53f05959Shih-wei Liao      ALOGE("Incorrect number of arguments to function: %zu",
22989e8490c80505468f2b816ca9d12fefa53f05959Shih-wei Liao            ArgVec.size());
230db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      return false;
231db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
232db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *Arg_p = ArgVec[0];
233db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *Arg_x1 = ArgVec[1];
234db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *Arg_x2 = ArgVec[2];
235db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *Arg_instep = ArgVec[3];
236db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *Arg_outstep = ArgVec[4];
237db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
238db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Arg_p->setName("p");
239db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Arg_x1->setName("x1");
240db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Arg_x2->setName("x2");
2412b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    Arg_instep->setName("arg_instep");
2422b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    Arg_outstep->setName("arg_outstep");
2432b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
2442b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    llvm::Value *InStep = NULL;
2452b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    llvm::Value *OutStep = NULL;
246db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
247db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Construct the actual function body.
248db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::BasicBlock *Begin =
249db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
250db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::IRBuilder<> Builder(Begin);
251db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
252db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // uint32_t X = x1;
253db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
254db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Builder.CreateStore(Arg_x1, AX);
255db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
256cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Collect and construct the arguments for the kernel().
257db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Note that we load any loop-invariant arguments before entering the Loop.
258db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Function::arg_iterator Args = F->arg_begin();
259db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
260db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Type *InTy = NULL;
261db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::AllocaInst *AIn = NULL;
262db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (hasIn(Signature)) {
263db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      InTy = Args->getType();
264db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      AIn = Builder.CreateAlloca(InTy, 0, "AIn");
2652b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      InStep = getStepValue(&TD, InTy, Arg_instep);
2662b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      InStep->setName("instep");
267db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
268db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
269db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Args++;
270db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
271db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
272db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Type *OutTy = NULL;
273db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::AllocaInst *AOut = NULL;
274db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (hasOut(Signature)) {
275db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      OutTy = Args->getType();
276db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
2772b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      OutStep = getStepValue(&TD, OutTy, Arg_outstep);
2782b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      OutStep->setName("outstep");
279db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
280db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
281db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Args++;
282db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
283db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
284db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *UsrData = NULL;
285db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (hasUsrData(Signature)) {
286db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::Type *UsrDataTy = Args->getType();
287db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
288db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
289db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      UsrData->setName("UsrData");
290db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Args++;
291db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
292db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
293db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (hasX(Signature)) {
294db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Args++;
295db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
296db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
297db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *Y = NULL;
298db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (hasY(Signature)) {
299db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
300db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Args++;
301db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
302db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
3036e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines    bccAssert(Args == F->arg_end());
304db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
305db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
306db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
307db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
308db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // if (x1 < x2) goto Loop; else goto Exit;
309db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
310db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Builder.CreateCondBr(Cond, Loop, Exit);
311db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
312db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Loop:
313db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Builder.SetInsertPoint(Loop);
314db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
315cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Populate the actual call to kernel().
316db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
317db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
318db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *In = NULL;
319db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *Out = NULL;
320db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
321db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (AIn) {
322db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      In = Builder.CreateLoad(AIn, "In");
323db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(In);
324db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
325db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
326db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (AOut) {
327db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Out = Builder.CreateLoad(AOut, "Out");
328db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(Out);
329db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
330db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
331db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (UsrData) {
332db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(UsrData);
333db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
334db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
335db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // We always have to load X, since it is used to iterate through the loop.
336db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *X = Builder.CreateLoad(AX, "X");
337db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (hasX(Signature)) {
338db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(X);
339db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
340db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
341db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (Y) {
342db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(Y);
343db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
344db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
345db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Builder.CreateCall(F, RootArgs);
346db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
347db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (In) {
348db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // In += instep
349db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
3502b04086acbef6520ae2c54a868b1271abf053122Stephen Hines          Builder.CreatePtrToInt(In, Int32Ty), InStep), InTy);
351db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Builder.CreateStore(NewIn, AIn);
352db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
353db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
354db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (Out) {
355db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // Out += outstep
356db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
3572b04086acbef6520ae2c54a868b1271abf053122Stephen Hines          Builder.CreatePtrToInt(Out, Int32Ty), OutStep), OutTy);
358db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      Builder.CreateStore(NewOut, AOut);
359db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
360db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
361db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // X++;
362db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::Value *XPlusOne =
363db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
364db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Builder.CreateStore(XPlusOne, AX);
365db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
366db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // If (X < x2) goto Loop; else goto Exit;
367db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
368db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Builder.CreateCondBr(Cond, Loop, Exit);
369db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
370db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Exit:
371db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Builder.SetInsertPoint(Exit);
372db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    Builder.CreateRetVoid();
373db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
374db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return true;
375db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
376db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
377db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  virtual bool runOnModule(llvm::Module &M) {
378cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    bool Changed = false;
379db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    this->M = &M;
380db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    C = &M.getContext();
381db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
3827a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao    for (RSInfo::ExportForeachFuncListTy::const_iterator
3837a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao             func_iter = mFuncs.begin(), func_end = mFuncs.end();
3847a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao         func_iter != func_end; func_iter++) {
3857a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao      const char *name = func_iter->first;
3867a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao      uint32_t signature = func_iter->second;
3877a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao      llvm::Function *kernel = M.getFunction(name);
388cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      if (kernel && kernel->getReturnType()->isVoidTy()) {
3897a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao        Changed |= ExpandFunction(kernel, signature);
390cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      }
391db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
392db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
393cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    return Changed;
394db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
395db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
396db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  virtual const char *getPassName() const {
397db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return "ForEach-able Function Expansion";
398db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
399db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
4007a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}; // end RSForEachExpandPass
401db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
4027a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace
4037a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
4047a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaochar RSForEachExpandPass::ID = 0;
405db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
406db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc {
407db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
4087a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass *
4092b04086acbef6520ae2c54a868b1271abf053122Stephen HinescreateRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
4102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines                          bool pEnableStepOpt){
4112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
4127a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}
413db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
4147a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc
415