RSForEachExpand.cpp revision 74a4b08235990916911b8fe758d656c1171faf26
1/*
2 * Copyright 2012, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "bcc/Assert.h"
18#include "bcc/Renderscript/RSTransforms.h"
19
20#include <cstdlib>
21
22#include <llvm/DerivedTypes.h>
23#include <llvm/Function.h>
24#include <llvm/Instructions.h>
25#include <llvm/IRBuilder.h>
26#include <llvm/Module.h>
27#include <llvm/Pass.h>
28#include <llvm/Support/raw_ostream.h>
29#include <llvm/Target/TargetData.h>
30#include <llvm/Type.h>
31
32#include "bcc/Config/Config.h"
33#include "bcc/Renderscript/RSInfo.h"
34#include "bcc/Support/Log.h"
35
36using namespace bcc;
37
38namespace {
39
40/* RSForEachExpandPass - This pass operates on functions that are able to be
41 * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
42 * ForEach-able function to be invoked over the appropriate data cells of the
43 * input/output allocations (adjusting other relevant parameters as we go). We
44 * support doing this for any ForEach-able compute kernels. The new function
45 * name is the original function name followed by ".expand". Note that we
46 * still generate code for the original function.
47 */
48class RSForEachExpandPass : public llvm::ModulePass {
49private:
50  static char ID;
51
52  llvm::Module *M;
53  llvm::LLVMContext *C;
54
55  const RSInfo::ExportForeachFuncListTy &mFuncs;
56
57  // Turns on optimization of allocation stride values.
58  bool mEnableStepOpt;
59
60  uint32_t getRootSignature(llvm::Function *F) {
61    const llvm::NamedMDNode *ExportForEachMetadata =
62        M->getNamedMetadata("#rs_export_foreach");
63
64    if (!ExportForEachMetadata) {
65      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
66      for (llvm::Function::arg_iterator B = F->arg_begin(),
67                                        E = F->arg_end();
68           B != E;
69           ++B) {
70        RootArgTys.push_back(B->getType());
71      }
72
73      // For pre-ICS bitcode, we may not have signature information. In that
74      // case, we use the size of the RootArgTys to select the number of
75      // arguments.
76      return (1 << RootArgTys.size()) - 1;
77    }
78
79    if (ExportForEachMetadata->getNumOperands() == 0) {
80      return 0;
81    }
82
83    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
84
85    // We only handle the case for legacy root() functions here, so this is
86    // hard-coded to look at only the first such function.
87    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
88    if (SigNode != NULL && SigNode->getNumOperands() == 1) {
89      llvm::Value *SigVal = SigNode->getOperand(0);
90      if (SigVal->getValueID() == llvm::Value::MDStringVal) {
91        llvm::StringRef SigString =
92            static_cast<llvm::MDString*>(SigVal)->getString();
93        uint32_t Signature = 0;
94        if (SigString.getAsInteger(10, Signature)) {
95          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
96          return 0;
97        }
98        return Signature;
99      }
100    }
101
102    return 0;
103  }
104
105  // Get the actual value we should use to step through an allocation.
106  // TD - Target Data size/layout information.
107  // T - Type of allocation (should be a pointer).
108  // OrigStep - Original step increment (root.expand() input from driver).
109  llvm::Value *getStepValue(llvm::TargetData *TD, llvm::Type *T,
110                            llvm::Value *OrigStep) {
111    bccAssert(TD);
112    bccAssert(T);
113    bccAssert(OrigStep);
114    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
115    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
116    if (mEnableStepOpt && T != VoidPtrTy && PT) {
117      llvm::Type *ET = PT->getElementType();
118      uint64_t ETSize = TD->getTypeStoreSize(ET);
119      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
120      return llvm::ConstantInt::get(Int32Ty, ETSize);
121    } else {
122      return OrigStep;
123    }
124  }
125
126  static bool hasIn(uint32_t Signature) {
127    return Signature & 0x01;
128  }
129
130  static bool hasOut(uint32_t Signature) {
131    return Signature & 0x02;
132  }
133
134  static bool hasUsrData(uint32_t Signature) {
135    return Signature & 0x04;
136  }
137
138  static bool hasX(uint32_t Signature) {
139    return Signature & 0x08;
140  }
141
142  static bool hasY(uint32_t Signature) {
143    return Signature & 0x10;
144  }
145
146  static bool isKernel(uint32_t Signature) {
147    return Signature & 0x20;
148  }
149
150
151public:
152  RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
153                      bool pEnableStepOpt)
154      : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
155        mEnableStepOpt(pEnableStepOpt) {
156  }
157
158  /* Performs the actual optimization on a selected function. On success, the
159   * Module will contain a new function of the name "<NAME>.expand" that
160   * invokes <NAME>() in a loop with the appropriate parameters.
161   */
162  bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
163    ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
164
165    if (!Signature) {
166      Signature = getRootSignature(F);
167      if (!Signature) {
168        // We couldn't determine how to expand this function based on its
169        // function signature.
170        return false;
171      }
172    }
173
174    llvm::TargetData TD(M);
175
176    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
177    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
178    llvm::Type *SizeTy = Int32Ty;
179
180    /* Defined in frameworks/base/libs/rs/rs_hal.h:
181     *
182     * struct RsForEachStubParamStruct {
183     *   const void *in;
184     *   void *out;
185     *   const void *usr;
186     *   size_t usr_len;
187     *   uint32_t x;
188     *   uint32_t y;
189     *   uint32_t z;
190     *   uint32_t lod;
191     *   enum RsAllocationCubemapFace face;
192     *   uint32_t ar[16];
193     * };
194     */
195    llvm::SmallVector<llvm::Type*, 9> StructTys;
196    StructTys.push_back(VoidPtrTy);  // const void *in
197    StructTys.push_back(VoidPtrTy);  // void *out
198    StructTys.push_back(VoidPtrTy);  // const void *usr
199    StructTys.push_back(SizeTy);     // size_t usr_len
200    StructTys.push_back(Int32Ty);    // uint32_t x
201    StructTys.push_back(Int32Ty);    // uint32_t y
202    StructTys.push_back(Int32Ty);    // uint32_t z
203    StructTys.push_back(Int32Ty);    // uint32_t lod
204    StructTys.push_back(Int32Ty);    // enum RsAllocationCubemapFace
205    StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16));  // uint32_t ar[16]
206
207    llvm::Type *ForEachStubPtrTy = llvm::StructType::create(
208        StructTys, "RsForEachStubParamStruct")->getPointerTo();
209
210    /* Create the function signature for our expanded function.
211     * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
212     *       uint32_t instep, uint32_t outstep)
213     */
214    llvm::SmallVector<llvm::Type*, 8> ParamTys;
215    ParamTys.push_back(ForEachStubPtrTy);  // const RsForEachStubParamStruct *p
216    ParamTys.push_back(Int32Ty);           // uint32_t x1
217    ParamTys.push_back(Int32Ty);           // uint32_t x2
218    ParamTys.push_back(Int32Ty);           // uint32_t instep
219    ParamTys.push_back(Int32Ty);           // uint32_t outstep
220
221    llvm::FunctionType *FT =
222        llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
223    llvm::Function *ExpandedFunc =
224        llvm::Function::Create(FT,
225                               llvm::GlobalValue::ExternalLinkage,
226                               F->getName() + ".expand", M);
227
228    // Create and name the actual arguments to this expanded function.
229    llvm::SmallVector<llvm::Argument*, 8> ArgVec;
230    for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
231                                      E = ExpandedFunc->arg_end();
232         B != E;
233         ++B) {
234      ArgVec.push_back(B);
235    }
236
237    if (ArgVec.size() != 5) {
238      ALOGE("Incorrect number of arguments to function: %zu",
239            ArgVec.size());
240      return false;
241    }
242    llvm::Value *Arg_p = ArgVec[0];
243    llvm::Value *Arg_x1 = ArgVec[1];
244    llvm::Value *Arg_x2 = ArgVec[2];
245    llvm::Value *Arg_instep = ArgVec[3];
246    llvm::Value *Arg_outstep = ArgVec[4];
247
248    Arg_p->setName("p");
249    Arg_x1->setName("x1");
250    Arg_x2->setName("x2");
251    Arg_instep->setName("arg_instep");
252    Arg_outstep->setName("arg_outstep");
253
254    llvm::Value *InStep = NULL;
255    llvm::Value *OutStep = NULL;
256
257    // Construct the actual function body.
258    llvm::BasicBlock *Begin =
259        llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
260    llvm::IRBuilder<> Builder(Begin);
261
262    // uint32_t X = x1;
263    llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
264    Builder.CreateStore(Arg_x1, AX);
265
266    // Collect and construct the arguments for the kernel().
267    // Note that we load any loop-invariant arguments before entering the Loop.
268    llvm::Function::arg_iterator Args = F->arg_begin();
269
270    llvm::Type *InTy = NULL;
271    llvm::AllocaInst *AIn = NULL;
272    if (hasIn(Signature)) {
273      InTy = Args->getType();
274      AIn = Builder.CreateAlloca(InTy, 0, "AIn");
275      InStep = getStepValue(&TD, InTy, Arg_instep);
276      InStep->setName("instep");
277      Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
278          Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
279      Args++;
280    }
281
282    llvm::Type *OutTy = NULL;
283    llvm::AllocaInst *AOut = NULL;
284    if (hasOut(Signature)) {
285      OutTy = Args->getType();
286      AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
287      OutStep = getStepValue(&TD, OutTy, Arg_outstep);
288      OutStep->setName("outstep");
289      Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
290          Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
291      Args++;
292    }
293
294    llvm::Value *UsrData = NULL;
295    if (hasUsrData(Signature)) {
296      llvm::Type *UsrDataTy = Args->getType();
297      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
298          Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
299      UsrData->setName("UsrData");
300      Args++;
301    }
302
303    if (hasX(Signature)) {
304      Args++;
305    }
306
307    llvm::Value *Y = NULL;
308    if (hasY(Signature)) {
309      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
310      Args++;
311    }
312
313    bccAssert(Args == F->arg_end());
314
315    llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
316    llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
317
318    // if (x1 < x2) goto Loop; else goto Exit;
319    llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
320    Builder.CreateCondBr(Cond, Loop, Exit);
321
322    // Loop:
323    Builder.SetInsertPoint(Loop);
324
325    // Populate the actual call to kernel().
326    llvm::SmallVector<llvm::Value*, 8> RootArgs;
327
328    llvm::Value *InPtr = NULL;
329    llvm::Value *OutPtr = NULL;
330
331    if (AIn) {
332      InPtr = Builder.CreateLoad(AIn, "InPtr");
333      RootArgs.push_back(InPtr);
334    }
335
336    if (AOut) {
337      OutPtr = Builder.CreateLoad(AOut, "OutPtr");
338      RootArgs.push_back(OutPtr);
339    }
340
341    if (UsrData) {
342      RootArgs.push_back(UsrData);
343    }
344
345    // We always have to load X, since it is used to iterate through the loop.
346    llvm::Value *X = Builder.CreateLoad(AX, "X");
347    if (hasX(Signature)) {
348      RootArgs.push_back(X);
349    }
350
351    if (Y) {
352      RootArgs.push_back(Y);
353    }
354
355    Builder.CreateCall(F, RootArgs);
356
357    if (InPtr) {
358      // InPtr += instep
359      llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
360          Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
361      Builder.CreateStore(NewIn, AIn);
362    }
363
364    if (OutPtr) {
365      // OutPtr += outstep
366      llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
367          Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
368      Builder.CreateStore(NewOut, AOut);
369    }
370
371    // X++;
372    llvm::Value *XPlusOne =
373        Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
374    Builder.CreateStore(XPlusOne, AX);
375
376    // If (X < x2) goto Loop; else goto Exit;
377    Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
378    Builder.CreateCondBr(Cond, Loop, Exit);
379
380    // Exit:
381    Builder.SetInsertPoint(Exit);
382    Builder.CreateRetVoid();
383
384    return true;
385  }
386
387  /* Expand a pass-by-value kernel.
388   */
389  bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
390    bccAssert(isKernel(Signature));
391    ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
392
393    // TODO: Refactor this to share functionality with ExpandFunction.
394    llvm::TargetData TD(M);
395
396    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
397    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
398    llvm::Type *SizeTy = Int32Ty;
399
400    /* Defined in frameworks/base/libs/rs/rs_hal.h:
401     *
402     * struct RsForEachStubParamStruct {
403     *   const void *in;
404     *   void *out;
405     *   const void *usr;
406     *   size_t usr_len;
407     *   uint32_t x;
408     *   uint32_t y;
409     *   uint32_t z;
410     *   uint32_t lod;
411     *   enum RsAllocationCubemapFace face;
412     *   uint32_t ar[16];
413     * };
414     */
415    llvm::SmallVector<llvm::Type*, 9> StructTys;
416    StructTys.push_back(VoidPtrTy);  // const void *in
417    StructTys.push_back(VoidPtrTy);  // void *out
418    StructTys.push_back(VoidPtrTy);  // const void *usr
419    StructTys.push_back(SizeTy);     // size_t usr_len
420    StructTys.push_back(Int32Ty);    // uint32_t x
421    StructTys.push_back(Int32Ty);    // uint32_t y
422    StructTys.push_back(Int32Ty);    // uint32_t z
423    StructTys.push_back(Int32Ty);    // uint32_t lod
424    StructTys.push_back(Int32Ty);    // enum RsAllocationCubemapFace
425    StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16));  // uint32_t ar[16]
426
427    llvm::Type *ForEachStubPtrTy = llvm::StructType::create(
428        StructTys, "RsForEachStubParamStruct")->getPointerTo();
429
430    /* Create the function signature for our expanded function.
431     * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
432     *       uint32_t instep, uint32_t outstep)
433     */
434    llvm::SmallVector<llvm::Type*, 8> ParamTys;
435    ParamTys.push_back(ForEachStubPtrTy);  // const RsForEachStubParamStruct *p
436    ParamTys.push_back(Int32Ty);           // uint32_t x1
437    ParamTys.push_back(Int32Ty);           // uint32_t x2
438    ParamTys.push_back(Int32Ty);           // uint32_t instep
439    ParamTys.push_back(Int32Ty);           // uint32_t outstep
440
441    llvm::FunctionType *FT =
442        llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
443    llvm::Function *ExpandedFunc =
444        llvm::Function::Create(FT,
445                               llvm::GlobalValue::ExternalLinkage,
446                               F->getName() + ".expand", M);
447
448    // Create and name the actual arguments to this expanded function.
449    llvm::SmallVector<llvm::Argument*, 8> ArgVec;
450    for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
451                                      E = ExpandedFunc->arg_end();
452         B != E;
453         ++B) {
454      ArgVec.push_back(B);
455    }
456
457    if (ArgVec.size() != 5) {
458      ALOGE("Incorrect number of arguments to function: %zu",
459            ArgVec.size());
460      return false;
461    }
462    llvm::Value *Arg_p = ArgVec[0];
463    llvm::Value *Arg_x1 = ArgVec[1];
464    llvm::Value *Arg_x2 = ArgVec[2];
465    llvm::Value *Arg_instep = ArgVec[3];
466    llvm::Value *Arg_outstep = ArgVec[4];
467
468    Arg_p->setName("p");
469    Arg_x1->setName("x1");
470    Arg_x2->setName("x2");
471    Arg_instep->setName("arg_instep");
472    Arg_outstep->setName("arg_outstep");
473
474    llvm::Value *InStep = NULL;
475    llvm::Value *OutStep = NULL;
476
477    // Construct the actual function body.
478    llvm::BasicBlock *Begin =
479        llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
480    llvm::IRBuilder<> Builder(Begin);
481
482    // uint32_t X = x1;
483    llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
484    Builder.CreateStore(Arg_x1, AX);
485
486    // Collect and construct the arguments for the kernel().
487    // Note that we load any loop-invariant arguments before entering the Loop.
488    llvm::Function::arg_iterator Args = F->arg_begin();
489
490    llvm::Type *OutTy = NULL;
491    llvm::AllocaInst *AOut = NULL;
492    bool PassOutByReference = false;
493    if (hasOut(Signature)) {
494      llvm::Type *OutBaseTy = F->getReturnType();
495      if (OutBaseTy->isVoidTy()) {
496        PassOutByReference = true;
497        OutTy = Args->getType();
498        Args++;
499      } else {
500        OutTy = OutBaseTy->getPointerTo();
501        // We don't increment Args, since we are using the actual return type.
502      }
503      AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
504      OutStep = getStepValue(&TD, OutTy, Arg_outstep);
505      OutStep->setName("outstep");
506      Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
507          Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
508    }
509
510    llvm::Type *InBaseTy = NULL;
511    llvm::Type *InTy = NULL;
512    llvm::AllocaInst *AIn = NULL;
513    if (hasIn(Signature)) {
514      InBaseTy = Args->getType();
515      InTy =InBaseTy->getPointerTo();
516      AIn = Builder.CreateAlloca(InTy, 0, "AIn");
517      InStep = getStepValue(&TD, InTy, Arg_instep);
518      InStep->setName("instep");
519      Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
520          Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
521      Args++;
522    }
523
524    // No usrData parameter on kernels.
525    bccAssert(!hasUsrData(Signature));
526
527    if (hasX(Signature)) {
528      Args++;
529    }
530
531    llvm::Value *Y = NULL;
532    if (hasY(Signature)) {
533      Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
534      Args++;
535    }
536
537    bccAssert(Args == F->arg_end());
538
539    llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
540    llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
541
542    // if (x1 < x2) goto Loop; else goto Exit;
543    llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
544    Builder.CreateCondBr(Cond, Loop, Exit);
545
546    // Loop:
547    Builder.SetInsertPoint(Loop);
548
549    // Populate the actual call to kernel().
550    llvm::SmallVector<llvm::Value*, 8> RootArgs;
551
552    llvm::Value *InPtr = NULL;
553    llvm::Value *In = NULL;
554    llvm::Value *OutPtr = NULL;
555
556    if (PassOutByReference) {
557      OutPtr = Builder.CreateLoad(AOut, "OutPtr");
558      RootArgs.push_back(OutPtr);
559    }
560
561    if (AIn) {
562      InPtr = Builder.CreateLoad(AIn, "InPtr");
563      In = Builder.CreateLoad(InPtr, "In");
564      RootArgs.push_back(In);
565    }
566
567    // We always have to load X, since it is used to iterate through the loop.
568    llvm::Value *X = Builder.CreateLoad(AX, "X");
569    if (hasX(Signature)) {
570      RootArgs.push_back(X);
571    }
572
573    if (Y) {
574      RootArgs.push_back(Y);
575    }
576
577    llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
578
579    if (AOut && !PassOutByReference) {
580      OutPtr = Builder.CreateLoad(AOut, "OutPtr");
581      Builder.CreateStore(RetVal, OutPtr);
582    }
583
584    if (InPtr) {
585      // InPtr += instep
586      llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
587          Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
588      Builder.CreateStore(NewIn, AIn);
589    }
590
591    if (OutPtr) {
592      // OutPtr += outstep
593      llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
594          Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
595      Builder.CreateStore(NewOut, AOut);
596    }
597
598    // X++;
599    llvm::Value *XPlusOne =
600        Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
601    Builder.CreateStore(XPlusOne, AX);
602
603    // If (X < x2) goto Loop; else goto Exit;
604    Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
605    Builder.CreateCondBr(Cond, Loop, Exit);
606
607    // Exit:
608    Builder.SetInsertPoint(Exit);
609    Builder.CreateRetVoid();
610
611    return true;
612  }
613
614  virtual bool runOnModule(llvm::Module &M) {
615    bool Changed = false;
616    this->M = &M;
617    C = &M.getContext();
618
619    for (RSInfo::ExportForeachFuncListTy::const_iterator
620             func_iter = mFuncs.begin(), func_end = mFuncs.end();
621         func_iter != func_end; func_iter++) {
622      const char *name = func_iter->first;
623      uint32_t signature = func_iter->second;
624      llvm::Function *kernel = M.getFunction(name);
625      if (kernel && isKernel(signature)) {
626        Changed |= ExpandKernel(kernel, signature);
627      }
628      else if (kernel && kernel->getReturnType()->isVoidTy()) {
629        Changed |= ExpandFunction(kernel, signature);
630      }
631    }
632
633    return Changed;
634  }
635
636  virtual const char *getPassName() const {
637    return "ForEach-able Function Expansion";
638  }
639
640}; // end RSForEachExpandPass
641
642} // end anonymous namespace
643
644char RSForEachExpandPass::ID = 0;
645
646namespace bcc {
647
648llvm::ModulePass *
649createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
650                          bool pEnableStepOpt){
651  return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
652}
653
654} // end namespace bcc
655