RSKernelExpand.cpp revision 6e9e89d1ed049ec931bb1000948c12698e6c5484
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/* 2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project 3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License"); 5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License. 6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at 7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * http://www.apache.org/licenses/LICENSE-2.0 9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software 11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS, 12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and 14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License. 15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h" 18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h" 197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib> 217a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 22c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/DerivedTypes.h> 23c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Function.h> 24c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Instructions.h> 25c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Module.h> 26c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h> 27c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Support/IRBuilder.h> 28c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Type.h> 29c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang 30c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h" 31e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSInfo.h" 32ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h" 33db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 347a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc; 357a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 36db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace { 377a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 387a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao/* RSForEachExpandPass - This pass operates on functions that are able to be 397a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the 407a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * ForEach-able function to be invoked over the appropriate data cells of the 417a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * input/output allocations (adjusting other relevant parameters as we go). We 427a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * support doing this for any ForEach-able compute kernels. The new function 437a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * name is the original function name followed by ".expand". Note that we 447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * still generate code for the original function. 457a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */ 467a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoclass RSForEachExpandPass : public llvm::ModulePass { 477a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoprivate: 48db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static char ID; 49db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 50db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Module *M; 51db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::LLVMContext *C; 52db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao const RSInfo::ExportForeachFuncListTy &mFuncs; 54cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines 55cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines uint32_t getRootSignature(llvm::Function *F) { 56db169187dea4602e4ad32058762d23d474753fd0Stephen Hines const llvm::NamedMDNode *ExportForEachMetadata = 57db169187dea4602e4ad32058762d23d474753fd0Stephen Hines M->getNamedMetadata("#rs_export_foreach"); 58db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 59db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!ExportForEachMetadata) { 60db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> RootArgTys; 61db169187dea4602e4ad32058762d23d474753fd0Stephen Hines for (llvm::Function::arg_iterator B = F->arg_begin(), 62db169187dea4602e4ad32058762d23d474753fd0Stephen Hines E = F->arg_end(); 63db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 64db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 65db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgTys.push_back(B->getType()); 66db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 67db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 68db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // For pre-ICS bitcode, we may not have signature information. In that 69db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // case, we use the size of the RootArgTys to select the number of 70db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // arguments. 71db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return (1 << RootArgTys.size()) - 1; 72db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 73db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 746e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(ExportForEachMetadata->getNumOperands() > 0); 75db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 76cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We only handle the case for legacy root() functions here, so this is 77cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // hard-coded to look at only the first such function. 78db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 79db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigNode != NULL && SigNode->getNumOperands() == 1) { 80db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *SigVal = SigNode->getOperand(0); 81db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigVal->getValueID() == llvm::Value::MDStringVal) { 82db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::StringRef SigString = 83db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static_cast<llvm::MDString*>(SigVal)->getString(); 84db169187dea4602e4ad32058762d23d474753fd0Stephen Hines uint32_t Signature = 0; 85db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigString.getAsInteger(10, Signature)) { 86db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 87db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 88db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 89db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature; 90db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 91db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 92db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 93db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 94db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 95db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 96db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasIn(uint32_t Signature) { 97db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature & 1; 98db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 99db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 100db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasOut(uint32_t Signature) { 101db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature & 2; 102db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 103db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 104db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasUsrData(uint32_t Signature) { 105db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature & 4; 106db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 107db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 108db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasX(uint32_t Signature) { 109db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature & 8; 110db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 111db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 112db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasY(uint32_t Signature) { 113db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature & 16; 114db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 115db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1167a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaopublic: 1177a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs) 1187a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs) { 119db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 120db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 121db169187dea4602e4ad32058762d23d474753fd0Stephen Hines /* Performs the actual optimization on a selected function. On success, the 122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Module will contain a new function of the name "<NAME>.expand" that 123db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * invokes <NAME>() in a loop with the appropriate parameters. 124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 125cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines bool ExpandFunction(llvm::Function *F, uint32_t Signature) { 126cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str()); 127db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 128db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!Signature) { 129cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines Signature = getRootSignature(F); 130cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines if (!Signature) { 131cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We couldn't determine how to expand this function based on its 132cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // function signature. 133cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return false; 134cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 135db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 136db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 137db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C); 138db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *SizeTy = Int32Ty; 140db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 141db169187dea4602e4ad32058762d23d474753fd0Stephen Hines /* Defined in frameworks/base/libs/rs/rs_hal.h: 142db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 143db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * struct RsForEachStubParamStruct { 144db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * const void *in; 145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * void *out; 146db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * const void *usr; 147db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * size_t usr_len; 148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t x; 149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t y; 150db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t z; 151db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t lod; 152db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * enum RsAllocationCubemapFace face; 153db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t ar[16]; 154db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * }; 155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 9> StructTys; 157db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(VoidPtrTy); // const void *in 158db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(VoidPtrTy); // void *out 159db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(VoidPtrTy); // const void *usr 160db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(SizeTy); // size_t usr_len 161db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t x 162db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t y 163db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t z 164db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t lod 165db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace 166db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16] 167db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 168db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *ForEachStubPtrTy = llvm::StructType::create( 169db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys, "RsForEachStubParamStruct")->getPointerTo(); 170db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 171db169187dea4602e4ad32058762d23d474753fd0Stephen Hines /* Create the function signature for our expanded function. 172db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 173db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t instep, uint32_t outstep) 174db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 175db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> ParamTys; 176db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p 177db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ParamTys.push_back(Int32Ty); // uint32_t x1 178db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ParamTys.push_back(Int32Ty); // uint32_t x2 179db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ParamTys.push_back(Int32Ty); // uint32_t instep 180db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ParamTys.push_back(Int32Ty); // uint32_t outstep 181db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 182db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::FunctionType *FT = 183db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false); 184db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Function *ExpandedFunc = 185db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Function::Create(FT, 186db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::GlobalValue::ExternalLinkage, 187db169187dea4602e4ad32058762d23d474753fd0Stephen Hines F->getName() + ".expand", M); 188db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 189db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Create and name the actual arguments to this expanded function. 190db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Argument*, 8> ArgVec; 191db169187dea4602e4ad32058762d23d474753fd0Stephen Hines for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(), 192db169187dea4602e4ad32058762d23d474753fd0Stephen Hines E = ExpandedFunc->arg_end(); 193db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 194db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 195db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ArgVec.push_back(B); 196db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 197db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 198db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (ArgVec.size() != 5) { 19989e8490c80505468f2b816ca9d12fefa53f05959Shih-wei Liao ALOGE("Incorrect number of arguments to function: %zu", 20089e8490c80505468f2b816ca9d12fefa53f05959Shih-wei Liao ArgVec.size()); 201db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return false; 202db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 203db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_p = ArgVec[0]; 204db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_x1 = ArgVec[1]; 205db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_x2 = ArgVec[2]; 206db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_instep = ArgVec[3]; 207db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_outstep = ArgVec[4]; 208db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 209db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Arg_p->setName("p"); 210db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Arg_x1->setName("x1"); 211db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Arg_x2->setName("x2"); 212db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Arg_instep->setName("instep"); 213db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Arg_outstep->setName("outstep"); 214db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 215db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Construct the actual function body. 216db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::BasicBlock *Begin = 217db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc); 218db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::IRBuilder<> Builder(Begin); 219db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 220db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // uint32_t X = x1; 221db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX"); 222db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(Arg_x1, AX); 223db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 224cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Collect and construct the arguments for the kernel(). 225db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Note that we load any loop-invariant arguments before entering the Loop. 226db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Function::arg_iterator Args = F->arg_begin(); 227db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 228db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *InTy = NULL; 229db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::AllocaInst *AIn = NULL; 230db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasIn(Signature)) { 231db169187dea4602e4ad32058762d23d474753fd0Stephen Hines InTy = Args->getType(); 232db169187dea4602e4ad32058762d23d474753fd0Stephen Hines AIn = Builder.CreateAlloca(InTy, 0, "AIn"); 233db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 234db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn); 235db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 236db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 237db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 238db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *OutTy = NULL; 239db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::AllocaInst *AOut = NULL; 240db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasOut(Signature)) { 241db169187dea4602e4ad32058762d23d474753fd0Stephen Hines OutTy = Args->getType(); 242db169187dea4602e4ad32058762d23d474753fd0Stephen Hines AOut = Builder.CreateAlloca(OutTy, 0, "AOut"); 243db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 244db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut); 245db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 246db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 247db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 248db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *UsrData = NULL; 249db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasUsrData(Signature)) { 250db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *UsrDataTy = Args->getType(); 251db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData = Builder.CreatePointerCast(Builder.CreateLoad( 252db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy); 253db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData->setName("UsrData"); 254db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 255db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 256db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 257db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasX(Signature)) { 258db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 259db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 260db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 261db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Y = NULL; 262db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasY(Signature)) { 263db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y"); 264db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 265db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 266db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 2676e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(Args == F->arg_end()); 268db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 269db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc); 270db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc); 271db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 272db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // if (x1 < x2) goto Loop; else goto Exit; 273db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2); 274db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateCondBr(Cond, Loop, Exit); 275db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 276db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Loop: 277db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.SetInsertPoint(Loop); 278db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 279cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Populate the actual call to kernel(). 280db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 281db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 282db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *In = NULL; 283db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Out = NULL; 284db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 285db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (AIn) { 286db169187dea4602e4ad32058762d23d474753fd0Stephen Hines In = Builder.CreateLoad(AIn, "In"); 287db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(In); 288db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 289db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 290db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (AOut) { 291db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Out = Builder.CreateLoad(AOut, "Out"); 292db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(Out); 293db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 294db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 295db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (UsrData) { 296db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(UsrData); 297db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 298db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 299db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // We always have to load X, since it is used to iterate through the loop. 300db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *X = Builder.CreateLoad(AX, "X"); 301db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasX(Signature)) { 302db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(X); 303db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 304db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 305db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (Y) { 306db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(Y); 307db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 308db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 309db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateCall(F, RootArgs); 310db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 311db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (In) { 312db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // In += instep 313db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 314db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreatePtrToInt(In, Int32Ty), Arg_instep), InTy); 315db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(NewIn, AIn); 316db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 317db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 318db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (Out) { 319db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Out += outstep 320db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 321db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreatePtrToInt(Out, Int32Ty), Arg_outstep), OutTy); 322db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(NewOut, AOut); 323db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 324db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 325db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // X++; 326db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *XPlusOne = 327db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1)); 328db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(XPlusOne, AX); 329db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 330db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // If (X < x2) goto Loop; else goto Exit; 331db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2); 332db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateCondBr(Cond, Loop, Exit); 333db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 334db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Exit: 335db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.SetInsertPoint(Exit); 336db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateRetVoid(); 337db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 338db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return true; 339db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 340db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 341db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual bool runOnModule(llvm::Module &M) { 342cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines bool Changed = false; 343db169187dea4602e4ad32058762d23d474753fd0Stephen Hines this->M = &M; 344db169187dea4602e4ad32058762d23d474753fd0Stephen Hines C = &M.getContext(); 345db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 3467a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao for (RSInfo::ExportForeachFuncListTy::const_iterator 3477a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao func_iter = mFuncs.begin(), func_end = mFuncs.end(); 3487a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao func_iter != func_end; func_iter++) { 3497a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao const char *name = func_iter->first; 3507a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao uint32_t signature = func_iter->second; 3517a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao llvm::Function *kernel = M.getFunction(name); 352cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines if (kernel && kernel->getReturnType()->isVoidTy()) { 3537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao Changed |= ExpandFunction(kernel, signature); 354cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 355db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 356db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 357cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return Changed; 358db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 359db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 360db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual const char *getPassName() const { 361db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return "ForEach-able Function Expansion"; 362db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 363db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 3647a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}; // end RSForEachExpandPass 365db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 3667a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace 3677a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 3687a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaochar RSForEachExpandPass::ID = 0; 369db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 370db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc { 371db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 3727a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass * 3737a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei LiaocreateRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs){ 3747a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao return new RSForEachExpandPass(pForeachFuncs); 3757a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} 376db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 3777a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc 378