RSKernelExpand.cpp revision 2b04086acbef6520ae2c54a868b1271abf053122
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/* 2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project 3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License"); 5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License. 6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at 7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * http://www.apache.org/licenses/LICENSE-2.0 9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software 11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS, 12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and 14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License. 15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h" 18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h" 197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib> 217a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 22c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/DerivedTypes.h> 23c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Function.h> 24c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Instructions.h> 25c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Module.h> 26c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h> 27c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Support/IRBuilder.h> 282b04086acbef6520ae2c54a868b1271abf053122Stephen Hines#include <llvm/Target/TargetData.h> 29c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Type.h> 30c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang 31c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h" 32e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSInfo.h" 33ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h" 34db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 357a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc; 367a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 37db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace { 387a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 397a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao/* RSForEachExpandPass - This pass operates on functions that are able to be 407a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the 417a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * ForEach-able function to be invoked over the appropriate data cells of the 427a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * input/output allocations (adjusting other relevant parameters as we go). We 437a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * support doing this for any ForEach-able compute kernels. The new function 447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * name is the original function name followed by ".expand". Note that we 457a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * still generate code for the original function. 467a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */ 477a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoclass RSForEachExpandPass : public llvm::ModulePass { 487a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoprivate: 49db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static char ID; 50db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 51db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Module *M; 52db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::LLVMContext *C; 53db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 547a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao const RSInfo::ExportForeachFuncListTy &mFuncs; 55cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines 562b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Turns on optimization of allocation stride values. 572b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool mEnableStepOpt; 582b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 59cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines uint32_t getRootSignature(llvm::Function *F) { 60db169187dea4602e4ad32058762d23d474753fd0Stephen Hines const llvm::NamedMDNode *ExportForEachMetadata = 61db169187dea4602e4ad32058762d23d474753fd0Stephen Hines M->getNamedMetadata("#rs_export_foreach"); 62db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 63db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!ExportForEachMetadata) { 64db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> RootArgTys; 65db169187dea4602e4ad32058762d23d474753fd0Stephen Hines for (llvm::Function::arg_iterator B = F->arg_begin(), 66db169187dea4602e4ad32058762d23d474753fd0Stephen Hines E = F->arg_end(); 67db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 68db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 69db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgTys.push_back(B->getType()); 70db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 71db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 72db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // For pre-ICS bitcode, we may not have signature information. In that 73db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // case, we use the size of the RootArgTys to select the number of 74db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // arguments. 75db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return (1 << RootArgTys.size()) - 1; 76db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 77db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 786e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(ExportForEachMetadata->getNumOperands() > 0); 79db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 80cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We only handle the case for legacy root() functions here, so this is 81cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // hard-coded to look at only the first such function. 82db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 83db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigNode != NULL && SigNode->getNumOperands() == 1) { 84db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *SigVal = SigNode->getOperand(0); 85db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigVal->getValueID() == llvm::Value::MDStringVal) { 86db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::StringRef SigString = 87db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static_cast<llvm::MDString*>(SigVal)->getString(); 88db169187dea4602e4ad32058762d23d474753fd0Stephen Hines uint32_t Signature = 0; 89db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigString.getAsInteger(10, Signature)) { 90db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 91db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 92db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 93db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature; 94db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 95db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 96db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 97db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 98db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 99db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1002b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Get the actual value we should use to step through an allocation. 1012b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // TD - Target Data size/layout information. 1022b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // T - Type of allocation (should be a pointer). 1032b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // OrigStep - Original step increment (root.expand() input from driver). 1042b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *getStepValue(llvm::TargetData *TD, llvm::Type *T, 1052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *OrigStep) { 1062b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(TD); 1072b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(T); 1082b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(OrigStep); 1092b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T); 1102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C); 1112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines if (mEnableStepOpt && T != VoidPtrTy && PT) { 1122b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *ET = PT->getElementType(); 1132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines uint64_t ETSize = TD->getTypeStoreSize(ET); 1142b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 1152b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return llvm::ConstantInt::get(Int32Ty, ETSize); 1162b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } else { 1172b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return OrigStep; 1182b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 1192b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 1202b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 121db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasIn(uint32_t Signature) { 122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature & 1; 123db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 125db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasOut(uint32_t Signature) { 126db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature & 2; 127db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 128db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 129db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasUsrData(uint32_t Signature) { 130db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature & 4; 131db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 132db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 133db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasX(uint32_t Signature) { 134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature & 8; 135db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 136db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 137db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasY(uint32_t Signature) { 138db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature & 16; 139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 140db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1417a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaopublic: 1422b04086acbef6520ae2c54a868b1271abf053122Stephen Hines RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs, 1432b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool pEnableStepOpt) 1442b04086acbef6520ae2c54a868b1271abf053122Stephen Hines : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs), 1452b04086acbef6520ae2c54a868b1271abf053122Stephen Hines mEnableStepOpt(pEnableStepOpt) { 146db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 147db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines /* Performs the actual optimization on a selected function. On success, the 149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Module will contain a new function of the name "<NAME>.expand" that 150db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * invokes <NAME>() in a loop with the appropriate parameters. 151db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 152cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines bool ExpandFunction(llvm::Function *F, uint32_t Signature) { 153cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str()); 154db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!Signature) { 156cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines Signature = getRootSignature(F); 157cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines if (!Signature) { 158cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We couldn't determine how to expand this function based on its 159cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // function signature. 160cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return false; 161cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 162db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 163db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1642b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::TargetData TD(M); 1652b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 166db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C); 167db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 168db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *SizeTy = Int32Ty; 169db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 170db169187dea4602e4ad32058762d23d474753fd0Stephen Hines /* Defined in frameworks/base/libs/rs/rs_hal.h: 171db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 172db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * struct RsForEachStubParamStruct { 173db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * const void *in; 174db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * void *out; 175db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * const void *usr; 176db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * size_t usr_len; 177db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t x; 178db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t y; 179db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t z; 180db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t lod; 181db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * enum RsAllocationCubemapFace face; 182db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t ar[16]; 183db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * }; 184db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 185db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 9> StructTys; 186db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(VoidPtrTy); // const void *in 187db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(VoidPtrTy); // void *out 188db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(VoidPtrTy); // const void *usr 189db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(SizeTy); // size_t usr_len 190db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t x 191db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t y 192db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t z 193db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t lod 194db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace 195db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16] 196db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 197db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *ForEachStubPtrTy = llvm::StructType::create( 198db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys, "RsForEachStubParamStruct")->getPointerTo(); 199db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 200db169187dea4602e4ad32058762d23d474753fd0Stephen Hines /* Create the function signature for our expanded function. 201db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 202db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t instep, uint32_t outstep) 203db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 204db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> ParamTys; 205db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p 206db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ParamTys.push_back(Int32Ty); // uint32_t x1 207db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ParamTys.push_back(Int32Ty); // uint32_t x2 208db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ParamTys.push_back(Int32Ty); // uint32_t instep 209db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ParamTys.push_back(Int32Ty); // uint32_t outstep 210db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 211db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::FunctionType *FT = 212db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false); 213db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Function *ExpandedFunc = 214db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Function::Create(FT, 215db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::GlobalValue::ExternalLinkage, 216db169187dea4602e4ad32058762d23d474753fd0Stephen Hines F->getName() + ".expand", M); 217db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 218db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Create and name the actual arguments to this expanded function. 219db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Argument*, 8> ArgVec; 220db169187dea4602e4ad32058762d23d474753fd0Stephen Hines for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(), 221db169187dea4602e4ad32058762d23d474753fd0Stephen Hines E = ExpandedFunc->arg_end(); 222db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 223db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 224db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ArgVec.push_back(B); 225db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 226db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 227db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (ArgVec.size() != 5) { 22889e8490c80505468f2b816ca9d12fefa53f05959Shih-wei Liao ALOGE("Incorrect number of arguments to function: %zu", 22989e8490c80505468f2b816ca9d12fefa53f05959Shih-wei Liao ArgVec.size()); 230db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return false; 231db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 232db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_p = ArgVec[0]; 233db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_x1 = ArgVec[1]; 234db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_x2 = ArgVec[2]; 235db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_instep = ArgVec[3]; 236db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_outstep = ArgVec[4]; 237db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 238db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Arg_p->setName("p"); 239db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Arg_x1->setName("x1"); 240db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Arg_x2->setName("x2"); 2412b04086acbef6520ae2c54a868b1271abf053122Stephen Hines Arg_instep->setName("arg_instep"); 2422b04086acbef6520ae2c54a868b1271abf053122Stephen Hines Arg_outstep->setName("arg_outstep"); 2432b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 2442b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *InStep = NULL; 2452b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *OutStep = NULL; 246db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 247db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Construct the actual function body. 248db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::BasicBlock *Begin = 249db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc); 250db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::IRBuilder<> Builder(Begin); 251db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 252db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // uint32_t X = x1; 253db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX"); 254db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(Arg_x1, AX); 255db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 256cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Collect and construct the arguments for the kernel(). 257db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Note that we load any loop-invariant arguments before entering the Loop. 258db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Function::arg_iterator Args = F->arg_begin(); 259db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 260db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *InTy = NULL; 261db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::AllocaInst *AIn = NULL; 262db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasIn(Signature)) { 263db169187dea4602e4ad32058762d23d474753fd0Stephen Hines InTy = Args->getType(); 264db169187dea4602e4ad32058762d23d474753fd0Stephen Hines AIn = Builder.CreateAlloca(InTy, 0, "AIn"); 2652b04086acbef6520ae2c54a868b1271abf053122Stephen Hines InStep = getStepValue(&TD, InTy, Arg_instep); 2662b04086acbef6520ae2c54a868b1271abf053122Stephen Hines InStep->setName("instep"); 267db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 268db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn); 269db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 270db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 271db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 272db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *OutTy = NULL; 273db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::AllocaInst *AOut = NULL; 274db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasOut(Signature)) { 275db169187dea4602e4ad32058762d23d474753fd0Stephen Hines OutTy = Args->getType(); 276db169187dea4602e4ad32058762d23d474753fd0Stephen Hines AOut = Builder.CreateAlloca(OutTy, 0, "AOut"); 2772b04086acbef6520ae2c54a868b1271abf053122Stephen Hines OutStep = getStepValue(&TD, OutTy, Arg_outstep); 2782b04086acbef6520ae2c54a868b1271abf053122Stephen Hines OutStep->setName("outstep"); 279db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 280db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut); 281db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 282db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 283db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 284db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *UsrData = NULL; 285db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasUsrData(Signature)) { 286db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *UsrDataTy = Args->getType(); 287db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData = Builder.CreatePointerCast(Builder.CreateLoad( 288db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy); 289db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData->setName("UsrData"); 290db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 291db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 292db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 293db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasX(Signature)) { 294db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 295db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 296db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 297db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Y = NULL; 298db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasY(Signature)) { 299db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y"); 300db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 301db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 302db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 3036e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(Args == F->arg_end()); 304db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 305db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc); 306db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc); 307db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 308db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // if (x1 < x2) goto Loop; else goto Exit; 309db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2); 310db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateCondBr(Cond, Loop, Exit); 311db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 312db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Loop: 313db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.SetInsertPoint(Loop); 314db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 315cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Populate the actual call to kernel(). 316db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 317db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 318db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *In = NULL; 319db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Out = NULL; 320db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 321db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (AIn) { 322db169187dea4602e4ad32058762d23d474753fd0Stephen Hines In = Builder.CreateLoad(AIn, "In"); 323db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(In); 324db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 325db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 326db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (AOut) { 327db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Out = Builder.CreateLoad(AOut, "Out"); 328db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(Out); 329db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 330db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 331db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (UsrData) { 332db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(UsrData); 333db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 334db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 335db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // We always have to load X, since it is used to iterate through the loop. 336db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *X = Builder.CreateLoad(AX, "X"); 337db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasX(Signature)) { 338db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(X); 339db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 340db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 341db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (Y) { 342db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(Y); 343db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 344db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 345db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateCall(F, RootArgs); 346db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 347db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (In) { 348db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // In += instep 349db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 3502b04086acbef6520ae2c54a868b1271abf053122Stephen Hines Builder.CreatePtrToInt(In, Int32Ty), InStep), InTy); 351db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(NewIn, AIn); 352db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 353db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 354db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (Out) { 355db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Out += outstep 356db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 3572b04086acbef6520ae2c54a868b1271abf053122Stephen Hines Builder.CreatePtrToInt(Out, Int32Ty), OutStep), OutTy); 358db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(NewOut, AOut); 359db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 360db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 361db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // X++; 362db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *XPlusOne = 363db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1)); 364db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStore(XPlusOne, AX); 365db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 366db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // If (X < x2) goto Loop; else goto Exit; 367db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2); 368db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateCondBr(Cond, Loop, Exit); 369db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 370db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Exit: 371db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.SetInsertPoint(Exit); 372db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateRetVoid(); 373db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 374db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return true; 375db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 376db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 377db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual bool runOnModule(llvm::Module &M) { 378cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines bool Changed = false; 379db169187dea4602e4ad32058762d23d474753fd0Stephen Hines this->M = &M; 380db169187dea4602e4ad32058762d23d474753fd0Stephen Hines C = &M.getContext(); 381db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 3827a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao for (RSInfo::ExportForeachFuncListTy::const_iterator 3837a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao func_iter = mFuncs.begin(), func_end = mFuncs.end(); 3847a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao func_iter != func_end; func_iter++) { 3857a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao const char *name = func_iter->first; 3867a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao uint32_t signature = func_iter->second; 3877a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao llvm::Function *kernel = M.getFunction(name); 388cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines if (kernel && kernel->getReturnType()->isVoidTy()) { 3897a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao Changed |= ExpandFunction(kernel, signature); 390cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 391db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 392db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 393cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return Changed; 394db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 395db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 396db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual const char *getPassName() const { 397db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return "ForEach-able Function Expansion"; 398db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 399db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 4007a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}; // end RSForEachExpandPass 401db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 4027a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace 4037a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 4047a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaochar RSForEachExpandPass::ID = 0; 405db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 406db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc { 407db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 4087a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass * 4092b04086acbef6520ae2c54a868b1271abf053122Stephen HinescreateRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs, 4102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool pEnableStepOpt){ 4112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt); 4127a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} 413db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 4147a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc 415