RSKernelExpand.cpp revision ae937ec9658c219a38d9d631d2569f19e21b50c0
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/* 2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project 3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License"); 5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License. 6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at 7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * http://www.apache.org/licenses/LICENSE-2.0 9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software 11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS, 12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and 14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License. 15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h" 18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h" 197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib> 217a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 22b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h> 23b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h> 24b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h> 25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h> 26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h> 27c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h> 287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h> 29b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h> 30b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h> 31806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h> 32c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang 33c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h" 34e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSInfo.h" 35ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h" 36db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 377a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc; 387a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 39db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace { 407a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 417a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao/* RSForEachExpandPass - This pass operates on functions that are able to be 427a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the 437a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * ForEach-able function to be invoked over the appropriate data cells of the 447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * input/output allocations (adjusting other relevant parameters as we go). We 457a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * support doing this for any ForEach-able compute kernels. The new function 467a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * name is the original function name followed by ".expand". Note that we 477a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * still generate code for the original function. 487a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */ 497a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoclass RSForEachExpandPass : public llvm::ModulePass { 507a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoprivate: 51db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static char ID; 52db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 53db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Module *M; 54db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::LLVMContext *C; 55db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao const RSInfo::ExportForeachFuncListTy &mFuncs; 57cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines 582b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Turns on optimization of allocation stride values. 592b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool mEnableStepOpt; 602b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 61cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines uint32_t getRootSignature(llvm::Function *F) { 62db169187dea4602e4ad32058762d23d474753fd0Stephen Hines const llvm::NamedMDNode *ExportForEachMetadata = 63db169187dea4602e4ad32058762d23d474753fd0Stephen Hines M->getNamedMetadata("#rs_export_foreach"); 64db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 65db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!ExportForEachMetadata) { 66db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> RootArgTys; 67db169187dea4602e4ad32058762d23d474753fd0Stephen Hines for (llvm::Function::arg_iterator B = F->arg_begin(), 68db169187dea4602e4ad32058762d23d474753fd0Stephen Hines E = F->arg_end(); 69db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 70db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 71db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgTys.push_back(B->getType()); 72db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 73db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 74db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // For pre-ICS bitcode, we may not have signature information. In that 75db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // case, we use the size of the RootArgTys to select the number of 76db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // arguments. 77db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return (1 << RootArgTys.size()) - 1; 78db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 79db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 807ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (ExportForEachMetadata->getNumOperands() == 0) { 817ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return 0; 827ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 837ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 846e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(ExportForEachMetadata->getNumOperands() > 0); 85db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 86cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We only handle the case for legacy root() functions here, so this is 87cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // hard-coded to look at only the first such function. 88db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 89db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigNode != NULL && SigNode->getNumOperands() == 1) { 90db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *SigVal = SigNode->getOperand(0); 91db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigVal->getValueID() == llvm::Value::MDStringVal) { 92db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::StringRef SigString = 93db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static_cast<llvm::MDString*>(SigVal)->getString(); 94db169187dea4602e4ad32058762d23d474753fd0Stephen Hines uint32_t Signature = 0; 95db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigString.getAsInteger(10, Signature)) { 96db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 97db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 98db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 99db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature; 100db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 101db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 102db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 103db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 104db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 105db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1062b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Get the actual value we should use to step through an allocation. 1077b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 1087b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // Normally the value we use to step through an allocation is given to us by 1097b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // the driver. However, for certain primitive data types, we can derive an 1107b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // integer constant for the step value. We use this integer constant whenever 1117b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // possible to allow further compiler optimizations to take place. 1127b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 113b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines // DL - Target Data size/layout information. 1142b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // T - Type of allocation (should be a pointer). 1152b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // OrigStep - Original step increment (root.expand() input from driver). 116b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T, 1172b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *OrigStep) { 118b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines bccAssert(DL); 1192b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(T); 1202b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(OrigStep); 1212b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T); 1222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C); 1232b04086acbef6520ae2c54a868b1271abf053122Stephen Hines if (mEnableStepOpt && T != VoidPtrTy && PT) { 1242b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *ET = PT->getElementType(); 125b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines uint64_t ETSize = DL->getTypeAllocSize(ET); 1262b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 1272b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return llvm::ConstantInt::get(Int32Ty, ETSize); 1282b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } else { 1292b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return OrigStep; 1302b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 1312b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 1322b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 133db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasIn(uint32_t Signature) { 1347ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return Signature & 0x01; 135db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 136db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 137db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasOut(uint32_t Signature) { 1387ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return Signature & 0x02; 139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 140db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 141db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasUsrData(uint32_t Signature) { 1427ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return Signature & 0x04; 143db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 144db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasX(uint32_t Signature) { 1467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return Signature & 0x08; 147db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static bool hasY(uint32_t Signature) { 1507ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return Signature & 0x10; 1517ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 1527ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1537ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines static bool isKernel(uint32_t Signature) { 1547ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return Signature & 0x20; 155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1578ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /// @brief Returns the type of the ForEach stub parameter structure. 1588ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /// 1598ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /// Renderscript uses a single structure in which all parameters are passed 1608ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /// to keep the signature of the expanded function independent of the 1618ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /// parameters passed to it. 1628ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser llvm::Type *getForeachStubTy() { 163db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C); 164db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 165db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *SizeTy = Int32Ty; 166db169187dea4602e4ad32058762d23d474753fd0Stephen Hines /* Defined in frameworks/base/libs/rs/rs_hal.h: 167db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 168db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * struct RsForEachStubParamStruct { 169db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * const void *in; 170db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * void *out; 171db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * const void *usr; 172db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * size_t usr_len; 173db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t x; 174db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t y; 175db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t z; 176db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t lod; 177db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * enum RsAllocationCubemapFace face; 178db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t ar[16]; 179db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * }; 180db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 181db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 9> StructTys; 182db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(VoidPtrTy); // const void *in 183db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(VoidPtrTy); // void *out 184db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(VoidPtrTy); // const void *usr 185db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(SizeTy); // size_t usr_len 186db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t x 187db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t y 188db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t z 189db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // uint32_t lod 190db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace 191db169187dea4602e4ad32058762d23d474753fd0Stephen Hines StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16] 192db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1938ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser return llvm::StructType::create(StructTys, "RsForEachStubParamStruct"); 1948ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 1958ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 196357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// @brief Create skeleton of the expanded function. 197357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 198357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// This creates a function with the following signature: 199357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 200357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 201357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// uint32_t instep, uint32_t outstep) 202357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 203357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) { 204357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo(); 205357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 206357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser 207357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser llvm::SmallVector<llvm::Type*, 8> ParamTys; 208357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p 209357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser ParamTys.push_back(Int32Ty); // uint32_t x1 210357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser ParamTys.push_back(Int32Ty); // uint32_t x2 211357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser ParamTys.push_back(Int32Ty); // uint32_t instep 212357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser ParamTys.push_back(Int32Ty); // uint32_t outstep 213357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser 214357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser llvm::FunctionType *FT = 215357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false); 216802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser llvm::Function *F = 217802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, 218802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser OldName + ".expand", M); 219802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser 220802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser llvm::Function::arg_iterator AI = F->arg_begin(); 221802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser 222802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser AI->setName("p"); 223802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser AI++; 224802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser AI->setName("x1"); 225802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser AI++; 226802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser AI->setName("x2"); 227802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser AI++; 228802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser AI->setName("arg_instep"); 229802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser AI++; 230802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser AI->setName("arg_outstep"); 231802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser AI++; 232802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser 233802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser assert(AI == F->arg_end()); 234802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser 235806075b3a54af826fea78490fb213d8a0784138eTobias Grosser llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*C, "Begin", F); 236806075b3a54af826fea78490fb213d8a0784138eTobias Grosser llvm::IRBuilder<> Builder(Begin); 237806075b3a54af826fea78490fb213d8a0784138eTobias Grosser Builder.CreateRetVoid(); 238806075b3a54af826fea78490fb213d8a0784138eTobias Grosser 239802f65931852d925bbe2e478bafe422b4002e7c4Tobias Grosser return F; 240357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser } 241357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser 242e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @brief Create an empty loop 243e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 244e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// Create a loop of the form: 245e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 246e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// for (i = LowerBound; i < UpperBound; i++) 247e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// ; 248e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 249e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// After the loop has been created, the builder is set such that 250e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// instructions can be added to the loop body. 251e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 252e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param Builder The builder to use to build this loop. The current 253e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// position of the builder is the position the loop 254e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// will be inserted. 255e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LowerBound The first value of the loop iterator 256e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param UpperBound The maximal value of the loop iterator 257e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LoopIV A reference that will be set to the loop iterator. 258e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @return The BasicBlock that will be executed after the loop. 259e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder, 260e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *LowerBound, 261e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *UpperBound, 262e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode **LoopIV) { 263e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser assert(LowerBound->getType() == UpperBound->getType()); 264e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 265e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB; 266e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *Cond, *IVNext; 267e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode *IV; 268e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 269e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB = Builder.GetInsertBlock(); 270e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this); 271e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser HeaderBB = llvm::BasicBlock::Create(*C, "Loop", CondBB->getParent()); 272e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 273e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (LowerBound < Upperbound) 274e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 275e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 276e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 277e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB->getTerminator()->eraseFromParent(); 278e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(CondBB); 279e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(LowerBound, UpperBound); 280e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 281e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 282e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ] 283e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // iv.next = iv + 1 284e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (iv.next < Upperbound) 285e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 286e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 287e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 288e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB); 289e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV = Builder.CreatePHI(LowerBound->getType(), 2, "X"); 290e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV->addIncoming(LowerBound, CondBB); 291e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1)); 292e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV->addIncoming(IVNext, HeaderBB); 293e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(IVNext, UpperBound); 294e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 295e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser AfterBB->setName("Exit"); 296e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 297e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser *LoopIV = IV; 298e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser return AfterBB; 299e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser } 300e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 3018ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic: 3028ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs, 3038ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser bool pEnableStepOpt) 3048ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs), 3058ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser mEnableStepOpt(pEnableStepOpt) { 3068ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 3078ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 3088ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /* Performs the actual optimization on a selected function. On success, the 3098ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * Module will contain a new function of the name "<NAME>.expand" that 3108ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * invokes <NAME>() in a loop with the appropriate parameters. 3118ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser */ 3128ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser bool ExpandFunction(llvm::Function *F, uint32_t Signature) { 3138ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str()); 3148ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 3158ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 3168ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser Signature = getRootSignature(F); 3178ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 3188ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // We couldn't determine how to expand this function based on its 3198ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // function signature. 3208ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser return false; 3218ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 3228ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 3238ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 3248ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser llvm::DataLayout DL(M); 3258ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 326357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName()); 327db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 328db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Create and name the actual arguments to this expanded function. 329db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Argument*, 8> ArgVec; 330db169187dea4602e4ad32058762d23d474753fd0Stephen Hines for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(), 331db169187dea4602e4ad32058762d23d474753fd0Stephen Hines E = ExpandedFunc->arg_end(); 332db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 333db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 334db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ArgVec.push_back(B); 335db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 336db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 337db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (ArgVec.size() != 5) { 33889e8490c80505468f2b816ca9d12fefa53f05959Shih-wei Liao ALOGE("Incorrect number of arguments to function: %zu", 33989e8490c80505468f2b816ca9d12fefa53f05959Shih-wei Liao ArgVec.size()); 340db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return false; 341db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 342db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_p = ArgVec[0]; 343db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_x1 = ArgVec[1]; 344db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_x2 = ArgVec[2]; 345db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_instep = ArgVec[3]; 346db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Arg_outstep = ArgVec[4]; 347db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 3482b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *InStep = NULL; 3492b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *OutStep = NULL; 350db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 351db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Construct the actual function body. 352e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin()); 353db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 354cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Collect and construct the arguments for the kernel(). 355db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Note that we load any loop-invariant arguments before entering the Loop. 356db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Function::arg_iterator Args = F->arg_begin(); 357db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 358db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *InTy = NULL; 359ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser llvm::Value *InBasePtr = NULL; 360db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasIn(Signature)) { 361db169187dea4602e4ad32058762d23d474753fd0Stephen Hines InTy = Args->getType(); 362b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines InStep = getStepValue(&DL, InTy, Arg_instep); 3632b04086acbef6520ae2c54a868b1271abf053122Stephen Hines InStep->setName("instep"); 364ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0)); 365db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 366db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 367db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 368db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *OutTy = NULL; 36902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser llvm::Value *OutBasePtr = NULL; 370db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasOut(Signature)) { 371db169187dea4602e4ad32058762d23d474753fd0Stephen Hines OutTy = Args->getType(); 372b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 3732b04086acbef6520ae2c54a868b1271abf053122Stephen Hines OutStep->setName("outstep"); 37402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1)); 375db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 376db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 377db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 378db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *UsrData = NULL; 379db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasUsrData(Signature)) { 380db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Type *UsrDataTy = Args->getType(); 381db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData = Builder.CreatePointerCast(Builder.CreateLoad( 382db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy); 383db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData->setName("UsrData"); 384db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 385db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 386db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 387db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasX(Signature)) { 388db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 389db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 390db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 391db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::Value *Y = NULL; 392db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasY(Signature)) { 393db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y"); 394db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Args++; 395db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 396db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 3976e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(Args == F->arg_end()); 398db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 399e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode *IV; 400e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser createLoop(Builder, Arg_x1, Arg_x2, &IV); 401db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 402cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Populate the actual call to kernel(). 403db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 404db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 4057ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *InPtr = NULL; 4067ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *OutPtr = NULL; 407db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 408ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // Calculate the current input and output pointers 40902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 410ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // We always calculate the input/output pointers with a GEP operating on i8 41102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // values and only cast at the very end to OutTy. This is because the step 41202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // between two values is given in bytes. 41302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 41402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // TODO: We could further optimize the output by using a GEP operation of 41502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // type 'OutTy' in cases where the element type of the allocation allows. 41602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutBasePtr) { 41702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 41802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutOffset = Builder.CreateMul(OutOffset, OutStep); 41902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset); 42002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 42102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser } 422ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser if (InBasePtr) { 423ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1); 424ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InOffset = Builder.CreateMul(InOffset, InStep); 425ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InPtr = Builder.CreateGEP(InBasePtr, InOffset); 426ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InPtr = Builder.CreatePointerCast(InPtr, InTy); 427ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser } 42802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser 429ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser if (InPtr) { 4307ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(InPtr); 431db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 432db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 43302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutPtr) { 4347ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(OutPtr); 435db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 436db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 437db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (UsrData) { 438db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(UsrData); 439db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 440db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 441e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *X = IV; 442db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (hasX(Signature)) { 443db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(X); 444db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 445db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 446db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (Y) { 447db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(Y); 448db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 449db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 450db169187dea4602e4ad32058762d23d474753fd0Stephen Hines Builder.CreateCall(F, RootArgs); 451db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 4527ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return true; 4537ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 4547ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 4557ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines /* Expand a pass-by-value kernel. 4567ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines */ 4577ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines bool ExpandKernel(llvm::Function *F, uint32_t Signature) { 4587ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines bccAssert(isKernel(Signature)); 4597ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines ALOGV("Expanding kernel Function %s", F->getName().str().c_str()); 4607ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 4617ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // TODO: Refactor this to share functionality with ExpandFunction. 462b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines llvm::DataLayout DL(M); 4637ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 4647ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C); 465357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName()); 4667ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 4677ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Create and name the actual arguments to this expanded function. 4687ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::SmallVector<llvm::Argument*, 8> ArgVec; 4697ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(), 4707ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines E = ExpandedFunc->arg_end(); 4717ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines B != E; 4727ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines ++B) { 4737ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines ArgVec.push_back(B); 4747ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 4757ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 4767ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (ArgVec.size() != 5) { 4777ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines ALOGE("Incorrect number of arguments to function: %zu", 4787ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines ArgVec.size()); 4797ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return false; 4807ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 4817ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *Arg_p = ArgVec[0]; 4827ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *Arg_x1 = ArgVec[1]; 4837ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *Arg_x2 = ArgVec[2]; 4847ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *Arg_instep = ArgVec[3]; 4857ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *Arg_outstep = ArgVec[4]; 4867ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 4877ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *InStep = NULL; 4887ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *OutStep = NULL; 4897ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 4907ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Construct the actual function body. 491e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin()); 4927ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 4937ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Collect and construct the arguments for the kernel(). 4947ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Note that we load any loop-invariant arguments before entering the Loop. 4957ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Function::arg_iterator Args = F->arg_begin(); 4967ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 49774a4b08235990916911b8fe758d656c1171faf26Stephen Hines llvm::Type *OutTy = NULL; 49874a4b08235990916911b8fe758d656c1171faf26Stephen Hines bool PassOutByReference = false; 4997b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser llvm::Value *OutBasePtr = NULL; 50074a4b08235990916911b8fe758d656c1171faf26Stephen Hines if (hasOut(Signature)) { 50174a4b08235990916911b8fe758d656c1171faf26Stephen Hines llvm::Type *OutBaseTy = F->getReturnType(); 50274a4b08235990916911b8fe758d656c1171faf26Stephen Hines if (OutBaseTy->isVoidTy()) { 50374a4b08235990916911b8fe758d656c1171faf26Stephen Hines PassOutByReference = true; 50474a4b08235990916911b8fe758d656c1171faf26Stephen Hines OutTy = Args->getType(); 50574a4b08235990916911b8fe758d656c1171faf26Stephen Hines Args++; 50674a4b08235990916911b8fe758d656c1171faf26Stephen Hines } else { 50774a4b08235990916911b8fe758d656c1171faf26Stephen Hines OutTy = OutBaseTy->getPointerTo(); 50874a4b08235990916911b8fe758d656c1171faf26Stephen Hines // We don't increment Args, since we are using the actual return type. 50974a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 510b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 51174a4b08235990916911b8fe758d656c1171faf26Stephen Hines OutStep->setName("outstep"); 5127b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1)); 51374a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 51474a4b08235990916911b8fe758d656c1171faf26Stephen Hines 5157ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Type *InBaseTy = NULL; 5167ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Type *InTy = NULL; 5177ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::AllocaInst *AIn = NULL; 5187ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (hasIn(Signature)) { 5197ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines InBaseTy = Args->getType(); 5207ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines InTy =InBaseTy->getPointerTo(); 5217ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines AIn = Builder.CreateAlloca(InTy, 0, "AIn"); 522b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines InStep = getStepValue(&DL, InTy, Arg_instep); 5237ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines InStep->setName("instep"); 5247ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad( 5257ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn); 5267ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines Args++; 5277ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 5287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5297ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // No usrData parameter on kernels. 5307ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines bccAssert(!hasUsrData(Signature)); 5317ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5327ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (hasX(Signature)) { 5337ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines Args++; 5347ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 5357ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5367ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *Y = NULL; 5377ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (hasY(Signature)) { 5387ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y"); 5397ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines Args++; 5407ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 5417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5427ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines bccAssert(Args == F->arg_end()); 5437ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 544e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode *IV; 545e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser createLoop(Builder, Arg_x1, Arg_x2, &IV); 5467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5477ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Populate the actual call to kernel(). 5487ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 5497ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5507ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *InPtr = NULL; 5517ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *In = NULL; 5527ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *OutPtr = NULL; 5537ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5547b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // Calculate the current output pointer 5557b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 5567b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // We always calculate the output pointer with an GEP operating on i8 5577b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // values and only cast at the very end to OutTy. This is because the step 5587b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // between two values is given in bytes. 5597b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 5607b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // TODO: We could further optimize the output by using a GEP operation of 5617b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // type 'OutTy' in cases where the element type of the allocation allows. 5627b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser if (OutBasePtr) { 5637b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 5647b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser OutOffset = Builder.CreateMul(OutOffset, OutStep); 5657b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset); 5667b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 5677b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser } 5687b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser 56974a4b08235990916911b8fe758d656c1171faf26Stephen Hines if (PassOutByReference) { 57074a4b08235990916911b8fe758d656c1171faf26Stephen Hines RootArgs.push_back(OutPtr); 57174a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 57274a4b08235990916911b8fe758d656c1171faf26Stephen Hines 5737ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (AIn) { 5747ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines InPtr = Builder.CreateLoad(AIn, "InPtr"); 5757ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines In = Builder.CreateLoad(InPtr, "In"); 5767ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(In); 5777ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 5787ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 579e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *X = IV; 5807ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (hasX(Signature)) { 5817ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(X); 5827ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 5837ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5847ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (Y) { 5857ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(Y); 5867ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 5877ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5887ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *RetVal = Builder.CreateCall(F, RootArgs); 5897ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5907b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser if (OutPtr && !PassOutByReference) { 5917ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines Builder.CreateStore(RetVal, OutPtr); 5927ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 5937ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5947ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (InPtr) { 5957ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // InPtr += instep 5967ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd( 5977ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy); 5987ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines Builder.CreateStore(NewIn, AIn); 5997ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 6007ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 601db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return true; 602db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 603db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 604db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual bool runOnModule(llvm::Module &M) { 605cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines bool Changed = false; 606db169187dea4602e4ad32058762d23d474753fd0Stephen Hines this->M = &M; 607db169187dea4602e4ad32058762d23d474753fd0Stephen Hines C = &M.getContext(); 608db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 6097a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao for (RSInfo::ExportForeachFuncListTy::const_iterator 6107a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao func_iter = mFuncs.begin(), func_end = mFuncs.end(); 6117a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao func_iter != func_end; func_iter++) { 6127a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao const char *name = func_iter->first; 6137a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao uint32_t signature = func_iter->second; 6147a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao llvm::Function *kernel = M.getFunction(name); 6157ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (kernel && isKernel(signature)) { 6167ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines Changed |= ExpandKernel(kernel, signature); 6177ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 6187ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines else if (kernel && kernel->getReturnType()->isVoidTy()) { 6197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao Changed |= ExpandFunction(kernel, signature); 620cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 621db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 622db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 623cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return Changed; 624db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 625db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 626db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual const char *getPassName() const { 627db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return "ForEach-able Function Expansion"; 628db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 629db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 6307a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}; // end RSForEachExpandPass 631db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 6327a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace 6337a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 6347a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaochar RSForEachExpandPass::ID = 0; 635db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 636db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc { 637db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 6387a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass * 6392b04086acbef6520ae2c54a868b1271abf053122Stephen HinescreateRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs, 6402b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool pEnableStepOpt){ 6412b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt); 6427a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} 643db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 6447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc 645