RSKernelExpand.cpp revision 1bd9f627fa0affb457507e86b0b6684c695fe726
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/* 2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project 3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License"); 5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License. 6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at 7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * http://www.apache.org/licenses/LICENSE-2.0 9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software 11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS, 12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and 14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License. 15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h" 18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h" 197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib> 2133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional> 227a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 23b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h> 24b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h> 25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h> 26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h> 2718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h> 28b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h> 29c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h> 307ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h> 31b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h> 32cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h> 33b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h> 34806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h> 35c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang 36c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h" 37ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h" 38db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 39d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h" 40d88177580db4ddedf680854c51db333c97eabc59Stephen Hines 415010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes#define NUM_EXPANDED_FUNCTION_PARAMS 4 42bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 437a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc; 447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 45db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace { 467a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 471bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hinesstatic const bool gEnableRsTbaa = false; 489c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines 497a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao/* RSForEachExpandPass - This pass operates on functions that are able to be 507a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the 517a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * ForEach-able function to be invoked over the appropriate data cells of the 527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * input/output allocations (adjusting other relevant parameters as we go). We 537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * support doing this for any ForEach-able compute kernels. The new function 547a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * name is the original function name followed by ".expand". Note that we 557a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * still generate code for the original function. 567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */ 577a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoclass RSForEachExpandPass : public llvm::ModulePass { 5833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic: 59db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static char ID; 60db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 6133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate: 62e44a3525b9703739534c3b62d7d1af4c95649a38David Gross static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h 63e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 64e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsLaunchDimensionsField { 65e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldX, 66e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldY, 67e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldZ, 68e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldLod, 69e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldFace, 70e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldArray, 71e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 72e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldCount 73e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 74e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 75e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsExpandKernelDriverInfoPfxField { 76e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInPtr, 77e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInStride, 78e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInLen, 79e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutPtr, 80e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutStride, 81e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutLen, 82e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldDim, 83e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCurrent, 84e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsr, 85e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsLenr, 86e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 87e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCount 88e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 8933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 90bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Module *Module; 91bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::LLVMContext *Context; 92bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 93bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * Pointer to LLVM type information for the the function signature 95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * for expanded kernels. This must be re-calculated for each 96bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * module the pass is run on. 97bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 98bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::FunctionType *ExpandedFunctionType; 99db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 10025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t mExportForEachCount; 10125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char **mExportForEachNameList; 10225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const uint32_t *mExportForEachSignatureList; 103cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines 1042b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Turns on optimization of allocation stride values. 1052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool mEnableStepOpt; 1062b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 107bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes uint32_t getRootSignature(llvm::Function *Function) { 108db169187dea4602e4ad32058762d23d474753fd0Stephen Hines const llvm::NamedMDNode *ExportForEachMetadata = 109bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Module->getNamedMetadata("#rs_export_foreach"); 110db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 111db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!ExportForEachMetadata) { 112db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> RootArgTys; 113bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes for (llvm::Function::arg_iterator B = Function->arg_begin(), 114bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes E = Function->arg_end(); 115db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 116db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 117db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgTys.push_back(B->getType()); 118db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 119db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 120db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // For pre-ICS bitcode, we may not have signature information. In that 121db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // case, we use the size of the RootArgTys to select the number of 122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // arguments. 123db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return (1 << RootArgTys.size()) - 1; 124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 125db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1267ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (ExportForEachMetadata->getNumOperands() == 0) { 1277ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return 0; 1287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 1297ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1306e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(ExportForEachMetadata->getNumOperands() > 0); 131db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 132cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We only handle the case for legacy root() functions here, so this is 133cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // hard-coded to look at only the first such function. 134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 135900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes if (SigNode != nullptr && SigNode->getNumOperands() == 1) { 1361bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::Metadata *SigMD = SigNode->getOperand(0); 1371bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) { 1381bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::StringRef SigString = SigS->getString(); 139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines uint32_t Signature = 0; 140db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigString.getAsInteger(10, Signature)) { 141db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 142db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 143db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 144db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature; 145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 146db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 147db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 150db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 151429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray bool isStepOptSupported(llvm::Type *AllocType) { 152429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 153429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 154429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 155429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 156429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (mEnableStepOpt) { 157429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 158429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 159429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 160429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == VoidPtrTy) { 161429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 162429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 163429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 164429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (!PT) { 165429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 166429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 167429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 168429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // remaining conditions are 64-bit only 169429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (VoidPtrTy->getPrimitiveSizeInBits() == 32) { 170429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 171429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 172429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 173429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // coerce suggests an upconverted struct type, which we can't support 174429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) { 175429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 176429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 177429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 178429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported 179429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2); 180429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128); 181429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == V2xi64Ty || AllocType == Int128Ty) { 182429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 183429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 1882b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Get the actual value we should use to step through an allocation. 1897b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 1907b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // Normally the value we use to step through an allocation is given to us by 1917b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // the driver. However, for certain primitive data types, we can derive an 1927b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // integer constant for the step value. We use this integer constant whenever 1937b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // possible to allow further compiler optimizations to take place. 1947b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 195b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines // DL - Target Data size/layout information. 1962b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // T - Type of allocation (should be a pointer). 1972b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // OrigStep - Original step increment (root.expand() input from driver). 198bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType, 1992b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *OrigStep) { 200b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines bccAssert(DL); 201bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(AllocType); 2022b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(OrigStep); 203bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 204429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (isStepOptSupported(AllocType)) { 2052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *ET = PT->getElementType(); 206b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines uint64_t ETSize = DL->getTypeAllocSize(ET); 207bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 2082b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return llvm::ConstantInt::get(Int32Ty, ETSize); 2092b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } else { 2102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return OrigStep; 2112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2122b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 214097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /// Builds the types required by the pass for the given context. 215bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void buildTypes(void) { 216e44a3525b9703739534c3b62d7d1af4c95649a38David Gross // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs. 217bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 218e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8Ty = llvm::Type::getInt8Ty(*Context); 219e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrTy = Int8Ty->getPointerTo(); 220e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT); 221e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 222e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32ArrayInputLimitTy = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT); 223e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 224e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Array4Ty = llvm::ArrayType::get(Int32Ty, 4); 225097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 226097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h: 227db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 228e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsLaunchDimensions { 229e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t x; 230db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t y; 231db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t z; 232e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t lod; 233e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t face; 234e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t array[4]; 235e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * }; 236e44a3525b9703739534c3b62d7d1af4c95649a38David Gross */ 237e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes; 238e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t x 239e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t y 240e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t z 241e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t lod 242e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t face 243e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4] 244e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType *RsLaunchDimensionsTy = 245e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions"); 246e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 2471d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h: 248e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsExpandKernelDriverInfoPfx { 250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]; 251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inStride[RS_KERNEL_INPUT_LIMIT]; 252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inLen; 253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]; 255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outStride[RS_KERNEL_INPUT_LIMIT]; 256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outLen; 257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Dimension of the launch 259e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions dim; 260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 261e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // The walking iterator of the launch 262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions current; 263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 264e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const void *usr; 265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t usrLen; 266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Items below this line are not used by the compiler and can be change in the driver. 268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // So the compiler must assume there are an unknown number of fields of unknown type 269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // beginning here. 270db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * }; 2711d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * 2721d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp). 273db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes; 275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT] 276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t inStride[RS_KERNEL_INPUT_LIMIT] 277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t inLen 278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT] 279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t outStride[RS_KERNEL_INPUT_LIMIT] 280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t outLen 281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions dim 282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current 283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr 284e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen 285e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType *RsExpandKernelDriverInfoPfxTy = 286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx"); 287bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 288bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes // Create the function type for expanded kernels. 289bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo(); 291bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 292bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::SmallVector<llvm::Type*, 8> ParamTypes; 293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross ParamTypes.push_back(RsExpandKernelDriverInfoPfxPtrTy); // const RsExpandKernelDriverInfoPfx *p 294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross ParamTypes.push_back(Int32Ty); // uint32_t x1 295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross ParamTypes.push_back(Int32Ty); // uint32_t x2 296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross ParamTypes.push_back(Int32Ty); // uint32_t outstep 297bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 298e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes ExpandedFunctionType = 299e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes llvm::FunctionType::get(llvm::Type::getVoidTy(*Context), ParamTypes, 300e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes false); 3018ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 3028ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 303357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// @brief Create skeleton of the expanded function. 304357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 305357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// This creates a function with the following signature: 306357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 307357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 3085010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes /// uint32_t outstep) 309357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 310357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) { 311bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 312bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::Create(ExpandedFunctionType, 313bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::GlobalValue::ExternalLinkage, 314bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OldName + ".expand", Module); 315bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 316bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 317bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 318bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); 319bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 320bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("p"); 321bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x1"); 322bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x2"); 323bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("arg_outstep"); 324bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 326bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction); 327806075b3a54af826fea78490fb213d8a0784138eTobias Grosser llvm::IRBuilder<> Builder(Begin); 328806075b3a54af826fea78490fb213d8a0784138eTobias Grosser Builder.CreateRetVoid(); 329806075b3a54af826fea78490fb213d8a0784138eTobias Grosser 330bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes return ExpandedFunction; 331357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser } 332357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser 333e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @brief Create an empty loop 334e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 335e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// Create a loop of the form: 336e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 337e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// for (i = LowerBound; i < UpperBound; i++) 338e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// ; 339e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 340e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// After the loop has been created, the builder is set such that 341e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// instructions can be added to the loop body. 342e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 343e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param Builder The builder to use to build this loop. The current 344e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// position of the builder is the position the loop 345e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// will be inserted. 346e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LowerBound The first value of the loop iterator 347e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param UpperBound The maximal value of the loop iterator 348e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LoopIV A reference that will be set to the loop iterator. 349e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @return The BasicBlock that will be executed after the loop. 350e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder, 351e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *LowerBound, 352e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *UpperBound, 353e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode **LoopIV) { 354e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser assert(LowerBound->getType() == UpperBound->getType()); 355e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 356e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB; 357e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *Cond, *IVNext; 358e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode *IV; 359e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 360e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB = Builder.GetInsertBlock(); 3611bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines // DT = &getAnalysis<DominatorTree>(); 3621bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines // LI = &getAnalysis<LoopInfo>(); 3631bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr); 364bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent()); 365e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 366e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (LowerBound < Upperbound) 367e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 368e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 369e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 370e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB->getTerminator()->eraseFromParent(); 371e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(CondBB); 372e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(LowerBound, UpperBound); 373e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 374e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 375e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ] 376e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // iv.next = iv + 1 377e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (iv.next < Upperbound) 378e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 379e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 380e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 381e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB); 382e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV = Builder.CreatePHI(LowerBound->getType(), 2, "X"); 383e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV->addIncoming(LowerBound, CondBB); 384e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1)); 385e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV->addIncoming(IVNext, HeaderBB); 386e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(IVNext, UpperBound); 387e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 388e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser AfterBB->setName("Exit"); 389e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 390e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser *LoopIV = IV; 391e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser return AfterBB; 392e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser } 393e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 3948ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic: 39533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross RSForEachExpandPass(bool pEnableStepOpt = true) 396900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes : ModulePass(ID), Module(nullptr), Context(nullptr), 397bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes mEnableStepOpt(pEnableStepOpt) { 398bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 3998ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 4008ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 401c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { 402c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // This pass does not use any other analysis passes, but it does 403c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // add/wrap the existing functions in the module (thus altering the CFG). 404c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines } 405c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines 40633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Build contribution to outgoing argument list for calling a 40733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // ForEach-able function, based on the special parameters of that 40833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // function. 40933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // 41033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Signature - metadata bits for the signature of the ForEach-able function 41133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // X, Arg_p - values derived directly from expanded function, 41233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // suitable for computing arguments for the ForEach-able function 41333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // CalleeArgs - contribution is accumulated here 41433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Bump - invoked once for each contributed outgoing argument 41533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross void ExpandSpecialArguments(uint32_t Signature, 41633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::Value *X, 41733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::Value *Arg_p, 41833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::IRBuilder<> &Builder, 41933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::SmallVector<llvm::Value*, 8> &CalleeArgs, 42033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross std::function<void ()> Bump) { 42133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 42233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) { 42333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(Arg_p); 42433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 42533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 42633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 42733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) { 42833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(X); 42933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 43033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 43133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 432e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) || 433e44a3525b9703739534c3b62d7d1af4c95649a38David Gross bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 43433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 435e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *Current = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldCurrent); 436e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 437e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) { 438e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *Y = Builder.CreateLoad( 439e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Builder.CreateStructGEP(Current, RsLaunchDimensionsFieldY), "Y"); 440e44a3525b9703739534c3b62d7d1af4c95649a38David Gross CalleeArgs.push_back(Y); 441e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 442e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 443e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 444e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 445e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *Z = Builder.CreateLoad( 446e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Builder.CreateStructGEP(Current, RsLaunchDimensionsFieldZ), "Z"); 447e44a3525b9703739534c3b62d7d1af4c95649a38David Gross CalleeArgs.push_back(Z); 448e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 449e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 45033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 45133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 45233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 4538ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /* Performs the actual optimization on a selected function. On success, the 4548ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * Module will contain a new function of the name "<NAME>.expand" that 4558ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * invokes <NAME>() in a loop with the appropriate parameters. 4568ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser */ 457bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool ExpandFunction(llvm::Function *Function, uint32_t Signature) { 458bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding ForEach-able Function %s", 459bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Function->getName().str().c_str()); 4608ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 4618ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 462bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Signature = getRootSignature(Function); 4638ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 4648ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // We couldn't determine how to expand this function based on its 4658ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // function signature. 4668ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser return false; 4678ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 4688ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 4698ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 470bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 4718ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 472bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 473bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes createEmptyExpandedFunction(Function->getName()); 474db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 475bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 476bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 477bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * createEmptyExpandedFunction that there will be five parameters. 478bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 47933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 48033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 48133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 482bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 483bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 484db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 485bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 486bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 487bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 4885010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 489bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 490900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InStep = nullptr; 491900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutStep = nullptr; 492db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 493db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Construct the actual function body. 494bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 495db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 496cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Collect and construct the arguments for the kernel(). 497db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Note that we load any loop-invariant arguments before entering the Loop. 498bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin(); 499db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 500900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *InTy = nullptr; 501900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InBasePtr = nullptr; 502d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) { 503e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *InsBasePtr = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInPtr, "inputs_base"); 504e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 505e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *InStepsBase = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInStride, "insteps_base"); 506e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 507e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *InStepAddr = Builder.CreateConstInBoundsGEP2_32(InStepsBase, 0, 0); 508e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes llvm::LoadInst *InStepArg = Builder.CreateLoad(InStepAddr, 509e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes "instep_addr"); 510e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 511bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes InTy = (FunctionArgIter++)->getType(); 512e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes InStep = getStepValue(&DL, InTy, InStepArg); 513e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 5142b04086acbef6520ae2c54a868b1271abf053122Stephen Hines InStep->setName("instep"); 515e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 516e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *InputAddr = Builder.CreateConstInBoundsGEP2_32(InsBasePtr, 0, 0); 517e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes InBasePtr = Builder.CreateLoad(InputAddr, "input_base"); 518db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 519db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 520900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *OutTy = nullptr; 521900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutBasePtr = nullptr; 522d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 523bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OutTy = (FunctionArgIter++)->getType(); 524b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 5252b04086acbef6520ae2c54a868b1271abf053122Stephen Hines OutStep->setName("outstep"); 526097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes OutBasePtr = Builder.CreateLoad( 527e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Builder.CreateConstInBoundsGEP2_32( 528e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldOutPtr), 0, 0)); 529db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 530db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 531900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *UsrData = nullptr; 532d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) { 533bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *UsrDataTy = (FunctionArgIter++)->getType(); 534db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData = Builder.CreatePointerCast(Builder.CreateLoad( 535e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldUsr)), UsrDataTy); 536db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData->setName("UsrData"); 537db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 538db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 53933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::PHINode *IV; 54033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross createLoop(Builder, Arg_x1, Arg_x2, &IV); 541097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 54233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 54333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 54433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross [&FunctionArgIter]() { FunctionArgIter++; }); 545db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 546bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(FunctionArgIter == Function->arg_end()); 547db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 548cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Populate the actual call to kernel(). 549db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 550db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 551900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InPtr = nullptr; 552900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 553db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 554ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // Calculate the current input and output pointers 55502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 556ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // We always calculate the input/output pointers with a GEP operating on i8 55702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // values and only cast at the very end to OutTy. This is because the step 55802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // between two values is given in bytes. 55902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 56002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // TODO: We could further optimize the output by using a GEP operation of 56102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // type 'OutTy' in cases where the element type of the allocation allows. 56202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutBasePtr) { 56302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 56402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutOffset = Builder.CreateMul(OutOffset, OutStep); 56502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset); 56602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 56702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser } 568bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 569ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser if (InBasePtr) { 570ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1); 571ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InOffset = Builder.CreateMul(InOffset, InStep); 572ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InPtr = Builder.CreateGEP(InBasePtr, InOffset); 573ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InPtr = Builder.CreatePointerCast(InPtr, InTy); 574ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser } 57502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser 576ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser if (InPtr) { 5777ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(InPtr); 578db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 579db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 58002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutPtr) { 5817ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(OutPtr); 582db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 583db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 584db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (UsrData) { 585db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(UsrData); 586db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 587db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 58833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross RootArgs.append(CalleeArgs.begin(), CalleeArgs.end()); 589db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 590bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Builder.CreateCall(Function, RootArgs); 591db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 5927ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return true; 5937ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 5947ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 5957ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines /* Expand a pass-by-value kernel. 5967ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines */ 597bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool ExpandKernel(llvm::Function *Function, uint32_t Signature) { 598d88177580db4ddedf680854c51db333c97eabc59Stephen Hines bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)); 599bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding kernel Function %s", Function->getName().str().c_str()); 6007ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 6017ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // TODO: Refactor this to share functionality with ExpandFunction. 602bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 6037ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 604bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 605bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes createEmptyExpandedFunction(Function->getName()); 6067ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 607bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 608bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 609bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * createEmptyExpandedFunction that there will be five parameters. 610bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 611881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 612881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 613881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 614bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 615bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 616bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 617bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 618bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 619bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 6205010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 6217ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 6227ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Construct the actual function body. 623bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 6247ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 62518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Create TBAA meta-data. 62618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer; 627bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 62814588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien 62918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA"); 630e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 631e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 632e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 633e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation, 0); 634e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 635e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 63614588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 63718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 63850f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray llvm::MDNode *AliasingDomain, *AliasingScope; 63950f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray AliasingDomain = MDHelper.createAnonymousAliasScopeDomain("RS argument scope domain"); 64050f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray AliasingScope = MDHelper.createAnonymousAliasScope(AliasingDomain, "RS argument scope"); 64150f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 642881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes /* 643881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Collect and construct the arguments for the kernel(). 644881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * 645881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Note that we load any loop-invariant arguments before entering the Loop. 646881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes */ 647881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes size_t NumInputs = Function->arg_size(); 6487ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 649881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // No usrData parameter on kernels. 650881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes bccAssert( 651881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)); 652881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 653881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::Function::arg_iterator ArgIter = Function->arg_begin(); 654881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 655881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Check the return type 656bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Type *OutTy = nullptr; 657bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *OutStep = nullptr; 658bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::LoadInst *OutBasePtr = nullptr; 659bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *CastedOutBasePtr = nullptr; 660881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 661e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes bool PassOutByPointer = false; 662881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 663d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 664bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *OutBaseTy = Function->getReturnType(); 665881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 66674a4b08235990916911b8fe758d656c1171faf26Stephen Hines if (OutBaseTy->isVoidTy()) { 667e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes PassOutByPointer = true; 668881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = ArgIter->getType(); 669881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 670881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes ArgIter++; 671881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes --NumInputs; 67274a4b08235990916911b8fe758d656c1171faf26Stephen Hines } else { 67374a4b08235990916911b8fe758d656c1171faf26Stephen Hines // We don't increment Args, since we are using the actual return type. 674881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = OutBaseTy->getPointerTo(); 67574a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 676881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 677b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 67874a4b08235990916911b8fe758d656c1171faf26Stephen Hines OutStep->setName("outstep"); 679097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes OutBasePtr = Builder.CreateLoad( 680e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Builder.CreateConstInBoundsGEP2_32( 681e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldOutPtr), 0, 0)); 682097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 6839c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 6849c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines OutBasePtr->setMetadata("tbaa", TBAAPointer); 6859c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 68650f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 68750f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray OutBasePtr->setMetadata("alias.scope", AliasingScope); 68850f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 689bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out"); 69074a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 69174a4b08235990916911b8fe758d656c1171faf26Stephen Hines 69233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::PHINode *IV; 69333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross createLoop(Builder, Arg_x1, Arg_x2, &IV); 69433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 69533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 69633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 69733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross [&NumInputs]() { --NumInputs; }); 69833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 699bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::SmallVector<llvm::Type*, 8> InTypes; 700bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::SmallVector<llvm::Value*, 8> InSteps; 701bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::SmallVector<llvm::Value*, 8> InBasePtrs; 702bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::SmallVector<bool, 8> InIsStructPointer; 703881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 704e44a3525b9703739534c3b62d7d1af4c95649a38David Gross bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT); 705e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 706e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (NumInputs > 0) { 707e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *InsBasePtr = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInPtr, "inputs_base"); 7087ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 709e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *InStepsBase = Builder.CreateStructGEP(Arg_p, RsExpandKernelDriverInfoPfxFieldInStride, "insteps_base"); 710881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 711881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes for (size_t InputIndex = 0; InputIndex < NumInputs; 712881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes ++InputIndex, ArgIter++) { 713881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 714e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *InStepAddr = Builder.CreateConstInBoundsGEP2_32(InStepsBase, 0, InputIndex); 715881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::LoadInst *InStepArg = Builder.CreateLoad(InStepAddr, 716881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes "instep_addr"); 717881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 718326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes llvm::Type *InType = ArgIter->getType(); 719326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 720326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes /* 721326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes * AArch64 calling dictate that structs of sufficient size get passed by 722326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes * pointer instead of passed by value. This, combined with the fact 723326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes * that we don't allow kernels to operate on pointer data means that if 724326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes * we see a kernel with a pointer parameter we know that it is struct 725326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes * input that has been promoted. As such we don't need to convert its 726326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes * type to a pointer. Later we will need to know to avoid a load, so we 727326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes * save this information in InIsStructPointer. 728326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes */ 729326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes if (!InType->isPointerTy()) { 730326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes InType = InType->getPointerTo(); 731326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes InIsStructPointer.push_back(false); 732326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes } else { 733326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes InIsStructPointer.push_back(true); 734326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes } 735326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 736881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::Value *InStep = getStepValue(&DL, InType, InStepArg); 737881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 738881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes InStep->setName("instep"); 739881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 740e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Value *InputAddr = Builder.CreateConstInBoundsGEP2_32(InsBasePtr, 0, InputIndex); 741881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr, 742881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes "input_base"); 743bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *CastInBasePtr = Builder.CreatePointerCast(InBasePtr, 744bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray InType, "casted_in"); 745881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes if (gEnableRsTbaa) { 746881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes InBasePtr->setMetadata("tbaa", TBAAPointer); 747881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 748881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 74950f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray InBasePtr->setMetadata("alias.scope", AliasingScope); 75050f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 751881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes InTypes.push_back(InType); 752881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes InSteps.push_back(InStep); 753bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray InBasePtrs.push_back(CastInBasePtr); 754881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 755881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 7567ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 7577ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Populate the actual call to kernel(). 7587ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 7597ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 7604102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser // Calculate the current input and output pointers 7617b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 7627b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 763881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // We always calculate the input/output pointers with a GEP operating on i8 764881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // values combined with a multiplication and only cast at the very end to 765881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // OutTy. This is to account for dynamic stepping sizes when the value 766881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // isn't apparent at compile time. In the (very common) case when we know 767881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // the step size at compile time, due to haveing complete type information 768881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // this multiplication will optmized out and produces code equivalent to a 769881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // a GEP on a pointer of the correct type. 770881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 771881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Output 772881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 773900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 774bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray if (CastedOutBasePtr) { 7757b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 776881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 777bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray OutPtr = Builder.CreateGEP(CastedOutBasePtr, OutOffset); 778bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 779e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (PassOutByPointer) { 780881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes RootArgs.push_back(OutPtr); 781881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 7824102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser } 7837b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser 784881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Inputs 78574a4b08235990916911b8fe758d656c1171faf26Stephen Hines 786881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes if (NumInputs > 0) { 787881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1); 788881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 789881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes for (size_t Index = 0; Index < NumInputs; ++Index) { 790bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *InPtr = Builder.CreateGEP(InBasePtrs[Index], Offset); 791326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes llvm::Value *Input; 792326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 793326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes if (InIsStructPointer[Index]) { 794326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes Input = InPtr; 795326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 796326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes } else { 797326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input"); 798326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 799326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes if (gEnableRsTbaa) { 800326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes InputLoad->setMetadata("tbaa", TBAAAllocation); 801326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes } 802881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 80350f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray InputLoad->setMetadata("alias.scope", AliasingScope); 80450f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 805326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes Input = InputLoad; 806881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 807881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 808881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes RootArgs.push_back(Input); 8099c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 8107ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 8117ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 81233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross RootArgs.append(CalleeArgs.begin(), CalleeArgs.end()); 8137ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 814bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs); 8157ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 816e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (OutPtr && !PassOutByPointer) { 81718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr); 8189c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 8199c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines Store->setMetadata("tbaa", TBAAAllocation); 8209c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 82150f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray Store->setMetadata("alias.scope", AliasingScope); 8227ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 8237ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 824db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return true; 825db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 826db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 82718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Checks if pointers to allocation internals are exposed 82818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 82918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// This function verifies if through the parameters passed to the kernel 83018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// or through calls to the runtime library the script gains access to 83118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers pointing to data within a RenderScript Allocation. 83218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// If we know we control all loads from and stores to data within 83318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// RenderScript allocations and if we know the run-time internal accesses 83418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// are all annotated with RenderScript TBAA metadata, only then we 83518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// can safely use TBAA to distinguish between generic and from-allocation 83618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers. 837bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool allocPointersExposed(llvm::Module &Module) { 83818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Old style kernel function can expose pointers to elements within 83918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // allocations. 84018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // TODO: Extend analysis to allow simple cases of old-style kernels. 84125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 84225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *Name = mExportForEachNameList[i]; 84325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t Signature = mExportForEachSignatureList[i]; 844bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Module.getFunction(Name) && 845d88177580db4ddedf680854c51db333c97eabc59Stephen Hines !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) { 84618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 84718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 84818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 84918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 85018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Check for library functions that expose a pointer to an Allocation or 85118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // that are not yet annotated with RenderScript-specific tbaa information. 85218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser static std::vector<std::string> Funcs; 85318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 85418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // rsGetElementAt(...) 85518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser Funcs.push_back("_Z14rsGetElementAt13rs_allocationj"); 85618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj"); 85718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj"); 85818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // rsSetElementAt() 85918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj"); 86018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj"); 86118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj"); 86218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // rsGetElementAtYuv_uchar_Y() 86318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj"); 86418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // rsGetElementAtYuv_uchar_U() 86518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj"); 86618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // rsGetElementAtYuv_uchar_V() 86718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj"); 86818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 86918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser for (std::vector<std::string>::iterator FI = Funcs.begin(), 87018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser FE = Funcs.end(); 87118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser FI != FE; ++FI) { 872bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *Function = Module.getFunction(*FI); 87318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 874bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (!Function) { 87518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser ALOGE("Missing run-time function '%s'", FI->c_str()); 87618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 87718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 87818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 879bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Function->getNumUses() > 0) { 88018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 88118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 88218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 88318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 88418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return false; 88518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 88618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 88718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Connect RenderScript TBAA metadata to C/C++ metadata 88818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 88918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// The TBAA metadata used to annotate loads/stores from RenderScript 890e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// Allocations is generated in a separate TBAA tree with a 891e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// "RenderScript TBAA" root node. LLVM does assume may-alias for all nodes in 892e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// unrelated alias analysis trees. This function makes the RenderScript TBAA 893e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With 894e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// the connected trees every access to an Allocation is resolved to 895e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// must-alias if compared to a normal C/C++ access. 896bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void connectRenderScriptTBAAMetadata(llvm::Module &Module) { 897bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 898bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDNode *TBAARenderScript = 899bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes MDHelper.createTBAARoot("RenderScript TBAA"); 90018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 901bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 902bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript", 903bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes TBAARoot); 90418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 90518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser TBAARenderScript->replaceAllUsesWith(TBAAMergedRS); 90618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 90718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 908bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes virtual bool runOnModule(llvm::Module &Module) { 909bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool Changed = false; 910bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes this->Module = &Module; 911bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes this->Context = &Module.getContext(); 912bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 913bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes this->buildTypes(); 914bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 915bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bcinfo::MetadataExtractor me(&Module); 91625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines if (!me.extract()) { 91725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines ALOGE("Could not extract metadata from module!"); 91825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines return false; 91925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines } 92025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachCount = me.getExportForEachSignatureCount(); 92125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachNameList = me.getExportForEachNameList(); 92225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachSignatureList = me.getExportForEachSignatureList(); 923db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 924bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool AllocsExposed = allocPointersExposed(Module); 92518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 92625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 92725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *name = mExportForEachNameList[i]; 92825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t signature = mExportForEachSignatureList[i]; 929bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *kernel = Module.getFunction(name); 930cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser if (kernel) { 931d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) { 932cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser Changed |= ExpandKernel(kernel, signature); 933acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 934acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else if (kernel->getReturnType()->isVoidTy()) { 935cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser Changed |= ExpandFunction(kernel, signature); 936acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 937acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else { 938acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // There are some graphics root functions that are not 939acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // expanded, but that will be called directly. For those 940acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // functions, we can not set the linkage to internal. 941acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } 942cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 943db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 944db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 9459c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa && !AllocsExposed) { 946bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes connectRenderScriptTBAAMetadata(Module); 94718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 94818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 949cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return Changed; 950db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 951db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 952db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual const char *getPassName() const { 953db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return "ForEach-able Function Expansion"; 954db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 955db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 9567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}; // end RSForEachExpandPass 957db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 9587a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace 9597a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 9607a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaochar RSForEachExpandPass::ID = 0; 96133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossstatic llvm::RegisterPass<RSForEachExpandPass> X("foreachexp", "ForEach Expand Pass"); 962db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 963db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc { 964db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 9657a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass * 96625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen HinescreateRSForEachExpandPass(bool pEnableStepOpt){ 96725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines return new RSForEachExpandPass(pEnableStepOpt); 9687a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} 969db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 9707a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc 971