RSKernelExpand.cpp revision 083ef3c3d1991e9fa443ff76065ec6528dd4730c
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/* 2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project 3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License"); 5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License. 6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at 7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * http://www.apache.org/licenses/LICENSE-2.0 9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software 11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS, 12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and 14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License. 15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h" 18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h" 197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib> 2133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional> 227a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 23b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h> 24b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h> 25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h> 26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h> 2718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h> 28b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h> 29c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h> 307ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h> 31b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h> 32cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h> 33b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h> 34806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h> 35c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang 36c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h" 37ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h" 38db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 39d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h" 40d88177580db4ddedf680854c51db333c97eabc59Stephen Hines 415010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes#define NUM_EXPANDED_FUNCTION_PARAMS 4 42bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 437a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc; 447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 45db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace { 467a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 47354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true; 489c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines 497a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao/* RSForEachExpandPass - This pass operates on functions that are able to be 507a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the 517a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * ForEach-able function to be invoked over the appropriate data cells of the 527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * input/output allocations (adjusting other relevant parameters as we go). We 537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * support doing this for any ForEach-able compute kernels. The new function 547a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * name is the original function name followed by ".expand". Note that we 557a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * still generate code for the original function. 567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */ 577a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoclass RSForEachExpandPass : public llvm::ModulePass { 5833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic: 59db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static char ID; 60db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 6133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate: 62e44a3525b9703739534c3b62d7d1af4c95649a38David Gross static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h 63e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 64e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsLaunchDimensionsField { 65e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldX, 66e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldY, 67e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldZ, 68e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldLod, 69e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldFace, 70e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldArray, 71e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 72e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldCount 73e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 74e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 75e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsExpandKernelDriverInfoPfxField { 76e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInPtr, 77e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInStride, 78e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInLen, 79e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutPtr, 80e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutStride, 81e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutLen, 82e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldDim, 83e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCurrent, 84e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsr, 85e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsLenr, 86e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 87e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCount 88e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 8933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 90bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Module *Module; 91bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::LLVMContext *Context; 92bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 93bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * Pointer to LLVM type information for the the function signature 95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * for expanded kernels. This must be re-calculated for each 96bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * module the pass is run on. 97bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 98bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::FunctionType *ExpandedFunctionType; 99db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 10025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t mExportForEachCount; 10125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char **mExportForEachNameList; 10225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const uint32_t *mExportForEachSignatureList; 103cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines 1042b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Turns on optimization of allocation stride values. 1052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool mEnableStepOpt; 1062b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 107bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes uint32_t getRootSignature(llvm::Function *Function) { 108db169187dea4602e4ad32058762d23d474753fd0Stephen Hines const llvm::NamedMDNode *ExportForEachMetadata = 109bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Module->getNamedMetadata("#rs_export_foreach"); 110db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 111db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!ExportForEachMetadata) { 112db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> RootArgTys; 113bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes for (llvm::Function::arg_iterator B = Function->arg_begin(), 114bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes E = Function->arg_end(); 115db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 116db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 117db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgTys.push_back(B->getType()); 118db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 119db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 120db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // For pre-ICS bitcode, we may not have signature information. In that 121db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // case, we use the size of the RootArgTys to select the number of 122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // arguments. 123db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return (1 << RootArgTys.size()) - 1; 124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 125db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1267ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (ExportForEachMetadata->getNumOperands() == 0) { 1277ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return 0; 1287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 1297ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1306e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(ExportForEachMetadata->getNumOperands() > 0); 131db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 132cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We only handle the case for legacy root() functions here, so this is 133cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // hard-coded to look at only the first such function. 134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 135900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes if (SigNode != nullptr && SigNode->getNumOperands() == 1) { 1361bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::Metadata *SigMD = SigNode->getOperand(0); 1371bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) { 1381bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::StringRef SigString = SigS->getString(); 139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines uint32_t Signature = 0; 140db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigString.getAsInteger(10, Signature)) { 141db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 142db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 143db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 144db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature; 145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 146db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 147db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 150db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 151429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray bool isStepOptSupported(llvm::Type *AllocType) { 152429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 153429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 154429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 155429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 156429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (mEnableStepOpt) { 157429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 158429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 159429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 160429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == VoidPtrTy) { 161429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 162429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 163429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 164429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (!PT) { 165429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 166429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 167429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 168429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // remaining conditions are 64-bit only 169429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (VoidPtrTy->getPrimitiveSizeInBits() == 32) { 170429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 171429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 172429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 173429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // coerce suggests an upconverted struct type, which we can't support 174429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) { 175429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 176429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 177429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 178429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported 179429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2); 180429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128); 181429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == V2xi64Ty || AllocType == Int128Ty) { 182429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 183429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 1882b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Get the actual value we should use to step through an allocation. 1897b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 1907b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // Normally the value we use to step through an allocation is given to us by 1917b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // the driver. However, for certain primitive data types, we can derive an 1927b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // integer constant for the step value. We use this integer constant whenever 1937b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // possible to allow further compiler optimizations to take place. 1947b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 195b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines // DL - Target Data size/layout information. 1962b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // T - Type of allocation (should be a pointer). 1972b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // OrigStep - Original step increment (root.expand() input from driver). 198bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType, 1992b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *OrigStep) { 200b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines bccAssert(DL); 201bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(AllocType); 2022b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(OrigStep); 203bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 204429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (isStepOptSupported(AllocType)) { 2052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *ET = PT->getElementType(); 206b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines uint64_t ETSize = DL->getTypeAllocSize(ET); 207bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 2082b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return llvm::ConstantInt::get(Int32Ty, ETSize); 2092b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } else { 2102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return OrigStep; 2112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2122b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 214097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /// Builds the types required by the pass for the given context. 215bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void buildTypes(void) { 216e44a3525b9703739534c3b62d7d1af4c95649a38David Gross // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs. 217bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 218e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8Ty = llvm::Type::getInt8Ty(*Context); 219e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrTy = Int8Ty->getPointerTo(); 220e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT); 221e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 222e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32ArrayInputLimitTy = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT); 223e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 224e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Array4Ty = llvm::ArrayType::get(Int32Ty, 4); 225097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 226097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h: 227db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 228e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsLaunchDimensions { 229e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t x; 230db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t y; 231db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t z; 232e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t lod; 233e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t face; 234e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t array[4]; 235e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * }; 236e44a3525b9703739534c3b62d7d1af4c95649a38David Gross */ 237e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes; 238e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t x 239e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t y 240e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t z 241e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t lod 242e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t face 243e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4] 244e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType *RsLaunchDimensionsTy = 245e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions"); 246e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 2471d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h: 248e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsExpandKernelDriverInfoPfx { 250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]; 251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inStride[RS_KERNEL_INPUT_LIMIT]; 252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inLen; 253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]; 255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outStride[RS_KERNEL_INPUT_LIMIT]; 256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outLen; 257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Dimension of the launch 259e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions dim; 260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 261e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // The walking iterator of the launch 262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions current; 263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 264e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const void *usr; 265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t usrLen; 266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Items below this line are not used by the compiler and can be change in the driver. 268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // So the compiler must assume there are an unknown number of fields of unknown type 269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // beginning here. 270db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * }; 2711d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * 2721d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp). 273db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes; 275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT] 276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t inStride[RS_KERNEL_INPUT_LIMIT] 277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t inLen 278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT] 279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t outStride[RS_KERNEL_INPUT_LIMIT] 280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t outLen 281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions dim 282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current 283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr 284e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen 285e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType *RsExpandKernelDriverInfoPfxTy = 286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx"); 287bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 288bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes // Create the function type for expanded kernels. 289bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo(); 291bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 292bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::SmallVector<llvm::Type*, 8> ParamTypes; 293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross ParamTypes.push_back(RsExpandKernelDriverInfoPfxPtrTy); // const RsExpandKernelDriverInfoPfx *p 294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross ParamTypes.push_back(Int32Ty); // uint32_t x1 295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross ParamTypes.push_back(Int32Ty); // uint32_t x2 296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross ParamTypes.push_back(Int32Ty); // uint32_t outstep 297bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 298e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes ExpandedFunctionType = 299e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes llvm::FunctionType::get(llvm::Type::getVoidTy(*Context), ParamTypes, 300e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes false); 3018ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 3028ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 303357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// @brief Create skeleton of the expanded function. 304357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 305357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// This creates a function with the following signature: 306357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 307357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 3085010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes /// uint32_t outstep) 309357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 310357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) { 311bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 312bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::Create(ExpandedFunctionType, 313bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::GlobalValue::ExternalLinkage, 314bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OldName + ".expand", Module); 315bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 316bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 317bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 318bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); 319bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 320bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("p"); 321bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x1"); 322bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x2"); 323bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("arg_outstep"); 324bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 326bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction); 327806075b3a54af826fea78490fb213d8a0784138eTobias Grosser llvm::IRBuilder<> Builder(Begin); 328806075b3a54af826fea78490fb213d8a0784138eTobias Grosser Builder.CreateRetVoid(); 329806075b3a54af826fea78490fb213d8a0784138eTobias Grosser 330bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes return ExpandedFunction; 331357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser } 332357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser 333e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @brief Create an empty loop 334e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 335e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// Create a loop of the form: 336e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 337e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// for (i = LowerBound; i < UpperBound; i++) 338e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// ; 339e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 340e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// After the loop has been created, the builder is set such that 341e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// instructions can be added to the loop body. 342e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 343e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param Builder The builder to use to build this loop. The current 344e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// position of the builder is the position the loop 345e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// will be inserted. 346e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LowerBound The first value of the loop iterator 347e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param UpperBound The maximal value of the loop iterator 348e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LoopIV A reference that will be set to the loop iterator. 349e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @return The BasicBlock that will be executed after the loop. 350e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder, 351e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *LowerBound, 352e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *UpperBound, 353e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode **LoopIV) { 354c2ca742d7d0197c52e49467862844463fb42280fDavid Gross bccAssert(LowerBound->getType() == UpperBound->getType()); 355e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 356e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB; 357e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *Cond, *IVNext; 358e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode *IV; 359e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 360e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB = Builder.GetInsertBlock(); 3611bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr); 362bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent()); 363e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 364e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (LowerBound < Upperbound) 365e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 366e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 367e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 368e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB->getTerminator()->eraseFromParent(); 369e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(CondBB); 370e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(LowerBound, UpperBound); 371e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 372e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 373e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ] 374e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // iv.next = iv + 1 375e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (iv.next < Upperbound) 376e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 377e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 378e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 379e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB); 380e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV = Builder.CreatePHI(LowerBound->getType(), 2, "X"); 381e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV->addIncoming(LowerBound, CondBB); 382e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1)); 383e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV->addIncoming(IVNext, HeaderBB); 384e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(IVNext, UpperBound); 385e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 386e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser AfterBB->setName("Exit"); 387e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 388e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser *LoopIV = IV; 389e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser return AfterBB; 390e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser } 391e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 39228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Finish building the outgoing argument list for calling a ForEach-able function. 39328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // 39428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // ArgVector - on input, the non-special arguments 39528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // on output, the non-special arguments combined with the special arguments 39628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // from SpecialArgVector 39728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // SpecialArgVector - special arguments (from ExpandSpecialArguments()) 39828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // SpecialArgContextIdx - return value of ExpandSpecialArguments() 39928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // (position of context argument in SpecialArgVector) 40028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // CalleeFunction - the ForEach-able function being called 40128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Builder - for inserting code into the caller function 40228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen> 40328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross void finishArgList( llvm::SmallVector<llvm::Value *, ArgVectorLen> &ArgVector, 40428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector, 40528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int SpecialArgContextIdx, 40628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const llvm::Function &CalleeFunction, 40728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::IRBuilder<> &CallerBuilder) { 40828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross /* The context argument (if any) is a pointer to an opaque user-visible type that differs from 40928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the 41028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * two types represent the same thing). Therefore, we must introduce a pointer cast when 41128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * generating a call to the kernel function. 41228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross */ 41328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int ArgContextIdx = 41428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx; 41528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end()); 41628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross if (ArgContextIdx >= 0) { 41728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Type *ContextArgType = nullptr; 41828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int ArgIdx = ArgContextIdx; 41928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross for (const auto &Arg : CalleeFunction.getArgumentList()) { 42028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross if (!ArgIdx--) { 42128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ContextArgType = Arg.getType(); 42228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross break; 42328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 42428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 42528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross bccAssert(ContextArgType); 42628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType); 42728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 42828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 42928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 430083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // GEPHelper() returns a SmallVector of values suitable for passing 431083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for 432083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // the returned data type. It is sized so that the SmallVector 433083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // returned by GEPHelper() never needs to do a heap allocation for 434083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // any list of GEP indices it encounters in the code. 435083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices; 436083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 437083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Helper for turning a list of constant integer GEP indices into a 438083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // SmallVector of llvm::Value*. The return value is suitable for 439083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP(). 440083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // 441083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Inputs: 442083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // I32Args should be integers which represent the index arguments 443083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // to a GEP instruction. 444083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // 445083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Returns: 446083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Returns a SmallVector of ConstantInts. 447083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices GEPHelper(std::initializer_list<int32_t> I32Args) { 448083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices Out(I32Args.size()); 449083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context); 450083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala std::transform(I32Args.begin(), I32Args.end(), Out.begin(), 451083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); }); 452083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala return Out; 453083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala } 454083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 4558ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic: 45633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross RSForEachExpandPass(bool pEnableStepOpt = true) 457900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes : ModulePass(ID), Module(nullptr), Context(nullptr), 458bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes mEnableStepOpt(pEnableStepOpt) { 459bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 4608ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 4618ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 462c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { 463c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // This pass does not use any other analysis passes, but it does 464c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // add/wrap the existing functions in the module (thus altering the CFG). 465c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines } 466c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines 46733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Build contribution to outgoing argument list for calling a 46833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // ForEach-able function, based on the special parameters of that 46933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // function. 47033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // 47133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Signature - metadata bits for the signature of the ForEach-able function 47233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // X, Arg_p - values derived directly from expanded function, 47333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // suitable for computing arguments for the ForEach-able function 47433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // CalleeArgs - contribution is accumulated here 47533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Bump - invoked once for each contributed outgoing argument 476083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // LoopHeaderInsertionPoint - an Instruction in the loop header, before which 477083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // this function can insert loop-invariant loads 47828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // 47928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Return value is the (zero-based) position of the context (Arg_p) 48028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // argument in the CalleeArgs vector, or a negative value if the 48128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // context argument is not placed in the CalleeArgs vector. 48228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int ExpandSpecialArguments(uint32_t Signature, 48328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Value *X, 48428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Value *Arg_p, 48528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::IRBuilder<> &Builder, 48628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::SmallVector<llvm::Value*, 8> &CalleeArgs, 487083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala std::function<void ()> Bump, 488083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Instruction *LoopHeaderInsertionPoint) { 48928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 49028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross bccAssert(CalleeArgs.empty()); 49128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 49228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int Return = -1; 49333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) { 49433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(Arg_p); 49533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 49628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross Return = CalleeArgs.size() - 1; 49733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 49833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 49933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) { 50033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(X); 50133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 50233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 50333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 504e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) || 505e44a3525b9703739534c3b62d7d1af4c95649a38David Gross bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 506083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala bccAssert(LoopHeaderInsertionPoint); 50733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 508083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Y and Z are loop invariant, so they can be hoisted out of the 509083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // loop. Set the IRBuilder insertion point to the loop header. 510083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala auto OldInsertionPoint = Builder.saveIP(); 511083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.SetInsertPoint(LoopHeaderInsertionPoint); 512e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 513e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) { 514083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent, 515083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala RsLaunchDimensionsFieldY})); 516083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep"); 517083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y")); 518e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 519e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 520e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 521e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 522083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent, 523083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala RsLaunchDimensionsFieldZ})); 524083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep"); 525083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z")); 526e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 527e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 528083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 529083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.restoreIP(OldInsertionPoint); 53033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 53128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 53228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross return Return; 53333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 53433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 5358ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /* Performs the actual optimization on a selected function. On success, the 5368ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * Module will contain a new function of the name "<NAME>.expand" that 5378ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * invokes <NAME>() in a loop with the appropriate parameters. 5388ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser */ 539bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool ExpandFunction(llvm::Function *Function, uint32_t Signature) { 540bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding ForEach-able Function %s", 541bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Function->getName().str().c_str()); 5428ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 5438ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 544bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Signature = getRootSignature(Function); 5458ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 5468ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // We couldn't determine how to expand this function based on its 5478ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // function signature. 5488ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser return false; 5498ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 5508ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 5518ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 552bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 5538ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 554bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 555bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes createEmptyExpandedFunction(Function->getName()); 556db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 557bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 558bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 559bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * createEmptyExpandedFunction that there will be five parameters. 560bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 56133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 56233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 56333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 564bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 565bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 566db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 567bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 568bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 569bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 5705010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 571bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 572900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InStep = nullptr; 573900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutStep = nullptr; 574db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 575db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Construct the actual function body. 576bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 577db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 578cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Collect and construct the arguments for the kernel(). 579db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Note that we load any loop-invariant arguments before entering the Loop. 580bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin(); 581db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 582900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *InTy = nullptr; 583083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InBufPtr = nullptr; 584d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) { 585083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0})); 586083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::LoadInst *InStepArg = Builder.CreateLoad( 587083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr"); 588e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 589bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes InTy = (FunctionArgIter++)->getType(); 590e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes InStep = getStepValue(&DL, InTy, InStepArg); 591e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 5922b04086acbef6520ae2c54a868b1271abf053122Stephen Hines InStep->setName("instep"); 593e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 594083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0})); 595083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InBufPtr = Builder.CreateLoad( 596083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf"); 597db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 598db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 599900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *OutTy = nullptr; 600900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutBasePtr = nullptr; 601d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 602bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OutTy = (FunctionArgIter++)->getType(); 603b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 6042b04086acbef6520ae2c54a868b1271abf053122Stephen Hines OutStep->setName("outstep"); 605083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0})); 606083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep")); 607db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 608db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 609900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *UsrData = nullptr; 610d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) { 611bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *UsrDataTy = (FunctionArgIter++)->getType(); 612083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr); 613083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy); 614db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData->setName("UsrData"); 615db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 616db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 617083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 61833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::PHINode *IV; 61933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross createLoop(Builder, Arg_x1, Arg_x2, &IV); 620097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 62133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 62228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 623083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [&FunctionArgIter]() { FunctionArgIter++; }, 624083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala LoopHeader->getTerminator()); 625db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 626bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(FunctionArgIter == Function->arg_end()); 627db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 628cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Populate the actual call to kernel(). 629db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 630db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 631900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InPtr = nullptr; 632900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 633db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 634ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // Calculate the current input and output pointers 63502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 636ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // We always calculate the input/output pointers with a GEP operating on i8 63702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // values and only cast at the very end to OutTy. This is because the step 63802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // between two values is given in bytes. 63902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 64002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // TODO: We could further optimize the output by using a GEP operation of 64102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // type 'OutTy' in cases where the element type of the allocation allows. 64202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutBasePtr) { 64302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 64402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutOffset = Builder.CreateMul(OutOffset, OutStep); 645083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset); 64602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 64702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser } 648bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 649083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (InBufPtr) { 650ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1); 651ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InOffset = Builder.CreateMul(InOffset, InStep); 652083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset); 653ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InPtr = Builder.CreatePointerCast(InPtr, InTy); 654ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser } 65502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser 656ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser if (InPtr) { 6577ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(InPtr); 658db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 659db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 66002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutPtr) { 6617ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(OutPtr); 662db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 663db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 664db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (UsrData) { 665db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(UsrData); 666db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 667db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 66828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder); 669db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 670bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Builder.CreateCall(Function, RootArgs); 671db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 6727ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return true; 6737ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 6747ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 6757ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines /* Expand a pass-by-value kernel. 6767ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines */ 677bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool ExpandKernel(llvm::Function *Function, uint32_t Signature) { 678d88177580db4ddedf680854c51db333c97eabc59Stephen Hines bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)); 679bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding kernel Function %s", Function->getName().str().c_str()); 6807ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 6817ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // TODO: Refactor this to share functionality with ExpandFunction. 682bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 6837ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 684bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 685bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes createEmptyExpandedFunction(Function->getName()); 6867ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 687bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 688bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 689bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * createEmptyExpandedFunction that there will be five parameters. 690bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 691881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 692881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS); 693881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 694bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 695bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 696bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 697bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 698bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 699bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 7005010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 7017ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 7027ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Construct the actual function body. 703bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 7047ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 70518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Create TBAA meta-data. 706354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, 707354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines *TBAAAllocation, *TBAAPointer; 708bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 70914588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien 710354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScriptDistinct = 711354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines MDHelper.createTBAARoot("RenderScript Distinct TBAA"); 712354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScript = MDHelper.createTBAANode("RenderScript TBAA", 713354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScriptDistinct); 714e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 715e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 716e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 717e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation, 0); 718e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 719e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 72014588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 72118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 72250f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray llvm::MDNode *AliasingDomain, *AliasingScope; 72350f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray AliasingDomain = MDHelper.createAnonymousAliasScopeDomain("RS argument scope domain"); 72450f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray AliasingScope = MDHelper.createAnonymousAliasScope(AliasingDomain, "RS argument scope"); 72550f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 726881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes /* 727881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Collect and construct the arguments for the kernel(). 728881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * 729881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Note that we load any loop-invariant arguments before entering the Loop. 730881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes */ 731083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala size_t NumRemainingInputs = Function->arg_size(); 7327ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 733881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // No usrData parameter on kernels. 734881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes bccAssert( 735881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)); 736881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 737881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::Function::arg_iterator ArgIter = Function->arg_begin(); 738881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 739881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Check the return type 740bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Type *OutTy = nullptr; 741bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *OutStep = nullptr; 742bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::LoadInst *OutBasePtr = nullptr; 743bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *CastedOutBasePtr = nullptr; 744881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 745e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes bool PassOutByPointer = false; 746881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 747d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 748bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *OutBaseTy = Function->getReturnType(); 749881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 75074a4b08235990916911b8fe758d656c1171faf26Stephen Hines if (OutBaseTy->isVoidTy()) { 751e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes PassOutByPointer = true; 752881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = ArgIter->getType(); 753881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 754881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes ArgIter++; 755083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala --NumRemainingInputs; 75674a4b08235990916911b8fe758d656c1171faf26Stephen Hines } else { 75774a4b08235990916911b8fe758d656c1171faf26Stephen Hines // We don't increment Args, since we are using the actual return type. 758881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = OutBaseTy->getPointerTo(); 75974a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 760881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 761b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 76274a4b08235990916911b8fe758d656c1171faf26Stephen Hines OutStep->setName("outstep"); 763083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0})); 764083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep")); 765097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 7669c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 7679c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines OutBasePtr->setMetadata("tbaa", TBAAPointer); 7689c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 76950f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 77050f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray OutBasePtr->setMetadata("alias.scope", AliasingScope); 77150f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 772bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out"); 77374a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 77474a4b08235990916911b8fe758d656c1171faf26Stephen Hines 775bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::SmallVector<llvm::Type*, 8> InTypes; 776bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::SmallVector<llvm::Value*, 8> InSteps; 777083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::SmallVector<llvm::Value*, 8> InBufPtrs; 778d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::SmallVector<llvm::Value*, 8> InStructTempSlots; 779881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 780083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT); 781881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 782083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Create the loop structure. 783083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 784083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::PHINode *IV; 785083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala createLoop(Builder, Arg_x1, Arg_x2, &IV); 786881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 787083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 788083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala const int CalleeArgsContextIdx = 789083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 790083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [&NumRemainingInputs]() { --NumRemainingInputs; }, 791083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala LoopHeader->getTerminator()); 792083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 793083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // After ExpandSpecialArguments() gets called, NumRemainingInputs 794083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // counts the number of arguments to the kernel that correspond to 795083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // an array entry from the InPtr field of the DriverInfo 796083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // structure. 797083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala const size_t NumInPtrArguments = NumRemainingInputs; 798083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 799083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (NumInPtrArguments > 0) { 800083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Extract information about input slots and step sizes. The work done 801083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // here is loop-invariant, so we can hoist the operations out of the loop. 802083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala auto OldInsertionPoint = Builder.saveIP(); 803083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.SetInsertPoint(LoopHeader->getTerminator()); 804083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 805083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala for (size_t InputIndex = 0; InputIndex < NumInPtrArguments; ++InputIndex, ArgIter++) { 806083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 807083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala static_cast<int32_t>(InputIndex)})); 808083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InStepAddr = Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"); 809083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::LoadInst *InStepArg = Builder.CreateLoad(InStepAddr, "instep_addr"); 810881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 811d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::Type *InType = ArgIter->getType(); 812326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 813326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes /* 814d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * AArch64 calling conventions dictate that structs of sufficient size 815d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * get passed by pointer instead of passed by value. This, combined 816d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * with the fact that we don't allow kernels to operate on pointer 817d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * data means that if we see a kernel with a pointer parameter we know 818083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala * that it is a struct input that has been promoted. As such we don't 819d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * need to convert its type to a pointer. Later we will need to know 820d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * to create a temporary copy on the stack, so we save this information 821d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * in InStructTempSlots. 822326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes */ 823d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) { 824d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::Type *ElementType = PtrType->getElementType(); 825083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr, 826083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala "input_struct_slot")); 827d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala } else { 828d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InType = InType->getPointerTo(); 829d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InStructTempSlots.push_back(nullptr); 830d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala } 831326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 832d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::Value *InStep = getStepValue(&DL, InType, InStepArg); 833881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 834d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InStep->setName("instep"); 835881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 836083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 837083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala static_cast<int32_t>(InputIndex)})); 838083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep"); 839083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf"); 840083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in"); 841d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala if (gEnableRsTbaa) { 842083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InBufPtr->setMetadata("tbaa", TBAAPointer); 843d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala } 844881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 845083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InBufPtr->setMetadata("alias.scope", AliasingScope); 84650f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 847d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InTypes.push_back(InType); 848d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InSteps.push_back(InStep); 849083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InBufPtrs.push_back(CastInBufPtr); 850881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 851083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 852083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.restoreIP(OldInsertionPoint); 853881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 8547ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 8557ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Populate the actual call to kernel(). 8567ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 8577ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 8584102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser // Calculate the current input and output pointers 8597b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 8607b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 861881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // We always calculate the input/output pointers with a GEP operating on i8 862881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // values combined with a multiplication and only cast at the very end to 863881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // OutTy. This is to account for dynamic stepping sizes when the value 864881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // isn't apparent at compile time. In the (very common) case when we know 865881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // the step size at compile time, due to haveing complete type information 866881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // this multiplication will optmized out and produces code equivalent to a 867881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // a GEP on a pointer of the correct type. 868881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 869881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Output 870881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 871900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 872bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray if (CastedOutBasePtr) { 8737b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 874881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 875083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset); 876bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 877e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (PassOutByPointer) { 878881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes RootArgs.push_back(OutPtr); 879881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 8804102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser } 8817b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser 882881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Inputs 88374a4b08235990916911b8fe758d656c1171faf26Stephen Hines 884083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (NumInPtrArguments > 0) { 885881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1); 886881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 887083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala for (size_t Index = 0; Index < NumInPtrArguments; ++Index) { 888083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset); 889326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes llvm::Value *Input; 890326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 891d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) { 892d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala // Pass a pointer to a temporary on the stack, rather than 893d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala // passing a pointer to the original value. We do not want 894d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala // the kernel to potentially modify the input data. 895d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala 896d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::Type *ElementType = llvm::cast<llvm::PointerType>( 897d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InPtr->getType())->getElementType(); 898d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala uint64_t StoreSize = DL.getTypeStoreSize(ElementType); 899d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala uint64_t Alignment = DL.getABITypeAlignment(ElementType); 900d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala 901d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala Builder.CreateMemCpy(TemporarySlot, InPtr, StoreSize, Alignment, 902d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala /* isVolatile = */ false, 903d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala /* !tbaa = */ gEnableRsTbaa ? TBAAAllocation : nullptr, 904d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala /* !tbaa.struct = */ nullptr, 905d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala /* !alias.scope = */ AliasingScope); 906d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala Input = TemporarySlot; 907326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes } else { 908326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input"); 909326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 910326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes if (gEnableRsTbaa) { 911326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes InputLoad->setMetadata("tbaa", TBAAAllocation); 912326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes } 913881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 91450f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray InputLoad->setMetadata("alias.scope", AliasingScope); 91550f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 916326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes Input = InputLoad; 917881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 918881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 919881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes RootArgs.push_back(Input); 9209c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 9217ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 9227ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 92328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder); 9247ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 925bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs); 9267ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 927e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (OutPtr && !PassOutByPointer) { 92818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr); 9299c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 9309c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines Store->setMetadata("tbaa", TBAAAllocation); 9319c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 93250f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray Store->setMetadata("alias.scope", AliasingScope); 9337ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 9347ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 935db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return true; 936db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 937db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 93818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Checks if pointers to allocation internals are exposed 93918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 94018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// This function verifies if through the parameters passed to the kernel 94118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// or through calls to the runtime library the script gains access to 94218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers pointing to data within a RenderScript Allocation. 94318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// If we know we control all loads from and stores to data within 94418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// RenderScript allocations and if we know the run-time internal accesses 94518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// are all annotated with RenderScript TBAA metadata, only then we 94618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// can safely use TBAA to distinguish between generic and from-allocation 94718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers. 948bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool allocPointersExposed(llvm::Module &Module) { 94918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Old style kernel function can expose pointers to elements within 95018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // allocations. 95118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // TODO: Extend analysis to allow simple cases of old-style kernels. 95225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 95325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *Name = mExportForEachNameList[i]; 95425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t Signature = mExportForEachSignatureList[i]; 955bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Module.getFunction(Name) && 956d88177580db4ddedf680854c51db333c97eabc59Stephen Hines !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) { 95718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 95818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 95918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 96018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 96118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Check for library functions that expose a pointer to an Allocation or 96218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // that are not yet annotated with RenderScript-specific tbaa information. 963e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala static const std::vector<const char *> Funcs{ 964e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAt(...) 965e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationj", 966e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationjj", 967e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationjjj", 968e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 969e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsSetElementAt() 970e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvj", 971e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvjj", 972e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvjjj", 973e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 974e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_Y() 975e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj", 976e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 977e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_U() 978e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj", 979e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 980e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_V() 981e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj", 982e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala }; 983e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 984e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala for (auto FI : Funcs) { 985e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala llvm::Function *Function = Module.getFunction(FI); 98618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 987bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (!Function) { 988e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala ALOGE("Missing run-time function '%s'", FI); 98918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 99018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 99118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 992bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Function->getNumUses() > 0) { 99318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 99418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 99518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 99618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 99718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return false; 99818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 99918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 100018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Connect RenderScript TBAA metadata to C/C++ metadata 100118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 100218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// The TBAA metadata used to annotate loads/stores from RenderScript 1003e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// Allocations is generated in a separate TBAA tree with a 1004354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for 1005354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// all nodes in unrelated alias analysis trees. This function makes the 1006354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root), 1007e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With 1008e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// the connected trees every access to an Allocation is resolved to 1009e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// must-alias if compared to a normal C/C++ access. 1010bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void connectRenderScriptTBAAMetadata(llvm::Module &Module) { 1011bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 1012354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScriptDistinct = 1013354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines MDHelper.createTBAARoot("RenderScript Distinct TBAA"); 1014354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode( 1015354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines "RenderScript TBAA", TBAARenderScriptDistinct); 1016bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 1017354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScript->replaceOperandWith(1, TBAARoot); 101818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 101918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1020bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes virtual bool runOnModule(llvm::Module &Module) { 1021bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool Changed = false; 1022bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes this->Module = &Module; 1023bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes this->Context = &Module.getContext(); 1024bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 1025bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes this->buildTypes(); 1026bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 1027bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bcinfo::MetadataExtractor me(&Module); 102825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines if (!me.extract()) { 102925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines ALOGE("Could not extract metadata from module!"); 103025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines return false; 103125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines } 103225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachCount = me.getExportForEachSignatureCount(); 103325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachNameList = me.getExportForEachNameList(); 103425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachSignatureList = me.getExportForEachSignatureList(); 1035db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1036bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool AllocsExposed = allocPointersExposed(Module); 103718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 103825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 103925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *name = mExportForEachNameList[i]; 104025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t signature = mExportForEachSignatureList[i]; 1041bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *kernel = Module.getFunction(name); 1042cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser if (kernel) { 1043d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) { 1044cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser Changed |= ExpandKernel(kernel, signature); 1045acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 1046acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else if (kernel->getReturnType()->isVoidTy()) { 1047cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser Changed |= ExpandFunction(kernel, signature); 1048acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 1049acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else { 1050acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // There are some graphics root functions that are not 1051acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // expanded, but that will be called directly. For those 1052acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // functions, we can not set the linkage to internal. 1053acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } 1054cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 1055db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1056db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 10579c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa && !AllocsExposed) { 1058bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes connectRenderScriptTBAAMetadata(Module); 105918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 106018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1061cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return Changed; 1062db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1063db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1064db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual const char *getPassName() const { 1065db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return "ForEach-able Function Expansion"; 1066db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1067db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 10687a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}; // end RSForEachExpandPass 1069db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 10707a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace 10717a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 10727a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaochar RSForEachExpandPass::ID = 0; 107333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossstatic llvm::RegisterPass<RSForEachExpandPass> X("foreachexp", "ForEach Expand Pass"); 1074db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1075db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc { 1076db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 10777a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass * 107825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen HinescreateRSForEachExpandPass(bool pEnableStepOpt){ 107925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines return new RSForEachExpandPass(pEnableStepOpt); 10807a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} 1081db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 10827a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc 1083