RSKernelExpand.cpp revision e32af52d4be2bb80783404d99fa338b1143dbc9a
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/* 2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project 3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License"); 5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License. 6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at 7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * http://www.apache.org/licenses/LICENSE-2.0 9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software 11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS, 12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and 14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License. 15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h" 18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h" 197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib> 2133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional> 22e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross#include <unordered_set> 237a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 24b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h> 25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h> 26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h> 27b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h> 2818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h> 29b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h> 30c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h> 317ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h> 32b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h> 33cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h> 34b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h> 35806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h> 36c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang 37c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h" 38ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h" 39db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 40d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h" 41d88177580db4ddedf680854c51db333c97eabc59Stephen Hines 424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#ifndef __DISABLE_ASSERTS 434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala// Only used in bccAssert() 444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedForeachParams = 4; 454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedReduceParams = 3; 46e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Grossconst int kNumExpandedReduceNewAccumulatorParams = 4; 474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#endif 484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA"; 504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAANodeName[] = "RenderScript TBAA"; 51bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc; 537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 54db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace { 557a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 56354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true; 579c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines 584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala/* RSKernelExpandPass - This pass operates on functions that are able 594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * to be called via rsForEach(), "foreach_<NAME>", or 604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * "reduce_<NAME>". We create an inner loop for the function to be 614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * invoked over the appropriate data cells of the input/output 624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * allocations (adjusting other relevant parameters as we go). We 634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * support doing this for any forEach or reduce style compute 644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * kernels. The new function name is the original function name 654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * followed by ".expand". Note that we still generate code for the 664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * original function. 677a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */ 684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaclass RSKernelExpandPass : public llvm::ModulePass { 6933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic: 70db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static char ID; 71db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 7233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate: 73e44a3525b9703739534c3b62d7d1af4c95649a38David Gross static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h 74e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 75e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross typedef std::unordered_set<llvm::Function *> FunctionSet; 76e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 77e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsLaunchDimensionsField { 78e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldX, 79e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldY, 80e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldZ, 81e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldLod, 82e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldFace, 83e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldArray, 84e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 85e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldCount 86e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 87e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 88e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsExpandKernelDriverInfoPfxField { 89e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInPtr, 90e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInStride, 91e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInLen, 92e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutPtr, 93e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutStride, 94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutLen, 95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldDim, 96e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCurrent, 97e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsr, 98e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsLenr, 99e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 100e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCount 101e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 10233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 103bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Module *Module; 104bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::LLVMContext *Context; 105bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 106bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 1074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * Pointers to LLVM type information for the the function signatures 1084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * for expanded functions. These must be re-calculated for each module 1094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * the pass is run on. 110bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 1114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::FunctionType *ExpandedForEachType, *ExpandedReduceType; 112e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *RsExpandKernelDriverInfoPfxTy; 113db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 11425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t mExportForEachCount; 11525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char **mExportForEachNameList; 11625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const uint32_t *mExportForEachSignatureList; 117cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines 1184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala uint32_t mExportReduceCount; 1194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala const char **mExportReduceNameList; 1204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 1212b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Turns on optimization of allocation stride values. 1222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool mEnableStepOpt; 1232b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 124bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes uint32_t getRootSignature(llvm::Function *Function) { 125db169187dea4602e4ad32058762d23d474753fd0Stephen Hines const llvm::NamedMDNode *ExportForEachMetadata = 126bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Module->getNamedMetadata("#rs_export_foreach"); 127db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 128db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!ExportForEachMetadata) { 129db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> RootArgTys; 130bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes for (llvm::Function::arg_iterator B = Function->arg_begin(), 131bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes E = Function->arg_end(); 132db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 133db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgTys.push_back(B->getType()); 135db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 136db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 137db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // For pre-ICS bitcode, we may not have signature information. In that 138db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // case, we use the size of the RootArgTys to select the number of 139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // arguments. 140db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return (1 << RootArgTys.size()) - 1; 141db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 142db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1437ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (ExportForEachMetadata->getNumOperands() == 0) { 1447ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return 0; 1457ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 1467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1476e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(ExportForEachMetadata->getNumOperands() > 0); 148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 149cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We only handle the case for legacy root() functions here, so this is 150cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // hard-coded to look at only the first such function. 151db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 152900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes if (SigNode != nullptr && SigNode->getNumOperands() == 1) { 1531bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::Metadata *SigMD = SigNode->getOperand(0); 1541bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) { 1551bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::StringRef SigString = SigS->getString(); 156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines uint32_t Signature = 0; 157db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigString.getAsInteger(10, Signature)) { 158db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 159db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 160db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 161db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature; 162db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 163db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 164db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 165db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 166db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 167db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 168429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray bool isStepOptSupported(llvm::Type *AllocType) { 169429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 170429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 171429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 172429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 173429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (mEnableStepOpt) { 174429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 175429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 176429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 177429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == VoidPtrTy) { 178429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 179429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 180429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 181429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (!PT) { 182429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 183429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // remaining conditions are 64-bit only 186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (VoidPtrTy->getPrimitiveSizeInBits() == 32) { 187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 188429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 189429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 190429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // coerce suggests an upconverted struct type, which we can't support 191429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) { 192429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 193429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 194429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 195429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported 196429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2); 197429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128); 198429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == V2xi64Ty || AllocType == Int128Ty) { 199429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 200429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 201429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 202429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 203429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 204429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 2052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Get the actual value we should use to step through an allocation. 2067b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 2077b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // Normally the value we use to step through an allocation is given to us by 2087b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // the driver. However, for certain primitive data types, we can derive an 2097b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // integer constant for the step value. We use this integer constant whenever 2107b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // possible to allow further compiler optimizations to take place. 2117b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 212b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines // DL - Target Data size/layout information. 2132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // T - Type of allocation (should be a pointer). 2142b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // OrigStep - Original step increment (root.expand() input from driver). 215bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType, 2162b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *OrigStep) { 217b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines bccAssert(DL); 218bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(AllocType); 2192b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(OrigStep); 220bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 221429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (isStepOptSupported(AllocType)) { 2222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *ET = PT->getElementType(); 223b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines uint64_t ETSize = DL->getTypeAllocSize(ET); 224bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 2252b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return llvm::ConstantInt::get(Int32Ty, ETSize); 2262b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } else { 2272b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return OrigStep; 2282b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2292b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2302b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 231097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /// Builds the types required by the pass for the given context. 232bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void buildTypes(void) { 233e44a3525b9703739534c3b62d7d1af4c95649a38David Gross // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs. 234bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 235e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8Ty = llvm::Type::getInt8Ty(*Context); 236e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrTy = Int8Ty->getPointerTo(); 237e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT); 238e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 239e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32ArrayInputLimitTy = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT); 240e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 241e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Array4Ty = llvm::ArrayType::get(Int32Ty, 4); 242097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 243097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h: 244db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 245e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsLaunchDimensions { 246e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t x; 247db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t y; 248db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t z; 249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t lod; 250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t face; 251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t array[4]; 252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * }; 253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross */ 254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes; 255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t x 256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t y 257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t z 258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t lod 259e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t face 260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4] 261e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType *RsLaunchDimensionsTy = 262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions"); 263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 2641d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h: 265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsExpandKernelDriverInfoPfx { 267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]; 268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inStride[RS_KERNEL_INPUT_LIMIT]; 269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inLen; 270e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 271e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]; 272e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outStride[RS_KERNEL_INPUT_LIMIT]; 273e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outLen; 274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Dimension of the launch 276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions dim; 277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // The walking iterator of the launch 279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions current; 280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const void *usr; 282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t usrLen; 283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 284e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Items below this line are not used by the compiler and can be change in the driver. 285e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // So the compiler must assume there are an unknown number of fields of unknown type 286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // beginning here. 287db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * }; 2881d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * 2891d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp). 290db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 291e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes; 292e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT] 293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t inStride[RS_KERNEL_INPUT_LIMIT] 294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t inLen 295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT] 296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t outStride[RS_KERNEL_INPUT_LIMIT] 297e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t outLen 298e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions dim 299e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current 300e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr 301e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen 302e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross RsExpandKernelDriverInfoPfxTy = 303e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx"); 304bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 305bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes // Create the function type for expanded kernels. 3064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context); 307bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 308e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo(); 3094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep) 3104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ExpandedForEachType = llvm::FunctionType::get(VoidTy, 3114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false); 312bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 3134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // void (void *inBuf, void *outBuf, uint32_t len) 3144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ExpandedReduceType = llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy, Int32Ty}, false); 3158ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 3168ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 3174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala /// @brief Create skeleton of the expanded foreach kernel. 318357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 319357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// This creates a function with the following signature: 320357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 321357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 3225010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes /// uint32_t outstep) 323357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 3244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) { 325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 3264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function::Create(ExpandedForEachType, 327bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::GlobalValue::ExternalLinkage, 328bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OldName + ".expand", Module); 3294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 330bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); 331bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("p"); 332bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x1"); 333bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x2"); 334bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("arg_outstep"); 3354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 3364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ExpandedFunction); 3374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::IRBuilder<> Builder(Begin); 3384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateRetVoid(); 3394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return ExpandedFunction; 3404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 3414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Create skeleton of the expanded reduce kernel. 3434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 3444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // This creates a function with the following signature: 3454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 3464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // void @func.expand(i8* nocapture %inBuf, i8* nocapture %outBuf, i32 len) 3474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 3484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *createEmptyExpandedReduceKernel(llvm::StringRef OldName) { 3494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *ExpandedFunction = 3504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function::Create(ExpandedReduceType, 3514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::GlobalValue::ExternalLinkage, 3524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OldName + ".expand", Module); 3534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedReduceParams); 3544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); 3564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala using llvm::Attribute; 3584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Argument *InBuf = &(*AI++); 3604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InBuf->setName("inBuf"); 361dfde70a8ae9b77bbf0e8d9d22a55e1d1fda7d64dStephen Hines InBuf->addAttr(llvm::AttributeSet::get(*Context, InBuf->getArgNo() + 1, llvm::makeArrayRef(Attribute::NoCapture))); 3624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Argument *OutBuf = &(*AI++); 3644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutBuf->setName("outBuf"); 365dfde70a8ae9b77bbf0e8d9d22a55e1d1fda7d64dStephen Hines OutBuf->addAttr(llvm::AttributeSet::get(*Context, OutBuf->getArgNo() + 1, llvm::makeArrayRef(Attribute::NoCapture))); 3664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala (AI++)->setName("len"); 368bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 369bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 370bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction); 371806075b3a54af826fea78490fb213d8a0784138eTobias Grosser llvm::IRBuilder<> Builder(Begin); 372806075b3a54af826fea78490fb213d8a0784138eTobias Grosser Builder.CreateRetVoid(); 373806075b3a54af826fea78490fb213d8a0784138eTobias Grosser 374bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes return ExpandedFunction; 375357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser } 376357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser 377e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create skeleton of a general reduce kernel's expanded accumulator. 378e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 379e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // This creates a function with the following signature: 380e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 381e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p, 382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // i32 %x1, i32 %x2, accumType* nocapture %accum) 383e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 384e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *createEmptyExpandedReduceNewAccumulator(llvm::StringRef OldName, 385e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *AccumArgTy) { 386e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 387e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context); 388e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::FunctionType *ExpandedReduceNewAccumulatorType = 389e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::FunctionType::get(VoidTy, 390e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross {RsExpandKernelDriverInfoPfxTy->getPointerTo(), 391e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Int32Ty, Int32Ty, AccumArgTy}, false); 392e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *FnExpandedAccumulator = 393e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function::Create(ExpandedReduceNewAccumulatorType, 394e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::GlobalValue::ExternalLinkage, 395e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross OldName + ".expand", Module); 396e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams); 397e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 398e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin(); 399e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 400e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross using llvm::Attribute; 401e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 402e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_p = &(*AI++); 403e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_p->setName("p"); 404e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1, 405e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::makeArrayRef(Attribute::NoCapture))); 406e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 407e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_x1 = &(*AI++); 408e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_x1->setName("x1"); 409e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 410e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_x2 = &(*AI++); 411e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_x2->setName("x2"); 412e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 413e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_accum = &(*AI++); 414e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_accum->setName("accum"); 415e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1, 416e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::makeArrayRef(Attribute::NoCapture))); 417e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 418e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 419e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross FnExpandedAccumulator); 420e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::IRBuilder<> Builder(Begin); 421e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.CreateRetVoid(); 422e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 423e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return FnExpandedAccumulator; 424e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 425e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 426e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @brief Create an empty loop 427e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 428e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// Create a loop of the form: 429e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 430e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// for (i = LowerBound; i < UpperBound; i++) 431e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// ; 432e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 433e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// After the loop has been created, the builder is set such that 434e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// instructions can be added to the loop body. 435e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 436e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param Builder The builder to use to build this loop. The current 437e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// position of the builder is the position the loop 438e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// will be inserted. 439e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LowerBound The first value of the loop iterator 440e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param UpperBound The maximal value of the loop iterator 441e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LoopIV A reference that will be set to the loop iterator. 442e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @return The BasicBlock that will be executed after the loop. 443e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder, 444e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *LowerBound, 445e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *UpperBound, 446e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode **LoopIV) { 447c2ca742d7d0197c52e49467862844463fb42280fDavid Gross bccAssert(LowerBound->getType() == UpperBound->getType()); 448e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 449e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB; 450e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *Cond, *IVNext; 451e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode *IV; 452e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 453e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB = Builder.GetInsertBlock(); 4541bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr); 455bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent()); 456e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 457e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (LowerBound < Upperbound) 458e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 459e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 460e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 461e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB->getTerminator()->eraseFromParent(); 462e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(CondBB); 463e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(LowerBound, UpperBound); 464e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 465e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 466e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ] 467e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // iv.next = iv + 1 468e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (iv.next < Upperbound) 469e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 470e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 471e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 472e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB); 473e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV = Builder.CreatePHI(LowerBound->getType(), 2, "X"); 474e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV->addIncoming(LowerBound, CondBB); 475e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1)); 476e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV->addIncoming(IVNext, HeaderBB); 477e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(IVNext, UpperBound); 478e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 479e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser AfterBB->setName("Exit"); 480e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 481e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser *LoopIV = IV; 482e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser return AfterBB; 483e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser } 484e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 48528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Finish building the outgoing argument list for calling a ForEach-able function. 48628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // 48728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // ArgVector - on input, the non-special arguments 48828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // on output, the non-special arguments combined with the special arguments 48928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // from SpecialArgVector 49028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // SpecialArgVector - special arguments (from ExpandSpecialArguments()) 49128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // SpecialArgContextIdx - return value of ExpandSpecialArguments() 49228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // (position of context argument in SpecialArgVector) 49328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // CalleeFunction - the ForEach-able function being called 49428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Builder - for inserting code into the caller function 49528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen> 49628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross void finishArgList( llvm::SmallVector<llvm::Value *, ArgVectorLen> &ArgVector, 49728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector, 49828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int SpecialArgContextIdx, 49928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const llvm::Function &CalleeFunction, 50028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::IRBuilder<> &CallerBuilder) { 50128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross /* The context argument (if any) is a pointer to an opaque user-visible type that differs from 50228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the 50328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * two types represent the same thing). Therefore, we must introduce a pointer cast when 50428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * generating a call to the kernel function. 50528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross */ 50628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int ArgContextIdx = 50728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx; 50828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end()); 50928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross if (ArgContextIdx >= 0) { 51028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Type *ContextArgType = nullptr; 51128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int ArgIdx = ArgContextIdx; 51228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross for (const auto &Arg : CalleeFunction.getArgumentList()) { 51328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross if (!ArgIdx--) { 51428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ContextArgType = Arg.getType(); 51528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross break; 51628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 51728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 51828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross bccAssert(ContextArgType); 51928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType); 52028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 52128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 52228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 523083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // GEPHelper() returns a SmallVector of values suitable for passing 524083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for 525083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // the returned data type. It is sized so that the SmallVector 526083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // returned by GEPHelper() never needs to do a heap allocation for 527083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // any list of GEP indices it encounters in the code. 528083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices; 529083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 530083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Helper for turning a list of constant integer GEP indices into a 531083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // SmallVector of llvm::Value*. The return value is suitable for 532083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP(). 533083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // 534083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Inputs: 535083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // I32Args should be integers which represent the index arguments 536083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // to a GEP instruction. 537083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // 538083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Returns: 539083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Returns a SmallVector of ConstantInts. 5404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) { 541083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices Out(I32Args.size()); 542083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context); 543083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala std::transform(I32Args.begin(), I32Args.end(), Out.begin(), 544083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); }); 545083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala return Out; 546083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala } 547083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 5488ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic: 5494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala RSKernelExpandPass(bool pEnableStepOpt = true) 550900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes : ModulePass(ID), Module(nullptr), Context(nullptr), 551bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes mEnableStepOpt(pEnableStepOpt) { 552bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 5538ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 5548ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 555c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { 556c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // This pass does not use any other analysis passes, but it does 557c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // add/wrap the existing functions in the module (thus altering the CFG). 558c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines } 559c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines 56033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Build contribution to outgoing argument list for calling a 561e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // ForEach-able function or a general reduction accumulator 562e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // function, based on the special parameters of that function. 56333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // 564e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Signature - metadata bits for the signature of the callee 56533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // X, Arg_p - values derived directly from expanded function, 566e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // suitable for computing arguments for the callee 56733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // CalleeArgs - contribution is accumulated here 56833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Bump - invoked once for each contributed outgoing argument 569083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // LoopHeaderInsertionPoint - an Instruction in the loop header, before which 570083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // this function can insert loop-invariant loads 57128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // 57228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Return value is the (zero-based) position of the context (Arg_p) 57328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // argument in the CalleeArgs vector, or a negative value if the 57428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // context argument is not placed in the CalleeArgs vector. 57528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int ExpandSpecialArguments(uint32_t Signature, 57628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Value *X, 57728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Value *Arg_p, 57828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::IRBuilder<> &Builder, 57928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::SmallVector<llvm::Value*, 8> &CalleeArgs, 580083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala std::function<void ()> Bump, 581083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Instruction *LoopHeaderInsertionPoint) { 58228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 58328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross bccAssert(CalleeArgs.empty()); 58428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 58528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int Return = -1; 58633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) { 58733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(Arg_p); 58833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 58928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross Return = CalleeArgs.size() - 1; 59033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 59133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 59233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) { 59333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(X); 59433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 59533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 59633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 597e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) || 598e44a3525b9703739534c3b62d7d1af4c95649a38David Gross bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 599083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala bccAssert(LoopHeaderInsertionPoint); 60033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 601083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Y and Z are loop invariant, so they can be hoisted out of the 602083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // loop. Set the IRBuilder insertion point to the loop header. 603083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala auto OldInsertionPoint = Builder.saveIP(); 604083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.SetInsertPoint(LoopHeaderInsertionPoint); 605e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 606e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) { 607083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent, 608083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala RsLaunchDimensionsFieldY})); 609083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep"); 610083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y")); 611e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 612e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 613e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 614e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 615083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent, 616083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala RsLaunchDimensionsFieldZ})); 617083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep"); 618083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z")); 619e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 620e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 621083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 622083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.restoreIP(OldInsertionPoint); 62333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 62428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 62528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross return Return; 62633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 62733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 628e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Generate loop-invariant input processing setup code for an expanded 629e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // ForEach-able function or an expanded general reduction accumulator 630e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // function. 631e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 632e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // LoopHeader - block at the end of which the setup code will be inserted 633e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Arg_p - RSKernelDriverInfo pointer passed to the expanded function 634e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo 635e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // ArgIter - iterator pointing to first input of the UNexpanded function 636e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // NumInputs - number of inputs (NOT number of ARGUMENTS) 637e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 638e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // InBufPtrs[] - this function sets each array element to point to the first 639e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // cell of the corresponding input allocation 640e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // InStructTempSlots[] - this function sets each array element either to nullptr 641e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // or to the result of an alloca (for the case where the 642e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // calling convention dictates that a value must be passed 643e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // by reference, and so we need a stacked temporary to hold 644e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // a copy of that value) 645e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader, 646e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_p, 647e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDNode *TBAAPointer, 648e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function::arg_iterator ArgIter, 649e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const size_t NumInputs, 650e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs, 651e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) { 652e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT); 653e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 654e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Extract information about input slots. The work done 655e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // here is loop-invariant, so we can hoist the operations out of the loop. 656e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross auto OldInsertionPoint = Builder.saveIP(); 657e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.SetInsertPoint(LoopHeader->getTerminator()); 658e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 659e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) { 660e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *InType = ArgIter->getType(); 661e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 662e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross /* 663e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * AArch64 calling conventions dictate that structs of sufficient size 664e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * get passed by pointer instead of passed by value. This, combined 665e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * with the fact that we don't allow kernels to operate on pointer 666e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * data means that if we see a kernel with a pointer parameter we know 667e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * that it is a struct input that has been promoted. As such we don't 668e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * need to convert its type to a pointer. Later we will need to know 669e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * to create a temporary copy on the stack, so we save this information 670e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * in InStructTempSlots. 671e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross */ 672e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) { 673e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *ElementType = PtrType->getElementType(); 674e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr, 675e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross "input_struct_slot")); 676e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } else { 677e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InType = InType->getPointerTo(); 678e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InStructTempSlots.push_back(nullptr); 679e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 680e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 681e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 682e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross static_cast<int32_t>(InputIndex)})); 683e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep"); 684e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf"); 685e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in"); 686e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 687e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (gEnableRsTbaa) { 688e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InBufPtr->setMetadata("tbaa", TBAAPointer); 689e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 690e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 691e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InBufPtrs.push_back(CastInBufPtr); 692e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 693e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 694e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.restoreIP(OldInsertionPoint); 695e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 696e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 697e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Generate loop-varying input processing code for an expanded ForEach-able function 698e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // or an expanded general reduction accumulator function. Also, for the call to the 699e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // UNexpanded function, collect the portion of the argument list corresponding to the 700e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // inputs. 701e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 702e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Arg_x1 - first X coordinate to be processed by the expanded function 703e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // TBAAAllocation - metadata for marking loads of input values out of allocations 704e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // NumInputs -- number of inputs (NOT number of ARGUMENTS) 705e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant() 706e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant() 707e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // IndVar - value of loop induction variable (X coordinate) for a given loop iteration 708e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 709e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // RootArgs - this function sets this to the list of outgoing argument values corresponding 710e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // to the inputs 711e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross void ExpandInputsBody(llvm::IRBuilder<> &Builder, 712e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_x1, 713e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDNode *TBAAAllocation, 714e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const size_t NumInputs, 715e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs, 716e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots, 717e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *IndVar, 718e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVectorImpl<llvm::Value *> &RootArgs) { 719e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1); 720e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 721e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross for (size_t Index = 0; Index < NumInputs; ++Index) { 722e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset); 723e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Input; 724e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 725e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input"); 726e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 727e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (gEnableRsTbaa) { 728e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InputLoad->setMetadata("tbaa", TBAAAllocation); 729e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 730e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 731e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) { 732e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Pass a pointer to a temporary on the stack, rather than 733e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // passing a pointer to the original value. We do not want 734e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // the kernel to potentially modify the input data. 735e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 736e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Note: don't annotate with TBAA, since the kernel might 737e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // have its own TBAA annotations for the pointer argument. 738e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.CreateStore(InputLoad, TemporarySlot); 739e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Input = TemporarySlot; 740e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } else { 741e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Input = InputLoad; 742e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 743e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 744e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross RootArgs.push_back(Input); 745e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 746e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 747e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 7488ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /* Performs the actual optimization on a selected function. On success, the 7498ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * Module will contain a new function of the name "<NAME>.expand" that 7508ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * invokes <NAME>() in a loop with the appropriate parameters. 7518ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser */ 7524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) { 753bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding ForEach-able Function %s", 754bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Function->getName().str().c_str()); 7558ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 7568ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 757bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Signature = getRootSignature(Function); 7588ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 7598ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // We couldn't determine how to expand this function based on its 7608ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // function signature. 7618ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser return false; 7628ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 7638ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 7648ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 765bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 7668ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 767bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 7684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala createEmptyExpandedForEachKernel(Function->getName()); 769db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 770bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 771bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 772e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * createEmptyExpandedForEachKernel that there will be four parameters. 773bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 77433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 7754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 77633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 777bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 778bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 779db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 780bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 781bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 782bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 7835010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 784bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 785900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InStep = nullptr; 786900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutStep = nullptr; 787db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 788db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Construct the actual function body. 789bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 790db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 791cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Collect and construct the arguments for the kernel(). 792db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Note that we load any loop-invariant arguments before entering the Loop. 793bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin(); 794db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 795900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *InTy = nullptr; 796083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InBufPtr = nullptr; 797d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) { 798083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0})); 799083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::LoadInst *InStepArg = Builder.CreateLoad( 800083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr"); 801e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 802bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes InTy = (FunctionArgIter++)->getType(); 803e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes InStep = getStepValue(&DL, InTy, InStepArg); 804e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 8052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines InStep->setName("instep"); 806e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 807083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0})); 808083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InBufPtr = Builder.CreateLoad( 809083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf"); 810db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 811db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 812900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *OutTy = nullptr; 813900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutBasePtr = nullptr; 814d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 815bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OutTy = (FunctionArgIter++)->getType(); 816b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 8172b04086acbef6520ae2c54a868b1271abf053122Stephen Hines OutStep->setName("outstep"); 818083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0})); 819083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep")); 820db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 821db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 822900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *UsrData = nullptr; 823d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) { 824bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *UsrDataTy = (FunctionArgIter++)->getType(); 825083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr); 826083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy); 827db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData->setName("UsrData"); 828db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 829db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 830083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 83133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::PHINode *IV; 83233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross createLoop(Builder, Arg_x1, Arg_x2, &IV); 833097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 83433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 83528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 836083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [&FunctionArgIter]() { FunctionArgIter++; }, 837083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala LoopHeader->getTerminator()); 838db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 839bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(FunctionArgIter == Function->arg_end()); 840db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 841cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Populate the actual call to kernel(). 842db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 843db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 844900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InPtr = nullptr; 845900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 846db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 847ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // Calculate the current input and output pointers 84802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 849ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // We always calculate the input/output pointers with a GEP operating on i8 85002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // values and only cast at the very end to OutTy. This is because the step 85102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // between two values is given in bytes. 85202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 85302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // TODO: We could further optimize the output by using a GEP operation of 85402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // type 'OutTy' in cases where the element type of the allocation allows. 85502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutBasePtr) { 85602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 85702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutOffset = Builder.CreateMul(OutOffset, OutStep); 858083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset); 85902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 86002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser } 861bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 862083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (InBufPtr) { 863ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1); 864ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InOffset = Builder.CreateMul(InOffset, InStep); 865083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset); 866ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InPtr = Builder.CreatePointerCast(InPtr, InTy); 867ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser } 86802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser 869ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser if (InPtr) { 8707ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(InPtr); 871db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 872db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 87302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutPtr) { 8747ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(OutPtr); 875db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 876db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 877db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (UsrData) { 878db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(UsrData); 879db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 880db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 88128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder); 882db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 883bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Builder.CreateCall(Function, RootArgs); 884db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 8857ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return true; 8867ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 8877ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 8884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala /* Expand a pass-by-value foreach kernel. 8897ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines */ 8904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bool ExpandForEach(llvm::Function *Function, uint32_t Signature) { 891d88177580db4ddedf680854c51db333c97eabc59Stephen Hines bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)); 892bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding kernel Function %s", Function->getName().str().c_str()); 8937ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 8944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // TODO: Refactor this to share functionality with ExpandOldStyleForEach. 895bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 8967ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 897bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 8984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala createEmptyExpandedForEachKernel(Function->getName()); 8997ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 900bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 901bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 902e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * createEmptyExpandedForEachKernel that there will be four parameters. 903bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 904881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 9054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 906881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 907bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 908bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 909bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 910bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 911bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 912bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 9133bc475b206c3fa249a212b90fe989fdcda4d75f9Matt Wala // Arg_outstep is not used by expanded new-style forEach kernels. 9147ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 9157ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Construct the actual function body. 916bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 9177ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 91818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Create TBAA meta-data. 919354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, 920354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines *TBAAAllocation, *TBAAPointer; 921bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 92214588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien 923354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScriptDistinct = 9244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala MDHelper.createTBAARoot(kRenderScriptTBAARootName); 9254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName, 926354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScriptDistinct); 927e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 928e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 929e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 930e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation, 0); 931e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 932e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 93314588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 93418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 935881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes /* 936881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Collect and construct the arguments for the kernel(). 937881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * 938881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Note that we load any loop-invariant arguments before entering the Loop. 939881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes */ 940083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala size_t NumRemainingInputs = Function->arg_size(); 9417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 942881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // No usrData parameter on kernels. 943881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes bccAssert( 944881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)); 945881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 946881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::Function::arg_iterator ArgIter = Function->arg_begin(); 947881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 948881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Check the return type 949bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Type *OutTy = nullptr; 950bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::LoadInst *OutBasePtr = nullptr; 951bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *CastedOutBasePtr = nullptr; 952881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 953e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes bool PassOutByPointer = false; 954881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 955d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 956bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *OutBaseTy = Function->getReturnType(); 957881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 95874a4b08235990916911b8fe758d656c1171faf26Stephen Hines if (OutBaseTy->isVoidTy()) { 959e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes PassOutByPointer = true; 960881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = ArgIter->getType(); 961881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 962881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes ArgIter++; 963083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala --NumRemainingInputs; 96474a4b08235990916911b8fe758d656c1171faf26Stephen Hines } else { 96574a4b08235990916911b8fe758d656c1171faf26Stephen Hines // We don't increment Args, since we are using the actual return type. 966881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = OutBaseTy->getPointerTo(); 96774a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 968881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 969083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0})); 970083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep")); 971097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 9729c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 9739c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines OutBasePtr->setMetadata("tbaa", TBAAPointer); 9749c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 97550f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 976bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out"); 97774a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 97874a4b08235990916911b8fe758d656c1171faf26Stephen Hines 979083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::SmallVector<llvm::Value*, 8> InBufPtrs; 980d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::SmallVector<llvm::Value*, 8> InStructTempSlots; 981881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 982083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT); 983881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 984083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Create the loop structure. 985083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 986083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::PHINode *IV; 987083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala createLoop(Builder, Arg_x1, Arg_x2, &IV); 988881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 989083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 990083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala const int CalleeArgsContextIdx = 991083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 992083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [&NumRemainingInputs]() { --NumRemainingInputs; }, 993083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala LoopHeader->getTerminator()); 994083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 995083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // After ExpandSpecialArguments() gets called, NumRemainingInputs 996083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // counts the number of arguments to the kernel that correspond to 997083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // an array entry from the InPtr field of the DriverInfo 998083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // structure. 999083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala const size_t NumInPtrArguments = NumRemainingInputs; 1000083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 1001083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (NumInPtrArguments > 0) { 1002e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments, 1003e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InBufPtrs, InStructTempSlots); 1004881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 10057ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 10067ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Populate the actual call to kernel(). 10077ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 10087ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 10099296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala // Calculate the current input and output pointers. 1010881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1011881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Output 1012881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1013900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 1014bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray if (CastedOutBasePtr) { 10157b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 1016083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset); 1017bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 1018e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (PassOutByPointer) { 1019881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes RootArgs.push_back(OutPtr); 1020881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 10214102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser } 10227b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser 1023881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Inputs 102474a4b08235990916911b8fe758d656c1171faf26Stephen Hines 1025083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (NumInPtrArguments > 0) { 1026e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments, 1027e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InBufPtrs, InStructTempSlots, IV, RootArgs); 10287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 10297ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 103028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder); 10317ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1032bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs); 10337ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1034e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (OutPtr && !PassOutByPointer) { 10359296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala RetVal->setName("call.result"); 103618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr); 10379c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 10389c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines Store->setMetadata("tbaa", TBAAAllocation); 10399c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 10407ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 10417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1042db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return true; 1043db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1044db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1045e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Expand a simple reduce-style kernel function. 10464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // The input is a kernel which represents a binary operation, 10484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // of the form 10494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define foo @func(foo %a, foo %b), 10514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // (More generally, it can be of the forms 10534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define void @func(foo* %ret, foo* %a, foo* %b) 10554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define void @func(foo* %ret, foo1 %a, foo1 %b) 10564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define foo1 @func(foo2 %a, foo2 %b) 10574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // as a result of argument / return value conversions. Here, "foo1" 10594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // and "foo2" refer to possibly coerced types, and the coerced 10604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // argument type may be different from the coerced return type. See 10614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // "Note on coercion" below.) 10624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Note also, we do not expect to encounter any case when the 10644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // arguments are promoted to pointers but the return value is 10654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // unpromoted to pointer, e.g. 10664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define foo1 @func(foo* %a, foo* %b) 10684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // and we will throw an assertion in this case.) 10704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // The input kernel gets expanded into a kernel of the form 10724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define void @func.expand(i8* %inBuf, i8* outBuf, i32 len) 10744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // which performs a serial reduction of `len` elements from `inBuf`, 10764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // and stores the result into `outBuf`. In pseudocode, @func.expand 10774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // does: 10784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // inArr := (foo *)inBuf; 10804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // accum := inArr[0]; 10814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // for (i := 1; i < len; ++i) { 10824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // accum := foo(accum, inArr[i]); 10834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // } 10844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // *(foo *)outBuf := accum; 10854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Note on coercion 10874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Both the return value and the argument types may undergo internal 10894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // coercion in clang as part of call lowering. As a result, the 10904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // return value type may differ from the argument type even if the 10914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // types in the RenderScript signaure are the same. For instance, the 10924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // kernel 10934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // int3 add(int3 a, int3 b) { return a + b; } 10954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // gets lowered by clang as 10974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define <3 x i32> @add(<4 x i32> %a.coerce, <4 x i32> %b.coerce) 10994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 11004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // under AArch64. The details of this process are found in clang, 11014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // lib/CodeGen/TargetInfo.cpp, under classifyArgumentType() and 11024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // classifyReturnType() in ARMABIInfo, AArch64ABIInfo. If the value 11034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // is passed by pointer, then the pointed-to type is not coerced. 11044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 11054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Since we lack the original type information, this code does loads 11064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // and stores of allocation data by way of pointers to the coerced 11074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // type. 11084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bool ExpandReduce(llvm::Function *Function) { 11094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(Function); 11104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 1111e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ALOGV("Expanding simple reduce kernel %s", Function->getName().str().c_str()); 11124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::DataLayout DL(Module); 11144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // TBAA Metadata 11164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, *TBAAAllocation; 11174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::MDBuilder MDHelper(*Context); 11184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScriptDistinct = 11204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala MDHelper.createTBAARoot(kRenderScriptTBAARootName); 11214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName, 11224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScriptDistinct); 11234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 11244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScript); 11254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 11264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAAAllocation, 0); 11274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *ExpandedFunction = 11294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala createEmptyExpandedReduceKernel(Function->getName()); 11304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Extract the expanded kernel's parameters. It is guaranteed by 1132e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // createEmptyExpandedReduceKernel that there will be 3 parameters. 11334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala auto ExpandedFunctionArgIter = ExpandedFunction->arg_begin(); 11344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *Arg_inBuf = &*(ExpandedFunctionArgIter++); 11364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *Arg_outBuf = &*(ExpandedFunctionArgIter++); 11374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *Arg_len = &*(ExpandedFunctionArgIter++); 11384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(Function->arg_size() == 2 || Function->arg_size() == 3); 11404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Check if, instead of returning a value, the original kernel has 11424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // a pointer parameter which points to a temporary buffer into 11434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // which the return value gets written. 11444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala const bool ReturnValuePointerStyle = (Function->arg_size() == 3); 11454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(Function->getReturnType()->isVoidTy() == ReturnValuePointerStyle); 11464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Check if, instead of being passed by value, the inputs to the 11484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // original kernel are passed by pointer. 11494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala auto FirstArgIter = Function->arg_begin(); 11504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // The second argument is always an input to the original kernel. 11514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala auto SecondArgIter = std::next(FirstArgIter); 11524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala const bool InputsPointerStyle = SecondArgIter->getType()->isPointerTy(); 11534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Get the output type (i.e. return type of the original kernel). 11554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::PointerType *OutPtrTy = nullptr; 11564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Type *OutTy = nullptr; 11574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (ReturnValuePointerStyle) { 11584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutPtrTy = llvm::dyn_cast<llvm::PointerType>(FirstArgIter->getType()); 11594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(OutPtrTy && "Expected a pointer parameter to kernel"); 11604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutTy = OutPtrTy->getElementType(); 11614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } else { 11624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutTy = Function->getReturnType(); 11634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(!OutTy->isVoidTy()); 11644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutPtrTy = OutTy->getPointerTo(); 11654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 11664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Get the input type (type of the arguments to the original 11684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // kernel). Some input types are different from the output type, 11694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // due to explicit coercion that the compiler performs when 11704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // lowering the parameters. See "Note on coercion" above. 11714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::PointerType *InPtrTy; 11724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Type *InTy; 11734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (InputsPointerStyle) { 11744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InPtrTy = llvm::dyn_cast<llvm::PointerType>(SecondArgIter->getType()); 11754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(InPtrTy && "Expected a pointer parameter to kernel"); 11764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ReturnValuePointerStyle); 11774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(std::next(SecondArgIter)->getType() == InPtrTy && 11784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala "Input type mismatch"); 11794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InTy = InPtrTy->getElementType(); 11804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } else { 11814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InTy = SecondArgIter->getType(); 11824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InPtrTy = InTy->getPointerTo(); 11834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (!ReturnValuePointerStyle) { 11844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(InTy == FirstArgIter->getType() && "Input type mismatch"); 11854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } else { 11864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(InTy == std::next(SecondArgIter)->getType() && 11874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala "Input type mismatch"); 11884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 11894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 11904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // The input type should take up the same amount of space in 11924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // memory as the output type. 11934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(DL.getTypeAllocSize(InTy) == DL.getTypeAllocSize(OutTy)); 11944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Construct the actual function body. 11964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 11974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Cast input and output buffers to appropriate types. 11994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *InBuf = Builder.CreatePointerCast(Arg_inBuf, InPtrTy); 12004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *OutBuf = Builder.CreatePointerCast(Arg_outBuf, OutPtrTy); 12014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Create a slot to pass temporary results back. This needs to be 12034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // separate from the accumulator slot because the kernel may mark 12044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // the return value slot as noalias. 12054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *ReturnBuf = nullptr; 12064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (ReturnValuePointerStyle) { 12074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ReturnBuf = Builder.CreateAlloca(OutTy, nullptr, "ret.tmp"); 12084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Create a slot to hold the second input if the inputs are passed 12114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // by pointer to the original kernel. We cannot directly pass a 12124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // pointer to the input buffer, because the kernel may modify its 12134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // inputs. 12144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *SecondInputTempBuf = nullptr; 12154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (InputsPointerStyle) { 12164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala SecondInputTempBuf = Builder.CreateAlloca(InTy, nullptr, "in.tmp"); 12174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Create a slot to accumulate temporary results, and fill it with 12204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // the first value. 12214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *AccumBuf = Builder.CreateAlloca(OutTy, nullptr, "accum"); 12224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Cast to OutPtrTy before loading, since AccumBuf has type OutPtrTy. 12234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::LoadInst *FirstElementLoad = Builder.CreateLoad( 12244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreatePointerCast(InBuf, OutPtrTy)); 12254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa) { 12264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala FirstElementLoad->setMetadata("tbaa", TBAAAllocation); 12274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Memory operations with AccumBuf shouldn't be marked with 12294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // RenderScript TBAA, since this might conflict with TBAA metadata 12304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // in the kernel function when AccumBuf is passed by pointer. 12314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateStore(FirstElementLoad, AccumBuf); 12324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Loop body 12344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Create the loop structure. Note that the first input in the input buffer 12364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // has already been accumulated, so that we start at index 1. 12374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::PHINode *IndVar; 12384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *Start = llvm::ConstantInt::get(Arg_len->getType(), 1); 12394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::BasicBlock *Exit = createLoop(Builder, Start, Arg_len, &IndVar); 12404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *InputPtr = Builder.CreateInBoundsGEP(InBuf, IndVar, "next_input.gep"); 12424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Set up arguments and call the original (unexpanded) kernel. 12444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 12454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // The original kernel can have at most 3 arguments, which is 12464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // achieved when the signature looks like: 12474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 12484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define void @func(foo* %ret, bar %a, bar %b) 12494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 12504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // (bar can be one of foo/foo.coerce/foo*). 12514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::SmallVector<llvm::Value *, 3> KernelArgs; 12524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (ReturnValuePointerStyle) { 12544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala KernelArgs.push_back(ReturnBuf); 12554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (InputsPointerStyle) { 12584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ReturnValuePointerStyle); 12594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Because the return buffer is copied back into the 12604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // accumulator, it's okay if the accumulator is overwritten. 12614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala KernelArgs.push_back(AccumBuf); 12624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::LoadInst *InputLoad = Builder.CreateLoad(InputPtr); 12644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa) { 12654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InputLoad->setMetadata("tbaa", TBAAAllocation); 12664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateStore(InputLoad, SecondInputTempBuf); 12684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala KernelArgs.push_back(SecondInputTempBuf); 12704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } else { 12714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // InPtrTy may be different from OutPtrTy (the type of 12724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // AccumBuf), so first cast the accumulator buffer to the 12734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // pointer type corresponding to the input argument type. 12744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala KernelArgs.push_back( 12754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateLoad(Builder.CreatePointerCast(AccumBuf, InPtrTy))); 12764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::LoadInst *LoadedArg = Builder.CreateLoad(InputPtr); 12784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa) { 12794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala LoadedArg->setMetadata("tbaa", TBAAAllocation); 12804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala KernelArgs.push_back(LoadedArg); 12824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *RetVal = Builder.CreateCall(Function, KernelArgs); 12854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala const uint64_t ElementSize = DL.getTypeStoreSize(OutTy); 12874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala const uint64_t ElementAlign = DL.getABITypeAlignment(OutTy); 12884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Store the output in the accumulator. 12904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (ReturnValuePointerStyle) { 12914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateMemCpy(AccumBuf, ReturnBuf, ElementSize, ElementAlign); 12924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } else { 12934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateStore(RetVal, AccumBuf); 12944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Loop exit 12974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.SetInsertPoint(Exit, Exit->begin()); 12984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::LoadInst *OutputLoad = Builder.CreateLoad(AccumBuf); 13004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::StoreInst *OutputStore = Builder.CreateStore(OutputLoad, OutBuf); 13014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa) { 13024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutputStore->setMetadata("tbaa", TBAAAllocation); 13034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 13044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 13054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return true; 13064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 13074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 1308e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Certain categories of functions that make up a general 1309e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // reduce-style kernel are called directly from the driver with no 1310e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // expansion needed. For a function in such a category, we need to 1311e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // promote linkage from static to external, to ensure that the 1312e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // function is visible to the driver in the dynamic symbol table. 1313e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // This promotion is safe because we don't have any kind of cross 1314e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // translation unit linkage model (except for linking against 1315e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // RenderScript libraries), so we do not risk name clashes. 1316e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bool PromoteReduceNewFunction(const char *Name, FunctionSet &PromotedFunctions) { 1317e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (!Name) // a presumably-optional function that is not present 1318e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return false; 1319e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1320e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *Fn = Module->getFunction(Name); 1321e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(Fn != nullptr); 1322e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (PromotedFunctions.insert(Fn).second) { 1323e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage); 1324e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Fn->setLinkage(llvm::GlobalValue::ExternalLinkage); 1325e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return true; 1326e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1327e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1328e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return false; 1329e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1330e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1331e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Expand the accumulator function for a general reduce-style kernel. 1332e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1333e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // The input is a function of the form 1334e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1335e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments]) 1336e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1337e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // where all arguments except the first are the same as for a foreach kernel. 1338e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1339e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // The input accumulator function gets expanded into a function of the form 1340e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1341e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum) 1342e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1343e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // which performs a serial accumulaion of elements [x1, x2) into *%accum. 1344e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1345e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // In pseudocode, @func.expand does: 1346e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1347e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // for (i = %x1; i < %x2; ++i) { 1348e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // func(%accum, 1349e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i) 1350e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // [, p] [, i] [, p->current.y] [, p->current.z]); 1351e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // } 1352e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1353e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // This is very similar to foreach kernel expansion with no output. 1354e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bool ExpandReduceNewAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) { 1355e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ALOGV("Expanding accumulator %s for general reduce kernel", 1356e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross FnAccumulator->getName().str().c_str()); 1357e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1358e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create TBAA meta-data. 1359e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, 1360e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross *TBAAAllocation, *TBAAPointer; 1361e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDBuilder MDHelper(*Context); 1362e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScriptDistinct = 1363e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross MDHelper.createTBAARoot(kRenderScriptTBAARootName); 1364e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName, 1365e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScriptDistinct); 1366e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 1367e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScript); 1368e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 1369e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAAllocation, 0); 1370e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 1371e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScript); 1372e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 1373e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1374e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross auto AccumulatorArgIter = FnAccumulator->arg_begin(); 1375e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1376e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create empty accumulator function. 1377e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *FnExpandedAccumulator = 1378e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross createEmptyExpandedReduceNewAccumulator(FnAccumulator->getName(), 1379e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross (AccumulatorArgIter++)->getType()); 1380e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1381e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Extract the expanded accumulator's parameters. It is 1382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // guaranteed by createEmptyExpandedReduceNewAccumulator that 1383e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // there will be 4 parameters. 1384e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams); 1385e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin(); 1386e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_p = &*(ExpandedAccumulatorArgIter++); 1387e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_x1 = &*(ExpandedAccumulatorArgIter++); 1388e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_x2 = &*(ExpandedAccumulatorArgIter++); 1389e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++); 1390e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1391e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Construct the actual function body. 1392e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::IRBuilder<> Builder(FnExpandedAccumulator->getEntryBlock().begin()); 1393e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1394e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create the loop structure. 1395e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 1396e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::PHINode *IndVar; 1397e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross createLoop(Builder, Arg_x1, Arg_x2, &IndVar); 1398e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1399e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 1400e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const int CalleeArgsContextIdx = 1401e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs, 1402e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross [](){}, LoopHeader->getTerminator()); 1403e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1404e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> InBufPtrs; 1405e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> InStructTempSlots; 1406e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs, 1407e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InBufPtrs, InStructTempSlots); 1408e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1409e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Populate the actual call to the original accumulator. 1410e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> RootArgs; 1411e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross RootArgs.push_back(Arg_accum); 1412e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InBufPtrs, InStructTempSlots, 1413e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross IndVar, RootArgs); 1414e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder); 1415e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.CreateCall(FnAccumulator, RootArgs); 1416e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1417e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return true; 1418e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1419e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 142018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Checks if pointers to allocation internals are exposed 142118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 142218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// This function verifies if through the parameters passed to the kernel 142318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// or through calls to the runtime library the script gains access to 142418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers pointing to data within a RenderScript Allocation. 142518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// If we know we control all loads from and stores to data within 142618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// RenderScript allocations and if we know the run-time internal accesses 142718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// are all annotated with RenderScript TBAA metadata, only then we 142818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// can safely use TBAA to distinguish between generic and from-allocation 142918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers. 1430bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool allocPointersExposed(llvm::Module &Module) { 143118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Old style kernel function can expose pointers to elements within 143218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // allocations. 143318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // TODO: Extend analysis to allow simple cases of old-style kernels. 143425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 143525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *Name = mExportForEachNameList[i]; 143625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t Signature = mExportForEachSignatureList[i]; 1437bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Module.getFunction(Name) && 1438d88177580db4ddedf680854c51db333c97eabc59Stephen Hines !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) { 143918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 144018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 144118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 144218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 144318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Check for library functions that expose a pointer to an Allocation or 144418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // that are not yet annotated with RenderScript-specific tbaa information. 1445e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala static const std::vector<const char *> Funcs{ 1446e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAt(...) 1447e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationj", 1448e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationjj", 1449e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationjjj", 1450e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1451e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsSetElementAt() 1452e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvj", 1453e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvjj", 1454e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvjjj", 1455e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1456e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_Y() 1457e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj", 1458e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1459e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_U() 1460e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj", 1461e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1462e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_V() 1463e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj", 1464e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala }; 1465e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1466e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala for (auto FI : Funcs) { 1467e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala llvm::Function *Function = Module.getFunction(FI); 146818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1469bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (!Function) { 1470e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala ALOGE("Missing run-time function '%s'", FI); 147118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 147218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 147318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1474bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Function->getNumUses() > 0) { 147518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 147618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 147718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 147818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 147918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return false; 148018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 148118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 148218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Connect RenderScript TBAA metadata to C/C++ metadata 148318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 148418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// The TBAA metadata used to annotate loads/stores from RenderScript 1485e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// Allocations is generated in a separate TBAA tree with a 1486354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for 1487354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// all nodes in unrelated alias analysis trees. This function makes the 1488354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root), 1489e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With 1490e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// the connected trees every access to an Allocation is resolved to 1491e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// must-alias if compared to a normal C/C++ access. 1492bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void connectRenderScriptTBAAMetadata(llvm::Module &Module) { 1493bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 1494354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScriptDistinct = 1495354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines MDHelper.createTBAARoot("RenderScript Distinct TBAA"); 1496354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode( 1497354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines "RenderScript TBAA", TBAARenderScriptDistinct); 1498bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 1499354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScript->replaceOperandWith(1, TBAARoot); 150018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 150118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1502bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes virtual bool runOnModule(llvm::Module &Module) { 1503bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool Changed = false; 1504bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes this->Module = &Module; 15054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Context = &Module.getContext(); 1506bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 15074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala buildTypes(); 1508bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 1509bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bcinfo::MetadataExtractor me(&Module); 151025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines if (!me.extract()) { 151125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines ALOGE("Could not extract metadata from module!"); 151225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines return false; 151325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines } 15144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 15154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Expand forEach_* style kernels. 151625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachCount = me.getExportForEachSignatureCount(); 151725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachNameList = me.getExportForEachNameList(); 151825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachSignatureList = me.getExportForEachSignatureList(); 1519db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 152025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 152125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *name = mExportForEachNameList[i]; 152225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t signature = mExportForEachSignatureList[i]; 1523bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *kernel = Module.getFunction(name); 1524cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser if (kernel) { 1525d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) { 15264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Changed |= ExpandForEach(kernel, signature); 1527acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 1528acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else if (kernel->getReturnType()->isVoidTy()) { 15294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Changed |= ExpandOldStyleForEach(kernel, signature); 1530acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 1531acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else { 1532acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // There are some graphics root functions that are not 1533acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // expanded, but that will be called directly. For those 1534acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // functions, we can not set the linkage to internal. 1535acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } 1536cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 1537db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1538db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1539e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Expand simple reduce_* style kernels. 15404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala mExportReduceCount = me.getExportReduceCount(); 15414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala mExportReduceNameList = me.getExportReduceNameList(); 15424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 15434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala for (size_t i = 0; i < mExportReduceCount; ++i) { 15444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *kernel = Module.getFunction(mExportReduceNameList[i]); 15454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (kernel) { 15464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Changed |= ExpandReduce(kernel); 15474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 15484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 15494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 1550e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Process general reduce_* style functions. 1551e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const size_t ExportReduceNewCount = me.getExportReduceNewCount(); 1552e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const bcinfo::MetadataExtractor::ReduceNew *ExportReduceNewList = me.getExportReduceNewList(); 1553e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Note that functions can be shared between kernels 1554e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross FunctionSet PromotedFunctions, ExpandedAccumulators; 1555e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1556e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross for (size_t i = 0; i < ExportReduceNewCount; ++i) { 1557e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mInitializerName, PromotedFunctions); 1558e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mOutConverterName, PromotedFunctions); 1559e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1560e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Accumulator 1561e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *accumulator = Module.getFunction(ExportReduceNewList[i].mAccumulatorName); 1562e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(accumulator != nullptr); 1563e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (ExpandedAccumulators.insert(accumulator).second) 1564e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Changed |= ExpandReduceNewAccumulator(accumulator, 1565e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExportReduceNewList[i].mSignature, 1566e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExportReduceNewList[i].mInputCount); 1567e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1568e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 15694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa && !allocPointersExposed(Module)) { 1570bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes connectRenderScriptTBAAMetadata(Module); 157118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 157218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1573cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return Changed; 1574db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1575db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1576db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual const char *getPassName() const { 15774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return "forEach_* and reduce_* function expansion"; 1578db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1579db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 15804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala}; // end RSKernelExpandPass 1581db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 15827a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace 15837a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 15844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walachar RSKernelExpandPass::ID = 0; 15854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walastatic llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass"); 1586db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1587db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc { 1588db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 15897a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass * 15904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt WalacreateRSKernelExpandPass(bool pEnableStepOpt) { 15914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return new RSKernelExpandPass(pEnableStepOpt); 15927a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} 1593db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 15947a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc 1595