RSKernelExpand.cpp revision 4e7a50685ae18a24087f6f2a51c604e71fab69e2
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/* 2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project 3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License"); 5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License. 6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at 7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * http://www.apache.org/licenses/LICENSE-2.0 9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software 11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS, 12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and 14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License. 15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h" 18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h" 197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib> 2133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional> 227a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 23b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h> 24b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h> 25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h> 26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h> 2718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h> 28b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h> 29c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h> 307ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h> 31b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h> 32cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h> 33b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h> 34806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h> 35c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang 36c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h" 37ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h" 38db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 39d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h" 40d88177580db4ddedf680854c51db333c97eabc59Stephen Hines 414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#ifndef __DISABLE_ASSERTS 424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala// Only used in bccAssert() 434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedForeachParams = 4; 444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedReduceParams = 3; 454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#endif 464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA"; 484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAANodeName[] = "RenderScript TBAA"; 49bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 507a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc; 517a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 52db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace { 537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 54354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true; 559c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines 564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala/* RSKernelExpandPass - This pass operates on functions that are able 574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * to be called via rsForEach(), "foreach_<NAME>", or 584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * "reduce_<NAME>". We create an inner loop for the function to be 594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * invoked over the appropriate data cells of the input/output 604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * allocations (adjusting other relevant parameters as we go). We 614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * support doing this for any forEach or reduce style compute 624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * kernels. The new function name is the original function name 634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * followed by ".expand". Note that we still generate code for the 644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * original function. 657a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */ 664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaclass RSKernelExpandPass : public llvm::ModulePass { 6733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic: 68db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static char ID; 69db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 7033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate: 71e44a3525b9703739534c3b62d7d1af4c95649a38David Gross static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h 72e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 73e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsLaunchDimensionsField { 74e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldX, 75e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldY, 76e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldZ, 77e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldLod, 78e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldFace, 79e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldArray, 80e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 81e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldCount 82e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 83e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 84e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsExpandKernelDriverInfoPfxField { 85e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInPtr, 86e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInStride, 87e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInLen, 88e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutPtr, 89e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutStride, 90e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutLen, 91e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldDim, 92e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCurrent, 93e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsr, 94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsLenr, 95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 96e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCount 97e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 9833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 99bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Module *Module; 100bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::LLVMContext *Context; 101bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 102bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 1034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * Pointers to LLVM type information for the the function signatures 1044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * for expanded functions. These must be re-calculated for each module 1054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * the pass is run on. 106bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 1074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::FunctionType *ExpandedForEachType, *ExpandedReduceType; 108db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 10925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t mExportForEachCount; 11025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char **mExportForEachNameList; 11125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const uint32_t *mExportForEachSignatureList; 112cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines 1134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala uint32_t mExportReduceCount; 1144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala const char **mExportReduceNameList; 1154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 1162b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Turns on optimization of allocation stride values. 1172b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool mEnableStepOpt; 1182b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 119bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes uint32_t getRootSignature(llvm::Function *Function) { 120db169187dea4602e4ad32058762d23d474753fd0Stephen Hines const llvm::NamedMDNode *ExportForEachMetadata = 121bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Module->getNamedMetadata("#rs_export_foreach"); 122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 123db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!ExportForEachMetadata) { 124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> RootArgTys; 125bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes for (llvm::Function::arg_iterator B = Function->arg_begin(), 126bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes E = Function->arg_end(); 127db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 128db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 129db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgTys.push_back(B->getType()); 130db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 131db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 132db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // For pre-ICS bitcode, we may not have signature information. In that 133db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // case, we use the size of the RootArgTys to select the number of 134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // arguments. 135db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return (1 << RootArgTys.size()) - 1; 136db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 137db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1387ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (ExportForEachMetadata->getNumOperands() == 0) { 1397ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return 0; 1407ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 1417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1426e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(ExportForEachMetadata->getNumOperands() > 0); 143db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 144cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We only handle the case for legacy root() functions here, so this is 145cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // hard-coded to look at only the first such function. 146db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 147900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes if (SigNode != nullptr && SigNode->getNumOperands() == 1) { 1481bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::Metadata *SigMD = SigNode->getOperand(0); 1491bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) { 1501bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::StringRef SigString = SigS->getString(); 151db169187dea4602e4ad32058762d23d474753fd0Stephen Hines uint32_t Signature = 0; 152db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigString.getAsInteger(10, Signature)) { 153db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 154db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature; 157db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 158db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 159db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 160db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 161db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 162db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 163429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray bool isStepOptSupported(llvm::Type *AllocType) { 164429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 165429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 166429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 167429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 168429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (mEnableStepOpt) { 169429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 170429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 171429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 172429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == VoidPtrTy) { 173429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 174429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 175429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 176429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (!PT) { 177429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 178429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 179429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 180429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // remaining conditions are 64-bit only 181429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (VoidPtrTy->getPrimitiveSizeInBits() == 32) { 182429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 183429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // coerce suggests an upconverted struct type, which we can't support 186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) { 187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 188429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 189429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 190429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported 191429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2); 192429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128); 193429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == V2xi64Ty || AllocType == Int128Ty) { 194429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 195429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 196429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 197429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 198429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 199429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 2002b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Get the actual value we should use to step through an allocation. 2017b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 2027b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // Normally the value we use to step through an allocation is given to us by 2037b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // the driver. However, for certain primitive data types, we can derive an 2047b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // integer constant for the step value. We use this integer constant whenever 2057b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // possible to allow further compiler optimizations to take place. 2067b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 207b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines // DL - Target Data size/layout information. 2082b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // T - Type of allocation (should be a pointer). 2092b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // OrigStep - Original step increment (root.expand() input from driver). 210bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType, 2112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *OrigStep) { 212b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines bccAssert(DL); 213bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(AllocType); 2142b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(OrigStep); 215bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 216429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (isStepOptSupported(AllocType)) { 2172b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *ET = PT->getElementType(); 218b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines uint64_t ETSize = DL->getTypeAllocSize(ET); 219bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 2202b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return llvm::ConstantInt::get(Int32Ty, ETSize); 2212b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } else { 2222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return OrigStep; 2232b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2242b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2252b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 226097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /// Builds the types required by the pass for the given context. 227bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void buildTypes(void) { 228e44a3525b9703739534c3b62d7d1af4c95649a38David Gross // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs. 229bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 230e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8Ty = llvm::Type::getInt8Ty(*Context); 231e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrTy = Int8Ty->getPointerTo(); 232e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT); 233e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 234e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32ArrayInputLimitTy = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT); 235e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 236e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Array4Ty = llvm::ArrayType::get(Int32Ty, 4); 237097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 238097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h: 239db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 240e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsLaunchDimensions { 241e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t x; 242db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t y; 243db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t z; 244e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t lod; 245e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t face; 246e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t array[4]; 247e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * }; 248e44a3525b9703739534c3b62d7d1af4c95649a38David Gross */ 249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes; 250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t x 251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t y 252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t z 253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t lod 254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t face 255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4] 256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType *RsLaunchDimensionsTy = 257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions"); 258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 2591d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h: 260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 261e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsExpandKernelDriverInfoPfx { 262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]; 263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inStride[RS_KERNEL_INPUT_LIMIT]; 264e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inLen; 265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]; 267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outStride[RS_KERNEL_INPUT_LIMIT]; 268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outLen; 269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 270e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Dimension of the launch 271e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions dim; 272e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 273e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // The walking iterator of the launch 274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions current; 275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const void *usr; 277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t usrLen; 278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Items below this line are not used by the compiler and can be change in the driver. 280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // So the compiler must assume there are an unknown number of fields of unknown type 281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // beginning here. 282db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * }; 2831d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * 2841d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp). 285db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes; 287e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT] 288e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t inStride[RS_KERNEL_INPUT_LIMIT] 289e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t inLen 290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT] 291e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t outStride[RS_KERNEL_INPUT_LIMIT] 292e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t outLen 293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions dim 294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current 295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr 296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen 297e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType *RsExpandKernelDriverInfoPfxTy = 298e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx"); 299bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 300bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes // Create the function type for expanded kernels. 3014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context); 302bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 303e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo(); 3044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep) 3054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ExpandedForEachType = llvm::FunctionType::get(VoidTy, 3064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false); 307bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 3084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // void (void *inBuf, void *outBuf, uint32_t len) 3094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ExpandedReduceType = llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy, Int32Ty}, false); 3108ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 3118ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 3124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala /// @brief Create skeleton of the expanded foreach kernel. 313357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 314357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// This creates a function with the following signature: 315357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 316357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 3175010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes /// uint32_t outstep) 318357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 3194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) { 320bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 3214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function::Create(ExpandedForEachType, 322bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::GlobalValue::ExternalLinkage, 323bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OldName + ".expand", Module); 3244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); 326bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("p"); 327bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x1"); 328bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x2"); 329bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("arg_outstep"); 3304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 3314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ExpandedFunction); 3324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::IRBuilder<> Builder(Begin); 3334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateRetVoid(); 3344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return ExpandedFunction; 3354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 3364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Create skeleton of the expanded reduce kernel. 3384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 3394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // This creates a function with the following signature: 3404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 3414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // void @func.expand(i8* nocapture %inBuf, i8* nocapture %outBuf, i32 len) 3424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 3434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *createEmptyExpandedReduceKernel(llvm::StringRef OldName) { 3444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *ExpandedFunction = 3454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function::Create(ExpandedReduceType, 3464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::GlobalValue::ExternalLinkage, 3474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OldName + ".expand", Module); 3484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedReduceParams); 3494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); 3514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala using llvm::Attribute; 3534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Argument *InBuf = &(*AI++); 3554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InBuf->setName("inBuf"); 3564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InBuf->addAttr(llvm::AttributeSet::get(*Context, InBuf->getArgNo() + 1, {Attribute::NoCapture})); 3574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Argument *OutBuf = &(*AI++); 3594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutBuf->setName("outBuf"); 3604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutBuf->addAttr(llvm::AttributeSet::get(*Context, OutBuf->getArgNo() + 1, {Attribute::NoCapture})); 3614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 3624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala (AI++)->setName("len"); 363bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 364bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 365bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction); 366806075b3a54af826fea78490fb213d8a0784138eTobias Grosser llvm::IRBuilder<> Builder(Begin); 367806075b3a54af826fea78490fb213d8a0784138eTobias Grosser Builder.CreateRetVoid(); 368806075b3a54af826fea78490fb213d8a0784138eTobias Grosser 369bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes return ExpandedFunction; 370357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser } 371357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser 372e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @brief Create an empty loop 373e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 374e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// Create a loop of the form: 375e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 376e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// for (i = LowerBound; i < UpperBound; i++) 377e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// ; 378e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 379e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// After the loop has been created, the builder is set such that 380e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// instructions can be added to the loop body. 381e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 382e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param Builder The builder to use to build this loop. The current 383e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// position of the builder is the position the loop 384e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// will be inserted. 385e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LowerBound The first value of the loop iterator 386e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param UpperBound The maximal value of the loop iterator 387e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LoopIV A reference that will be set to the loop iterator. 388e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @return The BasicBlock that will be executed after the loop. 389e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder, 390e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *LowerBound, 391e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *UpperBound, 392e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode **LoopIV) { 393c2ca742d7d0197c52e49467862844463fb42280fDavid Gross bccAssert(LowerBound->getType() == UpperBound->getType()); 394e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 395e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB; 396e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *Cond, *IVNext; 397e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::PHINode *IV; 398e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 399e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB = Builder.GetInsertBlock(); 4001bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr); 401bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent()); 402e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 403e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (LowerBound < Upperbound) 404e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 405e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 406e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 407e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB->getTerminator()->eraseFromParent(); 408e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(CondBB); 409e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(LowerBound, UpperBound); 410e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 411e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 412e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ] 413e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // iv.next = iv + 1 414e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (iv.next < Upperbound) 415e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 416e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 417e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 418e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB); 419e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV = Builder.CreatePHI(LowerBound->getType(), 2, "X"); 420e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV->addIncoming(LowerBound, CondBB); 421e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1)); 422e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IV->addIncoming(IVNext, HeaderBB); 423e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(IVNext, UpperBound); 424e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 425e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser AfterBB->setName("Exit"); 426e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 427e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser *LoopIV = IV; 428e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser return AfterBB; 429e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser } 430e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 43128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Finish building the outgoing argument list for calling a ForEach-able function. 43228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // 43328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // ArgVector - on input, the non-special arguments 43428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // on output, the non-special arguments combined with the special arguments 43528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // from SpecialArgVector 43628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // SpecialArgVector - special arguments (from ExpandSpecialArguments()) 43728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // SpecialArgContextIdx - return value of ExpandSpecialArguments() 43828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // (position of context argument in SpecialArgVector) 43928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // CalleeFunction - the ForEach-able function being called 44028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Builder - for inserting code into the caller function 44128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen> 44228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross void finishArgList( llvm::SmallVector<llvm::Value *, ArgVectorLen> &ArgVector, 44328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector, 44428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int SpecialArgContextIdx, 44528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const llvm::Function &CalleeFunction, 44628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::IRBuilder<> &CallerBuilder) { 44728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross /* The context argument (if any) is a pointer to an opaque user-visible type that differs from 44828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the 44928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * two types represent the same thing). Therefore, we must introduce a pointer cast when 45028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * generating a call to the kernel function. 45128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross */ 45228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int ArgContextIdx = 45328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx; 45428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end()); 45528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross if (ArgContextIdx >= 0) { 45628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Type *ContextArgType = nullptr; 45728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int ArgIdx = ArgContextIdx; 45828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross for (const auto &Arg : CalleeFunction.getArgumentList()) { 45928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross if (!ArgIdx--) { 46028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ContextArgType = Arg.getType(); 46128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross break; 46228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 46328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 46428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross bccAssert(ContextArgType); 46528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType); 46628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 46728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 46828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 469083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // GEPHelper() returns a SmallVector of values suitable for passing 470083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for 471083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // the returned data type. It is sized so that the SmallVector 472083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // returned by GEPHelper() never needs to do a heap allocation for 473083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // any list of GEP indices it encounters in the code. 474083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices; 475083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 476083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Helper for turning a list of constant integer GEP indices into a 477083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // SmallVector of llvm::Value*. The return value is suitable for 478083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP(). 479083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // 480083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Inputs: 481083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // I32Args should be integers which represent the index arguments 482083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // to a GEP instruction. 483083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // 484083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Returns: 485083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Returns a SmallVector of ConstantInts. 4864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) { 487083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices Out(I32Args.size()); 488083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context); 489083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala std::transform(I32Args.begin(), I32Args.end(), Out.begin(), 490083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); }); 491083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala return Out; 492083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala } 493083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 4948ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic: 4954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala RSKernelExpandPass(bool pEnableStepOpt = true) 496900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes : ModulePass(ID), Module(nullptr), Context(nullptr), 497bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes mEnableStepOpt(pEnableStepOpt) { 498bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 4998ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 5008ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 501c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { 502c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // This pass does not use any other analysis passes, but it does 503c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // add/wrap the existing functions in the module (thus altering the CFG). 504c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines } 505c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines 50633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Build contribution to outgoing argument list for calling a 50733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // ForEach-able function, based on the special parameters of that 50833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // function. 50933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // 51033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Signature - metadata bits for the signature of the ForEach-able function 51133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // X, Arg_p - values derived directly from expanded function, 51233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // suitable for computing arguments for the ForEach-able function 51333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // CalleeArgs - contribution is accumulated here 51433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Bump - invoked once for each contributed outgoing argument 515083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // LoopHeaderInsertionPoint - an Instruction in the loop header, before which 516083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // this function can insert loop-invariant loads 51728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // 51828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Return value is the (zero-based) position of the context (Arg_p) 51928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // argument in the CalleeArgs vector, or a negative value if the 52028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // context argument is not placed in the CalleeArgs vector. 52128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int ExpandSpecialArguments(uint32_t Signature, 52228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Value *X, 52328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Value *Arg_p, 52428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::IRBuilder<> &Builder, 52528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::SmallVector<llvm::Value*, 8> &CalleeArgs, 526083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala std::function<void ()> Bump, 527083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Instruction *LoopHeaderInsertionPoint) { 52828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 52928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross bccAssert(CalleeArgs.empty()); 53028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 53128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int Return = -1; 53233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) { 53333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(Arg_p); 53433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 53528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross Return = CalleeArgs.size() - 1; 53633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 53733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 53833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) { 53933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(X); 54033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 54133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 54233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 543e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) || 544e44a3525b9703739534c3b62d7d1af4c95649a38David Gross bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 545083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala bccAssert(LoopHeaderInsertionPoint); 54633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 547083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Y and Z are loop invariant, so they can be hoisted out of the 548083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // loop. Set the IRBuilder insertion point to the loop header. 549083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala auto OldInsertionPoint = Builder.saveIP(); 550083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.SetInsertPoint(LoopHeaderInsertionPoint); 551e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 552e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) { 553083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent, 554083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala RsLaunchDimensionsFieldY})); 555083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep"); 556083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y")); 557e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 558e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 559e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 560e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 561083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent, 562083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala RsLaunchDimensionsFieldZ})); 563083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep"); 564083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z")); 565e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 566e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 567083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 568083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.restoreIP(OldInsertionPoint); 56933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 57028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 57128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross return Return; 57233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 57333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 5748ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /* Performs the actual optimization on a selected function. On success, the 5758ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * Module will contain a new function of the name "<NAME>.expand" that 5768ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * invokes <NAME>() in a loop with the appropriate parameters. 5778ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser */ 5784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) { 579bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding ForEach-able Function %s", 580bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Function->getName().str().c_str()); 5818ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 5828ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 583bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Signature = getRootSignature(Function); 5848ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 5858ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // We couldn't determine how to expand this function based on its 5868ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // function signature. 5878ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser return false; 5888ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 5898ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 5908ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 591bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 5928ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 593bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 5944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala createEmptyExpandedForEachKernel(Function->getName()); 595db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 596bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 597bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 598bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * createEmptyExpandedFunction that there will be five parameters. 599bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 60033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 6014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 60233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 603bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 604bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 605db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 606bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 607bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 608bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 6095010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 610bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 611900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InStep = nullptr; 612900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutStep = nullptr; 613db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 614db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Construct the actual function body. 615bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 616db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 617cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Collect and construct the arguments for the kernel(). 618db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Note that we load any loop-invariant arguments before entering the Loop. 619bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin(); 620db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 621900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *InTy = nullptr; 622083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InBufPtr = nullptr; 623d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) { 624083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0})); 625083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::LoadInst *InStepArg = Builder.CreateLoad( 626083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr"); 627e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 628bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes InTy = (FunctionArgIter++)->getType(); 629e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes InStep = getStepValue(&DL, InTy, InStepArg); 630e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 6312b04086acbef6520ae2c54a868b1271abf053122Stephen Hines InStep->setName("instep"); 632e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 633083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0})); 634083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InBufPtr = Builder.CreateLoad( 635083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf"); 636db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 637db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 638900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *OutTy = nullptr; 639900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutBasePtr = nullptr; 640d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 641bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OutTy = (FunctionArgIter++)->getType(); 642b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 6432b04086acbef6520ae2c54a868b1271abf053122Stephen Hines OutStep->setName("outstep"); 644083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0})); 645083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep")); 646db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 647db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 648900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *UsrData = nullptr; 649d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) { 650bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *UsrDataTy = (FunctionArgIter++)->getType(); 651083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr); 652083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy); 653db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData->setName("UsrData"); 654db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 655db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 656083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 65733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::PHINode *IV; 65833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross createLoop(Builder, Arg_x1, Arg_x2, &IV); 659097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 66033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 66128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 662083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [&FunctionArgIter]() { FunctionArgIter++; }, 663083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala LoopHeader->getTerminator()); 664db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 665bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(FunctionArgIter == Function->arg_end()); 666db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 667cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Populate the actual call to kernel(). 668db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 669db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 670900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InPtr = nullptr; 671900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 672db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 673ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // Calculate the current input and output pointers 67402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 675ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // We always calculate the input/output pointers with a GEP operating on i8 67602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // values and only cast at the very end to OutTy. This is because the step 67702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // between two values is given in bytes. 67802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 67902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // TODO: We could further optimize the output by using a GEP operation of 68002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // type 'OutTy' in cases where the element type of the allocation allows. 68102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutBasePtr) { 68202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 68302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutOffset = Builder.CreateMul(OutOffset, OutStep); 684083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset); 68502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 68602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser } 687bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 688083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (InBufPtr) { 689ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1); 690ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InOffset = Builder.CreateMul(InOffset, InStep); 691083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset); 692ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InPtr = Builder.CreatePointerCast(InPtr, InTy); 693ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser } 69402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser 695ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser if (InPtr) { 6967ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(InPtr); 697db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 698db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 69902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutPtr) { 7007ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(OutPtr); 701db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 702db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 703db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (UsrData) { 704db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(UsrData); 705db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 706db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 70728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder); 708db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 709bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Builder.CreateCall(Function, RootArgs); 710db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 7117ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return true; 7127ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 7137ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 7144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala /* Expand a pass-by-value foreach kernel. 7157ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines */ 7164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bool ExpandForEach(llvm::Function *Function, uint32_t Signature) { 717d88177580db4ddedf680854c51db333c97eabc59Stephen Hines bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)); 718bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding kernel Function %s", Function->getName().str().c_str()); 7197ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 7204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // TODO: Refactor this to share functionality with ExpandOldStyleForEach. 721bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 7227ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 723bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 7244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala createEmptyExpandedForEachKernel(Function->getName()); 7257ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 726bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 727bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 728bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * createEmptyExpandedFunction that there will be five parameters. 729bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 730881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 7314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 732881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 733bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 734bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 735bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 736bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 737bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 738bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 7395010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 7407ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 7417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Construct the actual function body. 742bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 7437ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 74418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Create TBAA meta-data. 745354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, 746354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines *TBAAAllocation, *TBAAPointer; 747bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 74814588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien 749354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScriptDistinct = 7504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala MDHelper.createTBAARoot(kRenderScriptTBAARootName); 7514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName, 752354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScriptDistinct); 753e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 754e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 755e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 756e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation, 0); 757e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 758e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 75914588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 76018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 761881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes /* 762881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Collect and construct the arguments for the kernel(). 763881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * 764881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Note that we load any loop-invariant arguments before entering the Loop. 765881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes */ 766083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala size_t NumRemainingInputs = Function->arg_size(); 7677ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 768881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // No usrData parameter on kernels. 769881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes bccAssert( 770881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)); 771881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 772881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::Function::arg_iterator ArgIter = Function->arg_begin(); 773881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 774881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Check the return type 775bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Type *OutTy = nullptr; 776bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *OutStep = nullptr; 777bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::LoadInst *OutBasePtr = nullptr; 778bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *CastedOutBasePtr = nullptr; 779881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 780e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes bool PassOutByPointer = false; 781881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 782d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 783bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *OutBaseTy = Function->getReturnType(); 784881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 78574a4b08235990916911b8fe758d656c1171faf26Stephen Hines if (OutBaseTy->isVoidTy()) { 786e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes PassOutByPointer = true; 787881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = ArgIter->getType(); 788881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 789881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes ArgIter++; 790083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala --NumRemainingInputs; 79174a4b08235990916911b8fe758d656c1171faf26Stephen Hines } else { 79274a4b08235990916911b8fe758d656c1171faf26Stephen Hines // We don't increment Args, since we are using the actual return type. 793881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = OutBaseTy->getPointerTo(); 79474a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 795881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 796b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 79774a4b08235990916911b8fe758d656c1171faf26Stephen Hines OutStep->setName("outstep"); 798083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0})); 799083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep")); 800097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 8019c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 8029c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines OutBasePtr->setMetadata("tbaa", TBAAPointer); 8039c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 80450f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 805bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out"); 80674a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 80774a4b08235990916911b8fe758d656c1171faf26Stephen Hines 808bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::SmallVector<llvm::Type*, 8> InTypes; 809bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::SmallVector<llvm::Value*, 8> InSteps; 810083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::SmallVector<llvm::Value*, 8> InBufPtrs; 811d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::SmallVector<llvm::Value*, 8> InStructTempSlots; 812881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 813083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT); 814881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 815083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Create the loop structure. 816083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 817083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::PHINode *IV; 818083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala createLoop(Builder, Arg_x1, Arg_x2, &IV); 819881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 820083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 821083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala const int CalleeArgsContextIdx = 822083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 823083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [&NumRemainingInputs]() { --NumRemainingInputs; }, 824083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala LoopHeader->getTerminator()); 825083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 826083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // After ExpandSpecialArguments() gets called, NumRemainingInputs 827083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // counts the number of arguments to the kernel that correspond to 828083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // an array entry from the InPtr field of the DriverInfo 829083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // structure. 830083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala const size_t NumInPtrArguments = NumRemainingInputs; 831083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 832083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (NumInPtrArguments > 0) { 833083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Extract information about input slots and step sizes. The work done 834083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // here is loop-invariant, so we can hoist the operations out of the loop. 835083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala auto OldInsertionPoint = Builder.saveIP(); 836083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.SetInsertPoint(LoopHeader->getTerminator()); 837083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 838083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala for (size_t InputIndex = 0; InputIndex < NumInPtrArguments; ++InputIndex, ArgIter++) { 839083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 840083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala static_cast<int32_t>(InputIndex)})); 841083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InStepAddr = Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"); 842083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::LoadInst *InStepArg = Builder.CreateLoad(InStepAddr, "instep_addr"); 843881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 844d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::Type *InType = ArgIter->getType(); 845326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 846326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes /* 847d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * AArch64 calling conventions dictate that structs of sufficient size 848d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * get passed by pointer instead of passed by value. This, combined 849d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * with the fact that we don't allow kernels to operate on pointer 850d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * data means that if we see a kernel with a pointer parameter we know 851083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala * that it is a struct input that has been promoted. As such we don't 852d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * need to convert its type to a pointer. Later we will need to know 853d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * to create a temporary copy on the stack, so we save this information 854d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala * in InStructTempSlots. 855326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes */ 856d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) { 857d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::Type *ElementType = PtrType->getElementType(); 858083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr, 859083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala "input_struct_slot")); 860d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala } else { 861d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InType = InType->getPointerTo(); 862d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InStructTempSlots.push_back(nullptr); 863d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala } 864326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 865d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::Value *InStep = getStepValue(&DL, InType, InStepArg); 866881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 867d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InStep->setName("instep"); 868881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 869083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 870083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala static_cast<int32_t>(InputIndex)})); 871083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep"); 872083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf"); 873083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in"); 874d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala if (gEnableRsTbaa) { 875083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InBufPtr->setMetadata("tbaa", TBAAPointer); 876d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala } 877881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 878d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InTypes.push_back(InType); 879d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala InSteps.push_back(InStep); 880083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InBufPtrs.push_back(CastInBufPtr); 881881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 882083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 883083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.restoreIP(OldInsertionPoint); 884881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 8857ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 8867ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Populate the actual call to kernel(). 8877ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 8887ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 8899296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala // Calculate the current input and output pointers. 890881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 891881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Output 892881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 893900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 894bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray if (CastedOutBasePtr) { 8957b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 896083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset); 897bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 898e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (PassOutByPointer) { 899881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes RootArgs.push_back(OutPtr); 900881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 9014102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser } 9027b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser 903881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Inputs 90474a4b08235990916911b8fe758d656c1171faf26Stephen Hines 905083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (NumInPtrArguments > 0) { 906881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1); 907881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 908083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala for (size_t Index = 0; Index < NumInPtrArguments; ++Index) { 909083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset); 910326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes llvm::Value *Input; 911326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes 9129296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input"); 9139296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala 9149296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala if (gEnableRsTbaa) { 9159296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala InputLoad->setMetadata("tbaa", TBAAAllocation); 9169296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala } 9179296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala 918d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) { 919d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala // Pass a pointer to a temporary on the stack, rather than 920d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala // passing a pointer to the original value. We do not want 921d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala // the kernel to potentially modify the input data. 922d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala 9239296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala // Note: don't annotate with TBAA, since the kernel might 9249296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala // have its own TBAA annotations for the pointer argument. 9259296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala Builder.CreateStore(InputLoad, TemporarySlot); 926d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala Input = TemporarySlot; 927326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes } else { 928326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes Input = InputLoad; 929881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 930881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 931881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes RootArgs.push_back(Input); 9329c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 9337ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 9347ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 93528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder); 9367ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 937bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs); 9387ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 939e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (OutPtr && !PassOutByPointer) { 9409296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala RetVal->setName("call.result"); 94118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr); 9429c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 9439c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines Store->setMetadata("tbaa", TBAAAllocation); 9449c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 9457ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 9467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 947db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return true; 948db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 949db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 9504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Expand a reduce-style kernel function. 9514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // The input is a kernel which represents a binary operation, 9534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // of the form 9544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define foo @func(foo %a, foo %b), 9564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // (More generally, it can be of the forms 9584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define void @func(foo* %ret, foo* %a, foo* %b) 9604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define void @func(foo* %ret, foo1 %a, foo1 %b) 9614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define foo1 @func(foo2 %a, foo2 %b) 9624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // as a result of argument / return value conversions. Here, "foo1" 9644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // and "foo2" refer to possibly coerced types, and the coerced 9654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // argument type may be different from the coerced return type. See 9664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // "Note on coercion" below.) 9674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Note also, we do not expect to encounter any case when the 9694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // arguments are promoted to pointers but the return value is 9704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // unpromoted to pointer, e.g. 9714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define foo1 @func(foo* %a, foo* %b) 9734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // and we will throw an assertion in this case.) 9754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // The input kernel gets expanded into a kernel of the form 9774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define void @func.expand(i8* %inBuf, i8* outBuf, i32 len) 9794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // which performs a serial reduction of `len` elements from `inBuf`, 9814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // and stores the result into `outBuf`. In pseudocode, @func.expand 9824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // does: 9834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // inArr := (foo *)inBuf; 9854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // accum := inArr[0]; 9864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // for (i := 1; i < len; ++i) { 9874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // accum := foo(accum, inArr[i]); 9884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // } 9894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // *(foo *)outBuf := accum; 9904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Note on coercion 9924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Both the return value and the argument types may undergo internal 9944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // coercion in clang as part of call lowering. As a result, the 9954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // return value type may differ from the argument type even if the 9964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // types in the RenderScript signaure are the same. For instance, the 9974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // kernel 9984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 9994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // int3 add(int3 a, int3 b) { return a + b; } 10004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // gets lowered by clang as 10024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define <3 x i32> @add(<4 x i32> %a.coerce, <4 x i32> %b.coerce) 10044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // under AArch64. The details of this process are found in clang, 10064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // lib/CodeGen/TargetInfo.cpp, under classifyArgumentType() and 10074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // classifyReturnType() in ARMABIInfo, AArch64ABIInfo. If the value 10084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // is passed by pointer, then the pointed-to type is not coerced. 10094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 10104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Since we lack the original type information, this code does loads 10114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // and stores of allocation data by way of pointers to the coerced 10124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // type. 10134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bool ExpandReduce(llvm::Function *Function) { 10144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(Function); 10154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ALOGV("Expanding reduce kernel %s", Function->getName().str().c_str()); 10174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::DataLayout DL(Module); 10194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // TBAA Metadata 10214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, *TBAAAllocation; 10224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::MDBuilder MDHelper(*Context); 10234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScriptDistinct = 10254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala MDHelper.createTBAARoot(kRenderScriptTBAARootName); 10264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName, 10274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScriptDistinct); 10284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 10294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScript); 10304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 10314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAAAllocation, 0); 10324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *ExpandedFunction = 10344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala createEmptyExpandedReduceKernel(Function->getName()); 10354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Extract the expanded kernel's parameters. It is guaranteed by 10374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // createEmptyExpandedFunction that there will be 3 parameters. 10384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala auto ExpandedFunctionArgIter = ExpandedFunction->arg_begin(); 10394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *Arg_inBuf = &*(ExpandedFunctionArgIter++); 10414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *Arg_outBuf = &*(ExpandedFunctionArgIter++); 10424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *Arg_len = &*(ExpandedFunctionArgIter++); 10434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(Function->arg_size() == 2 || Function->arg_size() == 3); 10454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Check if, instead of returning a value, the original kernel has 10474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // a pointer parameter which points to a temporary buffer into 10484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // which the return value gets written. 10494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala const bool ReturnValuePointerStyle = (Function->arg_size() == 3); 10504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(Function->getReturnType()->isVoidTy() == ReturnValuePointerStyle); 10514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Check if, instead of being passed by value, the inputs to the 10534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // original kernel are passed by pointer. 10544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala auto FirstArgIter = Function->arg_begin(); 10554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // The second argument is always an input to the original kernel. 10564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala auto SecondArgIter = std::next(FirstArgIter); 10574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala const bool InputsPointerStyle = SecondArgIter->getType()->isPointerTy(); 10584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Get the output type (i.e. return type of the original kernel). 10604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::PointerType *OutPtrTy = nullptr; 10614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Type *OutTy = nullptr; 10624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (ReturnValuePointerStyle) { 10634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutPtrTy = llvm::dyn_cast<llvm::PointerType>(FirstArgIter->getType()); 10644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(OutPtrTy && "Expected a pointer parameter to kernel"); 10654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutTy = OutPtrTy->getElementType(); 10664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } else { 10674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutTy = Function->getReturnType(); 10684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(!OutTy->isVoidTy()); 10694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutPtrTy = OutTy->getPointerTo(); 10704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 10714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Get the input type (type of the arguments to the original 10734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // kernel). Some input types are different from the output type, 10744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // due to explicit coercion that the compiler performs when 10754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // lowering the parameters. See "Note on coercion" above. 10764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::PointerType *InPtrTy; 10774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Type *InTy; 10784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (InputsPointerStyle) { 10794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InPtrTy = llvm::dyn_cast<llvm::PointerType>(SecondArgIter->getType()); 10804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(InPtrTy && "Expected a pointer parameter to kernel"); 10814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ReturnValuePointerStyle); 10824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(std::next(SecondArgIter)->getType() == InPtrTy && 10834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala "Input type mismatch"); 10844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InTy = InPtrTy->getElementType(); 10854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } else { 10864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InTy = SecondArgIter->getType(); 10874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InPtrTy = InTy->getPointerTo(); 10884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (!ReturnValuePointerStyle) { 10894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(InTy == FirstArgIter->getType() && "Input type mismatch"); 10904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } else { 10914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(InTy == std::next(SecondArgIter)->getType() && 10924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala "Input type mismatch"); 10934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 10944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 10954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 10964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // The input type should take up the same amount of space in 10974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // memory as the output type. 10984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(DL.getTypeAllocSize(InTy) == DL.getTypeAllocSize(OutTy)); 10994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Construct the actual function body. 11014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin()); 11024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Cast input and output buffers to appropriate types. 11044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *InBuf = Builder.CreatePointerCast(Arg_inBuf, InPtrTy); 11054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *OutBuf = Builder.CreatePointerCast(Arg_outBuf, OutPtrTy); 11064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Create a slot to pass temporary results back. This needs to be 11084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // separate from the accumulator slot because the kernel may mark 11094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // the return value slot as noalias. 11104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *ReturnBuf = nullptr; 11114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (ReturnValuePointerStyle) { 11124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ReturnBuf = Builder.CreateAlloca(OutTy, nullptr, "ret.tmp"); 11134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 11144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Create a slot to hold the second input if the inputs are passed 11164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // by pointer to the original kernel. We cannot directly pass a 11174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // pointer to the input buffer, because the kernel may modify its 11184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // inputs. 11194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *SecondInputTempBuf = nullptr; 11204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (InputsPointerStyle) { 11214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala SecondInputTempBuf = Builder.CreateAlloca(InTy, nullptr, "in.tmp"); 11224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 11234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Create a slot to accumulate temporary results, and fill it with 11254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // the first value. 11264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *AccumBuf = Builder.CreateAlloca(OutTy, nullptr, "accum"); 11274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Cast to OutPtrTy before loading, since AccumBuf has type OutPtrTy. 11284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::LoadInst *FirstElementLoad = Builder.CreateLoad( 11294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreatePointerCast(InBuf, OutPtrTy)); 11304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa) { 11314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala FirstElementLoad->setMetadata("tbaa", TBAAAllocation); 11324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 11334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Memory operations with AccumBuf shouldn't be marked with 11344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // RenderScript TBAA, since this might conflict with TBAA metadata 11354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // in the kernel function when AccumBuf is passed by pointer. 11364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateStore(FirstElementLoad, AccumBuf); 11374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Loop body 11394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Create the loop structure. Note that the first input in the input buffer 11414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // has already been accumulated, so that we start at index 1. 11424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::PHINode *IndVar; 11434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *Start = llvm::ConstantInt::get(Arg_len->getType(), 1); 11444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::BasicBlock *Exit = createLoop(Builder, Start, Arg_len, &IndVar); 11454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *InputPtr = Builder.CreateInBoundsGEP(InBuf, IndVar, "next_input.gep"); 11474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Set up arguments and call the original (unexpanded) kernel. 11494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 11504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // The original kernel can have at most 3 arguments, which is 11514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // achieved when the signature looks like: 11524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 11534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // define void @func(foo* %ret, bar %a, bar %b) 11544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // 11554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // (bar can be one of foo/foo.coerce/foo*). 11564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::SmallVector<llvm::Value *, 3> KernelArgs; 11574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (ReturnValuePointerStyle) { 11594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala KernelArgs.push_back(ReturnBuf); 11604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 11614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (InputsPointerStyle) { 11634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ReturnValuePointerStyle); 11644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Because the return buffer is copied back into the 11654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // accumulator, it's okay if the accumulator is overwritten. 11664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala KernelArgs.push_back(AccumBuf); 11674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::LoadInst *InputLoad = Builder.CreateLoad(InputPtr); 11694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa) { 11704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala InputLoad->setMetadata("tbaa", TBAAAllocation); 11714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 11724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateStore(InputLoad, SecondInputTempBuf); 11734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala KernelArgs.push_back(SecondInputTempBuf); 11754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } else { 11764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // InPtrTy may be different from OutPtrTy (the type of 11774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // AccumBuf), so first cast the accumulator buffer to the 11784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // pointer type corresponding to the input argument type. 11794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala KernelArgs.push_back( 11804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateLoad(Builder.CreatePointerCast(AccumBuf, InPtrTy))); 11814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::LoadInst *LoadedArg = Builder.CreateLoad(InputPtr); 11834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa) { 11844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala LoadedArg->setMetadata("tbaa", TBAAAllocation); 11854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 11864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala KernelArgs.push_back(LoadedArg); 11874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 11884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Value *RetVal = Builder.CreateCall(Function, KernelArgs); 11904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala const uint64_t ElementSize = DL.getTypeStoreSize(OutTy); 11924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala const uint64_t ElementAlign = DL.getABITypeAlignment(OutTy); 11934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 11944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Store the output in the accumulator. 11954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (ReturnValuePointerStyle) { 11964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateMemCpy(AccumBuf, ReturnBuf, ElementSize, ElementAlign); 11974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } else { 11984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateStore(RetVal, AccumBuf); 11994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Loop exit 12024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.SetInsertPoint(Exit, Exit->begin()); 12034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::LoadInst *OutputLoad = Builder.CreateLoad(AccumBuf); 12054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::StoreInst *OutputStore = Builder.CreateStore(OutputLoad, OutBuf); 12064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa) { 12074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala OutputStore->setMetadata("tbaa", TBAAAllocation); 12084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 12104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return true; 12114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 12124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 121318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Checks if pointers to allocation internals are exposed 121418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 121518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// This function verifies if through the parameters passed to the kernel 121618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// or through calls to the runtime library the script gains access to 121718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers pointing to data within a RenderScript Allocation. 121818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// If we know we control all loads from and stores to data within 121918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// RenderScript allocations and if we know the run-time internal accesses 122018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// are all annotated with RenderScript TBAA metadata, only then we 122118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// can safely use TBAA to distinguish between generic and from-allocation 122218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers. 1223bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool allocPointersExposed(llvm::Module &Module) { 122418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Old style kernel function can expose pointers to elements within 122518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // allocations. 122618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // TODO: Extend analysis to allow simple cases of old-style kernels. 122725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 122825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *Name = mExportForEachNameList[i]; 122925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t Signature = mExportForEachSignatureList[i]; 1230bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Module.getFunction(Name) && 1231d88177580db4ddedf680854c51db333c97eabc59Stephen Hines !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) { 123218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 123318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 123418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 123518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 123618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Check for library functions that expose a pointer to an Allocation or 123718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // that are not yet annotated with RenderScript-specific tbaa information. 1238e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala static const std::vector<const char *> Funcs{ 1239e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAt(...) 1240e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationj", 1241e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationjj", 1242e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationjjj", 1243e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1244e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsSetElementAt() 1245e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvj", 1246e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvjj", 1247e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvjjj", 1248e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1249e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_Y() 1250e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj", 1251e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1252e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_U() 1253e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj", 1254e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1255e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_V() 1256e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj", 1257e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala }; 1258e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1259e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala for (auto FI : Funcs) { 1260e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala llvm::Function *Function = Module.getFunction(FI); 126118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1262bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (!Function) { 1263e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala ALOGE("Missing run-time function '%s'", FI); 126418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 126518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 126618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1267bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Function->getNumUses() > 0) { 126818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 126918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 127018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 127118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 127218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return false; 127318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 127418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 127518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Connect RenderScript TBAA metadata to C/C++ metadata 127618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 127718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// The TBAA metadata used to annotate loads/stores from RenderScript 1278e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// Allocations is generated in a separate TBAA tree with a 1279354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for 1280354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// all nodes in unrelated alias analysis trees. This function makes the 1281354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root), 1282e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With 1283e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// the connected trees every access to an Allocation is resolved to 1284e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// must-alias if compared to a normal C/C++ access. 1285bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void connectRenderScriptTBAAMetadata(llvm::Module &Module) { 1286bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 1287354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScriptDistinct = 1288354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines MDHelper.createTBAARoot("RenderScript Distinct TBAA"); 1289354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode( 1290354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines "RenderScript TBAA", TBAARenderScriptDistinct); 1291bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 1292354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScript->replaceOperandWith(1, TBAARoot); 129318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 129418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1295bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes virtual bool runOnModule(llvm::Module &Module) { 1296bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool Changed = false; 1297bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes this->Module = &Module; 12984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Context = &Module.getContext(); 1299bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 13004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala buildTypes(); 1301bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 1302bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bcinfo::MetadataExtractor me(&Module); 130325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines if (!me.extract()) { 130425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines ALOGE("Could not extract metadata from module!"); 130525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines return false; 130625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines } 13074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 13084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Expand forEach_* style kernels. 130925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachCount = me.getExportForEachSignatureCount(); 131025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachNameList = me.getExportForEachNameList(); 131125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachSignatureList = me.getExportForEachSignatureList(); 1312db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 131325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 131425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *name = mExportForEachNameList[i]; 131525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t signature = mExportForEachSignatureList[i]; 1316bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *kernel = Module.getFunction(name); 1317cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser if (kernel) { 1318d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) { 13194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Changed |= ExpandForEach(kernel, signature); 1320acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 1321acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else if (kernel->getReturnType()->isVoidTy()) { 13224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Changed |= ExpandOldStyleForEach(kernel, signature); 1323acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 1324acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else { 1325acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // There are some graphics root functions that are not 1326acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // expanded, but that will be called directly. For those 1327acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // functions, we can not set the linkage to internal. 1328acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } 1329cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 1330db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1331db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 13324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Expand reduce_* style kernels. 13334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala mExportReduceCount = me.getExportReduceCount(); 13344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala mExportReduceNameList = me.getExportReduceNameList(); 13354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 13364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala for (size_t i = 0; i < mExportReduceCount; ++i) { 13374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *kernel = Module.getFunction(mExportReduceNameList[i]); 13384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (kernel) { 13394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Changed |= ExpandReduce(kernel); 13404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 13414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 13424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 13434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa && !allocPointersExposed(Module)) { 1344bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes connectRenderScriptTBAAMetadata(Module); 134518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 134618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1347cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return Changed; 1348db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1349db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1350db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual const char *getPassName() const { 13514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return "forEach_* and reduce_* function expansion"; 1352db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1353db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 13544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala}; // end RSKernelExpandPass 1355db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 13567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace 13577a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 13584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walachar RSKernelExpandPass::ID = 0; 13594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walastatic llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass"); 1360db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1361db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc { 1362db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 13637a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass * 13644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt WalacreateRSKernelExpandPass(bool pEnableStepOpt) { 13654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return new RSKernelExpandPass(pEnableStepOpt); 13667a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} 1367db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 13687a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc 1369