1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/* 2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project 3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License"); 5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License. 6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at 7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * http://www.apache.org/licenses/LICENSE-2.0 9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software 11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS, 12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and 14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License. 15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h" 18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h" 1957fd9f882f3359be4201c42b02aebf785d311df2David Gross#include "bcc/Renderscript/RSUtils.h" 207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 217a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib> 2233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional> 23e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross#include <unordered_set> 247a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h> 26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h> 27b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h> 28b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h> 2918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h> 30b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h> 31c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h> 327ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h> 33b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h> 34cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h> 35b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h> 36806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h> 37c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang 38c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h" 39ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h" 40db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 41d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h" 42d88177580db4ddedf680854c51db333c97eabc59Stephen Hines 434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#ifndef __DISABLE_ASSERTS 444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala// Only used in bccAssert() 454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedForeachParams = 4; 46a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Grossconst int kNumExpandedReduceAccumulatorParams = 4; 474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#endif 484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA"; 504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAANodeName[] = "RenderScript TBAA"; 51bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc; 537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 54db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace { 557a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 56354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true; 579c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines 584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala/* RSKernelExpandPass - This pass operates on functions that are able 594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * to be called via rsForEach(), "foreach_<NAME>", or 604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * "reduce_<NAME>". We create an inner loop for the function to be 614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * invoked over the appropriate data cells of the input/output 624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * allocations (adjusting other relevant parameters as we go). We 634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * support doing this for any forEach or reduce style compute 644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * kernels. The new function name is the original function name 654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * followed by ".expand". Note that we still generate code for the 664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * original function. 677a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */ 684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaclass RSKernelExpandPass : public llvm::ModulePass { 6933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic: 70db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static char ID; 71db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 7233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate: 73e44a3525b9703739534c3b62d7d1af4c95649a38David Gross static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h 74e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 75e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross typedef std::unordered_set<llvm::Function *> FunctionSet; 76e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 77e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsLaunchDimensionsField { 78e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldX, 79e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldY, 80e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldZ, 81e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldLod, 82e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldFace, 83e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldArray, 84e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 85e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldCount 86e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 87e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 88e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsExpandKernelDriverInfoPfxField { 89e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInPtr, 90e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInStride, 91e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInLen, 92e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutPtr, 93e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutStride, 94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutLen, 95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldDim, 96e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCurrent, 97e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsr, 98e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsLenr, 99e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 100e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCount 101e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 10233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 103bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Module *Module; 104bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::LLVMContext *Context; 105bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 106bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 1074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * Pointers to LLVM type information for the the function signatures 1084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * for expanded functions. These must be re-calculated for each module 1094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * the pass is run on. 110bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 111a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross llvm::FunctionType *ExpandedForEachType; 112e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *RsExpandKernelDriverInfoPfxTy; 113db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 11425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t mExportForEachCount; 11525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char **mExportForEachNameList; 11625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const uint32_t *mExportForEachSignatureList; 117cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines 1182b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Turns on optimization of allocation stride values. 1192b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool mEnableStepOpt; 1202b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 121bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes uint32_t getRootSignature(llvm::Function *Function) { 122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines const llvm::NamedMDNode *ExportForEachMetadata = 123bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Module->getNamedMetadata("#rs_export_foreach"); 124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 125db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!ExportForEachMetadata) { 126db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> RootArgTys; 127bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes for (llvm::Function::arg_iterator B = Function->arg_begin(), 128bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes E = Function->arg_end(); 129db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 130db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 131db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgTys.push_back(B->getType()); 132db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 133db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // For pre-ICS bitcode, we may not have signature information. In that 135db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // case, we use the size of the RootArgTys to select the number of 136db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // arguments. 137db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return (1 << RootArgTys.size()) - 1; 138db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1407ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (ExportForEachMetadata->getNumOperands() == 0) { 1417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return 0; 1427ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 1437ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1446e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(ExportForEachMetadata->getNumOperands() > 0); 145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 146cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We only handle the case for legacy root() functions here, so this is 147cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // hard-coded to look at only the first such function. 148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 149900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes if (SigNode != nullptr && SigNode->getNumOperands() == 1) { 1501bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::Metadata *SigMD = SigNode->getOperand(0); 1511bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) { 1521bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::StringRef SigString = SigS->getString(); 153db169187dea4602e4ad32058762d23d474753fd0Stephen Hines uint32_t Signature = 0; 154db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigString.getAsInteger(10, Signature)) { 155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 157db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 158db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature; 159db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 160db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 161db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 162db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 163db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 164db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 165429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray bool isStepOptSupported(llvm::Type *AllocType) { 166429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 167429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 168429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 169429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 170429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (mEnableStepOpt) { 171429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 172429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 173429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 174429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == VoidPtrTy) { 175429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 176429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 177429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 178429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (!PT) { 179429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 180429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 181429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 182429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // remaining conditions are 64-bit only 183429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (VoidPtrTy->getPrimitiveSizeInBits() == 32) { 184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // coerce suggests an upconverted struct type, which we can't support 188429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) { 189429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 190429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 191429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 192429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported 193429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2); 194429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128); 195429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == V2xi64Ty || AllocType == Int128Ty) { 196429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 197429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 198429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 199429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 200429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 201429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 2022b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Get the actual value we should use to step through an allocation. 2037b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 2047b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // Normally the value we use to step through an allocation is given to us by 2057b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // the driver. However, for certain primitive data types, we can derive an 2067b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // integer constant for the step value. We use this integer constant whenever 2077b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // possible to allow further compiler optimizations to take place. 2087b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 209b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines // DL - Target Data size/layout information. 2102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // T - Type of allocation (should be a pointer). 2112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // OrigStep - Original step increment (root.expand() input from driver). 212bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType, 2132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *OrigStep) { 214b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines bccAssert(DL); 215bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(AllocType); 2162b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(OrigStep); 217bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 218429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (isStepOptSupported(AllocType)) { 2192b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *ET = PT->getElementType(); 220b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines uint64_t ETSize = DL->getTypeAllocSize(ET); 221bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 2222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return llvm::ConstantInt::get(Int32Ty, ETSize); 2232b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } else { 2242b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return OrigStep; 2252b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2262b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2272b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 228097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /// Builds the types required by the pass for the given context. 229bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void buildTypes(void) { 230e44a3525b9703739534c3b62d7d1af4c95649a38David Gross // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs. 231bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 232e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8Ty = llvm::Type::getInt8Ty(*Context); 233e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrTy = Int8Ty->getPointerTo(); 234e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT); 235e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 236e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32ArrayInputLimitTy = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT); 237e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 238e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Array4Ty = llvm::ArrayType::get(Int32Ty, 4); 239097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 240097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h: 241db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 242e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsLaunchDimensions { 243e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t x; 244db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t y; 245db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t z; 246e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t lod; 247e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t face; 248e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t array[4]; 249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * }; 250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross */ 251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes; 252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t x 253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t y 254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t z 255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t lod 256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t face 257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4] 258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType *RsLaunchDimensionsTy = 259e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions"); 260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 2611d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h: 262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsExpandKernelDriverInfoPfx { 264e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]; 265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inStride[RS_KERNEL_INPUT_LIMIT]; 266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inLen; 267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]; 269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outStride[RS_KERNEL_INPUT_LIMIT]; 270e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outLen; 271e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 272e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Dimension of the launch 273e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions dim; 274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // The walking iterator of the launch 276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions current; 277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const void *usr; 279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t usrLen; 280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Items below this line are not used by the compiler and can be change in the driver. 282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // So the compiler must assume there are an unknown number of fields of unknown type 283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // beginning here. 284db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * }; 2851d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * 2861d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp). 287db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 288e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes; 289e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT] 290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t inStride[RS_KERNEL_INPUT_LIMIT] 291e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t inLen 292e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT] 293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t outStride[RS_KERNEL_INPUT_LIMIT] 294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t outLen 295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions dim 296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current 297e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr 298e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen 299e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross RsExpandKernelDriverInfoPfxTy = 300e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx"); 301bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 302bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes // Create the function type for expanded kernels. 3034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context); 304bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 305e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo(); 3064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep) 3074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ExpandedForEachType = llvm::FunctionType::get(VoidTy, 3084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false); 3098ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 3108ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 3114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala /// @brief Create skeleton of the expanded foreach kernel. 312357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 313357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// This creates a function with the following signature: 314357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 315357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 3165010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes /// uint32_t outstep) 317357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 3184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) { 319bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 3204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function::Create(ExpandedForEachType, 321bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::GlobalValue::ExternalLinkage, 322bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OldName + ".expand", Module); 3234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 324bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); 325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("p"); 326bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x1"); 327bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x2"); 328bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("arg_outstep"); 3294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 3304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ExpandedFunction); 3314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::IRBuilder<> Builder(Begin); 3324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateRetVoid(); 3334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return ExpandedFunction; 3344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 3354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 336e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create skeleton of a general reduce kernel's expanded accumulator. 337e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 338e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // This creates a function with the following signature: 339e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 340e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p, 341e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // i32 %x1, i32 %x2, accumType* nocapture %accum) 342e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 343a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross llvm::Function *createEmptyExpandedReduceAccumulator(llvm::StringRef OldName, 344a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross llvm::Type *AccumArgTy) { 345e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 346e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context); 347a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross llvm::FunctionType *ExpandedReduceAccumulatorType = 348e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::FunctionType::get(VoidTy, 349e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross {RsExpandKernelDriverInfoPfxTy->getPointerTo(), 350e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Int32Ty, Int32Ty, AccumArgTy}, false); 351e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *FnExpandedAccumulator = 352a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross llvm::Function::Create(ExpandedReduceAccumulatorType, 353e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::GlobalValue::ExternalLinkage, 354e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross OldName + ".expand", Module); 355a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams); 356e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 357e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin(); 358e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 359e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross using llvm::Attribute; 360e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 361e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_p = &(*AI++); 362e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_p->setName("p"); 363e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1, 364e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::makeArrayRef(Attribute::NoCapture))); 365e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 366e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_x1 = &(*AI++); 367e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_x1->setName("x1"); 368e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 369e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_x2 = &(*AI++); 370e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_x2->setName("x2"); 371e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 372e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_accum = &(*AI++); 373e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_accum->setName("accum"); 374e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1, 375e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::makeArrayRef(Attribute::NoCapture))); 376e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 377e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 378e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross FnExpandedAccumulator); 379e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::IRBuilder<> Builder(Begin); 380e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.CreateRetVoid(); 381e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return FnExpandedAccumulator; 383e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 384e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 385e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @brief Create an empty loop 386e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 387e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// Create a loop of the form: 388e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 389e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// for (i = LowerBound; i < UpperBound; i++) 390e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// ; 391e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 392e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// After the loop has been created, the builder is set such that 393e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// instructions can be added to the loop body. 394e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 395e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param Builder The builder to use to build this loop. The current 396e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// position of the builder is the position the loop 397e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// will be inserted. 398e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LowerBound The first value of the loop iterator 399e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param UpperBound The maximal value of the loop iterator 400e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LoopIV A reference that will be set to the loop iterator. 401e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @return The BasicBlock that will be executed after the loop. 402e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder, 403e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *LowerBound, 404e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *UpperBound, 405ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo llvm::Value **LoopIV) { 406c2ca742d7d0197c52e49467862844463fb42280fDavid Gross bccAssert(LowerBound->getType() == UpperBound->getType()); 407e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 408e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB; 409ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo llvm::Value *Cond, *IVNext, *IV, *IVVar; 410e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 411e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB = Builder.GetInsertBlock(); 4128e9089377848628813a697b972773e969b942c3bPirama Arumuga Nainar AfterBB = llvm::SplitBlock(CondBB, &*Builder.GetInsertPoint(), nullptr, nullptr); 413bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent()); 414e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 415ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo CondBB->getTerminator()->eraseFromParent(); 416ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo Builder.SetInsertPoint(CondBB); 417ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo 418ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // decltype(LowerBound) *ivvar = alloca(sizeof(int)) 419ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // *ivvar = LowerBound 420ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo IVVar = Builder.CreateAlloca(LowerBound->getType(), nullptr, BCC_INDEX_VAR_NAME); 421ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo Builder.CreateStore(LowerBound, IVVar); 422ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo 423e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (LowerBound < Upperbound) 424e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 425e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 426e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 427e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(LowerBound, UpperBound); 428e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 429e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 430ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // LoopHeader: 431ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // iv = *ivvar 432ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // <insertion point here> 433ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // iv.next = iv + 1 434ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // *ivvar = iv.next 435ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // if (iv.next < Upperbound) 436ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // goto LoopHeader 437ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // else 438ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // goto AfterBB 439ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // AfterBB: 440e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB); 441ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo IV = Builder.CreateLoad(IVVar, "X"); 442e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1)); 443ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo Builder.CreateStore(IVNext, IVVar); 444e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(IVNext, UpperBound); 445e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 446e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser AfterBB->setName("Exit"); 447ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo Builder.SetInsertPoint(llvm::cast<llvm::Instruction>(IVNext)); 448ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo 449ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo // Record information about this loop. 450e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser *LoopIV = IV; 451e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser return AfterBB; 452e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser } 453e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 45428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Finish building the outgoing argument list for calling a ForEach-able function. 45528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // 45628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // ArgVector - on input, the non-special arguments 45728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // on output, the non-special arguments combined with the special arguments 45828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // from SpecialArgVector 45928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // SpecialArgVector - special arguments (from ExpandSpecialArguments()) 46028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // SpecialArgContextIdx - return value of ExpandSpecialArguments() 46128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // (position of context argument in SpecialArgVector) 46228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // CalleeFunction - the ForEach-able function being called 46328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Builder - for inserting code into the caller function 46428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen> 46528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross void finishArgList( llvm::SmallVector<llvm::Value *, ArgVectorLen> &ArgVector, 46628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector, 46728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int SpecialArgContextIdx, 46828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const llvm::Function &CalleeFunction, 46928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::IRBuilder<> &CallerBuilder) { 47028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross /* The context argument (if any) is a pointer to an opaque user-visible type that differs from 47128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the 47228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * two types represent the same thing). Therefore, we must introduce a pointer cast when 47328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * generating a call to the kernel function. 47428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross */ 47528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int ArgContextIdx = 47628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx; 47728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end()); 47828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross if (ArgContextIdx >= 0) { 47928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Type *ContextArgType = nullptr; 48028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int ArgIdx = ArgContextIdx; 48128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross for (const auto &Arg : CalleeFunction.getArgumentList()) { 48228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross if (!ArgIdx--) { 48328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ContextArgType = Arg.getType(); 48428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross break; 48528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 48628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 48728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross bccAssert(ContextArgType); 48828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType); 48928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 49028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 49128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 492083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // GEPHelper() returns a SmallVector of values suitable for passing 493083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for 494083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // the returned data type. It is sized so that the SmallVector 495083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // returned by GEPHelper() never needs to do a heap allocation for 496083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // any list of GEP indices it encounters in the code. 497083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices; 498083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 499083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Helper for turning a list of constant integer GEP indices into a 500083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // SmallVector of llvm::Value*. The return value is suitable for 501083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP(). 502083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // 503083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Inputs: 504083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // I32Args should be integers which represent the index arguments 505083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // to a GEP instruction. 506083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // 507083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Returns: 508083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Returns a SmallVector of ConstantInts. 5094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) { 510083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices Out(I32Args.size()); 511083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context); 512083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala std::transform(I32Args.begin(), I32Args.end(), Out.begin(), 513083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); }); 514083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala return Out; 515083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala } 516083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 5178ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic: 5184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala RSKernelExpandPass(bool pEnableStepOpt = true) 519900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes : ModulePass(ID), Module(nullptr), Context(nullptr), 520bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes mEnableStepOpt(pEnableStepOpt) { 521bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 5228ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 5238ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 524c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { 525c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // This pass does not use any other analysis passes, but it does 526c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // add/wrap the existing functions in the module (thus altering the CFG). 527c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines } 528c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines 52933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Build contribution to outgoing argument list for calling a 530e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // ForEach-able function or a general reduction accumulator 531e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // function, based on the special parameters of that function. 53233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // 533e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Signature - metadata bits for the signature of the callee 53433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // X, Arg_p - values derived directly from expanded function, 535e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // suitable for computing arguments for the callee 53633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // CalleeArgs - contribution is accumulated here 53733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Bump - invoked once for each contributed outgoing argument 538083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // LoopHeaderInsertionPoint - an Instruction in the loop header, before which 539083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // this function can insert loop-invariant loads 54028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // 54128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Return value is the (zero-based) position of the context (Arg_p) 54228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // argument in the CalleeArgs vector, or a negative value if the 54328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // context argument is not placed in the CalleeArgs vector. 54428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int ExpandSpecialArguments(uint32_t Signature, 54528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Value *X, 54628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Value *Arg_p, 54728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::IRBuilder<> &Builder, 54828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::SmallVector<llvm::Value*, 8> &CalleeArgs, 549083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala std::function<void ()> Bump, 550083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Instruction *LoopHeaderInsertionPoint) { 55128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 55228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross bccAssert(CalleeArgs.empty()); 55328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 55428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int Return = -1; 55533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) { 55633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(Arg_p); 55733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 55828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross Return = CalleeArgs.size() - 1; 55933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 56033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 56133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) { 56233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(X); 56333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 56433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 56533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 566e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) || 567e44a3525b9703739534c3b62d7d1af4c95649a38David Gross bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 568083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala bccAssert(LoopHeaderInsertionPoint); 56933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 570083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Y and Z are loop invariant, so they can be hoisted out of the 571083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // loop. Set the IRBuilder insertion point to the loop header. 572083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala auto OldInsertionPoint = Builder.saveIP(); 573083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.SetInsertPoint(LoopHeaderInsertionPoint); 574e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 575e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) { 576083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent, 577083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala RsLaunchDimensionsFieldY})); 578083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep"); 579083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y")); 580e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 581e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 582e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 583e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 584083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent, 585083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala RsLaunchDimensionsFieldZ})); 586083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep"); 587083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z")); 588e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 589e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 590083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 591083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.restoreIP(OldInsertionPoint); 59233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 59328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 59428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross return Return; 59533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 59633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 597e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Generate loop-invariant input processing setup code for an expanded 598e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // ForEach-able function or an expanded general reduction accumulator 599e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // function. 600e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 601e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // LoopHeader - block at the end of which the setup code will be inserted 602e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Arg_p - RSKernelDriverInfo pointer passed to the expanded function 603e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo 604e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // ArgIter - iterator pointing to first input of the UNexpanded function 605e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // NumInputs - number of inputs (NOT number of ARGUMENTS) 606e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 6077d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // InTypes[] - this function saves input type, they will be used in ExpandInputsBody(). 6087d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // InBufPtrs[] - this function sets each array element to point to the first cell / byte 6097d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // (byte for x86, cell for other platforms) of the corresponding input allocation 610e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // InStructTempSlots[] - this function sets each array element either to nullptr 611e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // or to the result of an alloca (for the case where the 612e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // calling convention dictates that a value must be passed 613e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // by reference, and so we need a stacked temporary to hold 614e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // a copy of that value) 615e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader, 616e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_p, 617e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDNode *TBAAPointer, 618e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function::arg_iterator ArgIter, 619e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const size_t NumInputs, 6207d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::SmallVectorImpl<llvm::Type *> &InTypes, 621e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs, 622e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) { 623e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT); 624e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 625e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Extract information about input slots. The work done 626e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // here is loop-invariant, so we can hoist the operations out of the loop. 627e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross auto OldInsertionPoint = Builder.saveIP(); 628e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.SetInsertPoint(LoopHeader->getTerminator()); 629e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 630e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) { 631e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *InType = ArgIter->getType(); 632e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 633e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross /* 634e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * AArch64 calling conventions dictate that structs of sufficient size 635e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * get passed by pointer instead of passed by value. This, combined 636e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * with the fact that we don't allow kernels to operate on pointer 637e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * data means that if we see a kernel with a pointer parameter we know 638e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * that it is a struct input that has been promoted. As such we don't 639e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * need to convert its type to a pointer. Later we will need to know 640e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * to create a temporary copy on the stack, so we save this information 641e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * in InStructTempSlots. 642e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross */ 643e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) { 644e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *ElementType = PtrType->getElementType(); 645e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr, 646e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross "input_struct_slot")); 647e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } else { 648e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InType = InType->getPointerTo(); 649e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InStructTempSlots.push_back(nullptr); 650e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 651e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 652e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 653e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross static_cast<int32_t>(InputIndex)})); 654e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep"); 655e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf"); 6567d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen 6577d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::Value *CastInBufPtr = nullptr; 6587d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) { 6597d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in"); 6607d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen } else { 6617d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // The disagreement between module and x86 target machine datalayout 6627d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // causes mismatched input/output data offset between slang reflected 6637d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // code and bcc codegen for GetElementPtr. To solve this issue, skip the 6647d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // cast to InType and leave CastInBufPtr as an int8_t*. The buffer is 6657d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // later indexed with an explicit byte offset computed based on 6667d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // X86_CUSTOM_DL_STRING and then bitcast it to actual input type. 6677d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen CastInBufPtr = InBufPtr; 6687d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen } 669e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 670e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (gEnableRsTbaa) { 671e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InBufPtr->setMetadata("tbaa", TBAAPointer); 672e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 673e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 6747d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen InTypes.push_back(InType); 675e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InBufPtrs.push_back(CastInBufPtr); 676e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 677e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 678e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.restoreIP(OldInsertionPoint); 679e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 680e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 681e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Generate loop-varying input processing code for an expanded ForEach-able function 682e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // or an expanded general reduction accumulator function. Also, for the call to the 683e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // UNexpanded function, collect the portion of the argument list corresponding to the 684e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // inputs. 685e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 686e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Arg_x1 - first X coordinate to be processed by the expanded function 687e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // TBAAAllocation - metadata for marking loads of input values out of allocations 688e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // NumInputs -- number of inputs (NOT number of ARGUMENTS) 6897d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // InTypes[] - this function uses the saved input types in ExpandInputsLoopInvariant() 6907d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // to convert the pointer of byte InPtr to its real type. 691e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant() 692e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant() 693e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // IndVar - value of loop induction variable (X coordinate) for a given loop iteration 694e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 695e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // RootArgs - this function sets this to the list of outgoing argument values corresponding 696e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // to the inputs 697e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross void ExpandInputsBody(llvm::IRBuilder<> &Builder, 698e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_x1, 699e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDNode *TBAAAllocation, 700e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const size_t NumInputs, 7017d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen const llvm::SmallVectorImpl<llvm::Type *> &InTypes, 702e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs, 703e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots, 704e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *IndVar, 705e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVectorImpl<llvm::Value *> &RootArgs) { 706e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1); 7077d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 708e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 709e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross for (size_t Index = 0; Index < NumInputs; ++Index) { 710e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 7117d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::Value *InPtr = nullptr; 7127d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) { 7137d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset); 7147d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen } else { 7157d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // Treat x86 input buffer as byte[], get indexed pointer with explicit 7167d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // byte offset computed using a datalayout based on 7177d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // X86_CUSTOM_DL_STRING, then bitcast it to actual input type. 7187d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::DataLayout DL(X86_CUSTOM_DL_STRING); 7197d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::Type *InTy = InTypes[Index]; 7207d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen uint64_t InStep = DL.getTypeAllocSize(InTy->getPointerElementType()); 7217d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::Value *OffsetInBytes = Builder.CreateMul(Offset, llvm::ConstantInt::get(Int32Ty, InStep)); 7227d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], OffsetInBytes); 7237d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen InPtr = Builder.CreatePointerCast(InPtr, InTy); 7247d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen } 7257d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen 7267d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::Value *Input; 727e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input"); 728e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 729e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (gEnableRsTbaa) { 730e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InputLoad->setMetadata("tbaa", TBAAAllocation); 731e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 732e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 733e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) { 734e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Pass a pointer to a temporary on the stack, rather than 735e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // passing a pointer to the original value. We do not want 736e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // the kernel to potentially modify the input data. 737e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 738e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Note: don't annotate with TBAA, since the kernel might 739e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // have its own TBAA annotations for the pointer argument. 740e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.CreateStore(InputLoad, TemporarySlot); 741e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Input = TemporarySlot; 742e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } else { 743e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Input = InputLoad; 744e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 745e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 746e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross RootArgs.push_back(Input); 747e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 748e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 749e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 7508ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /* Performs the actual optimization on a selected function. On success, the 7518ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * Module will contain a new function of the name "<NAME>.expand" that 7528ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * invokes <NAME>() in a loop with the appropriate parameters. 7538ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser */ 7544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) { 755bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding ForEach-able Function %s", 756bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Function->getName().str().c_str()); 7578ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 7588ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 759bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Signature = getRootSignature(Function); 7608ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 7618ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // We couldn't determine how to expand this function based on its 7628ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // function signature. 7638ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser return false; 7648ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 7658ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 7668ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 767bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 7687d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) { 7697d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen DL.reset(X86_CUSTOM_DL_STRING); 7707d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen } 7718ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 772bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 7734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala createEmptyExpandedForEachKernel(Function->getName()); 774db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 775bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 776bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 777e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * createEmptyExpandedForEachKernel that there will be four parameters. 778bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 77933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 7804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 78133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 782bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 783bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 784db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 785bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 786bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 787bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 7885010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 789bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 790900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InStep = nullptr; 791900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutStep = nullptr; 792db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 793db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Construct the actual function body. 7948e9089377848628813a697b972773e969b942c3bPirama Arumuga Nainar llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin()); 795db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 796cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Collect and construct the arguments for the kernel(). 797db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Note that we load any loop-invariant arguments before entering the Loop. 798bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin(); 799db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 800900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *InTy = nullptr; 801083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InBufPtr = nullptr; 802d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) { 803083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0})); 804083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::LoadInst *InStepArg = Builder.CreateLoad( 805083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr"); 806e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 807bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes InTy = (FunctionArgIter++)->getType(); 808e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes InStep = getStepValue(&DL, InTy, InStepArg); 809e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 8102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines InStep->setName("instep"); 811e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 812083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0})); 813083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InBufPtr = Builder.CreateLoad( 814083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf"); 815db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 816db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 817900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *OutTy = nullptr; 818900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutBasePtr = nullptr; 819d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 820bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OutTy = (FunctionArgIter++)->getType(); 821b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 8222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines OutStep->setName("outstep"); 823083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0})); 824083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep")); 825db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 826db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 827900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *UsrData = nullptr; 828d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) { 829bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *UsrDataTy = (FunctionArgIter++)->getType(); 830083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr); 831083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy); 832db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData->setName("UsrData"); 833db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 834db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 835083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 836ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo llvm::Value *IV; 83733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross createLoop(Builder, Arg_x1, Arg_x2, &IV); 838097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 83933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 84028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 841083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [&FunctionArgIter]() { FunctionArgIter++; }, 842083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala LoopHeader->getTerminator()); 843db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 844bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(FunctionArgIter == Function->arg_end()); 845db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 846cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Populate the actual call to kernel(). 847db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 848db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 849900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InPtr = nullptr; 850900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 851db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 852ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // Calculate the current input and output pointers 85302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 854ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // We always calculate the input/output pointers with a GEP operating on i8 85502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // values and only cast at the very end to OutTy. This is because the step 85602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // between two values is given in bytes. 85702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 85802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // TODO: We could further optimize the output by using a GEP operation of 85902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // type 'OutTy' in cases where the element type of the allocation allows. 86002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutBasePtr) { 86102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 86202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutOffset = Builder.CreateMul(OutOffset, OutStep); 863083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset); 86402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 86502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser } 866bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 867083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (InBufPtr) { 868ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1); 869ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InOffset = Builder.CreateMul(InOffset, InStep); 870083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset); 871ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InPtr = Builder.CreatePointerCast(InPtr, InTy); 872ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser } 87302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser 874ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser if (InPtr) { 8757ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(InPtr); 876db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 877db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 87802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutPtr) { 8797ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(OutPtr); 880db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 881db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 882db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (UsrData) { 883db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(UsrData); 884db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 885db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 88628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder); 887db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 888bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Builder.CreateCall(Function, RootArgs); 889db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 8907ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return true; 8917ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 8927ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 8934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala /* Expand a pass-by-value foreach kernel. 8947ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines */ 8954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bool ExpandForEach(llvm::Function *Function, uint32_t Signature) { 896d88177580db4ddedf680854c51db333c97eabc59Stephen Hines bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)); 897bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding kernel Function %s", Function->getName().str().c_str()); 8987ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 8994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // TODO: Refactor this to share functionality with ExpandOldStyleForEach. 900bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 9017d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) { 9027d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen DL.reset(X86_CUSTOM_DL_STRING); 9037d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen } 9047d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 9057ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 906bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 9074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala createEmptyExpandedForEachKernel(Function->getName()); 9087ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 909bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 910bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 911e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * createEmptyExpandedForEachKernel that there will be four parameters. 912bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 913881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 9144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 915881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 916bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 917bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 918bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 919bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 920bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 921bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 9223bc475b206c3fa249a212b90fe989fdcda4d75f9Matt Wala // Arg_outstep is not used by expanded new-style forEach kernels. 9237ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 9247ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Construct the actual function body. 9258e9089377848628813a697b972773e969b942c3bPirama Arumuga Nainar llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin()); 9267ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 92718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Create TBAA meta-data. 928354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, 929354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines *TBAAAllocation, *TBAAPointer; 930bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 93114588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien 932354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScriptDistinct = 9334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala MDHelper.createTBAARoot(kRenderScriptTBAARootName); 9344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName, 935354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScriptDistinct); 936e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 937e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 938e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 939e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation, 0); 940e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 941e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 94214588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 94318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 944881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes /* 945881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Collect and construct the arguments for the kernel(). 946881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * 947881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Note that we load any loop-invariant arguments before entering the Loop. 948881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes */ 949083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala size_t NumRemainingInputs = Function->arg_size(); 9507ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 951881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // No usrData parameter on kernels. 952881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes bccAssert( 953881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)); 954881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 955881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::Function::arg_iterator ArgIter = Function->arg_begin(); 956881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 957881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Check the return type 958bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Type *OutTy = nullptr; 959bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::LoadInst *OutBasePtr = nullptr; 960bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *CastedOutBasePtr = nullptr; 961881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 962e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes bool PassOutByPointer = false; 963881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 964d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 965bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *OutBaseTy = Function->getReturnType(); 966881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 96774a4b08235990916911b8fe758d656c1171faf26Stephen Hines if (OutBaseTy->isVoidTy()) { 968e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes PassOutByPointer = true; 969881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = ArgIter->getType(); 970881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 971881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes ArgIter++; 972083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala --NumRemainingInputs; 97374a4b08235990916911b8fe758d656c1171faf26Stephen Hines } else { 97474a4b08235990916911b8fe758d656c1171faf26Stephen Hines // We don't increment Args, since we are using the actual return type. 975881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = OutBaseTy->getPointerTo(); 97674a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 977881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 978083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0})); 979083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep")); 980097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 9819c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 9829c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines OutBasePtr->setMetadata("tbaa", TBAAPointer); 9839c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 98450f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 9857d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) { 9867d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out"); 9877d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen } else { 9887d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // The disagreement between module and x86 target machine datalayout 9897d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // causes mismatched input/output data offset between slang reflected 9907d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // code and bcc codegen for GetElementPtr. To solve this issue, skip the 9917d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // cast to OutTy and leave CastedOutBasePtr as an int8_t*. The buffer 9927d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // is later indexed with an explicit byte offset computed based on 9937d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // X86_CUSTOM_DL_STRING and then bitcast it to actual output type. 9947d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen CastedOutBasePtr = OutBasePtr; 9957d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen } 99674a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 99774a4b08235990916911b8fe758d656c1171faf26Stephen Hines 9987d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::SmallVector<llvm::Type*, 8> InTypes; 999083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::SmallVector<llvm::Value*, 8> InBufPtrs; 1000d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::SmallVector<llvm::Value*, 8> InStructTempSlots; 1001881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1002083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT); 1003881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1004083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Create the loop structure. 1005083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 1006ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo llvm::Value *IV; 1007083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala createLoop(Builder, Arg_x1, Arg_x2, &IV); 1008881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1009083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 1010083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala const int CalleeArgsContextIdx = 1011083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 1012083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [&NumRemainingInputs]() { --NumRemainingInputs; }, 1013083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala LoopHeader->getTerminator()); 1014083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 1015083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // After ExpandSpecialArguments() gets called, NumRemainingInputs 1016083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // counts the number of arguments to the kernel that correspond to 1017083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // an array entry from the InPtr field of the DriverInfo 1018083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // structure. 1019083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala const size_t NumInPtrArguments = NumRemainingInputs; 1020083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 1021083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (NumInPtrArguments > 0) { 1022e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments, 10237d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen InTypes, InBufPtrs, InStructTempSlots); 1024881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 10257ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 10267ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Populate the actual call to kernel(). 10277ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 10287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 10299296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala // Calculate the current input and output pointers. 1030881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1031881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Output 1032881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1033900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 1034bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray if (CastedOutBasePtr) { 10357b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 10367d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen 10377d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) { 10387d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset); 10397d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen } else { 10407d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // Treat x86 output buffer as byte[], get indexed pointer with explicit 10417d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // byte offset computed using a datalayout based on 10427d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen // X86_CUSTOM_DL_STRING, then bitcast it to actual output type. 10437d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen uint64_t OutStep = DL.getTypeAllocSize(OutTy->getPointerElementType()); 10447d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::Value *OutOffsetInBytes = Builder.CreateMul(OutOffset, llvm::ConstantInt::get(Int32Ty, OutStep)); 10457d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffsetInBytes); 10467d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 10477d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen } 1048bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 1049e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (PassOutByPointer) { 1050881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes RootArgs.push_back(OutPtr); 1051881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 10524102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser } 10537b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser 1054881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Inputs 105574a4b08235990916911b8fe758d656c1171faf26Stephen Hines 1056083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (NumInPtrArguments > 0) { 1057e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments, 10587d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen InTypes, InBufPtrs, InStructTempSlots, IV, RootArgs); 10597ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 10607ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 106128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder); 10627ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1063bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs); 10647ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1065e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (OutPtr && !PassOutByPointer) { 10669296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala RetVal->setName("call.result"); 106718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr); 10689c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 10699c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines Store->setMetadata("tbaa", TBAAAllocation); 10709c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 10717ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 10727ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1073db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return true; 1074db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1075db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1076e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Certain categories of functions that make up a general 1077e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // reduce-style kernel are called directly from the driver with no 1078e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // expansion needed. For a function in such a category, we need to 1079e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // promote linkage from static to external, to ensure that the 1080e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // function is visible to the driver in the dynamic symbol table. 1081e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // This promotion is safe because we don't have any kind of cross 1082e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // translation unit linkage model (except for linking against 1083e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // RenderScript libraries), so we do not risk name clashes. 1084a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross bool PromoteReduceFunction(const char *Name, FunctionSet &PromotedFunctions) { 1085e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (!Name) // a presumably-optional function that is not present 1086e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return false; 1087e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1088e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *Fn = Module->getFunction(Name); 1089e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(Fn != nullptr); 1090e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (PromotedFunctions.insert(Fn).second) { 1091e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage); 1092e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Fn->setLinkage(llvm::GlobalValue::ExternalLinkage); 1093e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return true; 1094e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1095e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1096e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return false; 1097e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1098e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1099e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Expand the accumulator function for a general reduce-style kernel. 1100e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1101e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // The input is a function of the form 1102e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1103e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments]) 1104e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1105e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // where all arguments except the first are the same as for a foreach kernel. 1106e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1107e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // The input accumulator function gets expanded into a function of the form 1108e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1109e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum) 1110e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1111e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // which performs a serial accumulaion of elements [x1, x2) into *%accum. 1112e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1113e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // In pseudocode, @func.expand does: 1114e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1115e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // for (i = %x1; i < %x2; ++i) { 1116e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // func(%accum, 1117e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i) 1118e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // [, p] [, i] [, p->current.y] [, p->current.z]); 1119e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // } 1120e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1121e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // This is very similar to foreach kernel expansion with no output. 1122a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross bool ExpandReduceAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) { 1123e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ALOGV("Expanding accumulator %s for general reduce kernel", 1124e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross FnAccumulator->getName().str().c_str()); 1125e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1126e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create TBAA meta-data. 1127e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, 1128e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross *TBAAAllocation, *TBAAPointer; 1129e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDBuilder MDHelper(*Context); 1130e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScriptDistinct = 1131e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross MDHelper.createTBAARoot(kRenderScriptTBAARootName); 1132e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName, 1133e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScriptDistinct); 1134e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 1135e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScript); 1136e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 1137e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAAllocation, 0); 1138e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 1139e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScript); 1140e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 1141e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1142e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross auto AccumulatorArgIter = FnAccumulator->arg_begin(); 1143e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1144e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create empty accumulator function. 1145e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *FnExpandedAccumulator = 1146a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross createEmptyExpandedReduceAccumulator(FnAccumulator->getName(), 1147a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross (AccumulatorArgIter++)->getType()); 1148e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1149e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Extract the expanded accumulator's parameters. It is 1150a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross // guaranteed by createEmptyExpandedReduceAccumulator that 1151e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // there will be 4 parameters. 1152a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams); 1153e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin(); 1154e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_p = &*(ExpandedAccumulatorArgIter++); 1155e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_x1 = &*(ExpandedAccumulatorArgIter++); 1156e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_x2 = &*(ExpandedAccumulatorArgIter++); 1157e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++); 1158e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1159e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Construct the actual function body. 11608e9089377848628813a697b972773e969b942c3bPirama Arumuga Nainar llvm::IRBuilder<> Builder(&*FnExpandedAccumulator->getEntryBlock().begin()); 1161e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1162e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create the loop structure. 1163e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 1164ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo llvm::Value *IndVar; 1165e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross createLoop(Builder, Arg_x1, Arg_x2, &IndVar); 1166e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1167e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 1168e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const int CalleeArgsContextIdx = 1169e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs, 1170e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross [](){}, LoopHeader->getTerminator()); 1171e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 11727d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen llvm::SmallVector<llvm::Type*, 8> InTypes; 1173e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> InBufPtrs; 1174e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> InStructTempSlots; 1175e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs, 11767d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen InTypes, InBufPtrs, InStructTempSlots); 1177e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1178e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Populate the actual call to the original accumulator. 1179e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> RootArgs; 1180e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross RootArgs.push_back(Arg_accum); 11817d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InTypes, InBufPtrs, InStructTempSlots, 1182e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross IndVar, RootArgs); 1183e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder); 1184e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.CreateCall(FnAccumulator, RootArgs); 1185e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1186e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return true; 1187e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1188e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 118957fd9f882f3359be4201c42b02aebf785d311df2David Gross // Create a combiner function for a general reduce-style kernel that lacks one, 119057fd9f882f3359be4201c42b02aebf785d311df2David Gross // by calling the accumulator function. 119157fd9f882f3359be4201c42b02aebf785d311df2David Gross // 119257fd9f882f3359be4201c42b02aebf785d311df2David Gross // The accumulator function must be of the form 119357fd9f882f3359be4201c42b02aebf785d311df2David Gross // 119457fd9f882f3359be4201c42b02aebf785d311df2David Gross // define void @accumFn(accumType* %accum, accumType %in) 119557fd9f882f3359be4201c42b02aebf785d311df2David Gross // 119657fd9f882f3359be4201c42b02aebf785d311df2David Gross // A combiner function will be generated of the form 119757fd9f882f3359be4201c42b02aebf785d311df2David Gross // 119857fd9f882f3359be4201c42b02aebf785d311df2David Gross // define void @accumFn.combiner(accumType* %accum, accumType* %other) { 119957fd9f882f3359be4201c42b02aebf785d311df2David Gross // %1 = load accumType, accumType* %other 120057fd9f882f3359be4201c42b02aebf785d311df2David Gross // call void @accumFn(accumType* %accum, accumType %1); 120157fd9f882f3359be4201c42b02aebf785d311df2David Gross // } 1202a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross bool CreateReduceCombinerFromAccumulator(llvm::Function *FnAccumulator) { 120357fd9f882f3359be4201c42b02aebf785d311df2David Gross ALOGV("Creating combiner from accumulator %s for general reduce kernel", 120457fd9f882f3359be4201c42b02aebf785d311df2David Gross FnAccumulator->getName().str().c_str()); 120557fd9f882f3359be4201c42b02aebf785d311df2David Gross 120657fd9f882f3359be4201c42b02aebf785d311df2David Gross using llvm::Attribute; 120757fd9f882f3359be4201c42b02aebf785d311df2David Gross 120857fd9f882f3359be4201c42b02aebf785d311df2David Gross bccAssert(FnAccumulator->arg_size() == 2); 120957fd9f882f3359be4201c42b02aebf785d311df2David Gross auto AccumulatorArgIter = FnAccumulator->arg_begin(); 121057fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Value *AccumulatorArg_accum = &*(AccumulatorArgIter++); 121157fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Value *AccumulatorArg_in = &*(AccumulatorArgIter++); 121257fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Type *AccumulatorArgType = AccumulatorArg_accum->getType(); 121357fd9f882f3359be4201c42b02aebf785d311df2David Gross bccAssert(AccumulatorArgType->isPointerTy()); 121457fd9f882f3359be4201c42b02aebf785d311df2David Gross 121557fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context); 121657fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::FunctionType *CombinerType = 121757fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::FunctionType::get(VoidTy, { AccumulatorArgType, AccumulatorArgType }, false); 121857fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Function *FnCombiner = 121957fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Function::Create(CombinerType, llvm::GlobalValue::ExternalLinkage, 1220a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross nameReduceCombinerFromAccumulator(FnAccumulator->getName()), 122157fd9f882f3359be4201c42b02aebf785d311df2David Gross Module); 122257fd9f882f3359be4201c42b02aebf785d311df2David Gross 122357fd9f882f3359be4201c42b02aebf785d311df2David Gross auto CombinerArgIter = FnCombiner->arg_begin(); 122457fd9f882f3359be4201c42b02aebf785d311df2David Gross 122557fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Argument *CombinerArg_accum = &(*CombinerArgIter++); 122657fd9f882f3359be4201c42b02aebf785d311df2David Gross CombinerArg_accum->setName("accum"); 122757fd9f882f3359be4201c42b02aebf785d311df2David Gross CombinerArg_accum->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_accum->getArgNo() + 1, 122857fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::makeArrayRef(Attribute::NoCapture))); 122957fd9f882f3359be4201c42b02aebf785d311df2David Gross 123057fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Argument *CombinerArg_other = &(*CombinerArgIter++); 123157fd9f882f3359be4201c42b02aebf785d311df2David Gross CombinerArg_other->setName("other"); 123257fd9f882f3359be4201c42b02aebf785d311df2David Gross CombinerArg_other->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_other->getArgNo() + 1, 123357fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::makeArrayRef(Attribute::NoCapture))); 123457fd9f882f3359be4201c42b02aebf785d311df2David Gross 123557fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::BasicBlock *BB = llvm::BasicBlock::Create(*Context, "BB", FnCombiner); 123657fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::IRBuilder<> Builder(BB); 123757fd9f882f3359be4201c42b02aebf785d311df2David Gross 123857fd9f882f3359be4201c42b02aebf785d311df2David Gross if (AccumulatorArg_in->getType()->isPointerTy()) { 123957fd9f882f3359be4201c42b02aebf785d311df2David Gross // Types of sufficient size get passed by pointer-to-copy rather 124057fd9f882f3359be4201c42b02aebf785d311df2David Gross // than passed by value. An accumulator cannot take a pointer 124157fd9f882f3359be4201c42b02aebf785d311df2David Gross // at the user level; so if we see a pointer here, we know that 124257fd9f882f3359be4201c42b02aebf785d311df2David Gross // we have a pass-by-pointer-to-copy case. 124357fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Type *ElementType = AccumulatorArg_in->getType()->getPointerElementType(); 124457fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Value *TempMem = Builder.CreateAlloca(ElementType, nullptr, "caller_copy"); 124557fd9f882f3359be4201c42b02aebf785d311df2David Gross Builder.CreateStore(Builder.CreateLoad(CombinerArg_other), TempMem); 124657fd9f882f3359be4201c42b02aebf785d311df2David Gross Builder.CreateCall(FnAccumulator, { CombinerArg_accum, TempMem }); 124757fd9f882f3359be4201c42b02aebf785d311df2David Gross } else { 124857fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Value *TypeAdjustedOther = CombinerArg_other; 124957fd9f882f3359be4201c42b02aebf785d311df2David Gross if (AccumulatorArgType->getPointerElementType() != AccumulatorArg_in->getType()) { 125057fd9f882f3359be4201c42b02aebf785d311df2David Gross // Call lowering by frontend has done some type coercion 125157fd9f882f3359be4201c42b02aebf785d311df2David Gross TypeAdjustedOther = Builder.CreatePointerCast(CombinerArg_other, 125257fd9f882f3359be4201c42b02aebf785d311df2David Gross AccumulatorArg_in->getType()->getPointerTo(), 125357fd9f882f3359be4201c42b02aebf785d311df2David Gross "cast"); 125457fd9f882f3359be4201c42b02aebf785d311df2David Gross } 125557fd9f882f3359be4201c42b02aebf785d311df2David Gross llvm::Value *DerefOther = Builder.CreateLoad(TypeAdjustedOther); 125657fd9f882f3359be4201c42b02aebf785d311df2David Gross Builder.CreateCall(FnAccumulator, { CombinerArg_accum, DerefOther }); 125757fd9f882f3359be4201c42b02aebf785d311df2David Gross } 125857fd9f882f3359be4201c42b02aebf785d311df2David Gross Builder.CreateRetVoid(); 125957fd9f882f3359be4201c42b02aebf785d311df2David Gross 126057fd9f882f3359be4201c42b02aebf785d311df2David Gross return true; 126157fd9f882f3359be4201c42b02aebf785d311df2David Gross } 126257fd9f882f3359be4201c42b02aebf785d311df2David Gross 126318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Checks if pointers to allocation internals are exposed 126418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 126518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// This function verifies if through the parameters passed to the kernel 126618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// or through calls to the runtime library the script gains access to 126718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers pointing to data within a RenderScript Allocation. 126818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// If we know we control all loads from and stores to data within 126918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// RenderScript allocations and if we know the run-time internal accesses 127018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// are all annotated with RenderScript TBAA metadata, only then we 127118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// can safely use TBAA to distinguish between generic and from-allocation 127218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers. 1273bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool allocPointersExposed(llvm::Module &Module) { 127418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Old style kernel function can expose pointers to elements within 127518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // allocations. 127618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // TODO: Extend analysis to allow simple cases of old-style kernels. 127725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 127825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *Name = mExportForEachNameList[i]; 127925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t Signature = mExportForEachSignatureList[i]; 1280bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Module.getFunction(Name) && 1281d88177580db4ddedf680854c51db333c97eabc59Stephen Hines !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) { 128218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 128318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 128418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 128518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 128618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Check for library functions that expose a pointer to an Allocation or 128718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // that are not yet annotated with RenderScript-specific tbaa information. 1288e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala static const std::vector<const char *> Funcs{ 1289e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAt(...) 1290e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationj", 1291e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationjj", 1292e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationjjj", 1293e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1294e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsSetElementAt() 1295e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvj", 1296e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvjj", 1297e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvjjj", 1298e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1299e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_Y() 1300e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj", 1301e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1302e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_U() 1303e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj", 1304e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1305e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_V() 1306e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj", 1307e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala }; 1308e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1309e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala for (auto FI : Funcs) { 1310e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala llvm::Function *Function = Module.getFunction(FI); 131118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1312bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (!Function) { 1313e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala ALOGE("Missing run-time function '%s'", FI); 131418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 131518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 131618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1317bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Function->getNumUses() > 0) { 131818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 131918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 132018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 132118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 132218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return false; 132318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 132418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 132518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Connect RenderScript TBAA metadata to C/C++ metadata 132618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 132718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// The TBAA metadata used to annotate loads/stores from RenderScript 1328e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// Allocations is generated in a separate TBAA tree with a 1329354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for 1330354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// all nodes in unrelated alias analysis trees. This function makes the 1331354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root), 1332e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With 1333e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// the connected trees every access to an Allocation is resolved to 1334e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// must-alias if compared to a normal C/C++ access. 1335bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void connectRenderScriptTBAAMetadata(llvm::Module &Module) { 1336bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 1337354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScriptDistinct = 1338354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines MDHelper.createTBAARoot("RenderScript Distinct TBAA"); 1339354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode( 1340354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines "RenderScript TBAA", TBAARenderScriptDistinct); 1341bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 1342354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScript->replaceOperandWith(1, TBAARoot); 134318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 134418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1345bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes virtual bool runOnModule(llvm::Module &Module) { 1346bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool Changed = false; 1347bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes this->Module = &Module; 13484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Context = &Module.getContext(); 1349bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 13504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala buildTypes(); 1351bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 1352bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bcinfo::MetadataExtractor me(&Module); 135325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines if (!me.extract()) { 135425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines ALOGE("Could not extract metadata from module!"); 135525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines return false; 135625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines } 13574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 13584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Expand forEach_* style kernels. 135925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachCount = me.getExportForEachSignatureCount(); 136025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachNameList = me.getExportForEachNameList(); 136125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachSignatureList = me.getExportForEachSignatureList(); 1362db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 136325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 136425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *name = mExportForEachNameList[i]; 136525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t signature = mExportForEachSignatureList[i]; 1366bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *kernel = Module.getFunction(name); 1367cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser if (kernel) { 1368d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) { 13694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Changed |= ExpandForEach(kernel, signature); 1370acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 1371acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else if (kernel->getReturnType()->isVoidTy()) { 13724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Changed |= ExpandOldStyleForEach(kernel, signature); 1373acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 1374acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else { 1375acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // There are some graphics root functions that are not 1376acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // expanded, but that will be called directly. For those 1377acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // functions, we can not set the linkage to internal. 1378acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } 1379cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 1380db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1381db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Process general reduce_* style functions. 1383a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross const size_t ExportReduceCount = me.getExportReduceCount(); 1384a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross const bcinfo::MetadataExtractor::Reduce *ExportReduceList = me.getExportReduceList(); 1385e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Note that functions can be shared between kernels 138657fd9f882f3359be4201c42b02aebf785d311df2David Gross FunctionSet PromotedFunctions, ExpandedAccumulators, AccumulatorsForCombiners; 1387e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1388a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross for (size_t i = 0; i < ExportReduceCount; ++i) { 1389a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross Changed |= PromoteReduceFunction(ExportReduceList[i].mInitializerName, PromotedFunctions); 1390a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross Changed |= PromoteReduceFunction(ExportReduceList[i].mCombinerName, PromotedFunctions); 1391a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross Changed |= PromoteReduceFunction(ExportReduceList[i].mOutConverterName, PromotedFunctions); 1392e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1393e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Accumulator 1394a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross llvm::Function *accumulator = Module.getFunction(ExportReduceList[i].mAccumulatorName); 1395e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(accumulator != nullptr); 1396e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (ExpandedAccumulators.insert(accumulator).second) 1397a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross Changed |= ExpandReduceAccumulator(accumulator, 1398a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross ExportReduceList[i].mSignature, 1399a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross ExportReduceList[i].mInputCount); 1400a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross if (!ExportReduceList[i].mCombinerName) { 140157fd9f882f3359be4201c42b02aebf785d311df2David Gross if (AccumulatorsForCombiners.insert(accumulator).second) 1402a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross Changed |= CreateReduceCombinerFromAccumulator(accumulator); 140357fd9f882f3359be4201c42b02aebf785d311df2David Gross } 1404e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1405e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 14064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa && !allocPointersExposed(Module)) { 1407bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes connectRenderScriptTBAAMetadata(Module); 140818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 140918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1410cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return Changed; 1411db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1412db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1413db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual const char *getPassName() const { 14144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return "forEach_* and reduce_* function expansion"; 1415db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1416db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 14174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala}; // end RSKernelExpandPass 1418db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 14197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace 14207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 14214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walachar RSKernelExpandPass::ID = 0; 14224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walastatic llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass"); 1423db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1424db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc { 1425db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1426ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leoconst char BCC_INDEX_VAR_NAME[] = "rsIndex"; 1427ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo 14287a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass * 14294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt WalacreateRSKernelExpandPass(bool pEnableStepOpt) { 14304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return new RSKernelExpandPass(pEnableStepOpt); 14317a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} 1432db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 14337a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc 1434