RSKernelExpand.cpp revision 9fa4d4480252ecfe08c97bc35888360b1e19ec99
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/* 2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project 3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License"); 5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License. 6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at 7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * http://www.apache.org/licenses/LICENSE-2.0 9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software 11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS, 12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and 14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License. 15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h" 18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h" 19dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross#include "bcc/Renderscript/RSUtils.h" 207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 217a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib> 2233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional> 23e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross#include <unordered_set> 247a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h> 26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h> 27b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h> 28b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h> 2918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h> 30b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h> 31c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h> 327ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h> 33b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h> 34cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h> 35b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h> 36806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h> 37c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang 38c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h" 39ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h" 40db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 41d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h" 42d88177580db4ddedf680854c51db333c97eabc59Stephen Hines 434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#ifndef __DISABLE_ASSERTS 444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala// Only used in bccAssert() 454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedForeachParams = 4; 469fa4d4480252ecfe08c97bc35888360b1e19ec99David Grossconst int kNumExpandedReduceAccumulatorParams = 4; 474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#endif 484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA"; 504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAANodeName[] = "RenderScript TBAA"; 51bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc; 537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 54db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace { 557a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 56354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true; 579c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines 58797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross/* RSKernelExpandPass 59797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * 60797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * This pass generates functions used to implement calls via 61797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * rsForEach(), "foreach_<NAME>", or "reduce_<NAME>". We create an 62797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * inner loop for the function to be invoked over the appropriate data 63797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * cells of the input/output allocations (adjusting other relevant 64797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * parameters as we go). We support doing this for any forEach or 65797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * reduce style compute kernels. 66797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * 67797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * In the case of a foreach kernel or a simple reduction kernel, the 68797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * new function name is the original function name "<NAME>" followed 69797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * by ".expand" -- "<NAME>.expand". 70797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * 71797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * In the case of a general reduction kernel, the kernel's accumulator 72797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * function is the one transformed, and the new function name is the 73797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * original accumulator function name "<ACCUMFN>" followed by 74797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * ".expand" -- "<ACCUMFN>.expand". Using the name "<ACCUMFN>.expand" 75797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * for the function generated from the accumulator should not 76797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * introduce any possibility for name clashes today: The accumulator 77797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * function <ACCUMFN> must be static, so it cannot also serve as a 78797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * foreach kernel; and the code for <ACCUMFN>.expand depends only on 79797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * <ACCUMFN>, not on any other properties of the reduction kernel, so 80797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * any reduction kernels that share the accumulator <ACCUMFN> can 81797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * share <ACCUMFN>.expand also. 82797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * 83797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * Note that this pass does not delete the original function <NAME> or 84797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * <ACCUMFN>. However, if it is inlined into the newly-generated 85797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * function and not otherwise referenced, then a subsequent pass may 86797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * delete it. 877a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */ 884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaclass RSKernelExpandPass : public llvm::ModulePass { 8933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic: 90db169187dea4602e4ad32058762d23d474753fd0Stephen Hines static char ID; 91db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 9233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate: 93e44a3525b9703739534c3b62d7d1af4c95649a38David Gross static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h 94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 95e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross typedef std::unordered_set<llvm::Function *> FunctionSet; 96e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 97e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsLaunchDimensionsField { 98e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldX, 99e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldY, 100e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldZ, 101e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldLod, 102e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldFace, 103e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldArray, 104e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 105e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsFieldCount 106e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 107e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 108e44a3525b9703739534c3b62d7d1af4c95649a38David Gross enum RsExpandKernelDriverInfoPfxField { 109e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInPtr, 110e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInStride, 111e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldInLen, 112e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutPtr, 113e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutStride, 114e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldOutLen, 115e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldDim, 116e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCurrent, 117e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsr, 118e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldUsLenr, 119e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 120e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxFieldCount 121e44a3525b9703739534c3b62d7d1af4c95649a38David Gross }; 12233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 123bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Module *Module; 124bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::LLVMContext *Context; 125bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 126bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 1274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * Pointers to LLVM type information for the the function signatures 1284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * for expanded functions. These must be re-calculated for each module 1294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * the pass is run on. 130bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 1319fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross llvm::FunctionType *ExpandedForEachType; 132e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *RsExpandKernelDriverInfoPfxTy; 133db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 13425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t mExportForEachCount; 13525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char **mExportForEachNameList; 13625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const uint32_t *mExportForEachSignatureList; 137cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines 1382b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Turns on optimization of allocation stride values. 1392b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bool mEnableStepOpt; 1402b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 141bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes uint32_t getRootSignature(llvm::Function *Function) { 142db169187dea4602e4ad32058762d23d474753fd0Stephen Hines const llvm::NamedMDNode *ExportForEachMetadata = 143bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Module->getNamedMetadata("#rs_export_foreach"); 144db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (!ExportForEachMetadata) { 146db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Type*, 8> RootArgTys; 147bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes for (llvm::Function::arg_iterator B = Function->arg_begin(), 148bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes E = Function->arg_end(); 149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines B != E; 150db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ++B) { 151db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgTys.push_back(B->getType()); 152db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 153db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 154db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // For pre-ICS bitcode, we may not have signature information. In that 155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // case, we use the size of the RootArgTys to select the number of 156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // arguments. 157db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return (1 << RootArgTys.size()) - 1; 158db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 159db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1607ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines if (ExportForEachMetadata->getNumOperands() == 0) { 1617ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return 0; 1627ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 1637ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1646e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines bccAssert(ExportForEachMetadata->getNumOperands() > 0); 165db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 166cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // We only handle the case for legacy root() functions here, so this is 167cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // hard-coded to look at only the first such function. 168db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0); 169900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes if (SigNode != nullptr && SigNode->getNumOperands() == 1) { 1701bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::Metadata *SigMD = SigNode->getOperand(0); 1711bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) { 1721bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines llvm::StringRef SigString = SigS->getString(); 173db169187dea4602e4ad32058762d23d474753fd0Stephen Hines uint32_t Signature = 0; 174db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (SigString.getAsInteger(10, Signature)) { 175db169187dea4602e4ad32058762d23d474753fd0Stephen Hines ALOGE("Non-integer signature value '%s'", SigString.str().c_str()); 176db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 177db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 178db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return Signature; 179db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 180db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 181db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 182db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return 0; 183db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 184db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray bool isStepOptSupported(llvm::Type *AllocType) { 186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 188429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 189429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 190429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (mEnableStepOpt) { 191429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 192429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 193429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 194429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == VoidPtrTy) { 195429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 196429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 197429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 198429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (!PT) { 199429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 200429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 201429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 202429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // remaining conditions are 64-bit only 203429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (VoidPtrTy->getPrimitiveSizeInBits() == 32) { 204429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 205429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 206429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 207429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // coerce suggests an upconverted struct type, which we can't support 208429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) { 209429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 210429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 211429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 212429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported 213429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2); 214429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128); 215429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (AllocType == V2xi64Ty || AllocType == Int128Ty) { 216429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return false; 217429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 218429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 219429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray return true; 220429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray } 221429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray 2222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // Get the actual value we should use to step through an allocation. 2237b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 2247b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // Normally the value we use to step through an allocation is given to us by 2257b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // the driver. However, for certain primitive data types, we can derive an 2267b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // integer constant for the step value. We use this integer constant whenever 2277b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // possible to allow further compiler optimizations to take place. 2287b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser // 229b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines // DL - Target Data size/layout information. 2302b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // T - Type of allocation (should be a pointer). 2312b04086acbef6520ae2c54a868b1271abf053122Stephen Hines // OrigStep - Original step increment (root.expand() input from driver). 232bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType, 2332b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Value *OrigStep) { 234b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines bccAssert(DL); 235bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(AllocType); 2362b04086acbef6520ae2c54a868b1271abf053122Stephen Hines bccAssert(OrigStep); 237bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType); 238429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray if (isStepOptSupported(AllocType)) { 2392b04086acbef6520ae2c54a868b1271abf053122Stephen Hines llvm::Type *ET = PT->getElementType(); 240b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines uint64_t ETSize = DL->getTypeAllocSize(ET); 241bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 2422b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return llvm::ConstantInt::get(Int32Ty, ETSize); 2432b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } else { 2442b04086acbef6520ae2c54a868b1271abf053122Stephen Hines return OrigStep; 2452b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2462b04086acbef6520ae2c54a868b1271abf053122Stephen Hines } 2472b04086acbef6520ae2c54a868b1271abf053122Stephen Hines 248097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /// Builds the types required by the pass for the given context. 249bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void buildTypes(void) { 250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs. 251bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8Ty = llvm::Type::getInt8Ty(*Context); 253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrTy = Int8Ty->getPointerTo(); 254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT); 255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32ArrayInputLimitTy = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT); 257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context); 258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *Int32Array4Ty = llvm::ArrayType::get(Int32Ty, 4); 259097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 260097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h: 261db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * 262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsLaunchDimensions { 263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t x; 264db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t y; 265db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * uint32_t z; 266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t lod; 267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t face; 268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t array[4]; 269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * }; 270e44a3525b9703739534c3b62d7d1af4c95649a38David Gross */ 271e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes; 272e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t x 273e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t y 274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t z 275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t lod 276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Ty); // uint32_t face 277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4] 278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType *RsLaunchDimensionsTy = 279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions"); 280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 2811d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h: 282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * struct RsExpandKernelDriverInfoPfx { 284e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]; 285e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inStride[RS_KERNEL_INPUT_LIMIT]; 286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t inLen; 287e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 288e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]; 289e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outStride[RS_KERNEL_INPUT_LIMIT]; 290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t outLen; 291e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 292e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Dimension of the launch 293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions dim; 294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // The walking iterator of the launch 296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * RsLaunchDimensions current; 297e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 298e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * const void *usr; 299e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * uint32_t usrLen; 300e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * 301e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // Items below this line are not used by the compiler and can be change in the driver. 302e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // So the compiler must assume there are an unknown number of fields of unknown type 303e44a3525b9703739534c3b62d7d1af4c95649a38David Gross * // beginning here. 304db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * }; 3051d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * 3061d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp). 307db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */ 308e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes; 309e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT] 310e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t inStride[RS_KERNEL_INPUT_LIMIT] 311e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t inLen 312e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT] 313e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy); // uint32_t outStride[RS_KERNEL_INPUT_LIMIT] 314e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t outLen 315e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions dim 316e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy); // RsLaunchDimensions current 317e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy); // const void *usr 318e44a3525b9703739534c3b62d7d1af4c95649a38David Gross RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty); // uint32_t usrLen 319e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross RsExpandKernelDriverInfoPfxTy = 320e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx"); 321bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 322bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes // Create the function type for expanded kernels. 3234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context); 324bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 325e44a3525b9703739534c3b62d7d1af4c95649a38David Gross llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo(); 3264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep) 3274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ExpandedForEachType = llvm::FunctionType::get(VoidTy, 3284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false); 3298ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 3308ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 3314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala /// @brief Create skeleton of the expanded foreach kernel. 332357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 333357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// This creates a function with the following signature: 334357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 335357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2, 3365010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes /// uint32_t outstep) 337357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser /// 3384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) { 339bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 3404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::Function::Create(ExpandedForEachType, 341bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::GlobalValue::ExternalLinkage, 342bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OldName + ".expand", Module); 3434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 344bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); 345bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("p"); 346bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x1"); 347bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("x2"); 348bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes (AI++)->setName("arg_outstep"); 3494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 3504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala ExpandedFunction); 3514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala llvm::IRBuilder<> Builder(Begin); 3524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Builder.CreateRetVoid(); 3534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return ExpandedFunction; 3544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala } 3554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 356e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create skeleton of a general reduce kernel's expanded accumulator. 357e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 358e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // This creates a function with the following signature: 359e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 360e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p, 361e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // i32 %x1, i32 %x2, accumType* nocapture %accum) 362e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 3639fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross llvm::Function *createEmptyExpandedReduceAccumulator(llvm::StringRef OldName, 3649fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross llvm::Type *AccumArgTy) { 365e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 366e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context); 3679fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross llvm::FunctionType *ExpandedReduceAccumulatorType = 368e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::FunctionType::get(VoidTy, 369e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross {RsExpandKernelDriverInfoPfxTy->getPointerTo(), 370e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Int32Ty, Int32Ty, AccumArgTy}, false); 371e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *FnExpandedAccumulator = 3729fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross llvm::Function::Create(ExpandedReduceAccumulatorType, 373e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::GlobalValue::ExternalLinkage, 374e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross OldName + ".expand", Module); 3759fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams); 376e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 377e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin(); 378e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 379e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross using llvm::Attribute; 380e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 381e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_p = &(*AI++); 382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_p->setName("p"); 383e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1, 384e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::makeArrayRef(Attribute::NoCapture))); 385e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 386e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_x1 = &(*AI++); 387e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_x1->setName("x1"); 388e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 389e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_x2 = &(*AI++); 390e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_x2->setName("x2"); 391e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 392e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Argument *Arg_accum = &(*AI++); 393e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_accum->setName("accum"); 394e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1, 395e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::makeArrayRef(Attribute::NoCapture))); 396e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 397e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", 398e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross FnExpandedAccumulator); 399e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::IRBuilder<> Builder(Begin); 400e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.CreateRetVoid(); 401e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 402e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return FnExpandedAccumulator; 403e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 404e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 405e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @brief Create an empty loop 406e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 407e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// Create a loop of the form: 408e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 409e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// for (i = LowerBound; i < UpperBound; i++) 410e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// ; 411e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 412e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// After the loop has been created, the builder is set such that 413e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// instructions can be added to the loop body. 414e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// 415e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param Builder The builder to use to build this loop. The current 416e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// position of the builder is the position the loop 417e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// will be inserted. 418e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LowerBound The first value of the loop iterator 419e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param UpperBound The maximal value of the loop iterator 420e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @param LoopIV A reference that will be set to the loop iterator. 421e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser /// @return The BasicBlock that will be executed after the loop. 422e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder, 423e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *LowerBound, 424e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::Value *UpperBound, 4254165d29822fc7caf81e435995ff6189608fc0323Dean De Leo llvm::Value **LoopIV) { 426c2ca742d7d0197c52e49467862844463fb42280fDavid Gross bccAssert(LowerBound->getType() == UpperBound->getType()); 427e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 428e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB; 4294165d29822fc7caf81e435995ff6189608fc0323Dean De Leo llvm::Value *Cond, *IVNext, *IV, *IVVar; 430e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 431e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser CondBB = Builder.GetInsertBlock(); 432f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar AfterBB = llvm::SplitBlock(CondBB, &*Builder.GetInsertPoint(), nullptr, nullptr); 433bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent()); 434e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 4354165d29822fc7caf81e435995ff6189608fc0323Dean De Leo CondBB->getTerminator()->eraseFromParent(); 4364165d29822fc7caf81e435995ff6189608fc0323Dean De Leo Builder.SetInsertPoint(CondBB); 4374165d29822fc7caf81e435995ff6189608fc0323Dean De Leo 4384165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // decltype(LowerBound) *ivvar = alloca(sizeof(int)) 4394165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // *ivvar = LowerBound 4404165d29822fc7caf81e435995ff6189608fc0323Dean De Leo IVVar = Builder.CreateAlloca(LowerBound->getType(), nullptr, BCC_INDEX_VAR_NAME); 4414165d29822fc7caf81e435995ff6189608fc0323Dean De Leo Builder.CreateStore(LowerBound, IVVar); 4424165d29822fc7caf81e435995ff6189608fc0323Dean De Leo 443e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // if (LowerBound < Upperbound) 444e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto LoopHeader 445e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // else 446e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser // goto AfterBB 447e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(LowerBound, UpperBound); 448e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 449e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 4504165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // LoopHeader: 4514165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // iv = *ivvar 4524165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // <insertion point here> 4534165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // iv.next = iv + 1 4544165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // *ivvar = iv.next 4554165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // if (iv.next < Upperbound) 4564165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // goto LoopHeader 4574165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // else 4584165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // goto AfterBB 4594165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // AfterBB: 460e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.SetInsertPoint(HeaderBB); 4614165d29822fc7caf81e435995ff6189608fc0323Dean De Leo IV = Builder.CreateLoad(IVVar, "X"); 462e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1)); 4634165d29822fc7caf81e435995ff6189608fc0323Dean De Leo Builder.CreateStore(IVNext, IVVar); 464e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser Cond = Builder.CreateICmpULT(IVNext, UpperBound); 465e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser Builder.CreateCondBr(Cond, HeaderBB, AfterBB); 466e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser AfterBB->setName("Exit"); 4674165d29822fc7caf81e435995ff6189608fc0323Dean De Leo Builder.SetInsertPoint(llvm::cast<llvm::Instruction>(IVNext)); 4684165d29822fc7caf81e435995ff6189608fc0323Dean De Leo 4694165d29822fc7caf81e435995ff6189608fc0323Dean De Leo // Record information about this loop. 470e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser *LoopIV = IV; 471e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser return AfterBB; 472e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser } 473e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser 47428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Finish building the outgoing argument list for calling a ForEach-able function. 47528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // 47628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // ArgVector - on input, the non-special arguments 47728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // on output, the non-special arguments combined with the special arguments 47828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // from SpecialArgVector 47928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // SpecialArgVector - special arguments (from ExpandSpecialArguments()) 48028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // SpecialArgContextIdx - return value of ExpandSpecialArguments() 48128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // (position of context argument in SpecialArgVector) 48228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // CalleeFunction - the ForEach-able function being called 48328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Builder - for inserting code into the caller function 48428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen> 48528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross void finishArgList( llvm::SmallVector<llvm::Value *, ArgVectorLen> &ArgVector, 48628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector, 48728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int SpecialArgContextIdx, 48828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const llvm::Function &CalleeFunction, 48928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::IRBuilder<> &CallerBuilder) { 49028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross /* The context argument (if any) is a pointer to an opaque user-visible type that differs from 49128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the 49228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * two types represent the same thing). Therefore, we must introduce a pointer cast when 49328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross * generating a call to the kernel function. 49428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross */ 49528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int ArgContextIdx = 49628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx; 49728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end()); 49828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross if (ArgContextIdx >= 0) { 49928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Type *ContextArgType = nullptr; 50028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int ArgIdx = ArgContextIdx; 50128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross for (const auto &Arg : CalleeFunction.getArgumentList()) { 50228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross if (!ArgIdx--) { 50328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ContextArgType = Arg.getType(); 50428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross break; 50528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 50628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 50728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross bccAssert(ContextArgType); 50828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType); 50928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 51028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross } 51128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 512083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // GEPHelper() returns a SmallVector of values suitable for passing 513083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for 514083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // the returned data type. It is sized so that the SmallVector 515083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // returned by GEPHelper() never needs to do a heap allocation for 516083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // any list of GEP indices it encounters in the code. 517083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices; 518083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 519083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Helper for turning a list of constant integer GEP indices into a 520083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // SmallVector of llvm::Value*. The return value is suitable for 521083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP(). 522083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // 523083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Inputs: 524083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // I32Args should be integers which represent the index arguments 525083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // to a GEP instruction. 526083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // 527083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Returns: 528083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Returns a SmallVector of ConstantInts. 5294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) { 530083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices Out(I32Args.size()); 531083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context); 532083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala std::transform(I32Args.begin(), I32Args.end(), Out.begin(), 533083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); }); 534083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala return Out; 535083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala } 536083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 5378ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic: 5387e920a716693033edf32a6fedd03798bbfbd85ebChih-Hung Hsieh explicit RSKernelExpandPass(bool pEnableStepOpt = true) 539900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes : ModulePass(ID), Module(nullptr), Context(nullptr), 540bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes mEnableStepOpt(pEnableStepOpt) { 541bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 5428ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 5438ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 544c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { 545c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // This pass does not use any other analysis passes, but it does 546c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines // add/wrap the existing functions in the module (thus altering the CFG). 547c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines } 548c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines 54933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Build contribution to outgoing argument list for calling a 550e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // ForEach-able function or a general reduction accumulator 551e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // function, based on the special parameters of that function. 55233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // 553e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Signature - metadata bits for the signature of the callee 55433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // X, Arg_p - values derived directly from expanded function, 555e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // suitable for computing arguments for the callee 55633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // CalleeArgs - contribution is accumulated here 55733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross // Bump - invoked once for each contributed outgoing argument 558083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // LoopHeaderInsertionPoint - an Instruction in the loop header, before which 559083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // this function can insert loop-invariant loads 56028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // 56128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // Return value is the (zero-based) position of the context (Arg_p) 56228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // argument in the CalleeArgs vector, or a negative value if the 56328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross // context argument is not placed in the CalleeArgs vector. 56428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int ExpandSpecialArguments(uint32_t Signature, 56528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Value *X, 56628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::Value *Arg_p, 56728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::IRBuilder<> &Builder, 56828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross llvm::SmallVector<llvm::Value*, 8> &CalleeArgs, 569083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala std::function<void ()> Bump, 570083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Instruction *LoopHeaderInsertionPoint) { 57128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 57228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross bccAssert(CalleeArgs.empty()); 57328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 57428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross int Return = -1; 57533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) { 57633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(Arg_p); 57733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 57828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross Return = CalleeArgs.size() - 1; 57933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 58033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 58133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) { 58233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross CalleeArgs.push_back(X); 58333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross Bump(); 58433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 58533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 586e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) || 587e44a3525b9703739534c3b62d7d1af4c95649a38David Gross bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 588083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala bccAssert(LoopHeaderInsertionPoint); 58933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 590083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Y and Z are loop invariant, so they can be hoisted out of the 591083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // loop. Set the IRBuilder insertion point to the loop header. 592083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala auto OldInsertionPoint = Builder.saveIP(); 593083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.SetInsertPoint(LoopHeaderInsertionPoint); 594e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 595e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) { 596083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent, 597083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala RsLaunchDimensionsFieldY})); 598083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep"); 599083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y")); 600e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 601e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 602e44a3525b9703739534c3b62d7d1af4c95649a38David Gross 603e44a3525b9703739534c3b62d7d1af4c95649a38David Gross if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) { 604083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent, 605083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala RsLaunchDimensionsFieldZ})); 606083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep"); 607083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z")); 608e44a3525b9703739534c3b62d7d1af4c95649a38David Gross Bump(); 609e44a3525b9703739534c3b62d7d1af4c95649a38David Gross } 610083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 611083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.restoreIP(OldInsertionPoint); 61233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 61328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross 61428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross return Return; 61533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross } 61633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 617e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Generate loop-invariant input processing setup code for an expanded 618e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // ForEach-able function or an expanded general reduction accumulator 619e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // function. 620e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 621e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // LoopHeader - block at the end of which the setup code will be inserted 622e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Arg_p - RSKernelDriverInfo pointer passed to the expanded function 623e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo 624e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // ArgIter - iterator pointing to first input of the UNexpanded function 625e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // NumInputs - number of inputs (NOT number of ARGUMENTS) 626e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 627f039d98d346006442b61255a2889b8513a8cd56fYong Chen // InTypes[] - this function saves input type, they will be used in ExpandInputsBody(). 628f039d98d346006442b61255a2889b8513a8cd56fYong Chen // InBufPtrs[] - this function sets each array element to point to the first cell / byte 629f039d98d346006442b61255a2889b8513a8cd56fYong Chen // (byte for x86, cell for other platforms) of the corresponding input allocation 630e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // InStructTempSlots[] - this function sets each array element either to nullptr 631e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // or to the result of an alloca (for the case where the 632e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // calling convention dictates that a value must be passed 633e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // by reference, and so we need a stacked temporary to hold 634e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // a copy of that value) 635e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader, 636e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_p, 637e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDNode *TBAAPointer, 638e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function::arg_iterator ArgIter, 639e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const size_t NumInputs, 640f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::SmallVectorImpl<llvm::Type *> &InTypes, 641e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs, 642e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) { 643e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT); 644e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 645e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Extract information about input slots. The work done 646e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // here is loop-invariant, so we can hoist the operations out of the loop. 647e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross auto OldInsertionPoint = Builder.saveIP(); 648e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.SetInsertPoint(LoopHeader->getTerminator()); 649e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 650e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) { 651e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *InType = ArgIter->getType(); 652e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 653e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross /* 654e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * AArch64 calling conventions dictate that structs of sufficient size 655e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * get passed by pointer instead of passed by value. This, combined 656e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * with the fact that we don't allow kernels to operate on pointer 657e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * data means that if we see a kernel with a pointer parameter we know 658e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * that it is a struct input that has been promoted. As such we don't 659e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * need to convert its type to a pointer. Later we will need to know 660e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * to create a temporary copy on the stack, so we save this information 661e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * in InStructTempSlots. 662e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross */ 663e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) { 664e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Type *ElementType = PtrType->getElementType(); 665e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr, 666e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross "input_struct_slot")); 667e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } else { 668e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InType = InType->getPointerTo(); 669e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InStructTempSlots.push_back(nullptr); 670e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 671e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 672e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 673e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross static_cast<int32_t>(InputIndex)})); 674e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep"); 675e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf"); 676f039d98d346006442b61255a2889b8513a8cd56fYong Chen 677f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::Value *CastInBufPtr = nullptr; 678f039d98d346006442b61255a2889b8513a8cd56fYong Chen if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) { 679f039d98d346006442b61255a2889b8513a8cd56fYong Chen CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in"); 680f039d98d346006442b61255a2889b8513a8cd56fYong Chen } else { 681f039d98d346006442b61255a2889b8513a8cd56fYong Chen // The disagreement between module and x86 target machine datalayout 682f039d98d346006442b61255a2889b8513a8cd56fYong Chen // causes mismatched input/output data offset between slang reflected 683f039d98d346006442b61255a2889b8513a8cd56fYong Chen // code and bcc codegen for GetElementPtr. To solve this issue, skip the 684f039d98d346006442b61255a2889b8513a8cd56fYong Chen // cast to InType and leave CastInBufPtr as an int8_t*. The buffer is 685f039d98d346006442b61255a2889b8513a8cd56fYong Chen // later indexed with an explicit byte offset computed based on 686f039d98d346006442b61255a2889b8513a8cd56fYong Chen // X86_CUSTOM_DL_STRING and then bitcast it to actual input type. 687f039d98d346006442b61255a2889b8513a8cd56fYong Chen CastInBufPtr = InBufPtr; 688f039d98d346006442b61255a2889b8513a8cd56fYong Chen } 689e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 690e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (gEnableRsTbaa) { 691e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InBufPtr->setMetadata("tbaa", TBAAPointer); 692e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 693e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 694f039d98d346006442b61255a2889b8513a8cd56fYong Chen InTypes.push_back(InType); 695e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InBufPtrs.push_back(CastInBufPtr); 696e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 697e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 698e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.restoreIP(OldInsertionPoint); 699e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 700e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 701e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Generate loop-varying input processing code for an expanded ForEach-able function 702e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // or an expanded general reduction accumulator function. Also, for the call to the 703e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // UNexpanded function, collect the portion of the argument list corresponding to the 704e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // inputs. 705e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 706e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Arg_x1 - first X coordinate to be processed by the expanded function 707e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // TBAAAllocation - metadata for marking loads of input values out of allocations 708e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // NumInputs -- number of inputs (NOT number of ARGUMENTS) 709f039d98d346006442b61255a2889b8513a8cd56fYong Chen // InTypes[] - this function uses the saved input types in ExpandInputsLoopInvariant() 710f039d98d346006442b61255a2889b8513a8cd56fYong Chen // to convert the pointer of byte InPtr to its real type. 711e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant() 712e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant() 713e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // IndVar - value of loop induction variable (X coordinate) for a given loop iteration 714e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 715e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // RootArgs - this function sets this to the list of outgoing argument values corresponding 716e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // to the inputs 717e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross void ExpandInputsBody(llvm::IRBuilder<> &Builder, 718e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_x1, 719e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDNode *TBAAAllocation, 720e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const size_t NumInputs, 721f039d98d346006442b61255a2889b8513a8cd56fYong Chen const llvm::SmallVectorImpl<llvm::Type *> &InTypes, 722e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs, 723e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots, 724e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *IndVar, 725e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVectorImpl<llvm::Value *> &RootArgs) { 726e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1); 727f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 728e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 729e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross for (size_t Index = 0; Index < NumInputs; ++Index) { 730e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 731f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::Value *InPtr = nullptr; 732f039d98d346006442b61255a2889b8513a8cd56fYong Chen if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) { 733f039d98d346006442b61255a2889b8513a8cd56fYong Chen InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset); 734f039d98d346006442b61255a2889b8513a8cd56fYong Chen } else { 735f039d98d346006442b61255a2889b8513a8cd56fYong Chen // Treat x86 input buffer as byte[], get indexed pointer with explicit 736f039d98d346006442b61255a2889b8513a8cd56fYong Chen // byte offset computed using a datalayout based on 737f039d98d346006442b61255a2889b8513a8cd56fYong Chen // X86_CUSTOM_DL_STRING, then bitcast it to actual input type. 738f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::DataLayout DL(X86_CUSTOM_DL_STRING); 739f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::Type *InTy = InTypes[Index]; 740f039d98d346006442b61255a2889b8513a8cd56fYong Chen uint64_t InStep = DL.getTypeAllocSize(InTy->getPointerElementType()); 741f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::Value *OffsetInBytes = Builder.CreateMul(Offset, llvm::ConstantInt::get(Int32Ty, InStep)); 742f039d98d346006442b61255a2889b8513a8cd56fYong Chen InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], OffsetInBytes); 743f039d98d346006442b61255a2889b8513a8cd56fYong Chen InPtr = Builder.CreatePointerCast(InPtr, InTy); 744f039d98d346006442b61255a2889b8513a8cd56fYong Chen } 745f039d98d346006442b61255a2889b8513a8cd56fYong Chen 746f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::Value *Input; 747e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input"); 748e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 749e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (gEnableRsTbaa) { 750e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross InputLoad->setMetadata("tbaa", TBAAAllocation); 751e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 752e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 753e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) { 754e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Pass a pointer to a temporary on the stack, rather than 755e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // passing a pointer to the original value. We do not want 756e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // the kernel to potentially modify the input data. 757e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 758e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Note: don't annotate with TBAA, since the kernel might 759e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // have its own TBAA annotations for the pointer argument. 760e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.CreateStore(InputLoad, TemporarySlot); 761e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Input = TemporarySlot; 762e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } else { 763e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Input = InputLoad; 764e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 765e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 766e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross RootArgs.push_back(Input); 767e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 768e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 769e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 7708ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser /* Performs the actual optimization on a selected function. On success, the 7718ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * Module will contain a new function of the name "<NAME>.expand" that 7728ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser * invokes <NAME>() in a loop with the appropriate parameters. 7738ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser */ 7744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) { 775bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding ForEach-able Function %s", 776bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Function->getName().str().c_str()); 7778ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 7788ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 779bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Signature = getRootSignature(Function); 7808ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser if (!Signature) { 7818ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // We couldn't determine how to expand this function based on its 7828ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser // function signature. 7838ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser return false; 7848ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 7858ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser } 7868ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 787bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 788f039d98d346006442b61255a2889b8513a8cd56fYong Chen if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) { 789f039d98d346006442b61255a2889b8513a8cd56fYong Chen DL.reset(X86_CUSTOM_DL_STRING); 790f039d98d346006442b61255a2889b8513a8cd56fYong Chen } 7918ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser 792bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 7934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala createEmptyExpandedForEachKernel(Function->getName()); 794db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 795bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 796bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 797e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * createEmptyExpandedForEachKernel that there will be four parameters. 798bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 79933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 8004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 80133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross 802bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 803bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 804db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 805bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 806bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 807bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 8085010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter); 809bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 810900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InStep = nullptr; 811900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutStep = nullptr; 812db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 813db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Construct the actual function body. 814f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin()); 815db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 816cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Collect and construct the arguments for the kernel(). 817db169187dea4602e4ad32058762d23d474753fd0Stephen Hines // Note that we load any loop-invariant arguments before entering the Loop. 818bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin(); 819db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 820900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *InTy = nullptr; 821083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *InBufPtr = nullptr; 822d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) { 823083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0})); 824083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::LoadInst *InStepArg = Builder.CreateLoad( 825083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr"); 826e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 827bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes InTy = (FunctionArgIter++)->getType(); 828e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes InStep = getStepValue(&DL, InTy, InStepArg); 829e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 8302b04086acbef6520ae2c54a868b1271abf053122Stephen Hines InStep->setName("instep"); 831e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes 832083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0})); 833083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InBufPtr = Builder.CreateLoad( 834083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf"); 835db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 836db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 837900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Type *OutTy = nullptr; 838900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutBasePtr = nullptr; 839d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 840bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes OutTy = (FunctionArgIter++)->getType(); 841b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines OutStep = getStepValue(&DL, OutTy, Arg_outstep); 8422b04086acbef6520ae2c54a868b1271abf053122Stephen Hines OutStep->setName("outstep"); 843083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0})); 844083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep")); 845db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 846db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 847900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *UsrData = nullptr; 848d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) { 849bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *UsrDataTy = (FunctionArgIter++)->getType(); 850083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr); 851083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy); 852db169187dea4602e4ad32058762d23d474753fd0Stephen Hines UsrData->setName("UsrData"); 853db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 854db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 855083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 8564165d29822fc7caf81e435995ff6189608fc0323Dean De Leo llvm::Value *IV; 85733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross createLoop(Builder, Arg_x1, Arg_x2, &IV); 858097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 85933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 86028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 861083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [&FunctionArgIter]() { FunctionArgIter++; }, 862083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala LoopHeader->getTerminator()); 863db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 864bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bccAssert(FunctionArgIter == Function->arg_end()); 865db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 866cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines // Populate the actual call to kernel(). 867db169187dea4602e4ad32058762d23d474753fd0Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 868db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 869900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *InPtr = nullptr; 870900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 871db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 872ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // Calculate the current input and output pointers 87302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 874ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser // We always calculate the input/output pointers with a GEP operating on i8 87502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // values and only cast at the very end to OutTy. This is because the step 87602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // between two values is given in bytes. 87702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // 87802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // TODO: We could further optimize the output by using a GEP operation of 87902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser // type 'OutTy' in cases where the element type of the allocation allows. 88002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutBasePtr) { 88102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 88202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutOffset = Builder.CreateMul(OutOffset, OutStep); 883083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset); 88402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 88502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser } 886bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 887083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (InBufPtr) { 888ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1); 889ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InOffset = Builder.CreateMul(InOffset, InStep); 890083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset); 891ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser InPtr = Builder.CreatePointerCast(InPtr, InTy); 892ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser } 89302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser 894ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser if (InPtr) { 8957ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(InPtr); 896db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 897db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 89802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser if (OutPtr) { 8997ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines RootArgs.push_back(OutPtr); 900db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 901db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 902db169187dea4602e4ad32058762d23d474753fd0Stephen Hines if (UsrData) { 903db169187dea4602e4ad32058762d23d474753fd0Stephen Hines RootArgs.push_back(UsrData); 904db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 905db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 90628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder); 907db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 908bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes Builder.CreateCall(Function, RootArgs); 909db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 9107ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines return true; 9117ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 9127ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 9134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala /* Expand a pass-by-value foreach kernel. 9147ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines */ 9154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bool ExpandForEach(llvm::Function *Function, uint32_t Signature) { 916d88177580db4ddedf680854c51db333c97eabc59Stephen Hines bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)); 917bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ALOGV("Expanding kernel Function %s", Function->getName().str().c_str()); 9187ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 9194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // TODO: Refactor this to share functionality with ExpandOldStyleForEach. 920bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::DataLayout DL(Module); 921f039d98d346006442b61255a2889b8513a8cd56fYong Chen if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) { 922f039d98d346006442b61255a2889b8513a8cd56fYong Chen DL.reset(X86_CUSTOM_DL_STRING); 923f039d98d346006442b61255a2889b8513a8cd56fYong Chen } 924f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); 9257ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 926bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *ExpandedFunction = 9274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala createEmptyExpandedForEachKernel(Function->getName()); 9287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 929bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes /* 930bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes * Extract the expanded function's parameters. It is guaranteed by 931e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross * createEmptyExpandedForEachKernel that there will be four parameters. 932bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes */ 933881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 9344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams); 935881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 936bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function::arg_iterator ExpandedFunctionArgIter = 937bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes ExpandedFunction->arg_begin(); 938bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 939bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_p = &*(ExpandedFunctionArgIter++); 940bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x1 = &*(ExpandedFunctionArgIter++); 941bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *Arg_x2 = &*(ExpandedFunctionArgIter++); 9423bc475b206c3fa249a212b90fe989fdcda4d75f9Matt Wala // Arg_outstep is not used by expanded new-style forEach kernels. 9437ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 9447ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Construct the actual function body. 945f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin()); 9467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 94718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Create TBAA meta-data. 948354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, 949354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines *TBAAAllocation, *TBAAPointer; 950bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 95114588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien 952354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScriptDistinct = 9534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala MDHelper.createTBAARoot(kRenderScriptTBAARootName); 9544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName, 955354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScriptDistinct); 956e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 957e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 958e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 959e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAAllocation, 0); 960e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 961e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes TBAARenderScript); 96214588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 96318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 964881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes /* 965881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Collect and construct the arguments for the kernel(). 966881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * 967881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes * Note that we load any loop-invariant arguments before entering the Loop. 968881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes */ 969083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala size_t NumRemainingInputs = Function->arg_size(); 9707ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 971881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // No usrData parameter on kernels. 972881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes bccAssert( 973881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)); 974881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 975881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes llvm::Function::arg_iterator ArgIter = Function->arg_begin(); 976881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 977881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Check the return type 978bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Type *OutTy = nullptr; 979bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::LoadInst *OutBasePtr = nullptr; 980bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray llvm::Value *CastedOutBasePtr = nullptr; 981881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 982e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes bool PassOutByPointer = false; 983881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 984d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) { 985bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Type *OutBaseTy = Function->getReturnType(); 986881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 98774a4b08235990916911b8fe758d656c1171faf26Stephen Hines if (OutBaseTy->isVoidTy()) { 988e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes PassOutByPointer = true; 989881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = ArgIter->getType(); 990881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 991881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes ArgIter++; 992083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala --NumRemainingInputs; 99374a4b08235990916911b8fe758d656c1171faf26Stephen Hines } else { 99474a4b08235990916911b8fe758d656c1171faf26Stephen Hines // We don't increment Args, since we are using the actual return type. 995881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes OutTy = OutBaseTy->getPointerTo(); 99674a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 997881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 998083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0})); 999083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep")); 1000097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes 10019c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 10029c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines OutBasePtr->setMetadata("tbaa", TBAAPointer); 10039c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 100450f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray 1005f039d98d346006442b61255a2889b8513a8cd56fYong Chen if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) { 1006f039d98d346006442b61255a2889b8513a8cd56fYong Chen CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out"); 1007f039d98d346006442b61255a2889b8513a8cd56fYong Chen } else { 1008f039d98d346006442b61255a2889b8513a8cd56fYong Chen // The disagreement between module and x86 target machine datalayout 1009f039d98d346006442b61255a2889b8513a8cd56fYong Chen // causes mismatched input/output data offset between slang reflected 1010f039d98d346006442b61255a2889b8513a8cd56fYong Chen // code and bcc codegen for GetElementPtr. To solve this issue, skip the 1011f039d98d346006442b61255a2889b8513a8cd56fYong Chen // cast to OutTy and leave CastedOutBasePtr as an int8_t*. The buffer 1012f039d98d346006442b61255a2889b8513a8cd56fYong Chen // is later indexed with an explicit byte offset computed based on 1013f039d98d346006442b61255a2889b8513a8cd56fYong Chen // X86_CUSTOM_DL_STRING and then bitcast it to actual output type. 1014f039d98d346006442b61255a2889b8513a8cd56fYong Chen CastedOutBasePtr = OutBasePtr; 1015f039d98d346006442b61255a2889b8513a8cd56fYong Chen } 101674a4b08235990916911b8fe758d656c1171faf26Stephen Hines } 101774a4b08235990916911b8fe758d656c1171faf26Stephen Hines 1018f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::SmallVector<llvm::Type*, 8> InTypes; 1019083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::SmallVector<llvm::Value*, 8> InBufPtrs; 1020d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala llvm::SmallVector<llvm::Value*, 8> InStructTempSlots; 1021881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1022083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT); 1023881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1024083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // Create the loop structure. 1025083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 10264165d29822fc7caf81e435995ff6189608fc0323Dean De Leo llvm::Value *IV; 1027083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala createLoop(Builder, Arg_x1, Arg_x2, &IV); 1028881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1029083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 1030083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala const int CalleeArgsContextIdx = 1031083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs, 1032083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala [&NumRemainingInputs]() { --NumRemainingInputs; }, 1033083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala LoopHeader->getTerminator()); 1034083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 1035083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // After ExpandSpecialArguments() gets called, NumRemainingInputs 1036083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // counts the number of arguments to the kernel that correspond to 1037083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // an array entry from the InPtr field of the DriverInfo 1038083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala // structure. 1039083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala const size_t NumInPtrArguments = NumRemainingInputs; 1040083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala 1041083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (NumInPtrArguments > 0) { 1042e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments, 1043f039d98d346006442b61255a2889b8513a8cd56fYong Chen InTypes, InBufPtrs, InStructTempSlots); 1044881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 10457ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 10467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines // Populate the actual call to kernel(). 10477ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines llvm::SmallVector<llvm::Value*, 8> RootArgs; 10487ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 10499296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala // Calculate the current input and output pointers. 1050881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1051881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Output 1052881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes 1053900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes llvm::Value *OutPtr = nullptr; 1054bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray if (CastedOutBasePtr) { 10557b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1); 1056f039d98d346006442b61255a2889b8513a8cd56fYong Chen 1057f039d98d346006442b61255a2889b8513a8cd56fYong Chen if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) { 1058f039d98d346006442b61255a2889b8513a8cd56fYong Chen OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset); 1059f039d98d346006442b61255a2889b8513a8cd56fYong Chen } else { 1060f039d98d346006442b61255a2889b8513a8cd56fYong Chen // Treat x86 output buffer as byte[], get indexed pointer with explicit 1061f039d98d346006442b61255a2889b8513a8cd56fYong Chen // byte offset computed using a datalayout based on 1062f039d98d346006442b61255a2889b8513a8cd56fYong Chen // X86_CUSTOM_DL_STRING, then bitcast it to actual output type. 1063f039d98d346006442b61255a2889b8513a8cd56fYong Chen uint64_t OutStep = DL.getTypeAllocSize(OutTy->getPointerElementType()); 1064f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::Value *OutOffsetInBytes = Builder.CreateMul(OutOffset, llvm::ConstantInt::get(Int32Ty, OutStep)); 1065f039d98d346006442b61255a2889b8513a8cd56fYong Chen OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffsetInBytes); 1066f039d98d346006442b61255a2889b8513a8cd56fYong Chen OutPtr = Builder.CreatePointerCast(OutPtr, OutTy); 1067f039d98d346006442b61255a2889b8513a8cd56fYong Chen } 1068bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 1069e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (PassOutByPointer) { 1070881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes RootArgs.push_back(OutPtr); 1071881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes } 10724102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser } 10737b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser 1074881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes // Inputs 107574a4b08235990916911b8fe758d656c1171faf26Stephen Hines 1076083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala if (NumInPtrArguments > 0) { 1077e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments, 1078f039d98d346006442b61255a2889b8513a8cd56fYong Chen InTypes, InBufPtrs, InStructTempSlots, IV, RootArgs); 10797ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 10807ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 108128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder); 10827ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1083bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs); 10847ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1085e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes if (OutPtr && !PassOutByPointer) { 10869296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala RetVal->setName("call.result"); 108718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr); 10889c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines if (gEnableRsTbaa) { 10899c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines Store->setMetadata("tbaa", TBAAAllocation); 10909c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines } 10917ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines } 10927ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines 1093db169187dea4602e4ad32058762d23d474753fd0Stephen Hines return true; 1094db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1095db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1096e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Certain categories of functions that make up a general 1097e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // reduce-style kernel are called directly from the driver with no 1098e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // expansion needed. For a function in such a category, we need to 1099e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // promote linkage from static to external, to ensure that the 1100e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // function is visible to the driver in the dynamic symbol table. 1101e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // This promotion is safe because we don't have any kind of cross 1102e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // translation unit linkage model (except for linking against 1103e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // RenderScript libraries), so we do not risk name clashes. 11049fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross bool PromoteReduceFunction(const char *Name, FunctionSet &PromotedFunctions) { 1105e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (!Name) // a presumably-optional function that is not present 1106e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return false; 1107e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1108e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *Fn = Module->getFunction(Name); 1109e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(Fn != nullptr); 1110e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (PromotedFunctions.insert(Fn).second) { 1111e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage); 1112e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Fn->setLinkage(llvm::GlobalValue::ExternalLinkage); 1113e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return true; 1114e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1115e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1116e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return false; 1117e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1118e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1119e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Expand the accumulator function for a general reduce-style kernel. 1120e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1121e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // The input is a function of the form 1122e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1123e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments]) 1124e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1125e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // where all arguments except the first are the same as for a foreach kernel. 1126e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1127e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // The input accumulator function gets expanded into a function of the form 1128e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1129e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum) 1130e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1131e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // which performs a serial accumulaion of elements [x1, x2) into *%accum. 1132e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1133e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // In pseudocode, @func.expand does: 1134e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1135e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // for (i = %x1; i < %x2; ++i) { 1136e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // func(%accum, 1137e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i) 1138e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // [, p] [, i] [, p->current.y] [, p->current.z]); 1139e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // } 1140e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // 1141e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // This is very similar to foreach kernel expansion with no output. 11429fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross bool ExpandReduceAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) { 1143e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ALOGV("Expanding accumulator %s for general reduce kernel", 1144e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross FnAccumulator->getName().str().c_str()); 1145e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1146e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create TBAA meta-data. 1147e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, 1148e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross *TBAAAllocation, *TBAAPointer; 1149e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::MDBuilder MDHelper(*Context); 1150e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScriptDistinct = 1151e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross MDHelper.createTBAARoot(kRenderScriptTBAARootName); 1152e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName, 1153e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScriptDistinct); 1154e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", 1155e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScript); 1156e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, 1157e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAAllocation, 0); 1158e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", 1159e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAARenderScript); 1160e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0); 1161e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1162e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross auto AccumulatorArgIter = FnAccumulator->arg_begin(); 1163e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1164e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create empty accumulator function. 1165e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Function *FnExpandedAccumulator = 11669fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross createEmptyExpandedReduceAccumulator(FnAccumulator->getName(), 11679fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross (AccumulatorArgIter++)->getType()); 1168e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1169e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Extract the expanded accumulator's parameters. It is 11709fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross // guaranteed by createEmptyExpandedReduceAccumulator that 1171e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // there will be 4 parameters. 11729fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams); 1173e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin(); 1174e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_p = &*(ExpandedAccumulatorArgIter++); 1175e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_x1 = &*(ExpandedAccumulatorArgIter++); 1176e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_x2 = &*(ExpandedAccumulatorArgIter++); 1177e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++); 1178e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1179e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Construct the actual function body. 1180f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar llvm::IRBuilder<> Builder(&*FnExpandedAccumulator->getEntryBlock().begin()); 1181e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1182e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Create the loop structure. 1183e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock(); 11844165d29822fc7caf81e435995ff6189608fc0323Dean De Leo llvm::Value *IndVar; 1185e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross createLoop(Builder, Arg_x1, Arg_x2, &IndVar); 1186e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1187e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> CalleeArgs; 1188e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross const int CalleeArgsContextIdx = 1189e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs, 1190e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross [](){}, LoopHeader->getTerminator()); 1191e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1192f039d98d346006442b61255a2889b8513a8cd56fYong Chen llvm::SmallVector<llvm::Type*, 8> InTypes; 1193e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> InBufPtrs; 1194e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> InStructTempSlots; 1195e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs, 1196f039d98d346006442b61255a2889b8513a8cd56fYong Chen InTypes, InBufPtrs, InStructTempSlots); 1197e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1198e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Populate the actual call to the original accumulator. 1199e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross llvm::SmallVector<llvm::Value*, 8> RootArgs; 1200e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross RootArgs.push_back(Arg_accum); 1201f039d98d346006442b61255a2889b8513a8cd56fYong Chen ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InTypes, InBufPtrs, InStructTempSlots, 1202e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross IndVar, RootArgs); 1203e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder); 1204e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross Builder.CreateCall(FnAccumulator, RootArgs); 1205e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1206e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross return true; 1207e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1208e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1209dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // Create a combiner function for a general reduce-style kernel that lacks one, 1210dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // by calling the accumulator function. 1211dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // 1212dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // The accumulator function must be of the form 1213dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // 1214dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // define void @accumFn(accumType* %accum, accumType %in) 1215dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // 1216dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // A combiner function will be generated of the form 1217dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // 1218dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // define void @accumFn.combiner(accumType* %accum, accumType* %other) { 1219dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // %1 = load accumType, accumType* %other 1220dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // call void @accumFn(accumType* %accum, accumType %1); 1221dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // } 12229fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross bool CreateReduceCombinerFromAccumulator(llvm::Function *FnAccumulator) { 1223dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross ALOGV("Creating combiner from accumulator %s for general reduce kernel", 1224dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross FnAccumulator->getName().str().c_str()); 1225dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross 1226dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross using llvm::Attribute; 1227dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross 1228dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross bccAssert(FnAccumulator->arg_size() == 2); 1229dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross auto AccumulatorArgIter = FnAccumulator->arg_begin(); 1230dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Value *AccumulatorArg_accum = &*(AccumulatorArgIter++); 1231dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Value *AccumulatorArg_in = &*(AccumulatorArgIter++); 1232dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Type *AccumulatorArgType = AccumulatorArg_accum->getType(); 1233dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross bccAssert(AccumulatorArgType->isPointerTy()); 1234dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross 1235dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context); 1236dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::FunctionType *CombinerType = 1237dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::FunctionType::get(VoidTy, { AccumulatorArgType, AccumulatorArgType }, false); 1238dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Function *FnCombiner = 1239dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Function::Create(CombinerType, llvm::GlobalValue::ExternalLinkage, 12409fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross nameReduceCombinerFromAccumulator(FnAccumulator->getName()), 1241dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross Module); 1242dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross 1243dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross auto CombinerArgIter = FnCombiner->arg_begin(); 1244dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross 1245dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Argument *CombinerArg_accum = &(*CombinerArgIter++); 1246dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross CombinerArg_accum->setName("accum"); 1247dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross CombinerArg_accum->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_accum->getArgNo() + 1, 1248dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::makeArrayRef(Attribute::NoCapture))); 1249dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross 1250dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Argument *CombinerArg_other = &(*CombinerArgIter++); 1251dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross CombinerArg_other->setName("other"); 1252dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross CombinerArg_other->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_other->getArgNo() + 1, 1253dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::makeArrayRef(Attribute::NoCapture))); 1254dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross 1255dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::BasicBlock *BB = llvm::BasicBlock::Create(*Context, "BB", FnCombiner); 1256dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::IRBuilder<> Builder(BB); 1257dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross 1258dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross if (AccumulatorArg_in->getType()->isPointerTy()) { 1259dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // Types of sufficient size get passed by pointer-to-copy rather 1260dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // than passed by value. An accumulator cannot take a pointer 1261dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // at the user level; so if we see a pointer here, we know that 1262dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // we have a pass-by-pointer-to-copy case. 1263dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Type *ElementType = AccumulatorArg_in->getType()->getPointerElementType(); 1264dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Value *TempMem = Builder.CreateAlloca(ElementType, nullptr, "caller_copy"); 1265dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross Builder.CreateStore(Builder.CreateLoad(CombinerArg_other), TempMem); 1266dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross Builder.CreateCall(FnAccumulator, { CombinerArg_accum, TempMem }); 1267dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross } else { 1268dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Value *TypeAdjustedOther = CombinerArg_other; 1269dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross if (AccumulatorArgType->getPointerElementType() != AccumulatorArg_in->getType()) { 1270dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross // Call lowering by frontend has done some type coercion 1271dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross TypeAdjustedOther = Builder.CreatePointerCast(CombinerArg_other, 1272dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross AccumulatorArg_in->getType()->getPointerTo(), 1273dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross "cast"); 1274dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross } 1275dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross llvm::Value *DerefOther = Builder.CreateLoad(TypeAdjustedOther); 1276dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross Builder.CreateCall(FnAccumulator, { CombinerArg_accum, DerefOther }); 1277dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross } 1278dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross Builder.CreateRetVoid(); 1279dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross 1280dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross return true; 1281dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross } 1282dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross 128318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Checks if pointers to allocation internals are exposed 128418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 128518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// This function verifies if through the parameters passed to the kernel 128618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// or through calls to the runtime library the script gains access to 128718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers pointing to data within a RenderScript Allocation. 128818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// If we know we control all loads from and stores to data within 128918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// RenderScript allocations and if we know the run-time internal accesses 129018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// are all annotated with RenderScript TBAA metadata, only then we 129118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// can safely use TBAA to distinguish between generic and from-allocation 129218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// pointers. 1293bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool allocPointersExposed(llvm::Module &Module) { 129418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Old style kernel function can expose pointers to elements within 129518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // allocations. 129618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // TODO: Extend analysis to allow simple cases of old-style kernels. 129725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 129825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *Name = mExportForEachNameList[i]; 129925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t Signature = mExportForEachSignatureList[i]; 1300bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Module.getFunction(Name) && 1301d88177580db4ddedf680854c51db333c97eabc59Stephen Hines !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) { 130218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 130318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 130418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 130518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 130618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // Check for library functions that expose a pointer to an Allocation or 130718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser // that are not yet annotated with RenderScript-specific tbaa information. 1308e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala static const std::vector<const char *> Funcs{ 1309e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAt(...) 1310e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationj", 1311e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationjj", 1312e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsGetElementAt13rs_allocationjjj", 1313e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1314e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsSetElementAt() 1315e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvj", 1316e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvjj", 1317e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z14rsSetElementAt13rs_allocationPvjjj", 1318e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1319e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_Y() 1320e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj", 1321e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1322e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_U() 1323e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj", 1324e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1325e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala // rsGetElementAtYuv_uchar_V() 1326e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj", 1327e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala }; 1328e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala 1329e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala for (auto FI : Funcs) { 1330e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala llvm::Function *Function = Module.getFunction(FI); 133118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1332bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (!Function) { 1333e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala ALOGE("Missing run-time function '%s'", FI); 133418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 133518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 133618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1337bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes if (Function->getNumUses() > 0) { 133818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return true; 133918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 134018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 134118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 134218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser return false; 134318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 134418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 134518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// @brief Connect RenderScript TBAA metadata to C/C++ metadata 134618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// 134718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser /// The TBAA metadata used to annotate loads/stores from RenderScript 1348e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// Allocations is generated in a separate TBAA tree with a 1349354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for 1350354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// all nodes in unrelated alias analysis trees. This function makes the 1351354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root), 1352e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With 1353e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// the connected trees every access to an Allocation is resolved to 1354e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes /// must-alias if compared to a normal C/C++ access. 1355bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes void connectRenderScriptTBAAMetadata(llvm::Module &Module) { 1356bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDBuilder MDHelper(*Context); 1357354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScriptDistinct = 1358354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines MDHelper.createTBAARoot("RenderScript Distinct TBAA"); 1359354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode( 1360354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines "RenderScript TBAA", TBAARenderScriptDistinct); 1361bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 1362354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines TBAARenderScript->replaceOperandWith(1, TBAARoot); 136318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 136418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1365bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes virtual bool runOnModule(llvm::Module &Module) { 1366bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bool Changed = false; 1367bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes this->Module = &Module; 13684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Context = &Module.getContext(); 1369bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 13704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala buildTypes(); 1371bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes 1372bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes bcinfo::MetadataExtractor me(&Module); 137325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines if (!me.extract()) { 137425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines ALOGE("Could not extract metadata from module!"); 137525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines return false; 137625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines } 13774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala 13784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala // Expand forEach_* style kernels. 137925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachCount = me.getExportForEachSignatureCount(); 138025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachNameList = me.getExportForEachNameList(); 138125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines mExportForEachSignatureList = me.getExportForEachSignatureList(); 1382db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 138325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines for (size_t i = 0; i < mExportForEachCount; ++i) { 138425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines const char *name = mExportForEachNameList[i]; 138525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines uint32_t signature = mExportForEachSignatureList[i]; 1386bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes llvm::Function *kernel = Module.getFunction(name); 1387cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser if (kernel) { 1388d88177580db4ddedf680854c51db333c97eabc59Stephen Hines if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) { 13894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Changed |= ExpandForEach(kernel, signature); 1390acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 1391acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else if (kernel->getReturnType()->isVoidTy()) { 13924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala Changed |= ExpandOldStyleForEach(kernel, signature); 1393acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser kernel->setLinkage(llvm::GlobalValue::InternalLinkage); 1394acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } else { 1395acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // There are some graphics root functions that are not 1396acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // expanded, but that will be called directly. For those 1397acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser // functions, we can not set the linkage to internal. 1398acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser } 1399cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines } 1400db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1401db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1402e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Process general reduce_* style functions. 14039fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross const size_t ExportReduceCount = me.getExportReduceCount(); 14049fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross const bcinfo::MetadataExtractor::Reduce *ExportReduceList = me.getExportReduceList(); 1405e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Note that functions can be shared between kernels 1406dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross FunctionSet PromotedFunctions, ExpandedAccumulators, AccumulatorsForCombiners; 1407e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 14089fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross for (size_t i = 0; i < ExportReduceCount; ++i) { 14099fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross Changed |= PromoteReduceFunction(ExportReduceList[i].mInitializerName, PromotedFunctions); 14109fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross Changed |= PromoteReduceFunction(ExportReduceList[i].mCombinerName, PromotedFunctions); 14119fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross Changed |= PromoteReduceFunction(ExportReduceList[i].mOutConverterName, PromotedFunctions); 1412e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 1413e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross // Accumulator 14149fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross llvm::Function *accumulator = Module.getFunction(ExportReduceList[i].mAccumulatorName); 1415e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross bccAssert(accumulator != nullptr); 1416e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross if (ExpandedAccumulators.insert(accumulator).second) 14179fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross Changed |= ExpandReduceAccumulator(accumulator, 14189fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross ExportReduceList[i].mSignature, 14199fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross ExportReduceList[i].mInputCount); 14209fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross if (!ExportReduceList[i].mCombinerName) { 1421dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross if (AccumulatorsForCombiners.insert(accumulator).second) 14229fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross Changed |= CreateReduceCombinerFromAccumulator(accumulator); 1423dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross } 1424e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross } 1425e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross 14264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala if (gEnableRsTbaa && !allocPointersExposed(Module)) { 1427bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes connectRenderScriptTBAAMetadata(Module); 142818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser } 142918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser 1430cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines return Changed; 1431db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1432db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1433db169187dea4602e4ad32058762d23d474753fd0Stephen Hines virtual const char *getPassName() const { 14344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return "forEach_* and reduce_* function expansion"; 1435db169187dea4602e4ad32058762d23d474753fd0Stephen Hines } 1436db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 14374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala}; // end RSKernelExpandPass 1438db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 14397a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace 14407a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao 14414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walachar RSKernelExpandPass::ID = 0; 14424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walastatic llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass"); 1443db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 1444db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc { 1445db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 14464165d29822fc7caf81e435995ff6189608fc0323Dean De Leoconst char BCC_INDEX_VAR_NAME[] = "rsIndex"; 14474165d29822fc7caf81e435995ff6189608fc0323Dean De Leo 14487a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass * 14494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt WalacreateRSKernelExpandPass(bool pEnableStepOpt) { 14504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala return new RSKernelExpandPass(pEnableStepOpt); 14517a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} 1452db169187dea4602e4ad32058762d23d474753fd0Stephen Hines 14537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc 1454