1a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni/* 2a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * Copyright 2015, The Android Open Source Project 3a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * 4a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * Licensed under the Apache License, Version 2.0 (the "License"); 5a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * you may not use this file except in compliance with the License. 6a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * You may obtain a copy of the License at 7a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * 8a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * http://www.apache.org/licenses/LICENSE-2.0 9a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * 10a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * Unless required by applicable law or agreed to in writing, software 11a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * distributed under the License is distributed on an "AS IS" BASIS, 12a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * See the License for the specific language governing permissions and 14a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * limitations under the License. 15a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni */ 16a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 17a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "RSScriptGroupFusion.h" 18a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 19a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "Assert.h" 20a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "Log.h" 21a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "bcc/BCCContext.h" 22a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "bcc/Source.h" 23a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "bcinfo/MetadataExtractor.h" 240ab50b835805c75ad164466767c2c212f48954ccYang Ni#include "llvm/ADT/StringExtras.h" 25a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "llvm/IR/DataLayout.h" 26a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "llvm/IR/IRBuilder.h" 27a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "llvm/IR/Module.h" 28531d08c85971e47f58aedc093fbe83f1b909703eYang Ni#include "llvm/Support/raw_ostream.h" 29a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 30a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Niusing llvm::Function; 310ab50b835805c75ad164466767c2c212f48954ccYang Niusing llvm::Module; 32a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 33a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Niusing std::string; 34a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 35a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ninamespace bcc { 36a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 37a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ninamespace { 38a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 390ab50b835805c75ad164466767c2c212f48954ccYang Niconst Function* getInvokeFunction(const Source& source, const int slot, 400ab50b835805c75ad164466767c2c212f48954ccYang Ni Module* newModule) { 41f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar 42f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar bcinfo::MetadataExtractor &metadata = *source.getMetadata(); 430ab50b835805c75ad164466767c2c212f48954ccYang Ni const char* functionName = metadata.getExportFuncNameList()[slot]; 440ab50b835805c75ad164466767c2c212f48954ccYang Ni Function* func = newModule->getFunction(functionName); 450ab50b835805c75ad164466767c2c212f48954ccYang Ni // Materialize the function so that later the caller can inspect its argument 460ab50b835805c75ad164466767c2c212f48954ccYang Ni // and return types. 470ab50b835805c75ad164466767c2c212f48954ccYang Ni newModule->materialize(func); 480ab50b835805c75ad164466767c2c212f48954ccYang Ni return func; 49a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni} 50a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 510ab50b835805c75ad164466767c2c212f48954ccYang Niconst Function* 520ab50b835805c75ad164466767c2c212f48954ccYang NigetFunction(Module* mergedModule, const Source* source, const int slot, 530ab50b835805c75ad164466767c2c212f48954ccYang Ni uint32_t* signature) { 540ab50b835805c75ad164466767c2c212f48954ccYang Ni 55f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar bcinfo::MetadataExtractor &metadata = *source->getMetadata(); 560ab50b835805c75ad164466767c2c212f48954ccYang Ni const char* functionName = metadata.getExportForEachNameList()[slot]; 57a108bc5ec0ca0cb48c72492d54a71126bccfa7d6Stephen Hines if (functionName == nullptr || !functionName[0]) { 58531d08c85971e47f58aedc093fbe83f1b909703eYang Ni ALOGE("Kernel fusion (module %s slot %d): failed to find kernel function", 59531d08c85971e47f58aedc093fbe83f1b909703eYang Ni source->getName().c_str(), slot); 60a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni return nullptr; 61a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 62a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 630ab50b835805c75ad164466767c2c212f48954ccYang Ni if (metadata.getExportForEachInputCountList()[slot] > 1) { 64531d08c85971e47f58aedc093fbe83f1b909703eYang Ni ALOGE("Kernel fusion (module %s function %s): cannot handle multiple inputs", 65531d08c85971e47f58aedc093fbe83f1b909703eYang Ni source->getName().c_str(), functionName); 66a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni return nullptr; 67a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 68a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 690ab50b835805c75ad164466767c2c212f48954ccYang Ni if (signature != nullptr) { 700ab50b835805c75ad164466767c2c212f48954ccYang Ni *signature = metadata.getExportForEachSignatureList()[slot]; 71a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 72a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 730ab50b835805c75ad164466767c2c212f48954ccYang Ni const Function* function = mergedModule->getFunction(functionName); 740ab50b835805c75ad164466767c2c212f48954ccYang Ni 750ab50b835805c75ad164466767c2c212f48954ccYang Ni return function; 760ab50b835805c75ad164466767c2c212f48954ccYang Ni} 770ab50b835805c75ad164466767c2c212f48954ccYang Ni 788c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni// The whitelist of supported signature bits. Context or user data arguments are 798c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni// not currently supported in kernel fusion. To support them or any new kinds of 808c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni// arguments in the future, it requires not only listing the signature bits here, 818c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni// but also implementing additional necessary fusion logic in the getFusedFuncSig(), 828c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni// getFusedFuncType(), and fuseKernels() functions below. 830ab50b835805c75ad164466767c2c212f48954ccYang Niconstexpr uint32_t ExpectedSignatureBits = 840ab50b835805c75ad164466767c2c212f48954ccYang Ni bcinfo::MD_SIG_In | 850ab50b835805c75ad164466767c2c212f48954ccYang Ni bcinfo::MD_SIG_Out | 860ab50b835805c75ad164466767c2c212f48954ccYang Ni bcinfo::MD_SIG_X | 870ab50b835805c75ad164466767c2c212f48954ccYang Ni bcinfo::MD_SIG_Y | 880ab50b835805c75ad164466767c2c212f48954ccYang Ni bcinfo::MD_SIG_Z | 890ab50b835805c75ad164466767c2c212f48954ccYang Ni bcinfo::MD_SIG_Kernel; 900ab50b835805c75ad164466767c2c212f48954ccYang Ni 910ab50b835805c75ad164466767c2c212f48954ccYang Niint getFusedFuncSig(const std::vector<Source*>& sources, 920ab50b835805c75ad164466767c2c212f48954ccYang Ni const std::vector<int>& slots, 930ab50b835805c75ad164466767c2c212f48954ccYang Ni uint32_t* retSig) { 940ab50b835805c75ad164466767c2c212f48954ccYang Ni *retSig = 0; 950ab50b835805c75ad164466767c2c212f48954ccYang Ni uint32_t firstSignature = 0; 960ab50b835805c75ad164466767c2c212f48954ccYang Ni uint32_t signature = 0; 970ab50b835805c75ad164466767c2c212f48954ccYang Ni auto slotIter = slots.begin(); 980ab50b835805c75ad164466767c2c212f48954ccYang Ni for (const Source* source : sources) { 990ab50b835805c75ad164466767c2c212f48954ccYang Ni const int slot = *slotIter++; 100f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar bcinfo::MetadataExtractor &metadata = *source->getMetadata(); 101a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 102a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni if (metadata.getExportForEachInputCountList()[slot] > 1) { 103531d08c85971e47f58aedc093fbe83f1b909703eYang Ni ALOGE("Kernel fusion (module %s slot %d): cannot handle multiple inputs", 104531d08c85971e47f58aedc093fbe83f1b909703eYang Ni source->getName().c_str(), slot); 1050ab50b835805c75ad164466767c2c212f48954ccYang Ni return -1; 106a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 107a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 1080ab50b835805c75ad164466767c2c212f48954ccYang Ni signature = metadata.getExportForEachSignatureList()[slot]; 1090ab50b835805c75ad164466767c2c212f48954ccYang Ni if (signature & ~ExpectedSignatureBits) { 110531d08c85971e47f58aedc093fbe83f1b909703eYang Ni ALOGE("Kernel fusion (module %s slot %d): Unexpected signature %x", 111531d08c85971e47f58aedc093fbe83f1b909703eYang Ni source->getName().c_str(), slot, signature); 1120ab50b835805c75ad164466767c2c212f48954ccYang Ni return -1; 113a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 1140ab50b835805c75ad164466767c2c212f48954ccYang Ni 1150ab50b835805c75ad164466767c2c212f48954ccYang Ni if (firstSignature == 0) { 1160ab50b835805c75ad164466767c2c212f48954ccYang Ni firstSignature = signature; 117a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 118a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 1190ab50b835805c75ad164466767c2c212f48954ccYang Ni *retSig |= signature; 120a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 121a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 1220ab50b835805c75ad164466767c2c212f48954ccYang Ni if (!bcinfo::MetadataExtractor::hasForEachSignatureIn(firstSignature)) { 1230ab50b835805c75ad164466767c2c212f48954ccYang Ni *retSig &= ~bcinfo::MD_SIG_In; 1240ab50b835805c75ad164466767c2c212f48954ccYang Ni } 125a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 1260ab50b835805c75ad164466767c2c212f48954ccYang Ni if (!bcinfo::MetadataExtractor::hasForEachSignatureOut(signature)) { 1270ab50b835805c75ad164466767c2c212f48954ccYang Ni *retSig &= ~bcinfo::MD_SIG_Out; 1280ab50b835805c75ad164466767c2c212f48954ccYang Ni } 1290ab50b835805c75ad164466767c2c212f48954ccYang Ni 1300ab50b835805c75ad164466767c2c212f48954ccYang Ni return 0; 1310ab50b835805c75ad164466767c2c212f48954ccYang Ni} 132a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 1330ab50b835805c75ad164466767c2c212f48954ccYang Nillvm::FunctionType* getFusedFuncType(bcc::BCCContext& Context, 1340ab50b835805c75ad164466767c2c212f48954ccYang Ni const std::vector<Source*>& sources, 1350ab50b835805c75ad164466767c2c212f48954ccYang Ni const std::vector<int>& slots, 1360ab50b835805c75ad164466767c2c212f48954ccYang Ni Module* M, 1370ab50b835805c75ad164466767c2c212f48954ccYang Ni uint32_t* signature) { 1380ab50b835805c75ad164466767c2c212f48954ccYang Ni int error = getFusedFuncSig(sources, slots, signature); 1390ab50b835805c75ad164466767c2c212f48954ccYang Ni 1400ab50b835805c75ad164466767c2c212f48954ccYang Ni if (error < 0) { 141a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni return nullptr; 142a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 143a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 1440ab50b835805c75ad164466767c2c212f48954ccYang Ni const Function* firstF = getFunction(M, sources.front(), slots.front(), nullptr); 1450ab50b835805c75ad164466767c2c212f48954ccYang Ni 1460ab50b835805c75ad164466767c2c212f48954ccYang Ni bccAssert (firstF != nullptr); 1470ab50b835805c75ad164466767c2c212f48954ccYang Ni 1480ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::SmallVector<llvm::Type*, 8> ArgTys; 1490ab50b835805c75ad164466767c2c212f48954ccYang Ni 1500ab50b835805c75ad164466767c2c212f48954ccYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureIn(*signature)) { 1510ab50b835805c75ad164466767c2c212f48954ccYang Ni ArgTys.push_back(firstF->arg_begin()->getType()); 152a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 1530ab50b835805c75ad164466767c2c212f48954ccYang Ni 1540ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::Type* I32Ty = llvm::IntegerType::get(Context.getLLVMContext(), 32); 1550ab50b835805c75ad164466767c2c212f48954ccYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureX(*signature)) { 1560ab50b835805c75ad164466767c2c212f48954ccYang Ni ArgTys.push_back(I32Ty); 1570ab50b835805c75ad164466767c2c212f48954ccYang Ni } 1580ab50b835805c75ad164466767c2c212f48954ccYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureY(*signature)) { 1590ab50b835805c75ad164466767c2c212f48954ccYang Ni ArgTys.push_back(I32Ty); 160a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 1610ab50b835805c75ad164466767c2c212f48954ccYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureZ(*signature)) { 1620ab50b835805c75ad164466767c2c212f48954ccYang Ni ArgTys.push_back(I32Ty); 1630ab50b835805c75ad164466767c2c212f48954ccYang Ni } 1640ab50b835805c75ad164466767c2c212f48954ccYang Ni 1650ab50b835805c75ad164466767c2c212f48954ccYang Ni const Function* lastF = getFunction(M, sources.back(), slots.back(), nullptr); 1660ab50b835805c75ad164466767c2c212f48954ccYang Ni 1670ab50b835805c75ad164466767c2c212f48954ccYang Ni bccAssert (lastF != nullptr); 1680ab50b835805c75ad164466767c2c212f48954ccYang Ni 1690ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::Type* retTy = lastF->getReturnType(); 1700ab50b835805c75ad164466767c2c212f48954ccYang Ni 1710ab50b835805c75ad164466767c2c212f48954ccYang Ni return llvm::FunctionType::get(retTy, ArgTys, false); 1720ab50b835805c75ad164466767c2c212f48954ccYang Ni} 1730ab50b835805c75ad164466767c2c212f48954ccYang Ni 1740ab50b835805c75ad164466767c2c212f48954ccYang Ni} // anonymous namespace 1750ab50b835805c75ad164466767c2c212f48954ccYang Ni 1760ab50b835805c75ad164466767c2c212f48954ccYang Nibool fuseKernels(bcc::BCCContext& Context, 1770ab50b835805c75ad164466767c2c212f48954ccYang Ni const std::vector<Source *>& sources, 1780ab50b835805c75ad164466767c2c212f48954ccYang Ni const std::vector<int>& slots, 1790ab50b835805c75ad164466767c2c212f48954ccYang Ni const std::string& fusedName, 1800ab50b835805c75ad164466767c2c212f48954ccYang Ni Module* mergedModule) { 1810ab50b835805c75ad164466767c2c212f48954ccYang Ni bccAssert(sources.size() == slots.size() && "sources and slots differ in size"); 1820ab50b835805c75ad164466767c2c212f48954ccYang Ni 1838c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni uint32_t fusedFunctionSignature; 1840ab50b835805c75ad164466767c2c212f48954ccYang Ni 1850ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::FunctionType* fusedType = 1868c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni getFusedFuncType(Context, sources, slots, mergedModule, &fusedFunctionSignature); 1870ab50b835805c75ad164466767c2c212f48954ccYang Ni 1880ab50b835805c75ad164466767c2c212f48954ccYang Ni if (fusedType == nullptr) { 1890ab50b835805c75ad164466767c2c212f48954ccYang Ni return false; 1900ab50b835805c75ad164466767c2c212f48954ccYang Ni } 1910ab50b835805c75ad164466767c2c212f48954ccYang Ni 192a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni Function* fusedKernel = 1930ab50b835805c75ad164466767c2c212f48954ccYang Ni (Function*)(mergedModule->getOrInsertFunction(fusedName, fusedType)); 194a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 1950ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::LLVMContext& ctxt = Context.getLLVMContext(); 1960ab50b835805c75ad164466767c2c212f48954ccYang Ni 1970ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::BasicBlock* block = llvm::BasicBlock::Create(ctxt, "entry", fusedKernel); 198a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni llvm::IRBuilder<> builder(block); 199a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 200a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni Function::arg_iterator argIter = fusedKernel->arg_begin(); 2010ab50b835805c75ad164466767c2c212f48954ccYang Ni 2020ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::Value* dataElement = nullptr; 2038c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureIn(fusedFunctionSignature)) { 204f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar dataElement = &*(argIter++); 2050ab50b835805c75ad164466767c2c212f48954ccYang Ni dataElement->setName("DataIn"); 2060ab50b835805c75ad164466767c2c212f48954ccYang Ni } 2070ab50b835805c75ad164466767c2c212f48954ccYang Ni 2080ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::Value* X = nullptr; 2098c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureX(fusedFunctionSignature)) { 210f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar X = &*(argIter++); 2118c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni X->setName("x"); 2120ab50b835805c75ad164466767c2c212f48954ccYang Ni } 2130ab50b835805c75ad164466767c2c212f48954ccYang Ni 2140ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::Value* Y = nullptr; 2158c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureY(fusedFunctionSignature)) { 216f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar Y = &*(argIter++); 2178c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni Y->setName("y"); 2180ab50b835805c75ad164466767c2c212f48954ccYang Ni } 2190ab50b835805c75ad164466767c2c212f48954ccYang Ni 2200ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::Value* Z = nullptr; 2218c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureZ(fusedFunctionSignature)) { 222f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar Z = &*(argIter++); 2238c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni Z->setName("z"); 2240ab50b835805c75ad164466767c2c212f48954ccYang Ni } 225a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 226a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni auto slotIter = slots.begin(); 227a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni for (const Source* source : sources) { 228531d08c85971e47f58aedc093fbe83f1b909703eYang Ni int slot = *slotIter; 229a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 2308c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni uint32_t inputFunctionSignature; 2318c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni const Function* inputFunction = 2328c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni getFunction(mergedModule, source, slot, &inputFunctionSignature); 2338c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (inputFunction == nullptr) { 234531d08c85971e47f58aedc093fbe83f1b909703eYang Ni // Either failed to find the kernel function, or the function has multiple inputs. 2358c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni return false; 2368c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni } 2370ab50b835805c75ad164466767c2c212f48954ccYang Ni 2388c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni // Don't try to fuse a non-kernel 2398c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (!bcinfo::MetadataExtractor::hasForEachSignatureKernel(inputFunctionSignature)) { 240531d08c85971e47f58aedc093fbe83f1b909703eYang Ni ALOGE("Kernel fusion (module %s function %s): not a kernel", 241531d08c85971e47f58aedc093fbe83f1b909703eYang Ni source->getName().c_str(), inputFunction->getName().str().c_str()); 2420ab50b835805c75ad164466767c2c212f48954ccYang Ni return false; 243a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 244a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 245a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni std::vector<llvm::Value*> args; 2468c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni 2478c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureIn(inputFunctionSignature)) { 2488c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (dataElement == nullptr) { 249531d08c85971e47f58aedc093fbe83f1b909703eYang Ni ALOGE("Kernel fusion (module %s function %s): expected input, but got null", 250531d08c85971e47f58aedc093fbe83f1b909703eYang Ni source->getName().c_str(), inputFunction->getName().str().c_str()); 2518c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni return false; 2528c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni } 2538c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni 2548c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni const llvm::FunctionType* funcTy = inputFunction->getFunctionType(); 2558c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni llvm::Type* firstArgType = funcTy->getParamType(0); 2568c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni 257531d08c85971e47f58aedc093fbe83f1b909703eYang Ni if (dataElement->getType() != firstArgType) { 258531d08c85971e47f58aedc093fbe83f1b909703eYang Ni std::string msg; 259531d08c85971e47f58aedc093fbe83f1b909703eYang Ni llvm::raw_string_ostream rso(msg); 260531d08c85971e47f58aedc093fbe83f1b909703eYang Ni rso << "Mismatching argument type, expected "; 261531d08c85971e47f58aedc093fbe83f1b909703eYang Ni firstArgType->print(rso); 262531d08c85971e47f58aedc093fbe83f1b909703eYang Ni rso << ", received "; 263531d08c85971e47f58aedc093fbe83f1b909703eYang Ni dataElement->getType()->print(rso); 264531d08c85971e47f58aedc093fbe83f1b909703eYang Ni ALOGE("Kernel fusion (module %s function %s): %s", source->getName().c_str(), 265531d08c85971e47f58aedc093fbe83f1b909703eYang Ni inputFunction->getName().str().c_str(), rso.str().c_str()); 2668c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni return false; 2678c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni } 2688c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni 2690ab50b835805c75ad164466767c2c212f48954ccYang Ni args.push_back(dataElement); 2708c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni } else { 2718c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni // Only the first kernel in a batch is allowed to have no input 2728c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (slotIter != slots.begin()) { 273531d08c85971e47f58aedc093fbe83f1b909703eYang Ni ALOGE("Kernel fusion (module %s function %s): function not first in batch takes no input", 274531d08c85971e47f58aedc093fbe83f1b909703eYang Ni source->getName().c_str(), inputFunction->getName().str().c_str()); 2758c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni return false; 2768c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni } 2770ab50b835805c75ad164466767c2c212f48954ccYang Ni } 2780ab50b835805c75ad164466767c2c212f48954ccYang Ni 2798c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureX(inputFunctionSignature)) { 280a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni args.push_back(X); 2810ab50b835805c75ad164466767c2c212f48954ccYang Ni } 2820ab50b835805c75ad164466767c2c212f48954ccYang Ni 2838c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureY(inputFunctionSignature)) { 2840ab50b835805c75ad164466767c2c212f48954ccYang Ni args.push_back(Y); 2850ab50b835805c75ad164466767c2c212f48954ccYang Ni } 2860ab50b835805c75ad164466767c2c212f48954ccYang Ni 2878c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni if (bcinfo::MetadataExtractor::hasForEachSignatureZ(inputFunctionSignature)) { 2880ab50b835805c75ad164466767c2c212f48954ccYang Ni args.push_back(Z); 289a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 290a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 2918c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni dataElement = builder.CreateCall((llvm::Value*)inputFunction, args); 292531d08c85971e47f58aedc093fbe83f1b909703eYang Ni 293531d08c85971e47f58aedc093fbe83f1b909703eYang Ni slotIter++; 294a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni } 295a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 2960ab50b835805c75ad164466767c2c212f48954ccYang Ni if (fusedKernel->getReturnType()->isVoidTy()) { 2970ab50b835805c75ad164466767c2c212f48954ccYang Ni builder.CreateRetVoid(); 2980ab50b835805c75ad164466767c2c212f48954ccYang Ni } else { 2990ab50b835805c75ad164466767c2c212f48954ccYang Ni builder.CreateRet(dataElement); 3000ab50b835805c75ad164466767c2c212f48954ccYang Ni } 3010ab50b835805c75ad164466767c2c212f48954ccYang Ni 3020ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::NamedMDNode* ExportForEachNameMD = 3030ab50b835805c75ad164466767c2c212f48954ccYang Ni mergedModule->getOrInsertNamedMetadata("#rs_export_foreach_name"); 3040ab50b835805c75ad164466767c2c212f48954ccYang Ni 3050ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::MDString* nameMDStr = llvm::MDString::get(ctxt, fusedName); 3060ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::MDNode* nameMDNode = llvm::MDNode::get(ctxt, nameMDStr); 3070ab50b835805c75ad164466767c2c212f48954ccYang Ni ExportForEachNameMD->addOperand(nameMDNode); 3080ab50b835805c75ad164466767c2c212f48954ccYang Ni 3090ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::NamedMDNode* ExportForEachMD = 3100ab50b835805c75ad164466767c2c212f48954ccYang Ni mergedModule->getOrInsertNamedMetadata("#rs_export_foreach"); 3110ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::MDString* sigMDStr = llvm::MDString::get(ctxt, 312f5b49a0ca149b09c8306b86db9f3aca703c4acd5Pirama Arumuga Nainar llvm::utostr(fusedFunctionSignature)); 3130ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::MDNode* sigMDNode = llvm::MDNode::get(ctxt, sigMDStr); 3140ab50b835805c75ad164466767c2c212f48954ccYang Ni ExportForEachMD->addOperand(sigMDNode); 3150ab50b835805c75ad164466767c2c212f48954ccYang Ni 3160ab50b835805c75ad164466767c2c212f48954ccYang Ni return true; 3170ab50b835805c75ad164466767c2c212f48954ccYang Ni} 3180ab50b835805c75ad164466767c2c212f48954ccYang Ni 3190ab50b835805c75ad164466767c2c212f48954ccYang Nibool renameInvoke(BCCContext& Context, const Source* source, const int slot, 3200ab50b835805c75ad164466767c2c212f48954ccYang Ni const std::string& newName, Module* module) { 3210ab50b835805c75ad164466767c2c212f48954ccYang Ni const llvm::Function* F = getInvokeFunction(*source, slot, module); 3220ab50b835805c75ad164466767c2c212f48954ccYang Ni std::vector<llvm::Type*> params; 3230ab50b835805c75ad164466767c2c212f48954ccYang Ni for (auto I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { 3240ab50b835805c75ad164466767c2c212f48954ccYang Ni params.push_back(I->getType()); 3250ab50b835805c75ad164466767c2c212f48954ccYang Ni } 3260ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::Type* returnTy = F->getReturnType(); 3270ab50b835805c75ad164466767c2c212f48954ccYang Ni 3280ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::FunctionType* batchFuncTy = 3290ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::FunctionType::get(returnTy, params, false); 3300ab50b835805c75ad164466767c2c212f48954ccYang Ni 3310ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::Function* newF = 3320ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::Function::Create(batchFuncTy, 3330ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::GlobalValue::ExternalLinkage, newName, 3340ab50b835805c75ad164466767c2c212f48954ccYang Ni module); 3350ab50b835805c75ad164466767c2c212f48954ccYang Ni 3360ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::BasicBlock* block = llvm::BasicBlock::Create(Context.getLLVMContext(), 3370ab50b835805c75ad164466767c2c212f48954ccYang Ni "entry", newF); 3380ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::IRBuilder<> builder(block); 3390ab50b835805c75ad164466767c2c212f48954ccYang Ni 3400ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::Function::arg_iterator argIter = newF->arg_begin(); 341f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar llvm::Value* arg1 = &*(argIter++); 3420ab50b835805c75ad164466767c2c212f48954ccYang Ni builder.CreateCall((llvm::Value*)F, arg1); 3430ab50b835805c75ad164466767c2c212f48954ccYang Ni 3440ab50b835805c75ad164466767c2c212f48954ccYang Ni builder.CreateRetVoid(); 345a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 3460ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::NamedMDNode* ExportFuncNameMD = 3470ab50b835805c75ad164466767c2c212f48954ccYang Ni module->getOrInsertNamedMetadata("#rs_export_func"); 3480ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::MDString* strMD = llvm::MDString::get(module->getContext(), newName); 3490ab50b835805c75ad164466767c2c212f48954ccYang Ni llvm::MDNode* nodeMD = llvm::MDNode::get(module->getContext(), strMD); 3500ab50b835805c75ad164466767c2c212f48954ccYang Ni ExportFuncNameMD->addOperand(nodeMD); 351a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 3520ab50b835805c75ad164466767c2c212f48954ccYang Ni return true; 353a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni} 354a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni 355a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni} // namespace bcc 356