1a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni/*
2a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * Copyright 2015, The Android Open Source Project
3a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni *
4a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * Licensed under the Apache License, Version 2.0 (the "License");
5a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * you may not use this file except in compliance with the License.
6a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * You may obtain a copy of the License at
7a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni *
8a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni *     http://www.apache.org/licenses/LICENSE-2.0
9a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni *
10a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * Unless required by applicable law or agreed to in writing, software
11a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * distributed under the License is distributed on an "AS IS" BASIS,
12a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * See the License for the specific language governing permissions and
14a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni * limitations under the License.
15a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni */
16a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
17a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "RSScriptGroupFusion.h"
18a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
19a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "Assert.h"
20a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "Log.h"
21a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "bcc/BCCContext.h"
22a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "bcc/Source.h"
23a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "bcinfo/MetadataExtractor.h"
240ab50b835805c75ad164466767c2c212f48954ccYang Ni#include "llvm/ADT/StringExtras.h"
25a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "llvm/IR/DataLayout.h"
26a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "llvm/IR/IRBuilder.h"
27a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni#include "llvm/IR/Module.h"
28531d08c85971e47f58aedc093fbe83f1b909703eYang Ni#include "llvm/Support/raw_ostream.h"
29a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
30a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Niusing llvm::Function;
310ab50b835805c75ad164466767c2c212f48954ccYang Niusing llvm::Module;
32a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
33a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Niusing std::string;
34a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
35a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ninamespace bcc {
36a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
37a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ninamespace {
38a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
390ab50b835805c75ad164466767c2c212f48954ccYang Niconst Function* getInvokeFunction(const Source& source, const int slot,
400ab50b835805c75ad164466767c2c212f48954ccYang Ni                                  Module* newModule) {
41f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar
42f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar  bcinfo::MetadataExtractor &metadata = *source.getMetadata();
430ab50b835805c75ad164466767c2c212f48954ccYang Ni  const char* functionName = metadata.getExportFuncNameList()[slot];
440ab50b835805c75ad164466767c2c212f48954ccYang Ni  Function* func = newModule->getFunction(functionName);
450ab50b835805c75ad164466767c2c212f48954ccYang Ni  // Materialize the function so that later the caller can inspect its argument
460ab50b835805c75ad164466767c2c212f48954ccYang Ni  // and return types.
470ab50b835805c75ad164466767c2c212f48954ccYang Ni  newModule->materialize(func);
480ab50b835805c75ad164466767c2c212f48954ccYang Ni  return func;
49a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni}
50a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
510ab50b835805c75ad164466767c2c212f48954ccYang Niconst Function*
520ab50b835805c75ad164466767c2c212f48954ccYang NigetFunction(Module* mergedModule, const Source* source, const int slot,
530ab50b835805c75ad164466767c2c212f48954ccYang Ni            uint32_t* signature) {
540ab50b835805c75ad164466767c2c212f48954ccYang Ni
55f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar  bcinfo::MetadataExtractor &metadata = *source->getMetadata();
560ab50b835805c75ad164466767c2c212f48954ccYang Ni  const char* functionName = metadata.getExportForEachNameList()[slot];
57a108bc5ec0ca0cb48c72492d54a71126bccfa7d6Stephen Hines  if (functionName == nullptr || !functionName[0]) {
58531d08c85971e47f58aedc093fbe83f1b909703eYang Ni    ALOGE("Kernel fusion (module %s slot %d): failed to find kernel function",
59531d08c85971e47f58aedc093fbe83f1b909703eYang Ni          source->getName().c_str(), slot);
60a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni    return nullptr;
61a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  }
62a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
630ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (metadata.getExportForEachInputCountList()[slot] > 1) {
64531d08c85971e47f58aedc093fbe83f1b909703eYang Ni    ALOGE("Kernel fusion (module %s function %s): cannot handle multiple inputs",
65531d08c85971e47f58aedc093fbe83f1b909703eYang Ni          source->getName().c_str(), functionName);
66a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni    return nullptr;
67a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  }
68a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
690ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (signature != nullptr) {
700ab50b835805c75ad164466767c2c212f48954ccYang Ni    *signature = metadata.getExportForEachSignatureList()[slot];
71a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  }
72a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
730ab50b835805c75ad164466767c2c212f48954ccYang Ni  const Function* function = mergedModule->getFunction(functionName);
740ab50b835805c75ad164466767c2c212f48954ccYang Ni
750ab50b835805c75ad164466767c2c212f48954ccYang Ni  return function;
760ab50b835805c75ad164466767c2c212f48954ccYang Ni}
770ab50b835805c75ad164466767c2c212f48954ccYang Ni
788c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni// The whitelist of supported signature bits. Context or user data arguments are
798c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni// not currently supported in kernel fusion. To support them or any new kinds of
808c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni// arguments in the future, it requires not only listing the signature bits here,
818c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni// but also implementing additional necessary fusion logic in the getFusedFuncSig(),
828c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni// getFusedFuncType(), and fuseKernels() functions below.
830ab50b835805c75ad164466767c2c212f48954ccYang Niconstexpr uint32_t ExpectedSignatureBits =
840ab50b835805c75ad164466767c2c212f48954ccYang Ni        bcinfo::MD_SIG_In |
850ab50b835805c75ad164466767c2c212f48954ccYang Ni        bcinfo::MD_SIG_Out |
860ab50b835805c75ad164466767c2c212f48954ccYang Ni        bcinfo::MD_SIG_X |
870ab50b835805c75ad164466767c2c212f48954ccYang Ni        bcinfo::MD_SIG_Y |
880ab50b835805c75ad164466767c2c212f48954ccYang Ni        bcinfo::MD_SIG_Z |
890ab50b835805c75ad164466767c2c212f48954ccYang Ni        bcinfo::MD_SIG_Kernel;
900ab50b835805c75ad164466767c2c212f48954ccYang Ni
910ab50b835805c75ad164466767c2c212f48954ccYang Niint getFusedFuncSig(const std::vector<Source*>& sources,
920ab50b835805c75ad164466767c2c212f48954ccYang Ni                    const std::vector<int>& slots,
930ab50b835805c75ad164466767c2c212f48954ccYang Ni                    uint32_t* retSig) {
940ab50b835805c75ad164466767c2c212f48954ccYang Ni  *retSig = 0;
950ab50b835805c75ad164466767c2c212f48954ccYang Ni  uint32_t firstSignature = 0;
960ab50b835805c75ad164466767c2c212f48954ccYang Ni  uint32_t signature = 0;
970ab50b835805c75ad164466767c2c212f48954ccYang Ni  auto slotIter = slots.begin();
980ab50b835805c75ad164466767c2c212f48954ccYang Ni  for (const Source* source : sources) {
990ab50b835805c75ad164466767c2c212f48954ccYang Ni    const int slot = *slotIter++;
100f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    bcinfo::MetadataExtractor &metadata = *source->getMetadata();
101a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
102a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni    if (metadata.getExportForEachInputCountList()[slot] > 1) {
103531d08c85971e47f58aedc093fbe83f1b909703eYang Ni      ALOGE("Kernel fusion (module %s slot %d): cannot handle multiple inputs",
104531d08c85971e47f58aedc093fbe83f1b909703eYang Ni            source->getName().c_str(), slot);
1050ab50b835805c75ad164466767c2c212f48954ccYang Ni      return -1;
106a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni    }
107a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
1080ab50b835805c75ad164466767c2c212f48954ccYang Ni    signature = metadata.getExportForEachSignatureList()[slot];
1090ab50b835805c75ad164466767c2c212f48954ccYang Ni    if (signature & ~ExpectedSignatureBits) {
110531d08c85971e47f58aedc093fbe83f1b909703eYang Ni      ALOGE("Kernel fusion (module %s slot %d): Unexpected signature %x",
111531d08c85971e47f58aedc093fbe83f1b909703eYang Ni            source->getName().c_str(), slot, signature);
1120ab50b835805c75ad164466767c2c212f48954ccYang Ni      return -1;
113a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni    }
1140ab50b835805c75ad164466767c2c212f48954ccYang Ni
1150ab50b835805c75ad164466767c2c212f48954ccYang Ni    if (firstSignature == 0) {
1160ab50b835805c75ad164466767c2c212f48954ccYang Ni      firstSignature = signature;
117a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni    }
118a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
1190ab50b835805c75ad164466767c2c212f48954ccYang Ni    *retSig |= signature;
120a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  }
121a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
1220ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (!bcinfo::MetadataExtractor::hasForEachSignatureIn(firstSignature)) {
1230ab50b835805c75ad164466767c2c212f48954ccYang Ni    *retSig &= ~bcinfo::MD_SIG_In;
1240ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
125a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
1260ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (!bcinfo::MetadataExtractor::hasForEachSignatureOut(signature)) {
1270ab50b835805c75ad164466767c2c212f48954ccYang Ni    *retSig &= ~bcinfo::MD_SIG_Out;
1280ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
1290ab50b835805c75ad164466767c2c212f48954ccYang Ni
1300ab50b835805c75ad164466767c2c212f48954ccYang Ni  return 0;
1310ab50b835805c75ad164466767c2c212f48954ccYang Ni}
132a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
1330ab50b835805c75ad164466767c2c212f48954ccYang Nillvm::FunctionType* getFusedFuncType(bcc::BCCContext& Context,
1340ab50b835805c75ad164466767c2c212f48954ccYang Ni                                     const std::vector<Source*>& sources,
1350ab50b835805c75ad164466767c2c212f48954ccYang Ni                                     const std::vector<int>& slots,
1360ab50b835805c75ad164466767c2c212f48954ccYang Ni                                     Module* M,
1370ab50b835805c75ad164466767c2c212f48954ccYang Ni                                     uint32_t* signature) {
1380ab50b835805c75ad164466767c2c212f48954ccYang Ni  int error = getFusedFuncSig(sources, slots, signature);
1390ab50b835805c75ad164466767c2c212f48954ccYang Ni
1400ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (error < 0) {
141a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni    return nullptr;
142a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  }
143a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
1440ab50b835805c75ad164466767c2c212f48954ccYang Ni  const Function* firstF = getFunction(M, sources.front(), slots.front(), nullptr);
1450ab50b835805c75ad164466767c2c212f48954ccYang Ni
1460ab50b835805c75ad164466767c2c212f48954ccYang Ni  bccAssert (firstF != nullptr);
1470ab50b835805c75ad164466767c2c212f48954ccYang Ni
1480ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::SmallVector<llvm::Type*, 8> ArgTys;
1490ab50b835805c75ad164466767c2c212f48954ccYang Ni
1500ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (bcinfo::MetadataExtractor::hasForEachSignatureIn(*signature)) {
1510ab50b835805c75ad164466767c2c212f48954ccYang Ni    ArgTys.push_back(firstF->arg_begin()->getType());
152a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  }
1530ab50b835805c75ad164466767c2c212f48954ccYang Ni
1540ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::Type* I32Ty = llvm::IntegerType::get(Context.getLLVMContext(), 32);
1550ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (bcinfo::MetadataExtractor::hasForEachSignatureX(*signature)) {
1560ab50b835805c75ad164466767c2c212f48954ccYang Ni    ArgTys.push_back(I32Ty);
1570ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
1580ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (bcinfo::MetadataExtractor::hasForEachSignatureY(*signature)) {
1590ab50b835805c75ad164466767c2c212f48954ccYang Ni    ArgTys.push_back(I32Ty);
160a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  }
1610ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (bcinfo::MetadataExtractor::hasForEachSignatureZ(*signature)) {
1620ab50b835805c75ad164466767c2c212f48954ccYang Ni    ArgTys.push_back(I32Ty);
1630ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
1640ab50b835805c75ad164466767c2c212f48954ccYang Ni
1650ab50b835805c75ad164466767c2c212f48954ccYang Ni  const Function* lastF = getFunction(M, sources.back(), slots.back(), nullptr);
1660ab50b835805c75ad164466767c2c212f48954ccYang Ni
1670ab50b835805c75ad164466767c2c212f48954ccYang Ni  bccAssert (lastF != nullptr);
1680ab50b835805c75ad164466767c2c212f48954ccYang Ni
1690ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::Type* retTy = lastF->getReturnType();
1700ab50b835805c75ad164466767c2c212f48954ccYang Ni
1710ab50b835805c75ad164466767c2c212f48954ccYang Ni  return llvm::FunctionType::get(retTy, ArgTys, false);
1720ab50b835805c75ad164466767c2c212f48954ccYang Ni}
1730ab50b835805c75ad164466767c2c212f48954ccYang Ni
1740ab50b835805c75ad164466767c2c212f48954ccYang Ni}  // anonymous namespace
1750ab50b835805c75ad164466767c2c212f48954ccYang Ni
1760ab50b835805c75ad164466767c2c212f48954ccYang Nibool fuseKernels(bcc::BCCContext& Context,
1770ab50b835805c75ad164466767c2c212f48954ccYang Ni                 const std::vector<Source *>& sources,
1780ab50b835805c75ad164466767c2c212f48954ccYang Ni                 const std::vector<int>& slots,
1790ab50b835805c75ad164466767c2c212f48954ccYang Ni                 const std::string& fusedName,
1800ab50b835805c75ad164466767c2c212f48954ccYang Ni                 Module* mergedModule) {
1810ab50b835805c75ad164466767c2c212f48954ccYang Ni  bccAssert(sources.size() == slots.size() && "sources and slots differ in size");
1820ab50b835805c75ad164466767c2c212f48954ccYang Ni
1838c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni  uint32_t fusedFunctionSignature;
1840ab50b835805c75ad164466767c2c212f48954ccYang Ni
1850ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::FunctionType* fusedType =
1868c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni          getFusedFuncType(Context, sources, slots, mergedModule, &fusedFunctionSignature);
1870ab50b835805c75ad164466767c2c212f48954ccYang Ni
1880ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (fusedType == nullptr) {
1890ab50b835805c75ad164466767c2c212f48954ccYang Ni    return false;
1900ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
1910ab50b835805c75ad164466767c2c212f48954ccYang Ni
192a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  Function* fusedKernel =
1930ab50b835805c75ad164466767c2c212f48954ccYang Ni          (Function*)(mergedModule->getOrInsertFunction(fusedName, fusedType));
194a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
1950ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::LLVMContext& ctxt = Context.getLLVMContext();
1960ab50b835805c75ad164466767c2c212f48954ccYang Ni
1970ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::BasicBlock* block = llvm::BasicBlock::Create(ctxt, "entry", fusedKernel);
198a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  llvm::IRBuilder<> builder(block);
199a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
200a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  Function::arg_iterator argIter = fusedKernel->arg_begin();
2010ab50b835805c75ad164466767c2c212f48954ccYang Ni
2020ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::Value* dataElement = nullptr;
2038c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni  if (bcinfo::MetadataExtractor::hasForEachSignatureIn(fusedFunctionSignature)) {
204f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    dataElement = &*(argIter++);
2050ab50b835805c75ad164466767c2c212f48954ccYang Ni    dataElement->setName("DataIn");
2060ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
2070ab50b835805c75ad164466767c2c212f48954ccYang Ni
2080ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::Value* X = nullptr;
2098c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni  if (bcinfo::MetadataExtractor::hasForEachSignatureX(fusedFunctionSignature)) {
210f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    X = &*(argIter++);
2118c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    X->setName("x");
2120ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
2130ab50b835805c75ad164466767c2c212f48954ccYang Ni
2140ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::Value* Y = nullptr;
2158c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni  if (bcinfo::MetadataExtractor::hasForEachSignatureY(fusedFunctionSignature)) {
216f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    Y = &*(argIter++);
2178c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    Y->setName("y");
2180ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
2190ab50b835805c75ad164466767c2c212f48954ccYang Ni
2200ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::Value* Z = nullptr;
2218c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni  if (bcinfo::MetadataExtractor::hasForEachSignatureZ(fusedFunctionSignature)) {
222f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    Z = &*(argIter++);
2238c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    Z->setName("z");
2240ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
225a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
226a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  auto slotIter = slots.begin();
227a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  for (const Source* source : sources) {
228531d08c85971e47f58aedc093fbe83f1b909703eYang Ni    int slot = *slotIter;
229a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
2308c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    uint32_t inputFunctionSignature;
2318c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    const Function* inputFunction =
2328c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni            getFunction(mergedModule, source, slot, &inputFunctionSignature);
2338c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    if (inputFunction == nullptr) {
234531d08c85971e47f58aedc093fbe83f1b909703eYang Ni      // Either failed to find the kernel function, or the function has multiple inputs.
2358c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni      return false;
2368c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    }
2370ab50b835805c75ad164466767c2c212f48954ccYang Ni
2388c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    // Don't try to fuse a non-kernel
2398c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    if (!bcinfo::MetadataExtractor::hasForEachSignatureKernel(inputFunctionSignature)) {
240531d08c85971e47f58aedc093fbe83f1b909703eYang Ni      ALOGE("Kernel fusion (module %s function %s): not a kernel",
241531d08c85971e47f58aedc093fbe83f1b909703eYang Ni            source->getName().c_str(), inputFunction->getName().str().c_str());
2420ab50b835805c75ad164466767c2c212f48954ccYang Ni      return false;
243a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni    }
244a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
245a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni    std::vector<llvm::Value*> args;
2468c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni
2478c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(inputFunctionSignature)) {
2488c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni      if (dataElement == nullptr) {
249531d08c85971e47f58aedc093fbe83f1b909703eYang Ni        ALOGE("Kernel fusion (module %s function %s): expected input, but got null",
250531d08c85971e47f58aedc093fbe83f1b909703eYang Ni              source->getName().c_str(), inputFunction->getName().str().c_str());
2518c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni        return false;
2528c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni      }
2538c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni
2548c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni      const llvm::FunctionType* funcTy = inputFunction->getFunctionType();
2558c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni      llvm::Type* firstArgType = funcTy->getParamType(0);
2568c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni
257531d08c85971e47f58aedc093fbe83f1b909703eYang Ni      if (dataElement->getType() != firstArgType) {
258531d08c85971e47f58aedc093fbe83f1b909703eYang Ni        std::string msg;
259531d08c85971e47f58aedc093fbe83f1b909703eYang Ni        llvm::raw_string_ostream rso(msg);
260531d08c85971e47f58aedc093fbe83f1b909703eYang Ni        rso << "Mismatching argument type, expected ";
261531d08c85971e47f58aedc093fbe83f1b909703eYang Ni        firstArgType->print(rso);
262531d08c85971e47f58aedc093fbe83f1b909703eYang Ni        rso << ", received ";
263531d08c85971e47f58aedc093fbe83f1b909703eYang Ni        dataElement->getType()->print(rso);
264531d08c85971e47f58aedc093fbe83f1b909703eYang Ni        ALOGE("Kernel fusion (module %s function %s): %s", source->getName().c_str(),
265531d08c85971e47f58aedc093fbe83f1b909703eYang Ni              inputFunction->getName().str().c_str(), rso.str().c_str());
2668c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni        return false;
2678c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni      }
2688c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni
2690ab50b835805c75ad164466767c2c212f48954ccYang Ni      args.push_back(dataElement);
2708c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    } else {
2718c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni      // Only the first kernel in a batch is allowed to have no input
2728c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni      if (slotIter != slots.begin()) {
273531d08c85971e47f58aedc093fbe83f1b909703eYang Ni        ALOGE("Kernel fusion (module %s function %s): function not first in batch takes no input",
274531d08c85971e47f58aedc093fbe83f1b909703eYang Ni              source->getName().c_str(), inputFunction->getName().str().c_str());
2758c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni        return false;
2768c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni      }
2770ab50b835805c75ad164466767c2c212f48954ccYang Ni    }
2780ab50b835805c75ad164466767c2c212f48954ccYang Ni
2798c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    if (bcinfo::MetadataExtractor::hasForEachSignatureX(inputFunctionSignature)) {
280a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni      args.push_back(X);
2810ab50b835805c75ad164466767c2c212f48954ccYang Ni    }
2820ab50b835805c75ad164466767c2c212f48954ccYang Ni
2838c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    if (bcinfo::MetadataExtractor::hasForEachSignatureY(inputFunctionSignature)) {
2840ab50b835805c75ad164466767c2c212f48954ccYang Ni      args.push_back(Y);
2850ab50b835805c75ad164466767c2c212f48954ccYang Ni    }
2860ab50b835805c75ad164466767c2c212f48954ccYang Ni
2878c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    if (bcinfo::MetadataExtractor::hasForEachSignatureZ(inputFunctionSignature)) {
2880ab50b835805c75ad164466767c2c212f48954ccYang Ni      args.push_back(Z);
289a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni    }
290a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
2918c12d615b4ed4b1d782722a125dd1d43bc44a71bYang Ni    dataElement = builder.CreateCall((llvm::Value*)inputFunction, args);
292531d08c85971e47f58aedc093fbe83f1b909703eYang Ni
293531d08c85971e47f58aedc093fbe83f1b909703eYang Ni    slotIter++;
294a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni  }
295a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
2960ab50b835805c75ad164466767c2c212f48954ccYang Ni  if (fusedKernel->getReturnType()->isVoidTy()) {
2970ab50b835805c75ad164466767c2c212f48954ccYang Ni    builder.CreateRetVoid();
2980ab50b835805c75ad164466767c2c212f48954ccYang Ni  } else {
2990ab50b835805c75ad164466767c2c212f48954ccYang Ni    builder.CreateRet(dataElement);
3000ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
3010ab50b835805c75ad164466767c2c212f48954ccYang Ni
3020ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::NamedMDNode* ExportForEachNameMD =
3030ab50b835805c75ad164466767c2c212f48954ccYang Ni    mergedModule->getOrInsertNamedMetadata("#rs_export_foreach_name");
3040ab50b835805c75ad164466767c2c212f48954ccYang Ni
3050ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::MDString* nameMDStr = llvm::MDString::get(ctxt, fusedName);
3060ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::MDNode* nameMDNode = llvm::MDNode::get(ctxt, nameMDStr);
3070ab50b835805c75ad164466767c2c212f48954ccYang Ni  ExportForEachNameMD->addOperand(nameMDNode);
3080ab50b835805c75ad164466767c2c212f48954ccYang Ni
3090ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::NamedMDNode* ExportForEachMD =
3100ab50b835805c75ad164466767c2c212f48954ccYang Ni    mergedModule->getOrInsertNamedMetadata("#rs_export_foreach");
3110ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::MDString* sigMDStr = llvm::MDString::get(ctxt,
312f5b49a0ca149b09c8306b86db9f3aca703c4acd5Pirama Arumuga Nainar                                                 llvm::utostr(fusedFunctionSignature));
3130ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::MDNode* sigMDNode = llvm::MDNode::get(ctxt, sigMDStr);
3140ab50b835805c75ad164466767c2c212f48954ccYang Ni  ExportForEachMD->addOperand(sigMDNode);
3150ab50b835805c75ad164466767c2c212f48954ccYang Ni
3160ab50b835805c75ad164466767c2c212f48954ccYang Ni  return true;
3170ab50b835805c75ad164466767c2c212f48954ccYang Ni}
3180ab50b835805c75ad164466767c2c212f48954ccYang Ni
3190ab50b835805c75ad164466767c2c212f48954ccYang Nibool renameInvoke(BCCContext& Context, const Source* source, const int slot,
3200ab50b835805c75ad164466767c2c212f48954ccYang Ni                  const std::string& newName, Module* module) {
3210ab50b835805c75ad164466767c2c212f48954ccYang Ni  const llvm::Function* F = getInvokeFunction(*source, slot, module);
3220ab50b835805c75ad164466767c2c212f48954ccYang Ni  std::vector<llvm::Type*> params;
3230ab50b835805c75ad164466767c2c212f48954ccYang Ni  for (auto I = F->arg_begin(), E = F->arg_end(); I != E; ++I) {
3240ab50b835805c75ad164466767c2c212f48954ccYang Ni    params.push_back(I->getType());
3250ab50b835805c75ad164466767c2c212f48954ccYang Ni  }
3260ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::Type* returnTy = F->getReturnType();
3270ab50b835805c75ad164466767c2c212f48954ccYang Ni
3280ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::FunctionType* batchFuncTy =
3290ab50b835805c75ad164466767c2c212f48954ccYang Ni          llvm::FunctionType::get(returnTy, params, false);
3300ab50b835805c75ad164466767c2c212f48954ccYang Ni
3310ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::Function* newF =
3320ab50b835805c75ad164466767c2c212f48954ccYang Ni          llvm::Function::Create(batchFuncTy,
3330ab50b835805c75ad164466767c2c212f48954ccYang Ni                                 llvm::GlobalValue::ExternalLinkage, newName,
3340ab50b835805c75ad164466767c2c212f48954ccYang Ni                                 module);
3350ab50b835805c75ad164466767c2c212f48954ccYang Ni
3360ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::BasicBlock* block = llvm::BasicBlock::Create(Context.getLLVMContext(),
3370ab50b835805c75ad164466767c2c212f48954ccYang Ni                                                     "entry", newF);
3380ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::IRBuilder<> builder(block);
3390ab50b835805c75ad164466767c2c212f48954ccYang Ni
3400ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::Function::arg_iterator argIter = newF->arg_begin();
341f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar  llvm::Value* arg1 = &*(argIter++);
3420ab50b835805c75ad164466767c2c212f48954ccYang Ni  builder.CreateCall((llvm::Value*)F, arg1);
3430ab50b835805c75ad164466767c2c212f48954ccYang Ni
3440ab50b835805c75ad164466767c2c212f48954ccYang Ni  builder.CreateRetVoid();
345a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
3460ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::NamedMDNode* ExportFuncNameMD =
3470ab50b835805c75ad164466767c2c212f48954ccYang Ni          module->getOrInsertNamedMetadata("#rs_export_func");
3480ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::MDString* strMD = llvm::MDString::get(module->getContext(), newName);
3490ab50b835805c75ad164466767c2c212f48954ccYang Ni  llvm::MDNode* nodeMD = llvm::MDNode::get(module->getContext(), strMD);
3500ab50b835805c75ad164466767c2c212f48954ccYang Ni  ExportFuncNameMD->addOperand(nodeMD);
351a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
3520ab50b835805c75ad164466767c2c212f48954ccYang Ni  return true;
353a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni}
354a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni
355a4ded1373d7ad3e503f186e65bccf94126a0f020Yang Ni}  // namespace bcc
356