rsCpuScriptGroup2.cpp revision da0f069871343119251d6b0586be356dc2146a62
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h"
21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h>
4da0f069871343119251d6b0586be356dc2146a62Yang Ni
5da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string>
6da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector>
7da0f069871343119251d6b0586be356dc2146a62Yang Ni
8da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
9da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h"
10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <sys/wait.h>
11da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
12da0f069871343119251d6b0586be356dc2146a62Yang Ni
131ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h"
141ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h"
151ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h"
161ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h"
171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h"
181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h"
191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h"
20da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h"
21da0f069871343119251d6b0586be356dc2146a62Yang Ni
22da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string;
23da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector;
241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android {
261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript {
271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace {
291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
30da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2;
311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni               uint32_t xend, uint32_t outstep) {
341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr;
351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  const void **oldIns  = kparams->ins;
371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  uint32_t *oldStrides = kparams->inEStrides;
381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  std::vector<const void*> ins(DefaultKernelArgCount);
401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  std::vector<uint32_t> strides(DefaultKernelArgCount);
411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
421ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  for (CPUClosure* cpuClosure : closures) {
431ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const Closure* closure = cpuClosure->mClosure;
441ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
451ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    auto in_iter = ins.begin();
461ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    auto stride_iter = strides.begin();
471ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
481ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    for (const auto& arg : closure->mArgs) {
491ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      const Allocation* a = (const Allocation*)arg;
501ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      const uint32_t eStride = a->mHal.state.elementSizeBytes;
511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni          eStride * xstart;
531ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      if (kparams->dimY > 1) {
541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        ptr += a->mHal.drvState.lod[0].stride * kparams->y;
551ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      }
561ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      *in_iter++ = ptr;
571ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      *stride_iter++ = eStride;
581ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
591ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
601ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    mutable_kparams->ins = &ins[0];
611ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    mutable_kparams->inEStrides = &strides[0];
621ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
631ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const Allocation* out = closure->mReturnValue;
641ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const uint32_t ostep = out->mHal.state.elementSizeBytes;
651ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni           ostep * xstart;
671ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    if (kparams->dimY > 1) {
681ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      ptr += out->mHal.drvState.lod[0].stride * kparams->y;
691ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
701ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
711ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    mutable_kparams->out = (void*)ptr;
721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
731ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    mutable_kparams->usr = cpuClosure->mUsrPtr;
741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
751ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    cpuClosure->mFunc(kparams, xstart, xend, ostep);
761ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  }
771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
781ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  mutable_kparams->ins        = oldIns;
791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  mutable_kparams->inEStrides = oldStrides;
801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  mutable_kparams->usr        = &closures;
811ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
821ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
83da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // namespace
84da0f069871343119251d6b0586be356dc2146a62Yang Ni
85da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() {
86da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (CPUClosure* c : mClosures) {
87da0f069871343119251d6b0586be356dc2146a62Yang Ni    delete c;
88da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
89da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (mScriptObj) {
90da0f069871343119251d6b0586be356dc2146a62Yang Ni    dlclose(mScriptObj);
91da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
92da0f069871343119251d6b0586be356dc2146a62Yang Ni}
93da0f069871343119251d6b0586be356dc2146a62Yang Ni
94da0f069871343119251d6b0586be356dc2146a62Yang Nibool Batch::conflict(CPUClosure* closure) const {
95da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (mClosures.empty()) {
96da0f069871343119251d6b0586be356dc2146a62Yang Ni    return false;
97da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
98da0f069871343119251d6b0586be356dc2146a62Yang Ni
99da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (closure->mClosure->mKernelID.get() == nullptr ||
100da0f069871343119251d6b0586be356dc2146a62Yang Ni      mClosures.front()->mClosure->mKernelID.get() == nullptr) {
101da0f069871343119251d6b0586be356dc2146a62Yang Ni    // An invoke should be in a batch by itself, so it conflicts with any other
102da0f069871343119251d6b0586be356dc2146a62Yang Ni    // closure.
103da0f069871343119251d6b0586be356dc2146a62Yang Ni    return true;
104da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
105da0f069871343119251d6b0586be356dc2146a62Yang Ni
1061ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  for (const auto &p : closure->mClosure->mGlobalDeps) {
1071ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const Closure* dep = p.first;
108da0f069871343119251d6b0586be356dc2146a62Yang Ni    for (CPUClosure* c : mClosures) {
1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      if (c->mClosure == dep) {
110da0f069871343119251d6b0586be356dc2146a62Yang Ni        ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global", closure, dep);
1111ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        return true;
1121ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      }
1131ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
1141ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  }
1151ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  for (const auto &p : closure->mClosure->mArgDeps) {
1161ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const Closure* dep = p.first;
117da0f069871343119251d6b0586be356dc2146a62Yang Ni    for (CPUClosure* c : mClosures) {
1181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      if (c->mClosure == dep) {
1191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        for (const auto &p1 : *p.second) {
120da0f069871343119251d6b0586be356dc2146a62Yang Ni          if (p1.second->get() != nullptr) {
121da0f069871343119251d6b0586be356dc2146a62Yang Ni            ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg", closure, dep);
1221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni            return true;
1231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni          }
1241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        }
1251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      }
1261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
1271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  }
1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  return false;
1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1311ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                         const ScriptGroupBase *sg) :
1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
134da0f069871343119251d6b0586be356dc2146a62Yang Ni  Batch* batch = new Batch(this);
1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  for (Closure* closure: mGroup->mClosures) {
1361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const ScriptKernelID* kernelID = closure->mKernelID.get();
1371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    RsdCpuScriptImpl* si =
1381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
1391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    MTLaunchStruct mtls;
1411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    si->forEachKernelSetup(kernelID->mSlot, &mtls);
1421ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    // TODO: Is mtls.fep.usrLen ever used?
1431ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
1441ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                    mtls.fep.usr, mtls.fep.usrLen);
145da0f069871343119251d6b0586be356dc2146a62Yang Ni    if (batch->conflict(cc)) {
1461ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      mBatches.push_back(batch);
147da0f069871343119251d6b0586be356dc2146a62Yang Ni      batch = new Batch(this);
1481ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
149da0f069871343119251d6b0586be356dc2146a62Yang Ni
150da0f069871343119251d6b0586be356dc2146a62Yang Ni    batch->mClosures.push_back(cc);
1511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  }
152da0f069871343119251d6b0586be356dc2146a62Yang Ni
1531ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  mBatches.push_back(batch);
154da0f069871343119251d6b0586be356dc2146a62Yang Ni
155da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
156da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (Batch* batch : mBatches) {
157da0f069871343119251d6b0586be356dc2146a62Yang Ni    batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str());
158da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
159da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
1601ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1611ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1621ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
163da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (Batch* batch : mBatches) {
164da0f069871343119251d6b0586be356dc2146a62Yang Ni    delete batch;
165da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
166da0f069871343119251d6b0586be356dc2146a62Yang Ni}
167da0f069871343119251d6b0586be356dc2146a62Yang Ni
168da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace {
169da0f069871343119251d6b0586be356dc2146a62Yang Ni
170da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
171da0f069871343119251d6b0586be356dc2146a62Yang Ni
172da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) {
173da0f069871343119251d6b0586be356dc2146a62Yang Ni  unsigned found = path.find_last_of("/\\");
174da0f069871343119251d6b0586be356dc2146a62Yang Ni  return path.substr(found + 1);
175da0f069871343119251d6b0586be356dc2146a62Yang Ni}
176da0f069871343119251d6b0586be356dc2146a62Yang Ni
177da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments(
178da0f069871343119251d6b0586be356dc2146a62Yang Ni    const vector<string>& inputs, const vector<int>& kernels,
179da0f069871343119251d6b0586be356dc2146a62Yang Ni    const string& output_dir, const string& output_filename,
180da0f069871343119251d6b0586be356dc2146a62Yang Ni    const string& rsLib, vector<const char*>* args) {
181da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
182da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back("-fPIC");
183da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back("-embedRSInfo");
184da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back("-mtriple");
185da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
186da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back("-bclib");
187da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back(rsLib.c_str());
188da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (const string& input : inputs) {
189da0f069871343119251d6b0586be356dc2146a62Yang Ni    args->push_back(input.c_str());
190da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
191da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (int kernel : kernels) {
192da0f069871343119251d6b0586be356dc2146a62Yang Ni    args->push_back("-k");
193da0f069871343119251d6b0586be356dc2146a62Yang Ni    string strKernel = std::to_string(kernel);
194da0f069871343119251d6b0586be356dc2146a62Yang Ni    args->push_back(strKernel.c_str());
195da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
196da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back("-output_path");
197da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back(output_dir.c_str());
198da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back("-o");
199da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back(output_filename.c_str());
200da0f069871343119251d6b0586be356dc2146a62Yang Ni  args->push_back(nullptr);
201da0f069871343119251d6b0586be356dc2146a62Yang Ni}
202da0f069871343119251d6b0586be356dc2146a62Yang Ni
203da0f069871343119251d6b0586be356dc2146a62Yang Nistring convertListToString(int n, const char* const* strs) {
204da0f069871343119251d6b0586be356dc2146a62Yang Ni  string ret;
205da0f069871343119251d6b0586be356dc2146a62Yang Ni  ret.append(strs[0]);
206da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (int i = 1; i < n; i++) {
207da0f069871343119251d6b0586be356dc2146a62Yang Ni    ret.append(" ");
208da0f069871343119251d6b0586be356dc2146a62Yang Ni    ret.append(strs[i]);
209da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
210da0f069871343119251d6b0586be356dc2146a62Yang Ni  return ret;
211da0f069871343119251d6b0586be356dc2146a62Yang Ni}
212da0f069871343119251d6b0586be356dc2146a62Yang Ni
213da0f069871343119251d6b0586be356dc2146a62Yang Nibool fuseAndCompile(const char** arguments,
214da0f069871343119251d6b0586be356dc2146a62Yang Ni                    const string& commandLine) {
215da0f069871343119251d6b0586be356dc2146a62Yang Ni  const pid_t pid = fork();
216da0f069871343119251d6b0586be356dc2146a62Yang Ni
217da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (pid == -1) {
218da0f069871343119251d6b0586be356dc2146a62Yang Ni    ALOGE("Couldn't fork for bcc execution");
219da0f069871343119251d6b0586be356dc2146a62Yang Ni    return false;
220da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
221da0f069871343119251d6b0586be356dc2146a62Yang Ni
222da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (pid == 0) {
223da0f069871343119251d6b0586be356dc2146a62Yang Ni    // Child process
224da0f069871343119251d6b0586be356dc2146a62Yang Ni    ALOGV("Invoking BCC with: %s", commandLine.c_str());
225da0f069871343119251d6b0586be356dc2146a62Yang Ni    execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
226da0f069871343119251d6b0586be356dc2146a62Yang Ni
227da0f069871343119251d6b0586be356dc2146a62Yang Ni    ALOGE("execv() failed: %s", strerror(errno));
228da0f069871343119251d6b0586be356dc2146a62Yang Ni    abort();
229da0f069871343119251d6b0586be356dc2146a62Yang Ni    return false;
230da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
231da0f069871343119251d6b0586be356dc2146a62Yang Ni
232da0f069871343119251d6b0586be356dc2146a62Yang Ni  // Parent process
233da0f069871343119251d6b0586be356dc2146a62Yang Ni  int status = 0;
234da0f069871343119251d6b0586be356dc2146a62Yang Ni  const pid_t w = waitpid(pid, &status, 0);
235da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (w == -1) {
236da0f069871343119251d6b0586be356dc2146a62Yang Ni    return false;
237da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
238da0f069871343119251d6b0586be356dc2146a62Yang Ni
239da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
240da0f069871343119251d6b0586be356dc2146a62Yang Ni    ALOGE("bcc terminated unexpectedly");
241da0f069871343119251d6b0586be356dc2146a62Yang Ni    return false;
242da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
243da0f069871343119251d6b0586be356dc2146a62Yang Ni
244da0f069871343119251d6b0586be356dc2146a62Yang Ni  return true;
245da0f069871343119251d6b0586be356dc2146a62Yang Ni}
246da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
247da0f069871343119251d6b0586be356dc2146a62Yang Ni
248da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // anonymous namespace
249da0f069871343119251d6b0586be356dc2146a62Yang Ni
250da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::tryToCreateFusedKernel(const char *cacheDir) {
251da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
252da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (mClosures.size() < 2) {
253da0f069871343119251d6b0586be356dc2146a62Yang Ni    ALOGV("Compiler kernel fusion skipped due to only one or zero kernel in"
254da0f069871343119251d6b0586be356dc2146a62Yang Ni          " a script group batch.");
255da0f069871343119251d6b0586be356dc2146a62Yang Ni    return;
256da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
257da0f069871343119251d6b0586be356dc2146a62Yang Ni
258da0f069871343119251d6b0586be356dc2146a62Yang Ni  //===--------------------------------------------------------------------===//
259da0f069871343119251d6b0586be356dc2146a62Yang Ni  // Fuse the input kernels and generate native code in an object file
260da0f069871343119251d6b0586be356dc2146a62Yang Ni  //===--------------------------------------------------------------------===//
261da0f069871343119251d6b0586be356dc2146a62Yang Ni
262da0f069871343119251d6b0586be356dc2146a62Yang Ni  std::vector<string> inputFiles;
263da0f069871343119251d6b0586be356dc2146a62Yang Ni  std::vector<int> slots;
264da0f069871343119251d6b0586be356dc2146a62Yang Ni
265da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (CPUClosure* cpuClosure : mClosures) {
266da0f069871343119251d6b0586be356dc2146a62Yang Ni    const Closure* closure = cpuClosure->mClosure;
267da0f069871343119251d6b0586be356dc2146a62Yang Ni    const ScriptKernelID* kernelID = closure->mKernelID.get();
268da0f069871343119251d6b0586be356dc2146a62Yang Ni    const Script* script = kernelID->mScript;
269da0f069871343119251d6b0586be356dc2146a62Yang Ni
270da0f069871343119251d6b0586be356dc2146a62Yang Ni    if (script->isIntrinsic()) {
271da0f069871343119251d6b0586be356dc2146a62Yang Ni      return;
2721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
273da0f069871343119251d6b0586be356dc2146a62Yang Ni
274da0f069871343119251d6b0586be356dc2146a62Yang Ni    const RsdCpuScriptImpl *cpuScript =
275da0f069871343119251d6b0586be356dc2146a62Yang Ni        (const RsdCpuScriptImpl*)script->mHal.drv;
276da0f069871343119251d6b0586be356dc2146a62Yang Ni
277da0f069871343119251d6b0586be356dc2146a62Yang Ni    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
278da0f069871343119251d6b0586be356dc2146a62Yang Ni
279da0f069871343119251d6b0586be356dc2146a62Yang Ni    inputFiles.push_back(bitcodeFilename);
280da0f069871343119251d6b0586be356dc2146a62Yang Ni    slots.push_back(kernelID->mSlot);
281da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
282da0f069871343119251d6b0586be356dc2146a62Yang Ni
283da0f069871343119251d6b0586be356dc2146a62Yang Ni  string outputPath(tempnam(cacheDir, "fused"));
284da0f069871343119251d6b0586be356dc2146a62Yang Ni  string outputFileName = getFileName(outputPath);
285da0f069871343119251d6b0586be356dc2146a62Yang Ni  string objFilePath(outputPath);
286da0f069871343119251d6b0586be356dc2146a62Yang Ni  objFilePath.append(".o");
287da0f069871343119251d6b0586be356dc2146a62Yang Ni  string rsLibPath(SYSLIBPATH"/libclcore.bc");
288da0f069871343119251d6b0586be356dc2146a62Yang Ni  vector<const char*> arguments;
289da0f069871343119251d6b0586be356dc2146a62Yang Ni  setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath,
290da0f069871343119251d6b0586be356dc2146a62Yang Ni                        &arguments);
291da0f069871343119251d6b0586be356dc2146a62Yang Ni  string commandLine =
292da0f069871343119251d6b0586be356dc2146a62Yang Ni      convertListToString(arguments.size() - 1, arguments.data());
293da0f069871343119251d6b0586be356dc2146a62Yang Ni
294da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (!fuseAndCompile(arguments.data(), commandLine)) {
295da0f069871343119251d6b0586be356dc2146a62Yang Ni    return;
296da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
297da0f069871343119251d6b0586be356dc2146a62Yang Ni
298da0f069871343119251d6b0586be356dc2146a62Yang Ni  //===--------------------------------------------------------------------===//
299da0f069871343119251d6b0586be356dc2146a62Yang Ni  // Create and load the shared lib
300da0f069871343119251d6b0586be356dc2146a62Yang Ni  //===--------------------------------------------------------------------===//
301da0f069871343119251d6b0586be356dc2146a62Yang Ni
302da0f069871343119251d6b0586be356dc2146a62Yang Ni  const char* resName = outputFileName.c_str();
303da0f069871343119251d6b0586be356dc2146a62Yang Ni
304da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
305da0f069871343119251d6b0586be356dc2146a62Yang Ni    ALOGE("Failed to link object file '%s'", resName);
306da0f069871343119251d6b0586be356dc2146a62Yang Ni    return;
307da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
308da0f069871343119251d6b0586be356dc2146a62Yang Ni
309da0f069871343119251d6b0586be356dc2146a62Yang Ni  void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
310da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (mSharedObj == nullptr) {
311da0f069871343119251d6b0586be356dc2146a62Yang Ni    ALOGE("Unable to load '%s'", resName);
312da0f069871343119251d6b0586be356dc2146a62Yang Ni    return;
3131ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  }
314da0f069871343119251d6b0586be356dc2146a62Yang Ni
315da0f069871343119251d6b0586be356dc2146a62Yang Ni  mExecutable = ScriptExecutable::createFromSharedObject(
316da0f069871343119251d6b0586be356dc2146a62Yang Ni      nullptr,  // RS context. Unused.
317da0f069871343119251d6b0586be356dc2146a62Yang Ni      mSharedObj);
318da0f069871343119251d6b0586be356dc2146a62Yang Ni
319da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif  // RS_COMPATIBILITY_LIB
3201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() {
323da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (auto batch : mBatches) {
324da0f069871343119251d6b0586be356dc2146a62Yang Ni    batch->setGlobalsForBatch();
325da0f069871343119251d6b0586be356dc2146a62Yang Ni    batch->run();
3261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  }
3271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
329da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() {
330da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (CPUClosure* cpuClosure : mClosures) {
3311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const Closure* closure = cpuClosure->mClosure;
3321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const ScriptKernelID* kernelID = closure->mKernelID.get();
3331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    Script* s = kernelID->mScript;
3341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    for (const auto& p : closure->mGlobals) {
3351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      const void* value = p.second.first;
3361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      int size = p.second.second;
3371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      // We use -1 size to indicate an ObjectBase rather than a primitive type
3381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      if (size < 0) {
3391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        s->setVarObj(p.first->mSlot, (ObjectBase*)value);
3401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      } else {
3411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        s->setVar(p.first->mSlot, (const void*)&value, size);
3421ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni      }
3431ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
3441ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  }
3451ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3461ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
347da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() {
348da0f069871343119251d6b0586be356dc2146a62Yang Ni  if (mExecutable != nullptr) {
349da0f069871343119251d6b0586be356dc2146a62Yang Ni    MTLaunchStruct mtls;
350da0f069871343119251d6b0586be356dc2146a62Yang Ni    const CPUClosure* firstCpuClosure = mClosures.front();
351da0f069871343119251d6b0586be356dc2146a62Yang Ni    const CPUClosure* lastCpuClosure = mClosures.back();
352da0f069871343119251d6b0586be356dc2146a62Yang Ni
353da0f069871343119251d6b0586be356dc2146a62Yang Ni    firstCpuClosure->mSi->forEachMtlsSetup(
354da0f069871343119251d6b0586be356dc2146a62Yang Ni        (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
355da0f069871343119251d6b0586be356dc2146a62Yang Ni        firstCpuClosure->mClosure->mArgs.size(),
356da0f069871343119251d6b0586be356dc2146a62Yang Ni        lastCpuClosure->mClosure->mReturnValue,
357da0f069871343119251d6b0586be356dc2146a62Yang Ni        nullptr, 0, nullptr, &mtls);
358da0f069871343119251d6b0586be356dc2146a62Yang Ni
359da0f069871343119251d6b0586be356dc2146a62Yang Ni    mtls.script = nullptr;
360da0f069871343119251d6b0586be356dc2146a62Yang Ni    mtls.fep.usr = nullptr;
361da0f069871343119251d6b0586be356dc2146a62Yang Ni    mtls.kernel = mExecutable->getForEachFunction(0);
362da0f069871343119251d6b0586be356dc2146a62Yang Ni
363da0f069871343119251d6b0586be356dc2146a62Yang Ni    mGroup->getCpuRefImpl()->launchThreads(
364da0f069871343119251d6b0586be356dc2146a62Yang Ni        (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
365da0f069871343119251d6b0586be356dc2146a62Yang Ni        firstCpuClosure->mClosure->mArgs.size(),
366da0f069871343119251d6b0586be356dc2146a62Yang Ni        lastCpuClosure->mClosure->mReturnValue,
367da0f069871343119251d6b0586be356dc2146a62Yang Ni        nullptr, &mtls);
368da0f069871343119251d6b0586be356dc2146a62Yang Ni
369da0f069871343119251d6b0586be356dc2146a62Yang Ni    return;
370da0f069871343119251d6b0586be356dc2146a62Yang Ni  }
371da0f069871343119251d6b0586be356dc2146a62Yang Ni
372da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (CPUClosure* cpuClosure : mClosures) {
3731ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const Closure* closure = cpuClosure->mClosure;
3741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const ScriptKernelID* kernelID = closure->mKernelID.get();
3751ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    cpuClosure->mSi->preLaunch(kernelID->mSlot,
3761ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                               (const Allocation**)&closure->mArgs[0],
3771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                               closure->mArgs.size(), closure->mReturnValue,
3781ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                               cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
3791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                               nullptr);
3801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  }
3811ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
382da0f069871343119251d6b0586be356dc2146a62Yang Ni  const CPUClosure* cpuClosure = mClosures.front();
3831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  const Closure* closure = cpuClosure->mClosure;
3841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  MTLaunchStruct mtls;
3851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
386bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams  if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0],
387bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams                                        closure->mArgs.size(),
388bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams                                        closure->mReturnValue,
389bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams                                        nullptr, 0, nullptr, &mtls)) {
3901ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
391bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams      mtls.script = nullptr;
392bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams      mtls.kernel = (void (*)())&groupRoot;
393da0f069871343119251d6b0586be356dc2146a62Yang Ni      mtls.fep.usr = &mClosures;
3941ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
395da0f069871343119251d6b0586be356dc2146a62Yang Ni      mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
396bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams  }
3971ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
398da0f069871343119251d6b0586be356dc2146a62Yang Ni  for (CPUClosure* cpuClosure : mClosures) {
3991ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const Closure* closure = cpuClosure->mClosure;
4001ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    const ScriptKernelID* kernelID = closure->mKernelID.get();
4011ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    cpuClosure->mSi->postLaunch(kernelID->mSlot,
4021ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                (const Allocation**)&closure->mArgs[0],
4031ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                closure->mArgs.size(), closure->mReturnValue,
4041ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                nullptr, 0, nullptr);
4051ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni  }
4061ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4071ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
4081ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace renderscript
4091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace android
410