rsCpuScriptGroup2.cpp revision 2abfcc6d129fe3defddef4540aa95cc445c03a7a
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h"
21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h>
4da0f069871343119251d6b0586be356dc2146a62Yang Ni
5da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string>
6da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector>
7da0f069871343119251d6b0586be356dc2146a62Yang Ni
8da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
9da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h"
10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <sys/wait.h>
11da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
12da0f069871343119251d6b0586be356dc2146a62Yang Ni
131ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h"
141ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h"
151ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h"
161ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h"
172abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h"
181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h"
191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h"
201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h"
21da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h"
22da0f069871343119251d6b0586be356dc2146a62Yang Ni
23da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string;
24da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector;
251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android {
271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript {
281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace {
301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
31da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2;
321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni               uint32_t xend, uint32_t outstep) {
35ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kparams->usr;
36eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
37eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const void **oldIns  = kparams->ins;
38eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    uint32_t *oldStrides = kparams->inEStrides;
39eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
40eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<const void*> ins(DefaultKernelArgCount);
41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<uint32_t> strides(DefaultKernelArgCount);
42eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
43eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : closures) {
44eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
45eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        auto in_iter = ins.begin();
47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        auto stride_iter = strides.begin();
48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
49ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        for (size_t i = 0; i < closure->mNumArg; i++) {
50ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const void* arg = closure->mArgs[i];
51eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const Allocation* a = (const Allocation*)arg;
52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint32_t eStride = a->mHal.state.elementSizeBytes;
53eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
54eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    eStride * xstart;
55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (kparams->dimY > 1) {
56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ptr += a->mHal.drvState.lod[0].stride * kparams->y;
57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            *in_iter++ = ptr;
59eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            *stride_iter++ = eStride;
60eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
611ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
62eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->ins = &ins[0];
63eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->inEStrides = &strides[0];
641ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
65eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Allocation* out = closure->mReturnValue;
66eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint32_t ostep = out->mHal.state.elementSizeBytes;
67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ostep * xstart;
69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (kparams->dimY > 1) {
70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            ptr += out->mHal.drvState.lod[0].stride * kparams->y;
71eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->out = (void*)ptr;
741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
75eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->usr = cpuClosure->mUsrPtr;
761ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
77eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mFunc(kparams, xstart, xend, ostep);
781ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
80eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mutable_kparams->ins        = oldIns;
81eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mutable_kparams->inEStrides = oldStrides;
82eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mutable_kparams->usr        = &closures;
831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
85da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // namespace
86da0f069871343119251d6b0586be356dc2146a62Yang Ni
87da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() {
88eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* c : mClosures) {
89eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete c;
90eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
91eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mScriptObj) {
92eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        dlclose(mScriptObj);
93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
94da0f069871343119251d6b0586be356dc2146a62Yang Ni}
95da0f069871343119251d6b0586be356dc2146a62Yang Ni
96ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const {
97eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.empty()) {
98eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
99eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
100da0f069871343119251d6b0586be356dc2146a62Yang Ni
101ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const Closure* closure = cpuClosure->mClosure;
102ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
103ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    if (closure->mKernelID.get() == nullptr ||
104eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mClosures.front()->mClosure->mKernelID.get() == nullptr) {
105eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // An invoke should be in a batch by itself, so it conflicts with any other
106eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // closure.
1071ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        return true;
1081ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
109eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
110ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& globalDeps = closure->mGlobalDeps;
111ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& argDeps = closure->mArgDeps;
112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    for (CPUClosure* c : mClosures) {
114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const Closure* batched = c->mClosure;
115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (globalDeps.find(batched) != globalDeps.end()) {
116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            return true;
117eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const auto& it = argDeps.find(batched);
119ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (it != argDeps.end()) {
120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const auto& args = (*it).second;
121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            for (const auto &p1 : *args) {
122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                if (p1.second->get() != nullptr) {
123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                    return true;
124eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                }
125eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
1261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        }
1271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
128ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
129eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return false;
1301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                         const ScriptGroupBase *sg) :
1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
135eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!mGroup->mClosures.empty());
136eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
137eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    Batch* batch = new Batch(this);
138eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Closure* closure: mGroup->mClosures) {
139eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptKernelID* kernelID = closure->mKernelID.get();
140eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        RsdCpuScriptImpl* si;
141eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc;
142eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (kernelID != nullptr) {
143eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
144eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            MTLaunchStruct mtls;
145eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            si->forEachKernelSetup(kernelID->mSlot, &mtls);
146eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            // TODO: Is mtls.fep.usrLen ever used?
147eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
148eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                mtls.fep.usr, mtls.fep.usrLen);
149eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
150eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(
151eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    closure->mInvokeID->mScript);
152eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si);
153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
1541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
155eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (batch->conflict(cc)) {
156eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            mBatches.push_back(batch);
157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            batch = new Batch(this);
158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
159da0f069871343119251d6b0586be356dc2146a62Yang Ni
160eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->mClosures.push_back(cc);
161eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
162da0f069871343119251d6b0586be356dc2146a62Yang Ni
163eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!batch->mClosures.empty());
164eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mBatches.push_back(batch);
165da0f069871343119251d6b0586be356dc2146a62Yang Ni
166da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
167eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
168ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        batch->tryToCreateFusedKernel(mGroup->mCacheDir);
169eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
170da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
1711ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1731ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
174eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
175eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete batch;
176eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
177da0f069871343119251d6b0586be356dc2146a62Yang Ni}
178da0f069871343119251d6b0586be356dc2146a62Yang Ni
179da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace {
180da0f069871343119251d6b0586be356dc2146a62Yang Ni
181da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
182da0f069871343119251d6b0586be356dc2146a62Yang Ni
183da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) {
184eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    unsigned found = path.find_last_of("/\\");
185eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return path.substr(found + 1);
186da0f069871343119251d6b0586be356dc2146a62Yang Ni}
187da0f069871343119251d6b0586be356dc2146a62Yang Ni
188da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments(
189eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const vector<string>& inputs, const vector<int>& kernels,
190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& output_dir, const string& output_filename,
191eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& rsLib, vector<const char*>* args) {
192eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
193eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-fPIC");
194eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-embedRSInfo");
195eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-mtriple");
196eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
197eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-bclib");
198eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(rsLib.c_str());
199eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (const string& input : inputs) {
200eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        args->push_back(input.c_str());
201eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
202eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (int kernel : kernels) {
203eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        args->push_back("-k");
204eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        string strKernel = std::to_string(kernel);
205eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        args->push_back(strKernel.c_str());
206eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
207eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-output_path");
208eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(output_dir.c_str());
209eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-o");
210eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(output_filename.c_str());
211eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(nullptr);
212da0f069871343119251d6b0586be356dc2146a62Yang Ni}
213da0f069871343119251d6b0586be356dc2146a62Yang Ni
214da0f069871343119251d6b0586be356dc2146a62Yang Nibool fuseAndCompile(const char** arguments,
215da0f069871343119251d6b0586be356dc2146a62Yang Ni                    const string& commandLine) {
216eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const pid_t pid = fork();
217da0f069871343119251d6b0586be356dc2146a62Yang Ni
218eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (pid == -1) {
219eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Couldn't fork for bcc execution");
220eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
221eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
222da0f069871343119251d6b0586be356dc2146a62Yang Ni
223eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (pid == 0) {
224eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // Child process
225eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGV("Invoking BCC with: %s", commandLine.c_str());
226eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
227da0f069871343119251d6b0586be356dc2146a62Yang Ni
228eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("execv() failed: %s", strerror(errno));
229eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        abort();
230eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
231eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
232da0f069871343119251d6b0586be356dc2146a62Yang Ni
233eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Parent process
234eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    int status = 0;
235eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const pid_t w = waitpid(pid, &status, 0);
236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (w == -1) {
237eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
238eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
239da0f069871343119251d6b0586be356dc2146a62Yang Ni
240eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
241eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("bcc terminated unexpectedly");
242eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
243eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
244da0f069871343119251d6b0586be356dc2146a62Yang Ni
245eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return true;
246da0f069871343119251d6b0586be356dc2146a62Yang Ni}
247da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
248da0f069871343119251d6b0586be356dc2146a62Yang Ni
249da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // anonymous namespace
250da0f069871343119251d6b0586be356dc2146a62Yang Ni
251da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::tryToCreateFusedKernel(const char *cacheDir) {
252da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
253eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.size() < 2) {
254eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
255eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
256da0f069871343119251d6b0586be356dc2146a62Yang Ni
257eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
258eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Fuse the input kernels and generate native code in an object file
259eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
260da0f069871343119251d6b0586be356dc2146a62Yang Ni
261eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<string> inputFiles;
262eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<int> slots;
263da0f069871343119251d6b0586be356dc2146a62Yang Ni
264eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
266eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptKernelID* kernelID = closure->mKernelID.get();
267eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Script* script = kernelID->mScript;
268da0f069871343119251d6b0586be356dc2146a62Yang Ni
269eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (script->isIntrinsic()) {
270eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            return;
271eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
272da0f069871343119251d6b0586be356dc2146a62Yang Ni
273eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const RsdCpuScriptImpl *cpuScript =
274eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                (const RsdCpuScriptImpl*)script->mHal.drv;
275da0f069871343119251d6b0586be356dc2146a62Yang Ni
276eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
277da0f069871343119251d6b0586be356dc2146a62Yang Ni
278eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        inputFiles.push_back(bitcodeFilename);
279eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        slots.push_back(kernelID->mSlot);
280eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
281da0f069871343119251d6b0586be356dc2146a62Yang Ni
282eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string outputPath(tempnam(cacheDir, "fused"));
283eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string outputFileName = getFileName(outputPath);
284eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string objFilePath(outputPath);
285eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    objFilePath.append(".o");
286eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string rsLibPath(SYSLIBPATH"/libclcore.bc");
287eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    vector<const char*> arguments;
288eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath,
289eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                          &arguments);
2902abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni    std::unique_ptr<const char> joined(
2912abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni        rsuJoinStrings(arguments.size() - 1, arguments.data()));
2922abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni    string commandLine (joined.get());
293eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
294eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!fuseAndCompile(arguments.data(), commandLine)) {
295eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
296eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
297da0f069871343119251d6b0586be356dc2146a62Yang Ni
298eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
299eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Create and load the shared lib
300eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
301da0f069871343119251d6b0586be356dc2146a62Yang Ni
302eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const char* resName = outputFileName.c_str();
303da0f069871343119251d6b0586be356dc2146a62Yang Ni
304eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
305eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Failed to link object file '%s'", resName);
306eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
307eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
308da0f069871343119251d6b0586be356dc2146a62Yang Ni
309eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
310eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mSharedObj == nullptr) {
311eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Unable to load '%s'", resName);
312eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
313eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
314da0f069871343119251d6b0586be356dc2146a62Yang Ni
315eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mExecutable = ScriptExecutable::createFromSharedObject(
316eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                                           nullptr,  // RS context. Unused.
317eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                                           mSharedObj);
318da0f069871343119251d6b0586be356dc2146a62Yang Ni
319da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif  // RS_COMPATIBILITY_LIB
3201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() {
323eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (auto batch : mBatches) {
324eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->setGlobalsForBatch();
325eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->run();
326eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
3271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
329da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() {
330eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
331eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
332eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptKernelID* kernelID = closure->mKernelID.get();
333eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        Script* s;
334eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (kernelID != nullptr) {
335eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            s = kernelID->mScript;
336eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
337eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            s = cpuClosure->mClosure->mInvokeID->mScript;
338eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
339eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (const auto& p : closure->mGlobals) {
340eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const void* value = p.second.first;
341eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            int size = p.second.second;
342eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (value == nullptr && size == 0) {
343eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // This indicates the current closure depends on another closure for a
344eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // global in their shared module (script). In this case we don't need to
345eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // copy the value. For example, an invoke intializes a global variable
346eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // which a kernel later reads.
347eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                continue;
348eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
349ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            rsAssert(p.first != nullptr);
350ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)",
351ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                  closure, p.first, p.first->mScript, p.first->mSlot);
352eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            // We use -1 size to indicate an ObjectBase rather than a primitive type
353eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (size < 0) {
354eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                s->setVarObj(p.first->mSlot, (ObjectBase*)value);
355eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            } else {
356eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                s->setVar(p.first->mSlot, (const void*)&value, size);
357eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
358eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
3591ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
3601ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3611ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
362da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() {
363eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mExecutable != nullptr) {
364eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        MTLaunchStruct mtls;
365eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* firstCpuClosure = mClosures.front();
366eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* lastCpuClosure = mClosures.back();
367eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
368eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        firstCpuClosure->mSi->forEachMtlsSetup(
369ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
370ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
371eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
372eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, 0, nullptr, &mtls);
373eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
374eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
375eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = nullptr;
376eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.kernel = mExecutable->getForEachFunction(0);
377eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
378eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(
379ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
380ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
381eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
382eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, &mtls);
383eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
384eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
385eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
386eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
387eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.size() == 1 &&
388eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mClosures.front()->mClosure->mKernelID.get() == nullptr) {
389eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // This closure is for an invoke function
390eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc = mClosures.front();
391eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* c = cc->mClosure;
392eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptInvokeID* invokeID = c->mInvokeID;
393eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        rsAssert(invokeID != nullptr);
394eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
395eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
397da0f069871343119251d6b0586be356dc2146a62Yang Ni
398eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
399eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
400eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptKernelID* kernelID = closure->mKernelID.get();
401eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->preLaunch(kernelID->mSlot,
402ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   (const Allocation**)closure->mArgs,
403ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   closure->mNumArg, closure->mReturnValue,
404eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                   cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                   nullptr);
406eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
407da0f069871343119251d6b0586be356dc2146a62Yang Ni
408eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const CPUClosure* cpuClosure = mClosures.front();
409eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const Closure* closure = cpuClosure->mClosure;
410eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    MTLaunchStruct mtls;
411da0f069871343119251d6b0586be356dc2146a62Yang Ni
412ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
413ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                          closure->mNumArg,
414eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          closure->mReturnValue,
415eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          nullptr, 0, nullptr, &mtls)) {
416da0f069871343119251d6b0586be356dc2146a62Yang Ni
417eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
418eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.kernel = (void (*)())&groupRoot;
419eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = &mClosures;
420da0f069871343119251d6b0586be356dc2146a62Yang Ni
421eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
422eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
423eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
424eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
425eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
426eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptKernelID* kernelID = closure->mKernelID.get();
427eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->postLaunch(kernelID->mSlot,
428ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    (const Allocation**)closure->mArgs,
429ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    closure->mNumArg, closure->mReturnValue,
430eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    nullptr, 0, nullptr);
431eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
4321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
4341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace renderscript
4351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace android
436