rsCpuScriptGroup2.cpp revision eb9aa675754c49f613c6ad71d41472b30f38b007
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h"
21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h>
4da0f069871343119251d6b0586be356dc2146a62Yang Ni
5da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string>
6da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector>
7da0f069871343119251d6b0586be356dc2146a62Yang Ni
8da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
9da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h"
10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <sys/wait.h>
11da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
12da0f069871343119251d6b0586be356dc2146a62Yang Ni
131ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h"
141ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h"
151ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h"
161ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h"
171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h"
181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h"
191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h"
20da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h"
21da0f069871343119251d6b0586be356dc2146a62Yang Ni
22da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string;
23da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector;
241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android {
261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript {
271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace {
291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
30da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2;
311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni               uint32_t xend, uint32_t outstep) {
34eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr;
35eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
36eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const void **oldIns  = kparams->ins;
37eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    uint32_t *oldStrides = kparams->inEStrides;
38eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
39eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<const void*> ins(DefaultKernelArgCount);
40eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<uint32_t> strides(DefaultKernelArgCount);
41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
42eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : closures) {
43eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
44eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
45eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        auto in_iter = ins.begin();
46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        auto stride_iter = strides.begin();
47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (const auto& arg : closure->mArgs) {
49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const Allocation* a = (const Allocation*)arg;
50eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint32_t eStride = a->mHal.state.elementSizeBytes;
51eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    eStride * xstart;
53eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (kparams->dimY > 1) {
54eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ptr += a->mHal.drvState.lod[0].stride * kparams->y;
55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            *in_iter++ = ptr;
57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            *stride_iter++ = eStride;
58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
591ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
60eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->ins = &ins[0];
61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->inEStrides = &strides[0];
621ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
63eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Allocation* out = closure->mReturnValue;
64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint32_t ostep = out->mHal.state.elementSizeBytes;
65eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
66eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ostep * xstart;
67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (kparams->dimY > 1) {
68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            ptr += out->mHal.drvState.lod[0].stride * kparams->y;
69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
701ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
71eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->out = (void*)ptr;
721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->usr = cpuClosure->mUsrPtr;
741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
75eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mFunc(kparams, xstart, xend, ostep);
761ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
78eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mutable_kparams->ins        = oldIns;
79eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mutable_kparams->inEStrides = oldStrides;
80eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mutable_kparams->usr        = &closures;
811ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
821ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
83da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // namespace
84da0f069871343119251d6b0586be356dc2146a62Yang Ni
85da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() {
86eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* c : mClosures) {
87eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete c;
88eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
89eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mScriptObj) {
90eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        dlclose(mScriptObj);
91eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
92da0f069871343119251d6b0586be356dc2146a62Yang Ni}
93da0f069871343119251d6b0586be356dc2146a62Yang Ni
94da0f069871343119251d6b0586be356dc2146a62Yang Nibool Batch::conflict(CPUClosure* closure) const {
95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.empty()) {
96eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
97eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
98da0f069871343119251d6b0586be356dc2146a62Yang Ni
99eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (closure->mClosure->mKernelID.get() == nullptr ||
100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mClosures.front()->mClosure->mKernelID.get() == nullptr) {
101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // An invoke should be in a batch by itself, so it conflicts with any other
102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // closure.
1031ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        return true;
1041ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
105eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
106eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (const auto &p : closure->mClosure->mGlobalDeps) {
107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* dep = p.first;
108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (CPUClosure* c : mClosures) {
109eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (c->mClosure == dep) {
110eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global",
111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                      closure, dep);
112eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                return true;
113eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
114eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
115eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
116eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (const auto &p : closure->mClosure->mArgDeps) {
117eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* dep = p.first;
118eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (CPUClosure* c : mClosures) {
119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (c->mClosure == dep) {
120eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                for (const auto &p1 : *p.second) {
121eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    if (p1.second->get() != nullptr) {
122eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                        ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg",
123eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                              closure, dep);
124eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                        return true;
125eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    }
126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                }
127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        }
1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
130eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return false;
1311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                         const ScriptGroupBase *sg) :
1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
136eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!mGroup->mClosures.empty());
137eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
138eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    Batch* batch = new Batch(this);
139eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Closure* closure: mGroup->mClosures) {
140eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptKernelID* kernelID = closure->mKernelID.get();
141eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        RsdCpuScriptImpl* si;
142eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc;
143eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (kernelID != nullptr) {
144eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
145eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            MTLaunchStruct mtls;
146eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            si->forEachKernelSetup(kernelID->mSlot, &mtls);
147eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            // TODO: Is mtls.fep.usrLen ever used?
148eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
149eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                mtls.fep.usr, mtls.fep.usrLen);
150eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
151eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(
152eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    closure->mInvokeID->mScript);
153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si);
154eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
1551ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
156eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (batch->conflict(cc)) {
157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            mBatches.push_back(batch);
158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            batch = new Batch(this);
159eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
160da0f069871343119251d6b0586be356dc2146a62Yang Ni
161eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->mClosures.push_back(cc);
162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
163da0f069871343119251d6b0586be356dc2146a62Yang Ni
164eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!batch->mClosures.empty());
165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mBatches.push_back(batch);
166da0f069871343119251d6b0586be356dc2146a62Yang Ni
167da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
168eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
169eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str());
170eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
171da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
1721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1731ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1741ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
175eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
176eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete batch;
177eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
178da0f069871343119251d6b0586be356dc2146a62Yang Ni}
179da0f069871343119251d6b0586be356dc2146a62Yang Ni
180da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace {
181da0f069871343119251d6b0586be356dc2146a62Yang Ni
182da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
183da0f069871343119251d6b0586be356dc2146a62Yang Ni
184da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) {
185eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    unsigned found = path.find_last_of("/\\");
186eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return path.substr(found + 1);
187da0f069871343119251d6b0586be356dc2146a62Yang Ni}
188da0f069871343119251d6b0586be356dc2146a62Yang Ni
189da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments(
190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const vector<string>& inputs, const vector<int>& kernels,
191eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& output_dir, const string& output_filename,
192eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& rsLib, vector<const char*>* args) {
193eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
194eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-fPIC");
195eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-embedRSInfo");
196eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-mtriple");
197eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
198eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-bclib");
199eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(rsLib.c_str());
200eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (const string& input : inputs) {
201eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        args->push_back(input.c_str());
202eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
203eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (int kernel : kernels) {
204eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        args->push_back("-k");
205eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        string strKernel = std::to_string(kernel);
206eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        args->push_back(strKernel.c_str());
207eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
208eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-output_path");
209eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(output_dir.c_str());
210eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-o");
211eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(output_filename.c_str());
212eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(nullptr);
213da0f069871343119251d6b0586be356dc2146a62Yang Ni}
214da0f069871343119251d6b0586be356dc2146a62Yang Ni
215da0f069871343119251d6b0586be356dc2146a62Yang Nistring convertListToString(int n, const char* const* strs) {
216eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string ret;
217eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    ret.append(strs[0]);
218eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (int i = 1; i < n; i++) {
219eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ret.append(" ");
220eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ret.append(strs[i]);
221eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
222eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return ret;
223da0f069871343119251d6b0586be356dc2146a62Yang Ni}
224da0f069871343119251d6b0586be356dc2146a62Yang Ni
225da0f069871343119251d6b0586be356dc2146a62Yang Nibool fuseAndCompile(const char** arguments,
226da0f069871343119251d6b0586be356dc2146a62Yang Ni                    const string& commandLine) {
227eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const pid_t pid = fork();
228da0f069871343119251d6b0586be356dc2146a62Yang Ni
229eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (pid == -1) {
230eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Couldn't fork for bcc execution");
231eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
232eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
233da0f069871343119251d6b0586be356dc2146a62Yang Ni
234eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (pid == 0) {
235eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // Child process
236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGV("Invoking BCC with: %s", commandLine.c_str());
237eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
238da0f069871343119251d6b0586be356dc2146a62Yang Ni
239eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("execv() failed: %s", strerror(errno));
240eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        abort();
241eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
242eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
243da0f069871343119251d6b0586be356dc2146a62Yang Ni
244eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Parent process
245eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    int status = 0;
246eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const pid_t w = waitpid(pid, &status, 0);
247eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (w == -1) {
248eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
249eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
250da0f069871343119251d6b0586be356dc2146a62Yang Ni
251eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
252eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("bcc terminated unexpectedly");
253eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
254eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
255da0f069871343119251d6b0586be356dc2146a62Yang Ni
256eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return true;
257da0f069871343119251d6b0586be356dc2146a62Yang Ni}
258da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
259da0f069871343119251d6b0586be356dc2146a62Yang Ni
260da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // anonymous namespace
261da0f069871343119251d6b0586be356dc2146a62Yang Ni
262da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::tryToCreateFusedKernel(const char *cacheDir) {
263da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
264eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.size() < 2) {
265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
266eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
267da0f069871343119251d6b0586be356dc2146a62Yang Ni
268eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
269eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Fuse the input kernels and generate native code in an object file
270eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
271da0f069871343119251d6b0586be356dc2146a62Yang Ni
272eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<string> inputFiles;
273eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<int> slots;
274da0f069871343119251d6b0586be356dc2146a62Yang Ni
275eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
276eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
277eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptKernelID* kernelID = closure->mKernelID.get();
278eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Script* script = kernelID->mScript;
279da0f069871343119251d6b0586be356dc2146a62Yang Ni
280eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (script->isIntrinsic()) {
281eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            return;
282eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
283da0f069871343119251d6b0586be356dc2146a62Yang Ni
284eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const RsdCpuScriptImpl *cpuScript =
285eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                (const RsdCpuScriptImpl*)script->mHal.drv;
286da0f069871343119251d6b0586be356dc2146a62Yang Ni
287eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
288da0f069871343119251d6b0586be356dc2146a62Yang Ni
289eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        inputFiles.push_back(bitcodeFilename);
290eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        slots.push_back(kernelID->mSlot);
291eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
292da0f069871343119251d6b0586be356dc2146a62Yang Ni
293eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string outputPath(tempnam(cacheDir, "fused"));
294eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string outputFileName = getFileName(outputPath);
295eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string objFilePath(outputPath);
296eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    objFilePath.append(".o");
297eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string rsLibPath(SYSLIBPATH"/libclcore.bc");
298eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    vector<const char*> arguments;
299eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath,
300eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                          &arguments);
301eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string commandLine =
302eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            convertListToString(arguments.size() - 1, arguments.data());
303eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
304eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!fuseAndCompile(arguments.data(), commandLine)) {
305eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
306eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
307da0f069871343119251d6b0586be356dc2146a62Yang Ni
308eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
309eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Create and load the shared lib
310eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
311da0f069871343119251d6b0586be356dc2146a62Yang Ni
312eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const char* resName = outputFileName.c_str();
313da0f069871343119251d6b0586be356dc2146a62Yang Ni
314eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
315eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Failed to link object file '%s'", resName);
316eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
317eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
318da0f069871343119251d6b0586be356dc2146a62Yang Ni
319eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
320eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mSharedObj == nullptr) {
321eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Unable to load '%s'", resName);
322eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
323eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
324da0f069871343119251d6b0586be356dc2146a62Yang Ni
325eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mExecutable = ScriptExecutable::createFromSharedObject(
326eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                                           nullptr,  // RS context. Unused.
327eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                                           mSharedObj);
328da0f069871343119251d6b0586be356dc2146a62Yang Ni
329da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif  // RS_COMPATIBILITY_LIB
3301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() {
333eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (auto batch : mBatches) {
334eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->setGlobalsForBatch();
335eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->run();
336eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
3371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
339da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() {
340eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
341eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
342eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptKernelID* kernelID = closure->mKernelID.get();
343eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        Script* s;
344eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (kernelID != nullptr) {
345eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            s = kernelID->mScript;
346eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
347eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            s = cpuClosure->mClosure->mInvokeID->mScript;
348eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
349eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (const auto& p : closure->mGlobals) {
350eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const void* value = p.second.first;
351eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            int size = p.second.second;
352eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (value == nullptr && size == 0) {
353eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // This indicates the current closure depends on another closure for a
354eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // global in their shared module (script). In this case we don't need to
355eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // copy the value. For example, an invoke intializes a global variable
356eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // which a kernel later reads.
357eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                continue;
358eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
359eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            // We use -1 size to indicate an ObjectBase rather than a primitive type
360eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (size < 0) {
361eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                s->setVarObj(p.first->mSlot, (ObjectBase*)value);
362eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            } else {
363eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                s->setVar(p.first->mSlot, (const void*)&value, size);
364eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
365eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
3661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
3671ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3681ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
369da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() {
370eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mExecutable != nullptr) {
371eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        MTLaunchStruct mtls;
372eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* firstCpuClosure = mClosures.front();
373eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* lastCpuClosure = mClosures.back();
374eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
375eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        firstCpuClosure->mSi->forEachMtlsSetup(
376eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
377eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                firstCpuClosure->mClosure->mArgs.size(),
378eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
379eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, 0, nullptr, &mtls);
380eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
381eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
382eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = nullptr;
383eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.kernel = mExecutable->getForEachFunction(0);
384eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
385eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(
386eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
387eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                firstCpuClosure->mClosure->mArgs.size(),
388eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
389eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, &mtls);
390eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
391eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
392eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
393eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
394eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.size() == 1 &&
395eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mClosures.front()->mClosure->mKernelID.get() == nullptr) {
396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // This closure is for an invoke function
397eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc = mClosures.front();
398eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* c = cc->mClosure;
399eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptInvokeID* invokeID = c->mInvokeID;
400eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        rsAssert(invokeID != nullptr);
401eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
402eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
403eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
404da0f069871343119251d6b0586be356dc2146a62Yang Ni
405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
406eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
407eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptKernelID* kernelID = closure->mKernelID.get();
408eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->preLaunch(kernelID->mSlot,
409eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                   (const Allocation**)&closure->mArgs[0],
410eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                   closure->mArgs.size(), closure->mReturnValue,
411eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                   cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
412eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                   nullptr);
413eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
414da0f069871343119251d6b0586be356dc2146a62Yang Ni
415eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const CPUClosure* cpuClosure = mClosures.front();
416eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const Closure* closure = cpuClosure->mClosure;
417eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    MTLaunchStruct mtls;
418da0f069871343119251d6b0586be356dc2146a62Yang Ni
419eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0],
420eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          closure->mArgs.size(),
421eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          closure->mReturnValue,
422eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          nullptr, 0, nullptr, &mtls)) {
423da0f069871343119251d6b0586be356dc2146a62Yang Ni
424eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
425eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.kernel = (void (*)())&groupRoot;
426eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = &mClosures;
427da0f069871343119251d6b0586be356dc2146a62Yang Ni
428eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
429eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
430eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
431eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
432eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
433eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const ScriptKernelID* kernelID = closure->mKernelID.get();
434eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->postLaunch(kernelID->mSlot,
435eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    (const Allocation**)&closure->mArgs[0],
436eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    closure->mArgs.size(), closure->mReturnValue,
437eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    nullptr, 0, nullptr);
438eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
4391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
4411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace renderscript
4421ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace android
443