rsCpuScriptGroup2.cpp revision 062c287f573ecc06c38ee4295e5627e12c52ac3d
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h"
21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h>
4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h>
5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h>
6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h>
7da0f069871343119251d6b0586be356dc2146a62Yang Ni
8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set>
9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream>
10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string>
11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector>
12da0f069871343119251d6b0586be356dc2146a62Yang Ni
13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h"
15da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <sys/wait.h>
16da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
17da0f069871343119251d6b0586be356dc2146a62Yang Ni
181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h"
191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h"
201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h"
211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h"
222abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h"
231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h"
241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h"
251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h"
26da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h"
27da0f069871343119251d6b0586be356dc2146a62Yang Ni
28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string;
29da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector;
301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android {
321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript {
331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace {
351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
36da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2;
371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni               uint32_t xend, uint32_t outstep) {
40ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kparams->usr;
41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
42eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const void **oldIns  = kparams->ins;
43eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    uint32_t *oldStrides = kparams->inEStrides;
44eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
45eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<const void*> ins(DefaultKernelArgCount);
46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<uint32_t> strides(DefaultKernelArgCount);
47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : closures) {
49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
50eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
51eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        auto in_iter = ins.begin();
52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        auto stride_iter = strides.begin();
53eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        for (size_t i = 0; i < closure->mNumArg; i++) {
55ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const void* arg = closure->mArgs[i];
56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const Allocation* a = (const Allocation*)arg;
57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint32_t eStride = a->mHal.state.elementSizeBytes;
58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
59eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    eStride * xstart;
60eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (kparams->dimY > 1) {
61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ptr += a->mHal.drvState.lod[0].stride * kparams->y;
62eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
63eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            *in_iter++ = ptr;
64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            *stride_iter++ = eStride;
65eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->ins = &ins[0];
68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->inEStrides = &strides[0];
691ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Allocation* out = closure->mReturnValue;
71eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint32_t ostep = out->mHal.state.elementSizeBytes;
72eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ostep * xstart;
74eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (kparams->dimY > 1) {
75eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            ptr += out->mHal.drvState.lod[0].stride * kparams->y;
76eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
78eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->out = (void*)ptr;
791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
80eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mFunc(kparams, xstart, xend, ostep);
811ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
821ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
83eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mutable_kparams->ins        = oldIns;
84eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mutable_kparams->inEStrides = oldStrides;
851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
87da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // namespace
88da0f069871343119251d6b0586be356dc2146a62Yang Ni
89062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) :
90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mGroup(group), mFunc(nullptr) {
91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mName = strndup(name, strlen(name));
92062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
93062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
94da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() {
95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* c : mClosures) {
96eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete c;
97eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
98062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    free(mName);
99da0f069871343119251d6b0586be356dc2146a62Yang Ni}
100da0f069871343119251d6b0586be356dc2146a62Yang Ni
101ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const {
102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.empty()) {
103eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
104eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
105da0f069871343119251d6b0586be356dc2146a62Yang Ni
106ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const Closure* closure = cpuClosure->mClosure;
107ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
108062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
109eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // An invoke should be in a batch by itself, so it conflicts with any other
110eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // closure.
1111ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        return true;
1121ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
113eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& globalDeps = closure->mGlobalDeps;
115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& argDeps = closure->mArgDeps;
116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    for (CPUClosure* c : mClosures) {
118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const Closure* batched = c->mClosure;
119ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (globalDeps.find(batched) != globalDeps.end()) {
120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            return true;
121eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const auto& it = argDeps.find(batched);
123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (it != argDeps.end()) {
124ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const auto& args = (*it).second;
125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            for (const auto &p1 : *args) {
126ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                if (p1.second->get() != nullptr) {
127ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                    return true;
128eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                }
129eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
1301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        }
1311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
132ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
133eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return false;
1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1361ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
1371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                         const ScriptGroupBase *sg) :
138062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
139062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mExecutable(nullptr), mScriptObj(nullptr) {
140eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!mGroup->mClosures.empty());
141eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
142062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    Batch* batch = new Batch(this, "Batch0");
143062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
144eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Closure* closure: mGroup->mClosures) {
145eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc;
146062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
147062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        RsdCpuScriptImpl* si =
148062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
149062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (closure->mIsKernel) {
150eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            MTLaunchStruct mtls;
151062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            si->forEachKernelSetup(funcID->mSlot, &mtls);
152062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
154eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si);
155eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
1561ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (batch->conflict(cc)) {
158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            mBatches.push_back(batch);
159062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            std::stringstream ss;
160062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ss << "Batch" << ++i;
161062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch = new Batch(this, ss.str().c_str());
162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
163da0f069871343119251d6b0586be356dc2146a62Yang Ni
164eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->mClosures.push_back(cc);
165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
166da0f069871343119251d6b0586be356dc2146a62Yang Ni
167eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!batch->mClosures.empty());
168eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mBatches.push_back(batch);
169da0f069871343119251d6b0586be356dc2146a62Yang Ni
170da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
171062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    compile(mGroup->mCacheDir);
172062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr && mExecutable != nullptr) {
173062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        for (Batch* batch : mBatches) {
174062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch->resolveFuncPtr(mScriptObj);
175062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
176eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILITY_LIB
178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
179062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
180062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) {
181062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::string funcName(mName);
182062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mClosures.front()->mClosure->mIsKernel) {
183062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        funcName.append(".expand");
184062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
185062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mFunc = dlsym(sharedObj, funcName.c_str());
186062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (mFunc != nullptr);
1871ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1881ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1891ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
191eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete batch;
192eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
193062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    // TODO: move this dlclose into ~ScriptExecutable().
194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr) {
195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        dlclose(mScriptObj);
196062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
197062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    delete mExecutable;
198da0f069871343119251d6b0586be356dc2146a62Yang Ni}
199da0f069871343119251d6b0586be356dc2146a62Yang Ni
200da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace {
201da0f069871343119251d6b0586be356dc2146a62Yang Ni
202da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
203da0f069871343119251d6b0586be356dc2146a62Yang Ni
204da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) {
205eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    unsigned found = path.find_last_of("/\\");
206eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return path.substr(found + 1);
207da0f069871343119251d6b0586be356dc2146a62Yang Ni}
208da0f069871343119251d6b0586be356dc2146a62Yang Ni
209da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments(
210062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const vector<string>& inputs, const vector<string>& kernelBatches,
211062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const vector<string>& invokeBatches,
212eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& output_dir, const string& output_filename,
213eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& rsLib, vector<const char*>* args) {
214eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
215eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-fPIC");
216eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-embedRSInfo");
217eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-mtriple");
218eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
219eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-bclib");
220eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(rsLib.c_str());
221eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (const string& input : inputs) {
222eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        args->push_back(input.c_str());
223eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
224062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : kernelBatches) {
225062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-merge");
226062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
227062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
228062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : invokeBatches) {
229062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-invoke");
230062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
231eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
232eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-output_path");
233eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(output_dir.c_str());
234eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-o");
235eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(output_filename.c_str());
236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(nullptr);
237da0f069871343119251d6b0586be356dc2146a62Yang Ni}
238da0f069871343119251d6b0586be356dc2146a62Yang Ni
239da0f069871343119251d6b0586be356dc2146a62Yang Nibool fuseAndCompile(const char** arguments,
240da0f069871343119251d6b0586be356dc2146a62Yang Ni                    const string& commandLine) {
241eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const pid_t pid = fork();
242da0f069871343119251d6b0586be356dc2146a62Yang Ni
243eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (pid == -1) {
244eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Couldn't fork for bcc execution");
245eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
246eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
247da0f069871343119251d6b0586be356dc2146a62Yang Ni
248eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (pid == 0) {
249eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // Child process
250eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGV("Invoking BCC with: %s", commandLine.c_str());
251eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
252da0f069871343119251d6b0586be356dc2146a62Yang Ni
253eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("execv() failed: %s", strerror(errno));
254eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        abort();
255eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
256eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
257da0f069871343119251d6b0586be356dc2146a62Yang Ni
258eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Parent process
259eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    int status = 0;
260eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const pid_t w = waitpid(pid, &status, 0);
261eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (w == -1) {
262eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
263eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
264da0f069871343119251d6b0586be356dc2146a62Yang Ni
265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
266eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("bcc terminated unexpectedly");
267eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
268eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
269da0f069871343119251d6b0586be356dc2146a62Yang Ni
270eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return true;
271da0f069871343119251d6b0586be356dc2146a62Yang Ni}
272062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
273062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid generateSourceSlot(const Closure& closure,
274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                        const std::vector<std::string>& inputs,
275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                        std::stringstream& ss) {
276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const Script* script = funcID->mScript;
278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
279062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (!script->isIntrinsic());
280062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
281062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const RsdCpuScriptImpl *cpuScript =
282062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            (const RsdCpuScriptImpl*)script->mHal.drv;
283062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
284062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
285062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
286062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            inputs.begin();
287062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
288062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    ss << index << "," << funcID->mSlot << ".";
289062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
290062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
291062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILTY_LIB
292da0f069871343119251d6b0586be356dc2146a62Yang Ni
293da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // anonymous namespace
294da0f069871343119251d6b0586be356dc2146a62Yang Ni
295062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) {
296da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
297062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mGroup->mClosures.size() < 2) {
298eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
299eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
300da0f069871343119251d6b0586be356dc2146a62Yang Ni
301eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
302eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Fuse the input kernels and generate native code in an object file
303eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
304da0f069871343119251d6b0586be356dc2146a62Yang Ni
305062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::set<string> inputSet;
306062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (Closure* closure : mGroup->mClosures) {
307062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Script* script = closure->mFunctionID.get()->mScript;
308da0f069871343119251d6b0586be356dc2146a62Yang Ni
309062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // If any script is an intrinsic, give up trying fusing the kernels.
310eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (script->isIntrinsic()) {
311eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            return;
312eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
313da0f069871343119251d6b0586be356dc2146a62Yang Ni
314eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const RsdCpuScriptImpl *cpuScript =
315eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                (const RsdCpuScriptImpl*)script->mHal.drv;
316eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        inputSet.insert(bitcodeFilename);
318062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
320062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> inputs(inputSet.begin(), inputSet.end());
321062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
322062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> kernelBatches;
323062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> invokeBatches;
324062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
325062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
326062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const auto& batch : mBatches) {
327062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(batch->size() > 0);
328da0f069871343119251d6b0586be356dc2146a62Yang Ni
329062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        std::stringstream ss;
330062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        ss << batch->mName << ":";
331062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
332062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (!batch->mClosures.front()->mClosure->mIsKernel) {
333062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(batch->size() == 1);
334062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss);
335062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            invokeBatches.push_back(ss.str());
336062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
337062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            for (const auto& cpuClosure : batch->mClosures) {
338062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                generateSourceSlot(*cpuClosure->mClosure, inputs, ss);
339062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            }
340062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            kernelBatches.push_back(ss.str());
341062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
342eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
343da0f069871343119251d6b0586be356dc2146a62Yang Ni
344433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    rsAssert(cacheDir != nullptr);
345433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    string objFilePath(cacheDir);
346433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    objFilePath.append("/fusedXXXXXX.o");
347433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    // Find unique object file name, to make following file names unique.
348433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    int tempfd = mkstemps(&objFilePath[0], 2);
349433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    if (tempfd == -1) {
350433558f0f9abbf07770db288183a15fd261cace2Yabin Cui      return;
351433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    }
352433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    TEMP_FAILURE_RETRY(close(tempfd));
353433558f0f9abbf07770db288183a15fd261cace2Yabin Cui
354433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2));
355eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    string rsLibPath(SYSLIBPATH"/libclcore.bc");
356eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    vector<const char*> arguments;
357062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
358062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                          outputFileName, rsLibPath, &arguments);
3592abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni    std::unique_ptr<const char> joined(
3602abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni        rsuJoinStrings(arguments.size() - 1, arguments.data()));
3612abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni    string commandLine (joined.get());
362eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
363eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!fuseAndCompile(arguments.data(), commandLine)) {
364433558f0f9abbf07770db288183a15fd261cace2Yabin Cui        unlink(objFilePath.c_str());
365eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
366eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
367da0f069871343119251d6b0586be356dc2146a62Yang Ni
368eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
369eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Create and load the shared lib
370eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
371da0f069871343119251d6b0586be356dc2146a62Yang Ni
372eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const char* resName = outputFileName.c_str();
373da0f069871343119251d6b0586be356dc2146a62Yang Ni
374eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
375eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Failed to link object file '%s'", resName);
376eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
377eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
378da0f069871343119251d6b0586be356dc2146a62Yang Ni
379062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
380062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj == nullptr) {
381eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Unable to load '%s'", resName);
382eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
383eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
384da0f069871343119251d6b0586be356dc2146a62Yang Ni
385eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mExecutable = ScriptExecutable::createFromSharedObject(
386062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        nullptr,  // RS context. Unused.
387062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mScriptObj);
388da0f069871343119251d6b0586be356dc2146a62Yang Ni
389da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif  // RS_COMPATIBILITY_LIB
3901ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3911ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3921ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() {
393eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (auto batch : mBatches) {
394eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->setGlobalsForBatch();
395eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->run();
396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
3971ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3981ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
399da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() {
400eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
401eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
402062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
403062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        Script* s = funcID->mScript;;
404eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (const auto& p : closure->mGlobals) {
405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const void* value = p.second.first;
406eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            int size = p.second.second;
407eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (value == nullptr && size == 0) {
408eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // This indicates the current closure depends on another closure for a
409eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // global in their shared module (script). In this case we don't need to
410eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // copy the value. For example, an invoke intializes a global variable
411eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // which a kernel later reads.
412eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                continue;
413eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
414ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            rsAssert(p.first != nullptr);
415ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)",
416ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                  closure, p.first, p.first->mScript, p.first->mSlot);
417062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            Script* script = p.first->mScript;
418062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const RsdCpuScriptImpl *cpuScript =
419062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    (const RsdCpuScriptImpl*)script->mHal.drv;
420062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            int slot = p.first->mSlot;
421062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ScriptExecutable* exec = mGroup->getExecutable();
422062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            if (exec != nullptr) {
423062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                const char* varName = cpuScript->getFieldName(slot);
424062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                void* addr = exec->getFieldAddress(varName);
425062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
426062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
427062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                 (rs_object_base*)addr, (ObjectBase*)value);
428062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
429062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    memcpy(addr, (const void*)&value, size);
430062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
431eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            } else {
432062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                // We use -1 size to indicate an ObjectBase rather than a primitive type
433062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
434062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVarObj(slot, (ObjectBase*)value);
435062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
436062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVar(slot, (const void*)&value, size);
437062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
438eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
439eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
4401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
4411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4421ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
443da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() {
444062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!mClosures.front()->mClosure->mIsKernel) {
445062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(mClosures.size() == 1);
446062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
447062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // This batch contains a single closure for an invoke function
448062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        CPUClosure* cc = mClosures.front();
449062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Closure* c = cc->mClosure;
450062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
451062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (mFunc != nullptr) {
452062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // TODO: Need align pointers for x86_64.
453062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
454062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
455062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
456062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
457062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(invokeID != nullptr);
458062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
459062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
460062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
461062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        return;
462062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
463062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
464062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mFunc != nullptr) {
465eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        MTLaunchStruct mtls;
466eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* firstCpuClosure = mClosures.front();
467eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* lastCpuClosure = mClosures.back();
468eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
469eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        firstCpuClosure->mSi->forEachMtlsSetup(
470ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
471ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
472eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
473eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, 0, nullptr, &mtls);
474eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
475eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
476eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = nullptr;
477062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mtls.kernel = (ForEachFunc_t)mFunc;
478eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
479eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(
480ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
481ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
482eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
483eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, &mtls);
484eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
485eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
486eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
487eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
488eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
489eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
490062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
491062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
492eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->preLaunch(kernelID->mSlot,
493ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   (const Allocation**)closure->mArgs,
494ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   closure->mNumArg, closure->mReturnValue,
495062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                   nullptr, 0, nullptr);
496eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
497da0f069871343119251d6b0586be356dc2146a62Yang Ni
498eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const CPUClosure* cpuClosure = mClosures.front();
499eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const Closure* closure = cpuClosure->mClosure;
500eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    MTLaunchStruct mtls;
501da0f069871343119251d6b0586be356dc2146a62Yang Ni
502ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
503ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                          closure->mNumArg,
504eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          closure->mReturnValue,
505eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          nullptr, 0, nullptr, &mtls)) {
506da0f069871343119251d6b0586be356dc2146a62Yang Ni
507eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
508eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.kernel = (void (*)())&groupRoot;
509eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = &mClosures;
510da0f069871343119251d6b0586be356dc2146a62Yang Ni
511eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
512eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
513eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
514eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
515eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
516062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
517062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
518eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->postLaunch(kernelID->mSlot,
519ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    (const Allocation**)closure->mArgs,
520ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    closure->mNumArg, closure->mReturnValue,
521eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    nullptr, 0, nullptr);
522eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
5231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
5251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace renderscript
5261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace android
527