rsCpuScriptGroup2.cpp revision edf4ea312cc3f7dd4373f8db5aaf9325ff054c8e
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h"
21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h>
4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h>
5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h>
6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h>
7da0f069871343119251d6b0586be356dc2146a62Yang Ni
8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set>
9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream>
10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string>
11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector>
12da0f069871343119251d6b0586be356dc2146a62Yang Ni
13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h"
15da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <sys/wait.h>
16da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
17da0f069871343119251d6b0586be356dc2146a62Yang Ni
181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h"
191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h"
201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h"
211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h"
222abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h"
231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h"
241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h"
251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h"
26da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h"
27da0f069871343119251d6b0586be356dc2146a62Yang Ni
28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string;
29da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector;
301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android {
321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript {
331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace {
351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
36da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2;
371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni               uint32_t xend, uint32_t outstep) {
40ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kparams->usr;
41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
42eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const void **oldIns  = kparams->ins;
43eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    uint32_t *oldStrides = kparams->inEStrides;
44eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
45eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<const void*> ins(DefaultKernelArgCount);
46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    std::vector<uint32_t> strides(DefaultKernelArgCount);
47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : closures) {
49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
50eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
51eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        auto in_iter = ins.begin();
52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        auto stride_iter = strides.begin();
53eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        for (size_t i = 0; i < closure->mNumArg; i++) {
55ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const void* arg = closure->mArgs[i];
56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const Allocation* a = (const Allocation*)arg;
57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint32_t eStride = a->mHal.state.elementSizeBytes;
58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
59eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    eStride * xstart;
60eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (kparams->dimY > 1) {
61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ptr += a->mHal.drvState.lod[0].stride * kparams->y;
62eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
63eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            *in_iter++ = ptr;
64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            *stride_iter++ = eStride;
65eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->ins = &ins[0];
68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->inEStrides = &strides[0];
691ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Allocation* out = closure->mReturnValue;
71eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint32_t ostep = out->mHal.state.elementSizeBytes;
72eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ostep * xstart;
74eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (kparams->dimY > 1) {
75eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            ptr += out->mHal.drvState.lod[0].stride * kparams->y;
76eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
78eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mutable_kparams->out = (void*)ptr;
791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
80eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mFunc(kparams, xstart, xend, ostep);
811ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
821ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
83eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mutable_kparams->ins        = oldIns;
84eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mutable_kparams->inEStrides = oldStrides;
851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
87da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // namespace
88da0f069871343119251d6b0586be356dc2146a62Yang Ni
89062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) :
90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mGroup(group), mFunc(nullptr) {
91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mName = strndup(name, strlen(name));
92062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
93062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
94da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() {
95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* c : mClosures) {
96eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete c;
97eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
98062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    free(mName);
99da0f069871343119251d6b0586be356dc2146a62Yang Ni}
100da0f069871343119251d6b0586be356dc2146a62Yang Ni
101ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const {
102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.empty()) {
103eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
104eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
105da0f069871343119251d6b0586be356dc2146a62Yang Ni
106ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const Closure* closure = cpuClosure->mClosure;
107ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
108062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
109eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // An invoke should be in a batch by itself, so it conflicts with any other
110eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // closure.
1111ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        return true;
1121ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
113eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& globalDeps = closure->mGlobalDeps;
115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& argDeps = closure->mArgDeps;
116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    for (CPUClosure* c : mClosures) {
118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const Closure* batched = c->mClosure;
119ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (globalDeps.find(batched) != globalDeps.end()) {
120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            return true;
121eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const auto& it = argDeps.find(batched);
123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (it != argDeps.end()) {
124ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const auto& args = (*it).second;
125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            for (const auto &p1 : *args) {
126ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                if (p1.second->get() != nullptr) {
127ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                    return true;
128eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                }
129eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
1301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        }
1311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
132ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
133eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return false;
1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1361ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
1371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                         const ScriptGroupBase *sg) :
138062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
139062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mExecutable(nullptr), mScriptObj(nullptr) {
140eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!mGroup->mClosures.empty());
141eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
142062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    Batch* batch = new Batch(this, "Batch0");
143062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
144eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Closure* closure: mGroup->mClosures) {
145eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc;
146062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
147062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        RsdCpuScriptImpl* si =
148062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
149062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (closure->mIsKernel) {
150eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            MTLaunchStruct mtls;
151062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            si->forEachKernelSetup(funcID->mSlot, &mtls);
152062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
154eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si);
155eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
1561ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (batch->conflict(cc)) {
158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            mBatches.push_back(batch);
159062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            std::stringstream ss;
160062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ss << "Batch" << ++i;
161062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch = new Batch(this, ss.str().c_str());
162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
163da0f069871343119251d6b0586be356dc2146a62Yang Ni
164eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->mClosures.push_back(cc);
165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
166da0f069871343119251d6b0586be356dc2146a62Yang Ni
167eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!batch->mClosures.empty());
168eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mBatches.push_back(batch);
169da0f069871343119251d6b0586be356dc2146a62Yang Ni
170da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
171062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    compile(mGroup->mCacheDir);
172062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr && mExecutable != nullptr) {
173062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        for (Batch* batch : mBatches) {
174062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch->resolveFuncPtr(mScriptObj);
175062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
176eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILITY_LIB
178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
179062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
180062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) {
181062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::string funcName(mName);
182062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mClosures.front()->mClosure->mIsKernel) {
183062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        funcName.append(".expand");
184062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
185062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mFunc = dlsym(sharedObj, funcName.c_str());
186062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (mFunc != nullptr);
1871ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1881ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1891ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
191eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete batch;
192eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
193062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    // TODO: move this dlclose into ~ScriptExecutable().
194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr) {
195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        dlclose(mScriptObj);
196062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
197062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    delete mExecutable;
198da0f069871343119251d6b0586be356dc2146a62Yang Ni}
199da0f069871343119251d6b0586be356dc2146a62Yang Ni
200da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace {
201da0f069871343119251d6b0586be356dc2146a62Yang Ni
202da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
203da0f069871343119251d6b0586be356dc2146a62Yang Ni
204edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
205edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = "";
206edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
207edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // If we're debugging, use the debug library.
208edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
209edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        return SYSLIBPATH"/libclcore_debug.bc";
210edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    }
211edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
212edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // Check for a platform specific library
213edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
214edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
215edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // NEON-capable ARMv7a devices can use an accelerated math library
216edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // for all reduced precision scripts.
217edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // ARMv8 does not use NEON, as ASIMD can be used with all precision
218edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // levels.
219edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
220edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
221edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
222edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__)
223edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // x86 devices will use an optimized library.
224edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore_x86.bc";
225edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else
226edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore.bc";
227edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
228edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni}
229edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
230da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) {
231eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    unsigned found = path.find_last_of("/\\");
232eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return path.substr(found + 1);
233da0f069871343119251d6b0586be356dc2146a62Yang Ni}
234da0f069871343119251d6b0586be356dc2146a62Yang Ni
235da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments(
236062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const vector<string>& inputs, const vector<string>& kernelBatches,
237062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const vector<string>& invokeBatches,
238eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& output_dir, const string& output_filename,
239edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        const string& coreLibPath, const string& coreLibRelaxedPath,
240edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        vector<const char*>* args) {
241eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
242eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-fPIC");
243eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-embedRSInfo");
244eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-mtriple");
245eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
246eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-bclib");
247edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    args->push_back(coreLibPath.c_str());
248edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    args->push_back("-bclib_relaxed");
249edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    args->push_back(coreLibRelaxedPath.c_str());
250eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (const string& input : inputs) {
251eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        args->push_back(input.c_str());
252eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
253062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : kernelBatches) {
254062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-merge");
255062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
256062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
257062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : invokeBatches) {
258062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-invoke");
259062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
260eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
261eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-output_path");
262eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(output_dir.c_str());
263eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-o");
264eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(output_filename.c_str());
265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(nullptr);
266da0f069871343119251d6b0586be356dc2146a62Yang Ni}
267da0f069871343119251d6b0586be356dc2146a62Yang Ni
268da0f069871343119251d6b0586be356dc2146a62Yang Nibool fuseAndCompile(const char** arguments,
269da0f069871343119251d6b0586be356dc2146a62Yang Ni                    const string& commandLine) {
270eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const pid_t pid = fork();
271da0f069871343119251d6b0586be356dc2146a62Yang Ni
272eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (pid == -1) {
273eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Couldn't fork for bcc execution");
274eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
275eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
276da0f069871343119251d6b0586be356dc2146a62Yang Ni
277eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (pid == 0) {
278eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // Child process
279eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGV("Invoking BCC with: %s", commandLine.c_str());
280eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
281da0f069871343119251d6b0586be356dc2146a62Yang Ni
282eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("execv() failed: %s", strerror(errno));
283eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        abort();
284eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
285eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
286da0f069871343119251d6b0586be356dc2146a62Yang Ni
287eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Parent process
288eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    int status = 0;
289eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const pid_t w = waitpid(pid, &status, 0);
290eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (w == -1) {
291eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
292eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
293da0f069871343119251d6b0586be356dc2146a62Yang Ni
294eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
295eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("bcc terminated unexpectedly");
296eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
297eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
298da0f069871343119251d6b0586be356dc2146a62Yang Ni
299eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return true;
300da0f069871343119251d6b0586be356dc2146a62Yang Ni}
301062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
302062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid generateSourceSlot(const Closure& closure,
303062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                        const std::vector<std::string>& inputs,
304062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                        std::stringstream& ss) {
305062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
306062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const Script* script = funcID->mScript;
307062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
308062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (!script->isIntrinsic());
309062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
310062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const RsdCpuScriptImpl *cpuScript =
311062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            (const RsdCpuScriptImpl*)script->mHal.drv;
312062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
313062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
314062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
315062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            inputs.begin();
316062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    ss << index << "," << funcID->mSlot << ".";
318062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
320062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILTY_LIB
321da0f069871343119251d6b0586be356dc2146a62Yang Ni
322da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // anonymous namespace
323da0f069871343119251d6b0586be356dc2146a62Yang Ni
324062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) {
325da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
326062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mGroup->mClosures.size() < 2) {
327eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
328eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
329da0f069871343119251d6b0586be356dc2146a62Yang Ni
330eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
331eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Fuse the input kernels and generate native code in an object file
332eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
333da0f069871343119251d6b0586be356dc2146a62Yang Ni
334062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::set<string> inputSet;
335062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (Closure* closure : mGroup->mClosures) {
336062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Script* script = closure->mFunctionID.get()->mScript;
337da0f069871343119251d6b0586be356dc2146a62Yang Ni
338062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // If any script is an intrinsic, give up trying fusing the kernels.
339eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (script->isIntrinsic()) {
340eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            return;
341eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
342da0f069871343119251d6b0586be356dc2146a62Yang Ni
343eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const RsdCpuScriptImpl *cpuScript =
344eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                (const RsdCpuScriptImpl*)script->mHal.drv;
345eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
346062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        inputSet.insert(bitcodeFilename);
347062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
348062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
349062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> inputs(inputSet.begin(), inputSet.end());
350062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
351062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> kernelBatches;
352062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> invokeBatches;
353062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
354062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
355062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const auto& batch : mBatches) {
356062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(batch->size() > 0);
357da0f069871343119251d6b0586be356dc2146a62Yang Ni
358062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        std::stringstream ss;
359062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        ss << batch->mName << ":";
360062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
361062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (!batch->mClosures.front()->mClosure->mIsKernel) {
362062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(batch->size() == 1);
363062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss);
364062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            invokeBatches.push_back(ss.str());
365062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
366062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            for (const auto& cpuClosure : batch->mClosures) {
367062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                generateSourceSlot(*cpuClosure->mClosure, inputs, ss);
368062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            }
369062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            kernelBatches.push_back(ss.str());
370062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
371eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
372da0f069871343119251d6b0586be356dc2146a62Yang Ni
373433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    rsAssert(cacheDir != nullptr);
374433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    string objFilePath(cacheDir);
375433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    objFilePath.append("/fusedXXXXXX.o");
376433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    // Find unique object file name, to make following file names unique.
377433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    int tempfd = mkstemps(&objFilePath[0], 2);
378433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    if (tempfd == -1) {
379433558f0f9abbf07770db288183a15fd261cace2Yabin Cui      return;
380433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    }
381433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    TEMP_FAILURE_RETRY(close(tempfd));
382433558f0f9abbf07770db288183a15fd261cace2Yabin Cui
383433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2));
384edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    string coreLibRelaxedPath;
385edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
386edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni                                               &coreLibRelaxedPath);
387eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    vector<const char*> arguments;
388062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
389edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni                          outputFileName, coreLibPath, coreLibRelaxedPath, &arguments);
3902abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni    std::unique_ptr<const char> joined(
3912abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni        rsuJoinStrings(arguments.size() - 1, arguments.data()));
3922abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni    string commandLine (joined.get());
393eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
394eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!fuseAndCompile(arguments.data(), commandLine)) {
395433558f0f9abbf07770db288183a15fd261cace2Yabin Cui        unlink(objFilePath.c_str());
396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
397eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
398da0f069871343119251d6b0586be356dc2146a62Yang Ni
399eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
400eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Create and load the shared lib
401eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
402da0f069871343119251d6b0586be356dc2146a62Yang Ni
403eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const char* resName = outputFileName.c_str();
404da0f069871343119251d6b0586be356dc2146a62Yang Ni
405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
406eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Failed to link object file '%s'", resName);
407eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
408eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
409da0f069871343119251d6b0586be356dc2146a62Yang Ni
410062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
411062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj == nullptr) {
412eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Unable to load '%s'", resName);
413eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
414eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
415da0f069871343119251d6b0586be356dc2146a62Yang Ni
416eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mExecutable = ScriptExecutable::createFromSharedObject(
417062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        nullptr,  // RS context. Unused.
418062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mScriptObj);
419da0f069871343119251d6b0586be356dc2146a62Yang Ni
420da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif  // RS_COMPATIBILITY_LIB
4211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
4231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() {
424eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (auto batch : mBatches) {
425eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->setGlobalsForBatch();
426eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->run();
427eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
4281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
430da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() {
431eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
432eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
433062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
434062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        Script* s = funcID->mScript;;
435eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (const auto& p : closure->mGlobals) {
436eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const void* value = p.second.first;
437eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            int size = p.second.second;
438eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (value == nullptr && size == 0) {
439eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // This indicates the current closure depends on another closure for a
440eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // global in their shared module (script). In this case we don't need to
441eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // copy the value. For example, an invoke intializes a global variable
442eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // which a kernel later reads.
443eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                continue;
444eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
445ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            rsAssert(p.first != nullptr);
446ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)",
447ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                  closure, p.first, p.first->mScript, p.first->mSlot);
448062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            Script* script = p.first->mScript;
449062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const RsdCpuScriptImpl *cpuScript =
450062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    (const RsdCpuScriptImpl*)script->mHal.drv;
451062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            int slot = p.first->mSlot;
452062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ScriptExecutable* exec = mGroup->getExecutable();
453062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            if (exec != nullptr) {
454062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                const char* varName = cpuScript->getFieldName(slot);
455062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                void* addr = exec->getFieldAddress(varName);
456062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
457062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
458062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                 (rs_object_base*)addr, (ObjectBase*)value);
459062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
460062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    memcpy(addr, (const void*)&value, size);
461062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
462eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            } else {
463062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                // We use -1 size to indicate an ObjectBase rather than a primitive type
464062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
465062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVarObj(slot, (ObjectBase*)value);
466062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
467062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVar(slot, (const void*)&value, size);
468062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
469eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
470eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
4711ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
4721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4731ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
474da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() {
475062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!mClosures.front()->mClosure->mIsKernel) {
476062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(mClosures.size() == 1);
477062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
478062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // This batch contains a single closure for an invoke function
479062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        CPUClosure* cc = mClosures.front();
480062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Closure* c = cc->mClosure;
481062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
482062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (mFunc != nullptr) {
483062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // TODO: Need align pointers for x86_64.
484062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
485062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
486062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
487062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
488062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(invokeID != nullptr);
489062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
490062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
491062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
492062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        return;
493062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
494062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
495062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mFunc != nullptr) {
496eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        MTLaunchStruct mtls;
497eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* firstCpuClosure = mClosures.front();
498eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* lastCpuClosure = mClosures.back();
499eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
500eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        firstCpuClosure->mSi->forEachMtlsSetup(
501ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
502ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
503eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
504eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, 0, nullptr, &mtls);
505eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
506eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
507eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = nullptr;
508062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mtls.kernel = (ForEachFunc_t)mFunc;
509eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
510eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(
511ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
512ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
513eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
514eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, &mtls);
515eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
516eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
517eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
518eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
519eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
520eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
521062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
522062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
523eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->preLaunch(kernelID->mSlot,
524ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   (const Allocation**)closure->mArgs,
525ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   closure->mNumArg, closure->mReturnValue,
526062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                   nullptr, 0, nullptr);
527eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
528da0f069871343119251d6b0586be356dc2146a62Yang Ni
529eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const CPUClosure* cpuClosure = mClosures.front();
530eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const Closure* closure = cpuClosure->mClosure;
531eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    MTLaunchStruct mtls;
532da0f069871343119251d6b0586be356dc2146a62Yang Ni
533ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
534ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                          closure->mNumArg,
535eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          closure->mReturnValue,
536eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          nullptr, 0, nullptr, &mtls)) {
537da0f069871343119251d6b0586be356dc2146a62Yang Ni
538eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
539eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.kernel = (void (*)())&groupRoot;
540eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = &mClosures;
541da0f069871343119251d6b0586be356dc2146a62Yang Ni
542eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
543eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
544eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
545eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
546eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
547062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
548062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
549eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->postLaunch(kernelID->mSlot,
550ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    (const Allocation**)closure->mArgs,
551ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    closure->mNumArg, closure->mReturnValue,
552eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    nullptr, 0, nullptr);
553eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
5541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5551ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
5561ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace renderscript
5571ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace android
558