rsCpuScriptGroup2.cpp revision 4c368af7e705f0bcb77fa99495b2e33ef20d2699
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h"
21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h>
4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h>
5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h>
6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h>
7da0f069871343119251d6b0586be356dc2146a62Yang Ni
8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set>
9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream>
10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string>
11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector>
12da0f069871343119251d6b0586be356dc2146a62Yang Ni
13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h"
15da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
16da0f069871343119251d6b0586be356dc2146a62Yang Ni
171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h"
181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h"
191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h"
201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h"
212abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h"
221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h"
231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h"
241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h"
25da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h"
26da0f069871343119251d6b0586be356dc2146a62Yang Ni
27da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string;
28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector;
291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android {
311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript {
321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace {
341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
35da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2;
361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
37b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni               uint32_t xend, uint32_t outstep) {
39b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
40b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
42b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const size_t oldInLen = mutable_kinfo->inLen;
43b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
44b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    decltype(mutable_kinfo->inStride) oldInStride;
45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : closures) {
48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        // There had better be enough space in mutable_kinfo
51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
53ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        for (size_t i = 0; i < closure->mNumArg; i++) {
54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const void* arg = closure->mArgs[i];
55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const Allocation* a = (const Allocation*)arg;
56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint32_t eStride = a->mHal.state.elementSizeBytes;
57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    eStride * xstart;
59b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            if (kinfo->dim.y > 1) {
60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mutable_kinfo->inPtr[i] = ptr;
63b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mutable_kinfo->inStride[i] = eStride;
64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mutable_kinfo->inLen = closure->mNumArg;
661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Allocation* out = closure->mReturnValue;
68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint32_t ostep = out->mHal.state.elementSizeBytes;
69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ostep * xstart;
71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        if (kinfo->dim.y > 1) {
72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        rsAssert(kinfo->outLen <= 1);
76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        cpuClosure->mFunc(kinfo, xstart, xend, ostep);
791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
81b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    mutable_kinfo->inLen = oldInLen;
82b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
85da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // namespace
86da0f069871343119251d6b0586be356dc2146a62Yang Ni
87062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) :
88062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mGroup(group), mFunc(nullptr) {
89062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mName = strndup(name, strlen(name));
90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
92da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() {
93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* c : mClosures) {
94eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete c;
95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
96062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    free(mName);
97da0f069871343119251d6b0586be356dc2146a62Yang Ni}
98da0f069871343119251d6b0586be356dc2146a62Yang Ni
99ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const {
100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.empty()) {
101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
103da0f069871343119251d6b0586be356dc2146a62Yang Ni
104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const Closure* closure = cpuClosure->mClosure;
105ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
106062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // An invoke should be in a batch by itself, so it conflicts with any other
108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // closure.
1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        return true;
1101ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& globalDeps = closure->mGlobalDeps;
113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& argDeps = closure->mArgDeps;
114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    for (CPUClosure* c : mClosures) {
116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const Closure* batched = c->mClosure;
117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (globalDeps.find(batched) != globalDeps.end()) {
118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            return true;
119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const auto& it = argDeps.find(batched);
121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (it != argDeps.end()) {
122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const auto& args = (*it).second;
123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            for (const auto &p1 : *args) {
124bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni                if (p1.second.get() != nullptr) {
125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                    return true;
126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                }
127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        }
1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
131eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return false;
1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                         const ScriptGroupBase *sg) :
136062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
137062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mExecutable(nullptr), mScriptObj(nullptr) {
138eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!mGroup->mClosures.empty());
139eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
140062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    Batch* batch = new Batch(this, "Batch0");
141062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
142eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Closure* closure: mGroup->mClosures) {
143eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc;
144062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
145062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        RsdCpuScriptImpl* si =
146062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
147062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (closure->mIsKernel) {
148eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            MTLaunchStruct mtls;
149062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            si->forEachKernelSetup(funcID->mSlot, &mtls);
150062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
151eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
152eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si);
153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
1541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
155eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (batch->conflict(cc)) {
156eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            mBatches.push_back(batch);
157062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            std::stringstream ss;
158062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ss << "Batch" << ++i;
159062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch = new Batch(this, ss.str().c_str());
160eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
161da0f069871343119251d6b0586be356dc2146a62Yang Ni
162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->mClosures.push_back(cc);
163eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
164da0f069871343119251d6b0586be356dc2146a62Yang Ni
165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!batch->mClosures.empty());
166eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mBatches.push_back(batch);
167da0f069871343119251d6b0586be356dc2146a62Yang Ni
168da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
169062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    compile(mGroup->mCacheDir);
170062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr && mExecutable != nullptr) {
171062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        for (Batch* batch : mBatches) {
172062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch->resolveFuncPtr(mScriptObj);
173062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
174eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
175062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILITY_LIB
176062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) {
179062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::string funcName(mName);
180062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mClosures.front()->mClosure->mIsKernel) {
181062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        funcName.append(".expand");
182062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
183062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mFunc = dlsym(sharedObj, funcName.c_str());
184062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (mFunc != nullptr);
1851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1871ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
188eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
189eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete batch;
190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
191bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni    delete mExecutable;
192062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    // TODO: move this dlclose into ~ScriptExecutable().
193062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr) {
194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        dlclose(mScriptObj);
195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
196da0f069871343119251d6b0586be356dc2146a62Yang Ni}
197da0f069871343119251d6b0586be356dc2146a62Yang Ni
198da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace {
199da0f069871343119251d6b0586be356dc2146a62Yang Ni
200da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
201da0f069871343119251d6b0586be356dc2146a62Yang Ni
202edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
203edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = "";
204edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
205edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // If we're debugging, use the debug library.
206edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
207edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        return SYSLIBPATH"/libclcore_debug.bc";
208edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    }
209edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
210edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // Check for a platform specific library
211edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
212edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
213edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // NEON-capable ARMv7a devices can use an accelerated math library
214edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // for all reduced precision scripts.
215edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // ARMv8 does not use NEON, as ASIMD can be used with all precision
216edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // levels.
217edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
218edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
219edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
220edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__)
221edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // x86 devices will use an optimized library.
222edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore_x86.bc";
223edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else
224edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore.bc";
225edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
226edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni}
227edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
228da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments(
229cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const vector<const char*>& inputs, const vector<string>& kernelBatches,
230062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const vector<string>& invokeBatches,
231cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* outputDir, const char* outputFileName,
232cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* coreLibPath, const char* coreLibRelaxedPath,
233edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        vector<const char*>* args) {
234eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
235eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-fPIC");
236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-embedRSInfo");
237eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-mtriple");
238eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
239eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-bclib");
240cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(coreLibPath);
241edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    args->push_back("-bclib_relaxed");
242cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(coreLibRelaxedPath);
243cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    for (const char* input : inputs) {
244cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        args->push_back(input);
245eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
246062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : kernelBatches) {
247062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-merge");
248062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
249062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
250062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : invokeBatches) {
251062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-invoke");
252062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
253eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
254eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-output_path");
255cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(outputDir);
256eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-o");
257cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(outputFileName);
258da0f069871343119251d6b0586be356dc2146a62Yang Ni}
259da0f069871343119251d6b0586be356dc2146a62Yang Ni
260062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid generateSourceSlot(const Closure& closure,
261cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                        const std::vector<const char*>& inputs,
262062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                        std::stringstream& ss) {
263062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
264062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const Script* script = funcID->mScript;
265062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
266062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (!script->isIntrinsic());
267062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
268062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const RsdCpuScriptImpl *cpuScript =
269062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            (const RsdCpuScriptImpl*)script->mHal.drv;
270062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
271062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
272062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
273062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            inputs.begin();
274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    ss << index << "," << funcID->mSlot << ".";
276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILTY_LIB
279da0f069871343119251d6b0586be356dc2146a62Yang Ni
280da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // anonymous namespace
281da0f069871343119251d6b0586be356dc2146a62Yang Ni
282062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) {
283da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
284062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mGroup->mClosures.size() < 2) {
285eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
286eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
287da0f069871343119251d6b0586be356dc2146a62Yang Ni
288cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    auto comparator = [](const char* str1, const char* str2) -> bool {
289cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        return strcmp(str1, str2) < 0;
290cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    };
291cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::set<const char*, decltype(comparator)> inputSet(comparator);
292cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
293062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (Closure* closure : mGroup->mClosures) {
294062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Script* script = closure->mFunctionID.get()->mScript;
295da0f069871343119251d6b0586be356dc2146a62Yang Ni
296062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // If any script is an intrinsic, give up trying fusing the kernels.
297eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (script->isIntrinsic()) {
298eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            return;
299eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
300da0f069871343119251d6b0586be356dc2146a62Yang Ni
301eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const RsdCpuScriptImpl *cpuScript =
302eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                (const RsdCpuScriptImpl*)script->mHal.drv;
303cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
304062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        inputSet.insert(bitcodeFilename);
305062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
306062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
307cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
308062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
309062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> kernelBatches;
310062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> invokeBatches;
311062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
312062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
313062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const auto& batch : mBatches) {
314062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(batch->size() > 0);
315da0f069871343119251d6b0586be356dc2146a62Yang Ni
316062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        std::stringstream ss;
317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        ss << batch->mName << ":";
318062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (!batch->mClosures.front()->mClosure->mIsKernel) {
320062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(batch->size() == 1);
321062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss);
322062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            invokeBatches.push_back(ss.str());
323062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
324062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            for (const auto& cpuClosure : batch->mClosures) {
325062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                generateSourceSlot(*cpuClosure->mClosure, inputs, ss);
326062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            }
327062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            kernelBatches.push_back(ss.str());
328062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
329eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
330da0f069871343119251d6b0586be356dc2146a62Yang Ni
331433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    rsAssert(cacheDir != nullptr);
332433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    string objFilePath(cacheDir);
333f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append("/");
334f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append(mGroup->mName);
335f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append(".o");
336433558f0f9abbf07770db288183a15fd261cace2Yabin Cui
337cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    const char* resName = mGroup->mName;
338edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    string coreLibRelaxedPath;
339edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
340edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni                                               &coreLibRelaxedPath);
341f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
342eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    vector<const char*> arguments;
343cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
344cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                          resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
345f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni                          &arguments);
346f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
347f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
348cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                                                       arguments.data()));
349cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
350cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    inputs.push_back(coreLibPath.c_str());
351cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    inputs.push_back(coreLibRelaxedPath.c_str());
352cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
353cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
354cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                                               inputs.data(), inputs.size());
355f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
356cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    if (checksum == 0) {
357f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        return;
358f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    }
359f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
360cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::stringstream ss;
361cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    ss << std::hex << checksum;
362cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    const char* checksumStr = ss.str().c_str();
363f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
364f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
365f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    // Try to load a shared lib from code cache matching filename and checksum
366f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
367f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
368f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
369f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    if (mScriptObj != nullptr) {
370f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        mExecutable = ScriptExecutable::createFromSharedObject(
371cb17015fed6b11a5028f31cc804a3847e379945dYang Ni            getCpuRefImpl()->getContext(), mScriptObj, checksum);
372f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        if (mExecutable != nullptr) {
373cb17015fed6b11a5028f31cc804a3847e379945dYang Ni            return;
374f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        } else {
375f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni            ALOGE("Failed to create an executable object from so file");
376f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        }
377f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        dlclose(mScriptObj);
378f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        mScriptObj = nullptr;
379f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    }
380f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
381f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
382f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    // Fuse the input kernels and generate native code in an object file
383f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
384f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
385f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    arguments.push_back("-build-checksum");
386cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    arguments.push_back(checksumStr);
387f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    arguments.push_back(nullptr);
388eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
3892fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
390f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni                                      arguments.size()-1,
391f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni                                      arguments.data());
3922fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    if (!compiled) {
393eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
394eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
395da0f069871343119251d6b0586be356dc2146a62Yang Ni
396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
397eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Create and load the shared lib
398eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
399da0f069871343119251d6b0586be356dc2146a62Yang Ni
4004c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines    if (!SharedLibraryUtils::createSharedLibrary(
4014c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines            getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) {
402eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Failed to link object file '%s'", resName);
4038b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni        unlink(objFilePath.c_str());
404eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
406da0f069871343119251d6b0586be356dc2146a62Yang Ni
4078b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni    unlink(objFilePath.c_str());
4088b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni
409062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
410062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj == nullptr) {
411eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Unable to load '%s'", resName);
412eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
413eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
414da0f069871343119251d6b0586be356dc2146a62Yang Ni
415eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mExecutable = ScriptExecutable::createFromSharedObject(
416bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni        getCpuRefImpl()->getContext(),
417062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mScriptObj);
418da0f069871343119251d6b0586be356dc2146a62Yang Ni
419da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif  // RS_COMPATIBILITY_LIB
4201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
4221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() {
423eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (auto batch : mBatches) {
424eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->setGlobalsForBatch();
425eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->run();
426eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
4271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
429da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() {
430eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
431eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
432062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
433062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        Script* s = funcID->mScript;;
434eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (const auto& p : closure->mGlobals) {
435eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const void* value = p.second.first;
436eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            int size = p.second.second;
437eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (value == nullptr && size == 0) {
438eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // This indicates the current closure depends on another closure for a
439eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // global in their shared module (script). In this case we don't need to
440eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // copy the value. For example, an invoke intializes a global variable
441eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // which a kernel later reads.
442eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                continue;
443eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
444ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            rsAssert(p.first != nullptr);
445062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            Script* script = p.first->mScript;
446062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const RsdCpuScriptImpl *cpuScript =
447062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    (const RsdCpuScriptImpl*)script->mHal.drv;
448062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            int slot = p.first->mSlot;
449062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ScriptExecutable* exec = mGroup->getExecutable();
450062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            if (exec != nullptr) {
451062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                const char* varName = cpuScript->getFieldName(slot);
452062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                void* addr = exec->getFieldAddress(varName);
453062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
454062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
455062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                 (rs_object_base*)addr, (ObjectBase*)value);
456062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
457062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    memcpy(addr, (const void*)&value, size);
458062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
459eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            } else {
460062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                // We use -1 size to indicate an ObjectBase rather than a primitive type
461062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
462062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVarObj(slot, (ObjectBase*)value);
463062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
464062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVar(slot, (const void*)&value, size);
465062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
466eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
467eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
4681ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
4691ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4701ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
471da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() {
472062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!mClosures.front()->mClosure->mIsKernel) {
473062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(mClosures.size() == 1);
474062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
475062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // This batch contains a single closure for an invoke function
476062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        CPUClosure* cc = mClosures.front();
477062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Closure* c = cc->mClosure;
478062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
479062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (mFunc != nullptr) {
480062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // TODO: Need align pointers for x86_64.
481062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
482062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
483062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
484062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
485062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(invokeID != nullptr);
486062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
487062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
488062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
489062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        return;
490062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
491062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
492062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mFunc != nullptr) {
493eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        MTLaunchStruct mtls;
494eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* firstCpuClosure = mClosures.front();
495eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* lastCpuClosure = mClosures.back();
496eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
497eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        firstCpuClosure->mSi->forEachMtlsSetup(
498ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
499ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
500eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
501eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, 0, nullptr, &mtls);
502eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
503eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
504eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = nullptr;
505062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mtls.kernel = (ForEachFunc_t)mFunc;
506eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
507eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(
508ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
509ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
510eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
511eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, &mtls);
512eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
513eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
514eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
515eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
516eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
517eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
518062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
519062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
520eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->preLaunch(kernelID->mSlot,
521ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   (const Allocation**)closure->mArgs,
522ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   closure->mNumArg, closure->mReturnValue,
523062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                   nullptr, 0, nullptr);
524eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
525da0f069871343119251d6b0586be356dc2146a62Yang Ni
526eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const CPUClosure* cpuClosure = mClosures.front();
527eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const Closure* closure = cpuClosure->mClosure;
528eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    MTLaunchStruct mtls;
529da0f069871343119251d6b0586be356dc2146a62Yang Ni
530ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
531ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                          closure->mNumArg,
532eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          closure->mReturnValue,
533eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          nullptr, 0, nullptr, &mtls)) {
534da0f069871343119251d6b0586be356dc2146a62Yang Ni
535eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
536eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.kernel = (void (*)())&groupRoot;
537eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = &mClosures;
538da0f069871343119251d6b0586be356dc2146a62Yang Ni
539eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
540eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
541eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
542eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
543eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
544062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
545062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
546eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->postLaunch(kernelID->mSlot,
547ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    (const Allocation**)closure->mArgs,
548ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    closure->mNumArg, closure->mReturnValue,
549eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    nullptr, 0, nullptr);
550eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
5511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
5531ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace renderscript
5541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace android
555