rsCpuScriptGroup2.cpp revision 2fa8a238dd69afebdeb757adcb1d674043d78e32
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h"
21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h>
4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h>
5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h>
6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h>
7da0f069871343119251d6b0586be356dc2146a62Yang Ni
8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set>
9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream>
10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string>
11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector>
12da0f069871343119251d6b0586be356dc2146a62Yang Ni
13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h"
15da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
16da0f069871343119251d6b0586be356dc2146a62Yang Ni
171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h"
181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h"
191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h"
201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h"
212abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h"
221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h"
231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h"
241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h"
25da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h"
26da0f069871343119251d6b0586be356dc2146a62Yang Ni
27da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string;
28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector;
291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android {
311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript {
321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace {
341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
35da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2;
361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
37b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni               uint32_t xend, uint32_t outstep) {
39b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
40b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
42b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const size_t oldInLen = mutable_kinfo->inLen;
43b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
44b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    decltype(mutable_kinfo->inStride) oldInStride;
45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : closures) {
48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        // There had better be enough space in mutable_kinfo
51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
53ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        for (size_t i = 0; i < closure->mNumArg; i++) {
54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const void* arg = closure->mArgs[i];
55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const Allocation* a = (const Allocation*)arg;
56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint32_t eStride = a->mHal.state.elementSizeBytes;
57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    eStride * xstart;
59b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            if (kinfo->dim.y > 1) {
60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mutable_kinfo->inPtr[i] = ptr;
63b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mutable_kinfo->inStride[i] = eStride;
64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mutable_kinfo->inLen = closure->mNumArg;
661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Allocation* out = closure->mReturnValue;
68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint32_t ostep = out->mHal.state.elementSizeBytes;
69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ostep * xstart;
71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        if (kinfo->dim.y > 1) {
72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        rsAssert(kinfo->outLen <= 1);
76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        cpuClosure->mFunc(kinfo, xstart, xend, ostep);
791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
81b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    mutable_kinfo->inLen = oldInLen;
82b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
85da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // namespace
86da0f069871343119251d6b0586be356dc2146a62Yang Ni
87062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) :
88062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mGroup(group), mFunc(nullptr) {
89062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mName = strndup(name, strlen(name));
90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
92da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() {
93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* c : mClosures) {
94eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete c;
95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
96062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    free(mName);
97da0f069871343119251d6b0586be356dc2146a62Yang Ni}
98da0f069871343119251d6b0586be356dc2146a62Yang Ni
99ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const {
100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.empty()) {
101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
103da0f069871343119251d6b0586be356dc2146a62Yang Ni
104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const Closure* closure = cpuClosure->mClosure;
105ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
106062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // An invoke should be in a batch by itself, so it conflicts with any other
108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // closure.
1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        return true;
1101ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& globalDeps = closure->mGlobalDeps;
113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& argDeps = closure->mArgDeps;
114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    for (CPUClosure* c : mClosures) {
116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const Closure* batched = c->mClosure;
117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (globalDeps.find(batched) != globalDeps.end()) {
118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            return true;
119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const auto& it = argDeps.find(batched);
121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (it != argDeps.end()) {
122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const auto& args = (*it).second;
123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            for (const auto &p1 : *args) {
124ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                if (p1.second->get() != nullptr) {
125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                    return true;
126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                }
127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        }
1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
131eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return false;
1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                         const ScriptGroupBase *sg) :
136062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
137062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mExecutable(nullptr), mScriptObj(nullptr) {
138eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!mGroup->mClosures.empty());
139eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
140062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    Batch* batch = new Batch(this, "Batch0");
141062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
142eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Closure* closure: mGroup->mClosures) {
143eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc;
144062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
145062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        RsdCpuScriptImpl* si =
146062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
147062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (closure->mIsKernel) {
148eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            MTLaunchStruct mtls;
149062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            si->forEachKernelSetup(funcID->mSlot, &mtls);
150062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
151eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
152eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si);
153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
1541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
155eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (batch->conflict(cc)) {
156eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            mBatches.push_back(batch);
157062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            std::stringstream ss;
158062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ss << "Batch" << ++i;
159062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch = new Batch(this, ss.str().c_str());
160eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
161da0f069871343119251d6b0586be356dc2146a62Yang Ni
162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->mClosures.push_back(cc);
163eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
164da0f069871343119251d6b0586be356dc2146a62Yang Ni
165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!batch->mClosures.empty());
166eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mBatches.push_back(batch);
167da0f069871343119251d6b0586be356dc2146a62Yang Ni
168da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
169062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    compile(mGroup->mCacheDir);
170062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr && mExecutable != nullptr) {
171062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        for (Batch* batch : mBatches) {
172062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch->resolveFuncPtr(mScriptObj);
173062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
174eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
175062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILITY_LIB
176062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) {
179062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::string funcName(mName);
180062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mClosures.front()->mClosure->mIsKernel) {
181062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        funcName.append(".expand");
182062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
183062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mFunc = dlsym(sharedObj, funcName.c_str());
184062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (mFunc != nullptr);
1851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1871ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
188eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
189eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete batch;
190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
191062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    // TODO: move this dlclose into ~ScriptExecutable().
192062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr) {
193062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        dlclose(mScriptObj);
194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    delete mExecutable;
196da0f069871343119251d6b0586be356dc2146a62Yang Ni}
197da0f069871343119251d6b0586be356dc2146a62Yang Ni
198da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace {
199da0f069871343119251d6b0586be356dc2146a62Yang Ni
200da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
201da0f069871343119251d6b0586be356dc2146a62Yang Ni
202edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
203edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = "";
204edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
205edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // If we're debugging, use the debug library.
206edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
207edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        return SYSLIBPATH"/libclcore_debug.bc";
208edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    }
209edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
210edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // Check for a platform specific library
211edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
212edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
213edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // NEON-capable ARMv7a devices can use an accelerated math library
214edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // for all reduced precision scripts.
215edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // ARMv8 does not use NEON, as ASIMD can be used with all precision
216edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // levels.
217edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
218edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
219edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
220edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__)
221edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // x86 devices will use an optimized library.
222edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore_x86.bc";
223edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else
224edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore.bc";
225edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
226edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni}
227edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
228da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) {
229eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    unsigned found = path.find_last_of("/\\");
230eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    return path.substr(found + 1);
231da0f069871343119251d6b0586be356dc2146a62Yang Ni}
232da0f069871343119251d6b0586be356dc2146a62Yang Ni
233da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments(
234062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const vector<string>& inputs, const vector<string>& kernelBatches,
235062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const vector<string>& invokeBatches,
236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& output_dir, const string& output_filename,
237edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        const string& coreLibPath, const string& coreLibRelaxedPath,
238edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        vector<const char*>* args) {
239eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
240eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-fPIC");
241eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-embedRSInfo");
242eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-mtriple");
243eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
244eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-bclib");
245edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    args->push_back(coreLibPath.c_str());
246edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    args->push_back("-bclib_relaxed");
247edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    args->push_back(coreLibRelaxedPath.c_str());
248eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (const string& input : inputs) {
249eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        args->push_back(input.c_str());
250eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
251062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : kernelBatches) {
252062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-merge");
253062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
254062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
255062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : invokeBatches) {
256062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-invoke");
257062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
258eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
259eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-output_path");
260eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(output_dir.c_str());
261eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-o");
262eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(output_filename.c_str());
263eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(nullptr);
264da0f069871343119251d6b0586be356dc2146a62Yang Ni}
265da0f069871343119251d6b0586be356dc2146a62Yang Ni
266062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid generateSourceSlot(const Closure& closure,
267062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                        const std::vector<std::string>& inputs,
268062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                        std::stringstream& ss) {
269062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
270062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const Script* script = funcID->mScript;
271062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
272062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (!script->isIntrinsic());
273062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const RsdCpuScriptImpl *cpuScript =
275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            (const RsdCpuScriptImpl*)script->mHal.drv;
276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
279062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            inputs.begin();
280062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
281062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    ss << index << "," << funcID->mSlot << ".";
282062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
283062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
284062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILTY_LIB
285da0f069871343119251d6b0586be356dc2146a62Yang Ni
286da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // anonymous namespace
287da0f069871343119251d6b0586be356dc2146a62Yang Ni
288062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) {
289da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
290062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mGroup->mClosures.size() < 2) {
291eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
292eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
293da0f069871343119251d6b0586be356dc2146a62Yang Ni
294eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
295eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Fuse the input kernels and generate native code in an object file
296eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
297da0f069871343119251d6b0586be356dc2146a62Yang Ni
298062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::set<string> inputSet;
299062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (Closure* closure : mGroup->mClosures) {
300062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Script* script = closure->mFunctionID.get()->mScript;
301da0f069871343119251d6b0586be356dc2146a62Yang Ni
302062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // If any script is an intrinsic, give up trying fusing the kernels.
303eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (script->isIntrinsic()) {
304eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            return;
305eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
306da0f069871343119251d6b0586be356dc2146a62Yang Ni
307eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const RsdCpuScriptImpl *cpuScript =
308eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                (const RsdCpuScriptImpl*)script->mHal.drv;
309eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
310062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        inputSet.insert(bitcodeFilename);
311062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
312062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
313062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> inputs(inputSet.begin(), inputSet.end());
314062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
315062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> kernelBatches;
316062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> invokeBatches;
317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
318062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const auto& batch : mBatches) {
320062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(batch->size() > 0);
321da0f069871343119251d6b0586be356dc2146a62Yang Ni
322062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        std::stringstream ss;
323062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        ss << batch->mName << ":";
324062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
325062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (!batch->mClosures.front()->mClosure->mIsKernel) {
326062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(batch->size() == 1);
327062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss);
328062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            invokeBatches.push_back(ss.str());
329062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
330062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            for (const auto& cpuClosure : batch->mClosures) {
331062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                generateSourceSlot(*cpuClosure->mClosure, inputs, ss);
332062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            }
333062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            kernelBatches.push_back(ss.str());
334062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
335eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
336da0f069871343119251d6b0586be356dc2146a62Yang Ni
337433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    rsAssert(cacheDir != nullptr);
338433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    string objFilePath(cacheDir);
339433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    objFilePath.append("/fusedXXXXXX.o");
340433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    // Find unique object file name, to make following file names unique.
341433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    int tempfd = mkstemps(&objFilePath[0], 2);
342433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    if (tempfd == -1) {
343433558f0f9abbf07770db288183a15fd261cace2Yabin Cui      return;
344433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    }
345433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    TEMP_FAILURE_RETRY(close(tempfd));
346433558f0f9abbf07770db288183a15fd261cace2Yabin Cui
347433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2));
348edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    string coreLibRelaxedPath;
349edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
350edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni                                               &coreLibRelaxedPath);
351eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    vector<const char*> arguments;
3522fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    string output_dir(cacheDir);
3532fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    setupCompileArguments(inputs, kernelBatches, invokeBatches, output_dir,
354edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni                          outputFileName, coreLibPath, coreLibRelaxedPath, &arguments);
355eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
3562fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
3572fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar                                     arguments.size()-1,
3582fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar                                     arguments.data());
3592fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    if (!compiled) {
360433558f0f9abbf07770db288183a15fd261cace2Yabin Cui        unlink(objFilePath.c_str());
361eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
362eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
363da0f069871343119251d6b0586be356dc2146a62Yang Ni
364eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
365eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Create and load the shared lib
366eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
367da0f069871343119251d6b0586be356dc2146a62Yang Ni
368eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const char* resName = outputFileName.c_str();
369da0f069871343119251d6b0586be356dc2146a62Yang Ni
370eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
371eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Failed to link object file '%s'", resName);
372eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
373eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
374da0f069871343119251d6b0586be356dc2146a62Yang Ni
375062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
376062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj == nullptr) {
377eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Unable to load '%s'", resName);
378eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
379eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
380da0f069871343119251d6b0586be356dc2146a62Yang Ni
381eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mExecutable = ScriptExecutable::createFromSharedObject(
382062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        nullptr,  // RS context. Unused.
383062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mScriptObj);
384da0f069871343119251d6b0586be356dc2146a62Yang Ni
385da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif  // RS_COMPATIBILITY_LIB
3861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3871ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3881ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() {
389eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (auto batch : mBatches) {
390eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->setGlobalsForBatch();
391eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->run();
392eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
3931ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
3941ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
395da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() {
396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
397eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
398062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
399062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        Script* s = funcID->mScript;;
400eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (const auto& p : closure->mGlobals) {
401eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const void* value = p.second.first;
402eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            int size = p.second.second;
403eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (value == nullptr && size == 0) {
404eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // This indicates the current closure depends on another closure for a
405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // global in their shared module (script). In this case we don't need to
406eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // copy the value. For example, an invoke intializes a global variable
407eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // which a kernel later reads.
408eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                continue;
409eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
410ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            rsAssert(p.first != nullptr);
411ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)",
412ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                  closure, p.first, p.first->mScript, p.first->mSlot);
413062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            Script* script = p.first->mScript;
414062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const RsdCpuScriptImpl *cpuScript =
415062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    (const RsdCpuScriptImpl*)script->mHal.drv;
416062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            int slot = p.first->mSlot;
417062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ScriptExecutable* exec = mGroup->getExecutable();
418062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            if (exec != nullptr) {
419062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                const char* varName = cpuScript->getFieldName(slot);
420062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                void* addr = exec->getFieldAddress(varName);
421062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
422062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
423062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                 (rs_object_base*)addr, (ObjectBase*)value);
424062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
425062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    memcpy(addr, (const void*)&value, size);
426062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
427eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            } else {
428062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                // We use -1 size to indicate an ObjectBase rather than a primitive type
429062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
430062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVarObj(slot, (ObjectBase*)value);
431062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
432062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVar(slot, (const void*)&value, size);
433062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
434eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
435eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
4361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
4371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
439da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() {
440062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!mClosures.front()->mClosure->mIsKernel) {
441062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(mClosures.size() == 1);
442062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
443062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // This batch contains a single closure for an invoke function
444062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        CPUClosure* cc = mClosures.front();
445062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Closure* c = cc->mClosure;
446062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
447062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (mFunc != nullptr) {
448062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // TODO: Need align pointers for x86_64.
449062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
450062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
451062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
452062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
453062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(invokeID != nullptr);
454062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
455062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
456062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
457062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        return;
458062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
459062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
460062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mFunc != nullptr) {
461eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        MTLaunchStruct mtls;
462eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* firstCpuClosure = mClosures.front();
463eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* lastCpuClosure = mClosures.back();
464eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
465eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        firstCpuClosure->mSi->forEachMtlsSetup(
466ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
467ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
468eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
469eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, 0, nullptr, &mtls);
470eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
471eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
472eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = nullptr;
473062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mtls.kernel = (ForEachFunc_t)mFunc;
474eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
475eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(
476ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
477ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
478eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
479eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, &mtls);
480eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
481eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
482eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
483eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
484eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
485eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
486062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
487062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
488eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->preLaunch(kernelID->mSlot,
489ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   (const Allocation**)closure->mArgs,
490ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   closure->mNumArg, closure->mReturnValue,
491062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                   nullptr, 0, nullptr);
492eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
493da0f069871343119251d6b0586be356dc2146a62Yang Ni
494eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const CPUClosure* cpuClosure = mClosures.front();
495eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const Closure* closure = cpuClosure->mClosure;
496eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    MTLaunchStruct mtls;
497da0f069871343119251d6b0586be356dc2146a62Yang Ni
498ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
499ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                          closure->mNumArg,
500eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          closure->mReturnValue,
501eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          nullptr, 0, nullptr, &mtls)) {
502da0f069871343119251d6b0586be356dc2146a62Yang Ni
503eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
504eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.kernel = (void (*)())&groupRoot;
505eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = &mClosures;
506da0f069871343119251d6b0586be356dc2146a62Yang Ni
507eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
508eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
509eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
510eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
511eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
512062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
513062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
514eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->postLaunch(kernelID->mSlot,
515ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    (const Allocation**)closure->mArgs,
516ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    closure->mNumArg, closure->mReturnValue,
517eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    nullptr, 0, nullptr);
518eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
5191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
5211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace renderscript
5221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace android
523