rsCpuScriptGroup2.cpp revision 1c20667f7a174a7c0a1599d34a40c524fe24c615
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h"
21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h>
4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h>
5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h>
6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h>
7da0f069871343119251d6b0586be356dc2146a62Yang Ni
8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set>
9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream>
10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string>
11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector>
12da0f069871343119251d6b0586be356dc2146a62Yang Ni
13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h"
15da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
16da0f069871343119251d6b0586be356dc2146a62Yang Ni
171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h"
181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h"
191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h"
201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h"
212abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h"
221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h"
231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h"
241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h"
25da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h"
26da0f069871343119251d6b0586be356dc2146a62Yang Ni
27da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string;
28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector;
291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android {
311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript {
321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace {
341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
35da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2;
361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
37b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni               uint32_t xend, uint32_t outstep) {
39b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
40b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
42b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const size_t oldInLen = mutable_kinfo->inLen;
43b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
44b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    decltype(mutable_kinfo->inStride) oldInStride;
45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : closures) {
48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        // There had better be enough space in mutable_kinfo
51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
53ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        for (size_t i = 0; i < closure->mNumArg; i++) {
54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const void* arg = closure->mArgs[i];
55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const Allocation* a = (const Allocation*)arg;
56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint32_t eStride = a->mHal.state.elementSizeBytes;
57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    eStride * xstart;
59b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            if (kinfo->dim.y > 1) {
60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mutable_kinfo->inPtr[i] = ptr;
63b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mutable_kinfo->inStride[i] = eStride;
64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mutable_kinfo->inLen = closure->mNumArg;
661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Allocation* out = closure->mReturnValue;
68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint32_t ostep = out->mHal.state.elementSizeBytes;
69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ostep * xstart;
71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        if (kinfo->dim.y > 1) {
72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        rsAssert(kinfo->outLen <= 1);
76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        cpuClosure->mFunc(kinfo, xstart, xend, ostep);
791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
81b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    mutable_kinfo->inLen = oldInLen;
82b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
85da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // namespace
86da0f069871343119251d6b0586be356dc2146a62Yang Ni
87062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) :
88062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mGroup(group), mFunc(nullptr) {
89062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mName = strndup(name, strlen(name));
90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
92da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() {
93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* c : mClosures) {
94eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete c;
95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
96062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    free(mName);
97da0f069871343119251d6b0586be356dc2146a62Yang Ni}
98da0f069871343119251d6b0586be356dc2146a62Yang Ni
99ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const {
100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.empty()) {
101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
103da0f069871343119251d6b0586be356dc2146a62Yang Ni
104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const Closure* closure = cpuClosure->mClosure;
105ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
106062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // An invoke should be in a batch by itself, so it conflicts with any other
108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // closure.
1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        return true;
1101ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& globalDeps = closure->mGlobalDeps;
113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& argDeps = closure->mArgDeps;
114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    for (CPUClosure* c : mClosures) {
116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const Closure* batched = c->mClosure;
117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (globalDeps.find(batched) != globalDeps.end()) {
118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            return true;
119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const auto& it = argDeps.find(batched);
121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (it != argDeps.end()) {
122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const auto& args = (*it).second;
123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            for (const auto &p1 : *args) {
124bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni                if (p1.second.get() != nullptr) {
125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                    return true;
126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                }
127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        }
1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
1311c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    // The compiler fusion pass in bcc expects that kernels chained up through
1321c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    // (1st) input and output.
1331c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1341c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    const Closure* lastBatched = mClosures.back()->mClosure;
1351c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    const auto& it = argDeps.find(lastBatched);
1361c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1371c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    if (it == argDeps.end()) {
1381c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni        return true;
1391c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    }
1401c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1411c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    const auto& args = (*it).second;
1421c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    for (const auto &p1 : *args) {
1431c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni        if (p1.first == 0 && p1.second.get() == nullptr) {
1441c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni            // The new closure depends on the last batched closure's return
1451c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni            // value (fieldId being nullptr) for its first argument (argument 0)
1461c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni            return false;
1471c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni        }
1481c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    }
1491c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1501c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    return true;
1511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1531ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
1541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                         const ScriptGroupBase *sg) :
155062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
156062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mExecutable(nullptr), mScriptObj(nullptr) {
157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!mGroup->mClosures.empty());
158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
159062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    Batch* batch = new Batch(this, "Batch0");
160062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
161eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Closure* closure: mGroup->mClosures) {
162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc;
163062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
164062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        RsdCpuScriptImpl* si =
165062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
166062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (closure->mIsKernel) {
167eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            MTLaunchStruct mtls;
168062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            si->forEachKernelSetup(funcID->mSlot, &mtls);
169062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
170eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
171eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si);
172eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
1731ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
174eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (batch->conflict(cc)) {
175eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            mBatches.push_back(batch);
176062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            std::stringstream ss;
177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ss << "Batch" << ++i;
178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch = new Batch(this, ss.str().c_str());
179eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
180da0f069871343119251d6b0586be356dc2146a62Yang Ni
181eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->mClosures.push_back(cc);
182eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
183da0f069871343119251d6b0586be356dc2146a62Yang Ni
184eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!batch->mClosures.empty());
185eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mBatches.push_back(batch);
186da0f069871343119251d6b0586be356dc2146a62Yang Ni
187da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
188062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    compile(mGroup->mCacheDir);
189062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr && mExecutable != nullptr) {
190062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        for (Batch* batch : mBatches) {
191062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch->resolveFuncPtr(mScriptObj);
192062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
193eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILITY_LIB
195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
196062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
197062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) {
198062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::string funcName(mName);
199062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mClosures.front()->mClosure->mIsKernel) {
200062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        funcName.append(".expand");
201062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
202062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mFunc = dlsym(sharedObj, funcName.c_str());
203062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (mFunc != nullptr);
2041ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
2051ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
2061ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
207eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
208eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete batch;
209eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
210bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni    delete mExecutable;
211062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    // TODO: move this dlclose into ~ScriptExecutable().
212062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr) {
213062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        dlclose(mScriptObj);
214062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
215da0f069871343119251d6b0586be356dc2146a62Yang Ni}
216da0f069871343119251d6b0586be356dc2146a62Yang Ni
217da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace {
218da0f069871343119251d6b0586be356dc2146a62Yang Ni
219da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
220da0f069871343119251d6b0586be356dc2146a62Yang Ni
221edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
222edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = "";
223edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
224edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // If we're debugging, use the debug library.
225edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
226edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        return SYSLIBPATH"/libclcore_debug.bc";
227edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    }
228edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
229edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // Check for a platform specific library
230edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
231edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
232edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // NEON-capable ARMv7a devices can use an accelerated math library
233edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // for all reduced precision scripts.
234edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // ARMv8 does not use NEON, as ASIMD can be used with all precision
235edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // levels.
236edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
237edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
238edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
239edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__)
240edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // x86 devices will use an optimized library.
241edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore_x86.bc";
242edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else
243edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore.bc";
244edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
245edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni}
246edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
247da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments(
248cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const vector<const char*>& inputs, const vector<string>& kernelBatches,
249062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const vector<string>& invokeBatches,
250cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* outputDir, const char* outputFileName,
251cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* coreLibPath, const char* coreLibRelaxedPath,
2528237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant,
253edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        vector<const char*>* args) {
254eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
255eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-fPIC");
256eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-embedRSInfo");
2578237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    if (emitGlobalInfo) {
2588237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        args->push_back("-rs-global-info");
2598237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        if (emitGlobalInfoSkipConstant) {
2608237638f87ca0e265d050fbb13725b41a795fe5fYang Ni            args->push_back("-rs-global-info-skip-constant");
2618237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        }
2628237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    }
263eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-mtriple");
264eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-bclib");
266cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(coreLibPath);
267edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    args->push_back("-bclib_relaxed");
268cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(coreLibRelaxedPath);
269cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    for (const char* input : inputs) {
270cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        args->push_back(input);
271eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
272062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : kernelBatches) {
273062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-merge");
274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : invokeBatches) {
277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-invoke");
278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
279eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
280eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-output_path");
281cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(outputDir);
282eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-o");
283cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(outputFileName);
284da0f069871343119251d6b0586be356dc2146a62Yang Ni}
285da0f069871343119251d6b0586be356dc2146a62Yang Ni
286cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Nivoid generateSourceSlot(RsdCpuReferenceImpl* ctxt,
287cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni                        const Closure& closure,
288cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                        const std::vector<const char*>& inputs,
289062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                        std::stringstream& ss) {
290062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
291062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const Script* script = funcID->mScript;
292062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
293062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (!script->isIntrinsic());
294062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
295062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const RsdCpuScriptImpl *cpuScript =
296cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
297062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
298062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
299062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
300062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            inputs.begin();
301062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
302062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    ss << index << "," << funcID->mSlot << ".";
303062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
304062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
305062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILTY_LIB
306da0f069871343119251d6b0586be356dc2146a62Yang Ni
307da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // anonymous namespace
308da0f069871343119251d6b0586be356dc2146a62Yang Ni
309062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) {
310da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
311062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mGroup->mClosures.size() < 2) {
312eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
313eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
314da0f069871343119251d6b0586be356dc2146a62Yang Ni
315cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    auto comparator = [](const char* str1, const char* str2) -> bool {
316cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        return strcmp(str1, str2) < 0;
317cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    };
318cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::set<const char*, decltype(comparator)> inputSet(comparator);
319cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
320062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (Closure* closure : mGroup->mClosures) {
321062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Script* script = closure->mFunctionID.get()->mScript;
322da0f069871343119251d6b0586be356dc2146a62Yang Ni
323062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // If any script is an intrinsic, give up trying fusing the kernels.
324eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (script->isIntrinsic()) {
325eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            return;
326eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
327da0f069871343119251d6b0586be356dc2146a62Yang Ni
328eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const RsdCpuScriptImpl *cpuScript =
329cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script);
330cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni
331cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
332062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        inputSet.insert(bitcodeFilename);
333062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
334062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
335cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
336062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
337062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> kernelBatches;
338062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> invokeBatches;
339062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
340062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
341062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const auto& batch : mBatches) {
342062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(batch->size() > 0);
343da0f069871343119251d6b0586be356dc2146a62Yang Ni
344062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        std::stringstream ss;
345062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        ss << batch->mName << ":";
346062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
347062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (!batch->mClosures.front()->mClosure->mIsKernel) {
348062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(batch->size() == 1);
349cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss);
350062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            invokeBatches.push_back(ss.str());
351062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
352062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            for (const auto& cpuClosure : batch->mClosures) {
353cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni                generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss);
354062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            }
355062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            kernelBatches.push_back(ss.str());
356062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
357eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
358da0f069871343119251d6b0586be356dc2146a62Yang Ni
359433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    rsAssert(cacheDir != nullptr);
360433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    string objFilePath(cacheDir);
361f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append("/");
362f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append(mGroup->mName);
363f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append(".o");
364433558f0f9abbf07770db288183a15fd261cace2Yabin Cui
365cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    const char* resName = mGroup->mName;
366edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    string coreLibRelaxedPath;
367edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
368edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni                                               &coreLibRelaxedPath);
369f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
370eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    vector<const char*> arguments;
3718237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo();
3728237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant();
373cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
374cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                          resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
3758237638f87ca0e265d050fbb13725b41a795fe5fYang Ni                          emitGlobalInfo, emitGlobalInfoSkipConstant,
376f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni                          &arguments);
377f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
378f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
379cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                                                       arguments.data()));
380cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
381cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    inputs.push_back(coreLibPath.c_str());
382cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    inputs.push_back(coreLibRelaxedPath.c_str());
383cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
384cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
385cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                                               inputs.data(), inputs.size());
386f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
387cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    if (checksum == 0) {
388f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        return;
389f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    }
390f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
391cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::stringstream ss;
392cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    ss << std::hex << checksum;
393cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    const char* checksumStr = ss.str().c_str();
394f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
395f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
396f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    // Try to load a shared lib from code cache matching filename and checksum
397f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
398f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
399f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
400f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    if (mScriptObj != nullptr) {
401f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        mExecutable = ScriptExecutable::createFromSharedObject(
402cb17015fed6b11a5028f31cc804a3847e379945dYang Ni            getCpuRefImpl()->getContext(), mScriptObj, checksum);
403f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        if (mExecutable != nullptr) {
404cb17015fed6b11a5028f31cc804a3847e379945dYang Ni            return;
405f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        } else {
406f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni            ALOGE("Failed to create an executable object from so file");
407f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        }
408f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        dlclose(mScriptObj);
409f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        mScriptObj = nullptr;
410f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    }
411f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
412f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
413f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    // Fuse the input kernels and generate native code in an object file
414f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
415f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
416f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    arguments.push_back("-build-checksum");
417cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    arguments.push_back(checksumStr);
418f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    arguments.push_back(nullptr);
419eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
4202fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
421f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni                                      arguments.size()-1,
422f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni                                      arguments.data());
4232fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    if (!compiled) {
424eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
425eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
426da0f069871343119251d6b0586be356dc2146a62Yang Ni
427eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
428eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Create and load the shared lib
429eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
430da0f069871343119251d6b0586be356dc2146a62Yang Ni
4314c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines    if (!SharedLibraryUtils::createSharedLibrary(
4324c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines            getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) {
433eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Failed to link object file '%s'", resName);
4348b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni        unlink(objFilePath.c_str());
435eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
436eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
437da0f069871343119251d6b0586be356dc2146a62Yang Ni
4388b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni    unlink(objFilePath.c_str());
4398b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni
440062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
441062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj == nullptr) {
442eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Unable to load '%s'", resName);
443eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
444eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
445da0f069871343119251d6b0586be356dc2146a62Yang Ni
446eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mExecutable = ScriptExecutable::createFromSharedObject(
447bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni        getCpuRefImpl()->getContext(),
448062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mScriptObj);
449da0f069871343119251d6b0586be356dc2146a62Yang Ni
450da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif  // RS_COMPATIBILITY_LIB
4511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
4531ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() {
454eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (auto batch : mBatches) {
455eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->setGlobalsForBatch();
456eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->run();
457eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
4581ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4591ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
460da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() {
461eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
462eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
463062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
464062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        Script* s = funcID->mScript;;
465eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (const auto& p : closure->mGlobals) {
466eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const void* value = p.second.first;
467eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            int size = p.second.second;
468eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (value == nullptr && size == 0) {
469eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // This indicates the current closure depends on another closure for a
470eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // global in their shared module (script). In this case we don't need to
471eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // copy the value. For example, an invoke intializes a global variable
472eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // which a kernel later reads.
473eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                continue;
474eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
475ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            rsAssert(p.first != nullptr);
476062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            Script* script = p.first->mScript;
477cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl();
478062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const RsdCpuScriptImpl *cpuScript =
479cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni                    (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
480062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            int slot = p.first->mSlot;
481062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ScriptExecutable* exec = mGroup->getExecutable();
482062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            if (exec != nullptr) {
483062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                const char* varName = cpuScript->getFieldName(slot);
484062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                void* addr = exec->getFieldAddress(varName);
485062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
486062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
487062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                 (rs_object_base*)addr, (ObjectBase*)value);
488062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
489062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    memcpy(addr, (const void*)&value, size);
490062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
491eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            } else {
492062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                // We use -1 size to indicate an ObjectBase rather than a primitive type
493062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
494062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVarObj(slot, (ObjectBase*)value);
495062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
496062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVar(slot, (const void*)&value, size);
497062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
498eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
499eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
5001ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
5011ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5021ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
503da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() {
504062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!mClosures.front()->mClosure->mIsKernel) {
505062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(mClosures.size() == 1);
506062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
507062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // This batch contains a single closure for an invoke function
508062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        CPUClosure* cc = mClosures.front();
509062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Closure* c = cc->mClosure;
510062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
511062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (mFunc != nullptr) {
512062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // TODO: Need align pointers for x86_64.
513062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
514062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
515062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
516062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
517062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(invokeID != nullptr);
518062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
519062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
520062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
521062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        return;
522062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
523062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
524062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mFunc != nullptr) {
525eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        MTLaunchStruct mtls;
526eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* firstCpuClosure = mClosures.front();
527eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* lastCpuClosure = mClosures.back();
528eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
529eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        firstCpuClosure->mSi->forEachMtlsSetup(
530ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
531ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
532eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
533eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, 0, nullptr, &mtls);
534eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
535eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
536eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = nullptr;
537062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mtls.kernel = (ForEachFunc_t)mFunc;
538eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
539eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(
540ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
541ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
542eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
543eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, &mtls);
544eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
545eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
546eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
547eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
548eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
549eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
550062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
551062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
552eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->preLaunch(kernelID->mSlot,
553ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   (const Allocation**)closure->mArgs,
554ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   closure->mNumArg, closure->mReturnValue,
555062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                   nullptr, 0, nullptr);
556eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
557da0f069871343119251d6b0586be356dc2146a62Yang Ni
558eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const CPUClosure* cpuClosure = mClosures.front();
559eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const Closure* closure = cpuClosure->mClosure;
560eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    MTLaunchStruct mtls;
561da0f069871343119251d6b0586be356dc2146a62Yang Ni
562ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
563ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                          closure->mNumArg,
564eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          closure->mReturnValue,
565eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          nullptr, 0, nullptr, &mtls)) {
566da0f069871343119251d6b0586be356dc2146a62Yang Ni
567eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
568eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.kernel = (void (*)())&groupRoot;
569eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = &mClosures;
570da0f069871343119251d6b0586be356dc2146a62Yang Ni
571eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
572eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
573eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
574eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
575eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
576062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
577062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
578eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->postLaunch(kernelID->mSlot,
579ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    (const Allocation**)closure->mArgs,
580ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    closure->mNumArg, closure->mReturnValue,
581eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    nullptr, 0, nullptr);
582eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
5831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
5851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace renderscript
5861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace android
587