rsCpuScriptGroup2.cpp revision f5029803ae6ce9d92d70b76e7a7cdd8d484f31ca
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h"
21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h>
4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h>
5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h>
6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h>
7da0f069871343119251d6b0586be356dc2146a62Yang Ni
8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set>
9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream>
10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string>
11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector>
12da0f069871343119251d6b0586be356dc2146a62Yang Ni
13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h"
15da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
16da0f069871343119251d6b0586be356dc2146a62Yang Ni
171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h"
181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h"
191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h"
201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h"
212abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h"
221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h"
231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h"
241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h"
25da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h"
26da0f069871343119251d6b0586be356dc2146a62Yang Ni
27da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string;
28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector;
291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android {
311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript {
321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace {
341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
35da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2;
361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
37b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni               uint32_t xend, uint32_t outstep) {
39b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
40b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
42b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const size_t oldInLen = mutable_kinfo->inLen;
43b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
44b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    decltype(mutable_kinfo->inStride) oldInStride;
45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : closures) {
48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        // There had better be enough space in mutable_kinfo
51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
53ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        for (size_t i = 0; i < closure->mNumArg; i++) {
54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const void* arg = closure->mArgs[i];
55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const Allocation* a = (const Allocation*)arg;
56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint32_t eStride = a->mHal.state.elementSizeBytes;
57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    eStride * xstart;
59b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            if (kinfo->dim.y > 1) {
60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mutable_kinfo->inPtr[i] = ptr;
63b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mutable_kinfo->inStride[i] = eStride;
64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mutable_kinfo->inLen = closure->mNumArg;
661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Allocation* out = closure->mReturnValue;
68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint32_t ostep = out->mHal.state.elementSizeBytes;
69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ostep * xstart;
71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        if (kinfo->dim.y > 1) {
72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        rsAssert(kinfo->outLen <= 1);
76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        cpuClosure->mFunc(kinfo, xstart, xend, ostep);
791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
81b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    mutable_kinfo->inLen = oldInLen;
82b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
85da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // namespace
86da0f069871343119251d6b0586be356dc2146a62Yang Ni
87062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) :
88062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mGroup(group), mFunc(nullptr) {
89062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mName = strndup(name, strlen(name));
90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
92da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() {
93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* c : mClosures) {
94eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete c;
95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
96062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    free(mName);
97da0f069871343119251d6b0586be356dc2146a62Yang Ni}
98da0f069871343119251d6b0586be356dc2146a62Yang Ni
99ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const {
100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.empty()) {
101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
103da0f069871343119251d6b0586be356dc2146a62Yang Ni
104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const Closure* closure = cpuClosure->mClosure;
105ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
106062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // An invoke should be in a batch by itself, so it conflicts with any other
108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // closure.
1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        return true;
1101ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& globalDeps = closure->mGlobalDeps;
113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& argDeps = closure->mArgDeps;
114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    for (CPUClosure* c : mClosures) {
116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const Closure* batched = c->mClosure;
117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (globalDeps.find(batched) != globalDeps.end()) {
118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            return true;
119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const auto& it = argDeps.find(batched);
121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (it != argDeps.end()) {
122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const auto& args = (*it).second;
123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            for (const auto &p1 : *args) {
124bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni                if (p1.second.get() != nullptr) {
125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                    return true;
126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                }
127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        }
1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
1311c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    // The compiler fusion pass in bcc expects that kernels chained up through
1321c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    // (1st) input and output.
1331c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1341c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    const Closure* lastBatched = mClosures.back()->mClosure;
1351c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    const auto& it = argDeps.find(lastBatched);
1361c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1371c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    if (it == argDeps.end()) {
1381c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni        return true;
1391c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    }
1401c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1411c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    const auto& args = (*it).second;
1421c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    for (const auto &p1 : *args) {
1431c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni        if (p1.first == 0 && p1.second.get() == nullptr) {
1441c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni            // The new closure depends on the last batched closure's return
1451c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni            // value (fieldId being nullptr) for its first argument (argument 0)
1461c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni            return false;
1471c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni        }
1481c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    }
1491c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1501c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    return true;
1511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1531ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
1541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                         const ScriptGroupBase *sg) :
155062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
156062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mExecutable(nullptr), mScriptObj(nullptr) {
157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!mGroup->mClosures.empty());
158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
1591efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    mCpuRefImpl->lockMutex();
160062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    Batch* batch = new Batch(this, "Batch0");
161062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Closure* closure: mGroup->mClosures) {
163eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc;
164062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
165062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        RsdCpuScriptImpl* si =
166062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
167062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (closure->mIsKernel) {
168eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            MTLaunchStruct mtls;
169062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            si->forEachKernelSetup(funcID->mSlot, &mtls);
170062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
171eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
172eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si);
173eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
1741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
175eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (batch->conflict(cc)) {
176eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            mBatches.push_back(batch);
177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            std::stringstream ss;
178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ss << "Batch" << ++i;
179062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch = new Batch(this, ss.str().c_str());
180eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
181da0f069871343119251d6b0586be356dc2146a62Yang Ni
182eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->mClosures.push_back(cc);
183eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
184da0f069871343119251d6b0586be356dc2146a62Yang Ni
185eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!batch->mClosures.empty());
186eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mBatches.push_back(batch);
187da0f069871343119251d6b0586be356dc2146a62Yang Ni
188da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
189062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    compile(mGroup->mCacheDir);
190062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr && mExecutable != nullptr) {
191062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        for (Batch* batch : mBatches) {
192062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch->resolveFuncPtr(mScriptObj);
193062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
194eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILITY_LIB
1961efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    mCpuRefImpl->unlockMutex();
197062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
198062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
199062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) {
200062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::string funcName(mName);
201062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mClosures.front()->mClosure->mIsKernel) {
202062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        funcName.append(".expand");
203062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
204062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mFunc = dlsym(sharedObj, funcName.c_str());
205062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (mFunc != nullptr);
2061ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
2071ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
2081ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
209eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
210eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete batch;
211eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
212bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni    delete mExecutable;
213062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    // TODO: move this dlclose into ~ScriptExecutable().
214062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr) {
215062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        dlclose(mScriptObj);
216062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
217da0f069871343119251d6b0586be356dc2146a62Yang Ni}
218da0f069871343119251d6b0586be356dc2146a62Yang Ni
219da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace {
220da0f069871343119251d6b0586be356dc2146a62Yang Ni
221da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
222da0f069871343119251d6b0586be356dc2146a62Yang Ni
223edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
224edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = "";
225edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
226edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // If we're debugging, use the debug library.
227edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
228edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        return SYSLIBPATH"/libclcore_debug.bc";
229edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    }
230edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
231edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // Check for a platform specific library
232edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
233edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
234edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // NEON-capable ARMv7a devices can use an accelerated math library
235edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // for all reduced precision scripts.
236edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // ARMv8 does not use NEON, as ASIMD can be used with all precision
237edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // levels.
238edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
239edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
240edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
241edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__)
242edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // x86 devices will use an optimized library.
243edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore_x86.bc";
244edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else
245edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore.bc";
246edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
247edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni}
248edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
249da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments(
250cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const vector<const char*>& inputs, const vector<string>& kernelBatches,
251062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const vector<string>& invokeBatches,
252cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* outputDir, const char* outputFileName,
253cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* coreLibPath, const char* coreLibRelaxedPath,
2548237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant,
255f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham        int optLevel, vector<const char*>* args) {
256eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
257eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-fPIC");
258eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-embedRSInfo");
2598237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    if (emitGlobalInfo) {
2608237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        args->push_back("-rs-global-info");
2618237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        if (emitGlobalInfoSkipConstant) {
2628237638f87ca0e265d050fbb13725b41a795fe5fYang Ni            args->push_back("-rs-global-info-skip-constant");
2638237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        }
2648237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    }
265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-mtriple");
266eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
267eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-bclib");
268cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(coreLibPath);
269edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    args->push_back("-bclib_relaxed");
270cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(coreLibRelaxedPath);
271cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    for (const char* input : inputs) {
272cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        args->push_back(input);
273eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : kernelBatches) {
275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-merge");
276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : invokeBatches) {
279062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-invoke");
280062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
281eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
282eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-output_path");
283cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(outputDir);
2841efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
285f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham    args->push_back("-O");
286f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham    args->push_back(std::to_string(optLevel).c_str());
287f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham
2881efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    // The output filename has to be the last, in case we need to pop it out and
2891efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    // replace with a different name.
290eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-o");
291cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(outputFileName);
292da0f069871343119251d6b0586be356dc2146a62Yang Ni}
293da0f069871343119251d6b0586be356dc2146a62Yang Ni
294cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Nivoid generateSourceSlot(RsdCpuReferenceImpl* ctxt,
295cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni                        const Closure& closure,
296cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                        const std::vector<const char*>& inputs,
297062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                        std::stringstream& ss) {
298062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
299062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const Script* script = funcID->mScript;
300062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
301062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (!script->isIntrinsic());
302062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
303062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const RsdCpuScriptImpl *cpuScript =
304cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
305062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
306062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
307062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
308062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            inputs.begin();
309062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
310062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    ss << index << "," << funcID->mSlot << ".";
311062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
312062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
313062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILTY_LIB
314da0f069871343119251d6b0586be356dc2146a62Yang Ni
315da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // anonymous namespace
316da0f069871343119251d6b0586be356dc2146a62Yang Ni
317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) {
318da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mGroup->mClosures.size() < 2) {
320eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
321eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
322da0f069871343119251d6b0586be356dc2146a62Yang Ni
323cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    auto comparator = [](const char* str1, const char* str2) -> bool {
324cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        return strcmp(str1, str2) < 0;
325cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    };
326cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::set<const char*, decltype(comparator)> inputSet(comparator);
327cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
328062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (Closure* closure : mGroup->mClosures) {
329062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Script* script = closure->mFunctionID.get()->mScript;
330da0f069871343119251d6b0586be356dc2146a62Yang Ni
331062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // If any script is an intrinsic, give up trying fusing the kernels.
332eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (script->isIntrinsic()) {
333eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            return;
334eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
335da0f069871343119251d6b0586be356dc2146a62Yang Ni
336eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const RsdCpuScriptImpl *cpuScript =
337cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script);
338cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni
339cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
340062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        inputSet.insert(bitcodeFilename);
341062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
342062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
343cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
344062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
345062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> kernelBatches;
346062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> invokeBatches;
347062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
348062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
349062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const auto& batch : mBatches) {
350062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(batch->size() > 0);
351da0f069871343119251d6b0586be356dc2146a62Yang Ni
352062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        std::stringstream ss;
353062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        ss << batch->mName << ":";
354062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
355062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (!batch->mClosures.front()->mClosure->mIsKernel) {
356062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(batch->size() == 1);
357cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss);
358062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            invokeBatches.push_back(ss.str());
359062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
360062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            for (const auto& cpuClosure : batch->mClosures) {
361cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni                generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss);
362062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            }
363062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            kernelBatches.push_back(ss.str());
364062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
365eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
366da0f069871343119251d6b0586be356dc2146a62Yang Ni
367433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    rsAssert(cacheDir != nullptr);
368433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    string objFilePath(cacheDir);
369f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append("/");
370f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append(mGroup->mName);
371f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append(".o");
372433558f0f9abbf07770db288183a15fd261cace2Yabin Cui
373cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    const char* resName = mGroup->mName;
374edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    string coreLibRelaxedPath;
375edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
376edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni                                               &coreLibRelaxedPath);
377f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
378f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham    int optLevel = getCpuRefImpl()->getContext()->getOptLevel();
379f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham
380eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    vector<const char*> arguments;
3818237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo();
3828237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant();
383cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
384cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                          resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
3858237638f87ca0e265d050fbb13725b41a795fe5fYang Ni                          emitGlobalInfo, emitGlobalInfoSkipConstant,
386f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham                          optLevel, &arguments);
387f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
388f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
389cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                                                       arguments.data()));
390cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
391cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    inputs.push_back(coreLibPath.c_str());
392cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    inputs.push_back(coreLibRelaxedPath.c_str());
393cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
394cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
395cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                                               inputs.data(), inputs.size());
396f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
397cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    if (checksum == 0) {
398f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        return;
399f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    }
400f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
401cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::stringstream ss;
402cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    ss << std::hex << checksum;
403cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    const char* checksumStr = ss.str().c_str();
404f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
405f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
406f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    // Try to load a shared lib from code cache matching filename and checksum
407f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
408f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
4091efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    bool alreadyLoaded = false;
4101efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    std::string cloneName;
4111efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
4121efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr,
4131efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni                                                       &alreadyLoaded);
414f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    if (mScriptObj != nullptr) {
4151efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        // A shared library named resName is found in code cache directory
4161efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        // cacheDir, and loaded with the handle stored in mScriptObj.
4171efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
418f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        mExecutable = ScriptExecutable::createFromSharedObject(
419cb17015fed6b11a5028f31cc804a3847e379945dYang Ni            getCpuRefImpl()->getContext(), mScriptObj, checksum);
4201efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
421f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        if (mExecutable != nullptr) {
4221efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // The loaded shared library in mScriptObj has a matching checksum.
4231efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // An executable object has been created.
424cb17015fed6b11a5028f31cc804a3847e379945dYang Ni            return;
425f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        }
4261efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
4271efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        ALOGV("Failed to create an executable object from so file due to "
4281efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni              "mismatching checksum");
4291efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
4301efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        if (alreadyLoaded) {
4311efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // The shared object found in code cache has already been loaded.
4321efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // A different file name is needed for the new shared library, to
4331efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // avoid corrupting the currently loaded instance.
4341efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
4351efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            cloneName.append(resName);
4361efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            cloneName.append("#");
4371efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            cloneName.append(SharedLibraryUtils::getRandomString(6).string());
4381efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
4391efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // The last element in arguments is the output filename.
4401efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            arguments.pop_back();
4411efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            arguments.push_back(cloneName.c_str());
4421efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        }
4431efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
444f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        dlclose(mScriptObj);
445f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        mScriptObj = nullptr;
446f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    }
447f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
448f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
449f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    // Fuse the input kernels and generate native code in an object file
450f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
451f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
452f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    arguments.push_back("-build-checksum");
453cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    arguments.push_back(checksumStr);
454f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    arguments.push_back(nullptr);
455eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
4562fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
457f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni                                      arguments.size()-1,
458f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni                                      arguments.data());
4592fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    if (!compiled) {
460eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
461eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
462da0f069871343119251d6b0586be356dc2146a62Yang Ni
463eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
464eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Create and load the shared lib
465eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
466da0f069871343119251d6b0586be356dc2146a62Yang Ni
4674c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines    if (!SharedLibraryUtils::createSharedLibrary(
4684c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines            getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) {
469eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Failed to link object file '%s'", resName);
4708b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni        unlink(objFilePath.c_str());
471eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
472eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
473da0f069871343119251d6b0586be356dc2146a62Yang Ni
4748b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni    unlink(objFilePath.c_str());
4758b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni
476062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
477062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj == nullptr) {
478eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Unable to load '%s'", resName);
479eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
480eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
481da0f069871343119251d6b0586be356dc2146a62Yang Ni
4821efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    if (alreadyLoaded) {
4831efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        // Delete the temporary, random-named file that we created to avoid
4841efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        // interfering with an already loaded shared library.
4851efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        string cloneFilePath(cacheDir);
4861efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        cloneFilePath.append("/");
4871efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        cloneFilePath.append(cloneName.c_str());
4881efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        cloneFilePath.append(".so");
4891efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        unlink(cloneFilePath.c_str());
4901efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    }
4911efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
492eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mExecutable = ScriptExecutable::createFromSharedObject(
493bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni        getCpuRefImpl()->getContext(),
494062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mScriptObj);
495da0f069871343119251d6b0586be356dc2146a62Yang Ni
496da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif  // RS_COMPATIBILITY_LIB
4971ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
4981ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
4991ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() {
500eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (auto batch : mBatches) {
501eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->setGlobalsForBatch();
502eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->run();
503eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
5041ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5051ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
506da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() {
507eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
508eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
509062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
510062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        Script* s = funcID->mScript;;
511eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (const auto& p : closure->mGlobals) {
512eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const void* value = p.second.first;
513eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            int size = p.second.second;
514eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            if (value == nullptr && size == 0) {
515eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // This indicates the current closure depends on another closure for a
516eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // global in their shared module (script). In this case we don't need to
517eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // copy the value. For example, an invoke intializes a global variable
518eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // which a kernel later reads.
519eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                continue;
520eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
521ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            rsAssert(p.first != nullptr);
522062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            Script* script = p.first->mScript;
523cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl();
524062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const RsdCpuScriptImpl *cpuScript =
525cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni                    (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
526062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            int slot = p.first->mSlot;
527062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ScriptExecutable* exec = mGroup->getExecutable();
528062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            if (exec != nullptr) {
529062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                const char* varName = cpuScript->getFieldName(slot);
530062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                void* addr = exec->getFieldAddress(varName);
531062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
532062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
533062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                 (rs_object_base*)addr, (ObjectBase*)value);
534062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
535062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    memcpy(addr, (const void*)&value, size);
536062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
537eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            } else {
538062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                // We use -1 size to indicate an ObjectBase rather than a primitive type
539062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
540062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVarObj(slot, (ObjectBase*)value);
541062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
542062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVar(slot, (const void*)&value, size);
543062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
544eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
545eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
5461ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
5471ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5481ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
549da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() {
550062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!mClosures.front()->mClosure->mIsKernel) {
551062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(mClosures.size() == 1);
552062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
553062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // This batch contains a single closure for an invoke function
554062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        CPUClosure* cc = mClosures.front();
555062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Closure* c = cc->mClosure;
556062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
557062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (mFunc != nullptr) {
558062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // TODO: Need align pointers for x86_64.
559062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
560062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
561062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
562062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
563062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(invokeID != nullptr);
564062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
565062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
566062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
567062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        return;
568062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
569062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
570062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mFunc != nullptr) {
571eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        MTLaunchStruct mtls;
572eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* firstCpuClosure = mClosures.front();
573eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* lastCpuClosure = mClosures.back();
574eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
575eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        firstCpuClosure->mSi->forEachMtlsSetup(
576ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
577ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
578eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
579eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, 0, nullptr, &mtls);
580eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
581eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
582eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = nullptr;
583062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mtls.kernel = (ForEachFunc_t)mFunc;
584eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
585eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(
586ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
587ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
588eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
589eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, &mtls);
590eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
591eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
592eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
593eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
594eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
595eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
596062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
597062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
598eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->preLaunch(kernelID->mSlot,
599ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   (const Allocation**)closure->mArgs,
600ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   closure->mNumArg, closure->mReturnValue,
601062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                   nullptr, 0, nullptr);
602eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
603da0f069871343119251d6b0586be356dc2146a62Yang Ni
604eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const CPUClosure* cpuClosure = mClosures.front();
605eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const Closure* closure = cpuClosure->mClosure;
606eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    MTLaunchStruct mtls;
607da0f069871343119251d6b0586be356dc2146a62Yang Ni
608ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
609ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                          closure->mNumArg,
610eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          closure->mReturnValue,
611eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          nullptr, 0, nullptr, &mtls)) {
612da0f069871343119251d6b0586be356dc2146a62Yang Ni
613eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
614eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.kernel = (void (*)())&groupRoot;
615eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = &mClosures;
616da0f069871343119251d6b0586be356dc2146a62Yang Ni
617eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
618eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
619eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
620eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
621eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
622062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
623062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
624eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->postLaunch(kernelID->mSlot,
625ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    (const Allocation**)closure->mArgs,
626ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    closure->mNumArg, closure->mReturnValue,
627eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    nullptr, 0, nullptr);
628eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
6291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
6301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
6311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace renderscript
6321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace android
633