11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h"
21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h>
4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h>
5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h>
6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h>
7da0f069871343119251d6b0586be356dc2146a62Yang Ni
8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set>
9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream>
10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string>
11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector>
12da0f069871343119251d6b0586be356dc2146a62Yang Ni
13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h"
15da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif
16da0f069871343119251d6b0586be356dc2146a62Yang Ni
171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h"
181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h"
191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h"
201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h"
212abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h"
221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h"
231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h"
241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h"
25da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h"
26da0f069871343119251d6b0586be356dc2146a62Yang Ni
27da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string;
28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector;
291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android {
311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript {
321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace {
341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
35da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2;
361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
37b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni               uint32_t xend, uint32_t outstep) {
39b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
40b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
42b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const size_t oldInLen = mutable_kinfo->inLen;
43b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
44b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    decltype(mutable_kinfo->inStride) oldInStride;
45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : closures) {
48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        // There had better be enough space in mutable_kinfo
51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
53ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        for (size_t i = 0; i < closure->mNumArg; i++) {
54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const void* arg = closure->mArgs[i];
55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const Allocation* a = (const Allocation*)arg;
56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint32_t eStride = a->mHal.state.elementSizeBytes;
57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                    eStride * xstart;
59b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            if (kinfo->dim.y > 1) {
60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mutable_kinfo->inPtr[i] = ptr;
63b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mutable_kinfo->inStride[i] = eStride;
64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mutable_kinfo->inLen = closure->mNumArg;
661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Allocation* out = closure->mReturnValue;
68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint32_t ostep = out->mHal.state.elementSizeBytes;
69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                ostep * xstart;
71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        if (kinfo->dim.y > 1) {
72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        rsAssert(kinfo->outLen <= 1);
76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
78a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni        // The implementation of an intrinsic relies on kinfo->usr being
79a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni        // the "this" pointer to the intrinsic (an RsdCpuScriptIntrinsic object)
80a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni        mutable_kinfo->usr = cpuClosure->mSi;
81a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni
82b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        cpuClosure->mFunc(kinfo, xstart, xend, ostep);
831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
85b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    mutable_kinfo->inLen = oldInLen;
86a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni    mutable_kinfo->usr = &closures;
87b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
881ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
891ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
90da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // namespace
91da0f069871343119251d6b0586be356dc2146a62Yang Ni
92062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) :
93062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mGroup(group), mFunc(nullptr) {
94062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mName = strndup(name, strlen(name));
95062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
96062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
97da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() {
98eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* c : mClosures) {
99eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete c;
100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
101062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    free(mName);
102da0f069871343119251d6b0586be356dc2146a62Yang Ni}
103da0f069871343119251d6b0586be356dc2146a62Yang Ni
104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const {
105eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    if (mClosures.empty()) {
106eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return false;
107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
108da0f069871343119251d6b0586be356dc2146a62Yang Ni
109ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const Closure* closure = cpuClosure->mClosure;
110ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
111062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
112eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // An invoke should be in a batch by itself, so it conflicts with any other
113eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        // closure.
1141ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        return true;
1151ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
116eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& globalDeps = closure->mGlobalDeps;
118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    const auto& argDeps = closure->mArgDeps;
119ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    for (CPUClosure* c : mClosures) {
121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const Closure* batched = c->mClosure;
122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (globalDeps.find(batched) != globalDeps.end()) {
123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            return true;
124eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        const auto& it = argDeps.find(batched);
126ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni        if (it != argDeps.end()) {
127ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            const auto& args = (*it).second;
128ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            for (const auto &p1 : *args) {
129bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni                if (p1.second.get() != nullptr) {
130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                    return true;
131eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                }
132eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni        }
1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
135ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni
1361c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    // The compiler fusion pass in bcc expects that kernels chained up through
1371c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    // (1st) input and output.
1381c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1391c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    const Closure* lastBatched = mClosures.back()->mClosure;
1401c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    const auto& it = argDeps.find(lastBatched);
1411c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1421c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    if (it == argDeps.end()) {
1431c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni        return true;
1441c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    }
1451c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1461c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    const auto& args = (*it).second;
1471c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    for (const auto &p1 : *args) {
1481c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni        if (p1.first == 0 && p1.second.get() == nullptr) {
1491c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni            // The new closure depends on the last batched closure's return
1501c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni            // value (fieldId being nullptr) for its first argument (argument 0)
1511c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni            return false;
1521c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni        }
1531c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    }
1541c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni
1551c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni    return true;
1561ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
1571ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
1581ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
1591ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni                                         const ScriptGroupBase *sg) :
160062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
161062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mExecutable(nullptr), mScriptObj(nullptr) {
162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!mGroup->mClosures.empty());
163eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
1641efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    mCpuRefImpl->lockMutex();
165062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    Batch* batch = new Batch(this, "Batch0");
166062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
167eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Closure* closure: mGroup->mClosures) {
168eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        CPUClosure* cc;
169062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
170062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        RsdCpuScriptImpl* si =
171062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
172062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (closure->mIsKernel) {
17314ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala            MTLaunchStructForEach mtls;
174062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            si->forEachKernelSetup(funcID->mSlot, &mtls);
175062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
176eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        } else {
177eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            cc = new CPUClosure(closure, si);
178eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
1791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
180eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (batch->conflict(cc)) {
181eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            mBatches.push_back(batch);
182062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            std::stringstream ss;
183062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ss << "Batch" << ++i;
184062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch = new Batch(this, ss.str().c_str());
185eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
186da0f069871343119251d6b0586be356dc2146a62Yang Ni
187eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->mClosures.push_back(cc);
188eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
189da0f069871343119251d6b0586be356dc2146a62Yang Ni
190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    rsAssert(!batch->mClosures.empty());
191eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    mBatches.push_back(batch);
192da0f069871343119251d6b0586be356dc2146a62Yang Ni
193da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    compile(mGroup->mCacheDir);
195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr && mExecutable != nullptr) {
196062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        for (Batch* batch : mBatches) {
197062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            batch->resolveFuncPtr(mScriptObj);
198062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
199eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
200062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILITY_LIB
2011efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    mCpuRefImpl->unlockMutex();
202062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
203062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
204062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) {
205062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::string funcName(mName);
206062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mClosures.front()->mClosure->mIsKernel) {
207062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        funcName.append(".expand");
208062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
209062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mFunc = dlsym(sharedObj, funcName.c_str());
210062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (mFunc != nullptr);
2111ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
2121ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
2131ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
214eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (Batch* batch : mBatches) {
215eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        delete batch;
216eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
217bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni    delete mExecutable;
218062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    // TODO: move this dlclose into ~ScriptExecutable().
219062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj != nullptr) {
220062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        dlclose(mScriptObj);
221062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
222da0f069871343119251d6b0586be356dc2146a62Yang Ni}
223da0f069871343119251d6b0586be356dc2146a62Yang Ni
224da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace {
225da0f069871343119251d6b0586be356dc2146a62Yang Ni
226da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
227da0f069871343119251d6b0586be356dc2146a62Yang Ni
228edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
229edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = "";
230edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
231edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // If we're debugging, use the debug library.
232edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
233edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni        return SYSLIBPATH"/libclcore_debug.bc";
234edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    }
235edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
236edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // Check for a platform specific library
237edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
238edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
239edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // NEON-capable ARMv7a devices can use an accelerated math library
240edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // for all reduced precision scripts.
241edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // ARMv8 does not use NEON, as ASIMD can be used with all precision
242edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // levels.
243edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
244edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
245edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
246edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__)
247edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    // x86 devices will use an optimized library.
248edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore_x86.bc";
249edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else
250edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    return SYSLIBPATH"/libclcore.bc";
251edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif
252edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni}
253edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni
254da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments(
255cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const vector<const char*>& inputs, const vector<string>& kernelBatches,
256062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const vector<string>& invokeBatches,
257cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* outputDir, const char* outputFileName,
258cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* coreLibPath, const char* coreLibRelaxedPath,
2598237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant,
260f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham        int optLevel, vector<const char*>* args) {
261eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
262eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-fPIC");
263eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-embedRSInfo");
2648237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    if (emitGlobalInfo) {
2658237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        args->push_back("-rs-global-info");
2668237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        if (emitGlobalInfoSkipConstant) {
2678237638f87ca0e265d050fbb13725b41a795fe5fYang Ni            args->push_back("-rs-global-info-skip-constant");
2688237638f87ca0e265d050fbb13725b41a795fe5fYang Ni        }
2698237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    }
270eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-mtriple");
271eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
272eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-bclib");
273cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(coreLibPath);
274edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    args->push_back("-bclib_relaxed");
275cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(coreLibRelaxedPath);
276cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    for (const char* input : inputs) {
277cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        args->push_back(input);
278eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
279062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : kernelBatches) {
280062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-merge");
281062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
282062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
283062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const string& batch : invokeBatches) {
284062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back("-invoke");
285062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        args->push_back(batch.c_str());
286eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
287eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-output_path");
288cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(outputDir);
2891efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
290f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham    args->push_back("-O");
291a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni    switch (optLevel) {
292a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni    case 0:
293a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni        args->push_back("0");
294a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni        break;
295a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni    case 3:
296a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni        args->push_back("3");
297a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni        break;
298a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni    default:
299a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni        ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel);
300a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni        args->push_back("3");
301a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni        break;
302a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni    }
303f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham
3041efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    // The output filename has to be the last, in case we need to pop it out and
3051efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    // replace with a different name.
306eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    args->push_back("-o");
307cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    args->push_back(outputFileName);
308da0f069871343119251d6b0586be356dc2146a62Yang Ni}
309da0f069871343119251d6b0586be356dc2146a62Yang Ni
310cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Nivoid generateSourceSlot(RsdCpuReferenceImpl* ctxt,
311cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni                        const Closure& closure,
312cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                        const std::vector<const char*>& inputs,
313062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                        std::stringstream& ss) {
314062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
315062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const Script* script = funcID->mScript;
316062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    rsAssert (!script->isIntrinsic());
318062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const RsdCpuScriptImpl *cpuScript =
320cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
321062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
322062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
323062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
324062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            inputs.begin();
325062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
326062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    ss << index << "," << funcID->mSlot << ".";
327062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni}
328062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
329062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif  // RS_COMPATIBILTY_LIB
330da0f069871343119251d6b0586be356dc2146a62Yang Ni
331da0f069871343119251d6b0586be356dc2146a62Yang Ni}  // anonymous namespace
332da0f069871343119251d6b0586be356dc2146a62Yang Ni
333062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) {
334da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB
335062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mGroup->mClosures.size() < 2) {
336eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
337eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
338da0f069871343119251d6b0586be356dc2146a62Yang Ni
339cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    auto comparator = [](const char* str1, const char* str2) -> bool {
340cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        return strcmp(str1, str2) < 0;
341cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    };
342cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::set<const char*, decltype(comparator)> inputSet(comparator);
343cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
344062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (Closure* closure : mGroup->mClosures) {
345062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Script* script = closure->mFunctionID.get()->mScript;
346da0f069871343119251d6b0586be356dc2146a62Yang Ni
347062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // If any script is an intrinsic, give up trying fusing the kernels.
348eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        if (script->isIntrinsic()) {
349eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            return;
350eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
351da0f069871343119251d6b0586be356dc2146a62Yang Ni
352eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const RsdCpuScriptImpl *cpuScript =
353cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script);
354cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni
355cb17015fed6b11a5028f31cc804a3847e379945dYang Ni        const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
356062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        inputSet.insert(bitcodeFilename);
357062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
358062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
359cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
360062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
361062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> kernelBatches;
362062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    std::vector<string> invokeBatches;
363062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
364062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    int i = 0;
365062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    for (const auto& batch : mBatches) {
366062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(batch->size() > 0);
367da0f069871343119251d6b0586be356dc2146a62Yang Ni
368062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        std::stringstream ss;
369062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        ss << batch->mName << ":";
370062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
371062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (!batch->mClosures.front()->mClosure->mIsKernel) {
372062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(batch->size() == 1);
373cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss);
374062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            invokeBatches.push_back(ss.str());
375062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
376062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            for (const auto& cpuClosure : batch->mClosures) {
377cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni                generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss);
378062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            }
379062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            kernelBatches.push_back(ss.str());
380062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
381eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
382da0f069871343119251d6b0586be356dc2146a62Yang Ni
383433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    rsAssert(cacheDir != nullptr);
384433558f0f9abbf07770db288183a15fd261cace2Yabin Cui    string objFilePath(cacheDir);
385f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append("/");
386f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append(mGroup->mName);
387f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    objFilePath.append(".o");
388433558f0f9abbf07770db288183a15fd261cace2Yabin Cui
389cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    const char* resName = mGroup->mName;
390edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    string coreLibRelaxedPath;
391edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
392edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni                                               &coreLibRelaxedPath);
393f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
394f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham    int optLevel = getCpuRefImpl()->getContext()->getOptLevel();
395f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham
396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    vector<const char*> arguments;
3978237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo();
3988237638f87ca0e265d050fbb13725b41a795fe5fYang Ni    bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant();
399cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
400cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                          resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
4018237638f87ca0e265d050fbb13725b41a795fe5fYang Ni                          emitGlobalInfo, emitGlobalInfoSkipConstant,
402f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham                          optLevel, &arguments);
403f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
404f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
405cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                                                       arguments.data()));
406cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
407cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    inputs.push_back(coreLibPath.c_str());
408cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    inputs.push_back(coreLibRelaxedPath.c_str());
409cb17015fed6b11a5028f31cc804a3847e379945dYang Ni
410cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
411cb17015fed6b11a5028f31cc804a3847e379945dYang Ni                                               inputs.data(), inputs.size());
412f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
413cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    if (checksum == 0) {
414f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        return;
415f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    }
416f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
417cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    std::stringstream ss;
418cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    ss << std::hex << checksum;
419cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    const char* checksumStr = ss.str().c_str();
420f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
421f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
422f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    // Try to load a shared lib from code cache matching filename and checksum
423f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
424f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
4251efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    bool alreadyLoaded = false;
4261efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    std::string cloneName;
4271efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
4281efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr,
4291efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni                                                       &alreadyLoaded);
430f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    if (mScriptObj != nullptr) {
4311efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        // A shared library named resName is found in code cache directory
4321efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        // cacheDir, and loaded with the handle stored in mScriptObj.
4331efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
434f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        mExecutable = ScriptExecutable::createFromSharedObject(
4355e48002cc4a11e9ce63852a77488cfb9f6765c2bYang Ni            mScriptObj, checksum);
4361efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
437f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        if (mExecutable != nullptr) {
4381efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // The loaded shared library in mScriptObj has a matching checksum.
4391efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // An executable object has been created.
440cb17015fed6b11a5028f31cc804a3847e379945dYang Ni            return;
441f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        }
4421efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
4431efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        ALOGV("Failed to create an executable object from so file due to "
4441efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni              "mismatching checksum");
4451efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
4461efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        if (alreadyLoaded) {
4471efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // The shared object found in code cache has already been loaded.
4481efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // A different file name is needed for the new shared library, to
4491efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // avoid corrupting the currently loaded instance.
4501efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
4511efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            cloneName.append(resName);
4521efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            cloneName.append("#");
4531efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            cloneName.append(SharedLibraryUtils::getRandomString(6).string());
4541efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
4551efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            // The last element in arguments is the output filename.
4561efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            arguments.pop_back();
4571efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni            arguments.push_back(cloneName.c_str());
4581efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        }
4591efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
460f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        dlclose(mScriptObj);
461f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni        mScriptObj = nullptr;
462f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    }
463f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
464f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
465f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    // Fuse the input kernels and generate native code in an object file
466f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    //===--------------------------------------------------------------------===//
467f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni
468f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    arguments.push_back("-build-checksum");
469cb17015fed6b11a5028f31cc804a3847e379945dYang Ni    arguments.push_back(checksumStr);
470f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni    arguments.push_back(nullptr);
471eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
4722fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
473f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni                                      arguments.size()-1,
474f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni                                      arguments.data());
4752fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar    if (!compiled) {
476eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
477eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
478da0f069871343119251d6b0586be356dc2146a62Yang Ni
479eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
480eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    // Create and load the shared lib
481eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    //===--------------------------------------------------------------------===//
482da0f069871343119251d6b0586be356dc2146a62Yang Ni
4834c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines    if (!SharedLibraryUtils::createSharedLibrary(
4844c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines            getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) {
485eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Failed to link object file '%s'", resName);
4868b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni        unlink(objFilePath.c_str());
487eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
488eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
489da0f069871343119251d6b0586be356dc2146a62Yang Ni
4908b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni    unlink(objFilePath.c_str());
4918b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni
492062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
493062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mScriptObj == nullptr) {
494eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        ALOGE("Unable to load '%s'", resName);
495eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
496eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
497da0f069871343119251d6b0586be356dc2146a62Yang Ni
4981efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    if (alreadyLoaded) {
4991efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        // Delete the temporary, random-named file that we created to avoid
5001efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        // interfering with an already loaded shared library.
5011efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        string cloneFilePath(cacheDir);
5021efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        cloneFilePath.append("/");
5031efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        cloneFilePath.append(cloneName.c_str());
5041efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        cloneFilePath.append(".so");
5051efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni        unlink(cloneFilePath.c_str());
5061efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni    }
5071efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni
5085e48002cc4a11e9ce63852a77488cfb9f6765c2bYang Ni    mExecutable = ScriptExecutable::createFromSharedObject(mScriptObj);
509da0f069871343119251d6b0586be356dc2146a62Yang Ni
510da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif  // RS_COMPATIBILITY_LIB
5111ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5121ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
5131ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() {
514eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (auto batch : mBatches) {
515eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->setGlobalsForBatch();
516eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        batch->run();
517eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
5181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
520da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() {
521eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
522eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
523062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const IDBase* funcID = closure->mFunctionID.get();
524062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        Script* s = funcID->mScript;;
525eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        for (const auto& p : closure->mGlobals) {
526fef0cd45027f235126d4fb62bda5ea9037450d9cYang Ni            const int64_t value = p.second.first;
527eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            int size = p.second.second;
528fef0cd45027f235126d4fb62bda5ea9037450d9cYang Ni            if (value == 0 && size == 0) {
529eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // This indicates the current closure depends on another closure for a
530eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // global in their shared module (script). In this case we don't need to
531eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // copy the value. For example, an invoke intializes a global variable
532eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                // which a kernel later reads.
533eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                continue;
534eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
535ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni            rsAssert(p.first != nullptr);
536062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            Script* script = p.first->mScript;
537a7481b21c4ad4127f54c02c7402039d068948a34Yang Ni            rsAssert(script == s);
538cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni            RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl();
539062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const RsdCpuScriptImpl *cpuScript =
540cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni                    (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
541062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            int slot = p.first->mSlot;
542062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ScriptExecutable* exec = mGroup->getExecutable();
543062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            if (exec != nullptr) {
544062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                const char* varName = cpuScript->getFieldName(slot);
545062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                void* addr = exec->getFieldAddress(varName);
546062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
547062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
548062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                 (rs_object_base*)addr, (ObjectBase*)value);
549062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
550062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    memcpy(addr, (const void*)&value, size);
551062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
552eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            } else {
553062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                // We use -1 size to indicate an ObjectBase rather than a primitive type
554062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                if (size < 0) {
555062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVarObj(slot, (ObjectBase*)value);
556062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                } else {
557062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                    s->setVar(slot, (const void*)&value, size);
558062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                }
559eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni            }
560eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        }
5611ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    }
5621ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
5631ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
564da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() {
565062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (!mClosures.front()->mClosure->mIsKernel) {
566062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        rsAssert(mClosures.size() == 1);
567062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
568062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        // This batch contains a single closure for an invoke function
569062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        CPUClosure* cc = mClosures.front();
570062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const Closure* c = cc->mClosure;
571062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
572062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        if (mFunc != nullptr) {
573062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // TODO: Need align pointers for x86_64.
574062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
575062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
576062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        } else {
577062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
578062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            rsAssert(invokeID != nullptr);
579062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
580062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        }
581062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
582062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        return;
583062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    }
584062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni
585062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni    if (mFunc != nullptr) {
58614ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala        MTLaunchStructForEach mtls;
587eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* firstCpuClosure = mClosures.front();
588eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const CPUClosure* lastCpuClosure = mClosures.back();
589eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
590eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        firstCpuClosure->mSi->forEachMtlsSetup(
591ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
592ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
593eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
594eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, 0, nullptr, &mtls);
595eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
596eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
597eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = nullptr;
598062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        mtls.kernel = (ForEachFunc_t)mFunc;
599eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
60014ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala        mGroup->getCpuRefImpl()->launchForEach(
601ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                (const Allocation**)firstCpuClosure->mClosure->mArgs,
602ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                firstCpuClosure->mClosure->mNumArg,
603eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                lastCpuClosure->mClosure->mReturnValue,
604eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                nullptr, &mtls);
605eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
606eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        return;
607eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
608eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
609eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
610eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
611062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
612062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
613eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->preLaunch(kernelID->mSlot,
614ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   (const Allocation**)closure->mArgs,
615ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                   closure->mNumArg, closure->mReturnValue,
616062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                                   nullptr, 0, nullptr);
617eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
618da0f069871343119251d6b0586be356dc2146a62Yang Ni
619eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const CPUClosure* cpuClosure = mClosures.front();
620eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    const Closure* closure = cpuClosure->mClosure;
62114ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala    MTLaunchStructForEach mtls;
622da0f069871343119251d6b0586be356dc2146a62Yang Ni
623ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
624ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                          closure->mNumArg,
625eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          closure->mReturnValue,
626eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                          nullptr, 0, nullptr, &mtls)) {
627da0f069871343119251d6b0586be356dc2146a62Yang Ni
628eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.script = nullptr;
62914ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala        mtls.kernel = &groupRoot;
630eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        mtls.fep.usr = &mClosures;
631da0f069871343119251d6b0586be356dc2146a62Yang Ni
63214ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala        mGroup->getCpuRefImpl()->launchForEach(nullptr, 0, nullptr, nullptr, &mtls);
633eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
634eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni
635eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    for (CPUClosure* cpuClosure : mClosures) {
636eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        const Closure* closure = cpuClosure->mClosure;
637062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni        const ScriptKernelID* kernelID =
638062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni                (const ScriptKernelID*)closure->mFunctionID.get();
639eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni        cpuClosure->mSi->postLaunch(kernelID->mSlot,
640ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    (const Allocation**)closure->mArgs,
641ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni                                    closure->mNumArg, closure->mReturnValue,
642eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni                                    nullptr, 0, nullptr);
643eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni    }
6441ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}
6451ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni
6461ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace renderscript
6471ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni}  // namespace android
648