rsCpuScriptGroup2.cpp revision 4c368af7e705f0bcb77fa99495b2e33ef20d2699
166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsCpuScriptGroup2.h"
266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <dlfcn.h>
466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <stdio.h>
566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <stdlib.h>
666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <unistd.h>
766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <set>
966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <sstream>
1066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <string>
1166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <vector>
1266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
1366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#ifndef RS_COMPATIBILITY_LIB
1466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "bcc/Config/Config.h"
1566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#endif
1666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
1766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "cpu_ref/rsCpuCore.h"
1866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsClosure.h"
1966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsContext.h"
2066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsCpuCore.h"
2166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsCpuExecutable.h"
2266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsCpuScript.h"
2366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsScript.h"
2466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsScriptGroup2.h"
2566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsScriptIntrinsic.h"
2666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
2766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumanusing std::string;
2866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumanusing std::vector;
2966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
3066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumannamespace android {
3166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumannamespace renderscript {
3266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
3366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumannamespace {
3466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
3566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumanconst size_t DefaultKernelArgCount = 2;
3666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
3766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumanvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
3866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman               uint32_t xend, uint32_t outstep) {
3966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
4066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
4166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
4266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    const size_t oldInLen = mutable_kinfo->inLen;
4366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
4466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    decltype(mutable_kinfo->inStride) oldInStride;
4566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
4666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
4766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    for (CPUClosure* cpuClosure : closures) {
4866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        const Closure* closure = cpuClosure->mClosure;
4966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
5066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        // There had better be enough space in mutable_kinfo
5166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
5266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
5366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        for (size_t i = 0; i < closure->mNumArg; i++) {
5466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            const void* arg = closure->mArgs[i];
5566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            const Allocation* a = (const Allocation*)arg;
5666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            const uint32_t eStride = a->mHal.state.elementSizeBytes;
5766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
5866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman                    eStride * xstart;
5966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            if (kinfo->dim.y > 1) {
6066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman                ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
6166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            }
6266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            mutable_kinfo->inPtr[i] = ptr;
6366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            mutable_kinfo->inStride[i] = eStride;
6466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        }
6566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        mutable_kinfo->inLen = closure->mNumArg;
6666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
6766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        const Allocation* out = closure->mReturnValue;
6866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        const uint32_t ostep = out->mHal.state.elementSizeBytes;
6966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
7066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman                ostep * xstart;
7166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        if (kinfo->dim.y > 1) {
7266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
7366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        }
7466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
7566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        rsAssert(kinfo->outLen <= 1);
7666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
7766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
7866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        cpuClosure->mFunc(kinfo, xstart, xend, ostep);
7966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    }
8066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
8166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    mutable_kinfo->inLen = oldInLen;
8266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
8366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman}
8466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
8566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman}  // namespace
8666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
8766b8ab22586debccb1f787d4d52b7f042d4ddeb8John BaumanBatch::Batch(CpuScriptGroup2Impl* group, const char* name) :
8866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    mGroup(group), mFunc(nullptr) {
8966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    mName = strndup(name, strlen(name));
9066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman}
9166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
9266b8ab22586debccb1f787d4d52b7f042d4ddeb8John BaumanBatch::~Batch() {
9366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    for (CPUClosure* c : mClosures) {
9466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        delete c;
9566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    }
9666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    free(mName);
9766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman}
9866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
9966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumanbool Batch::conflict(CPUClosure* cpuClosure) const {
10066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    if (mClosures.empty()) {
10166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        return false;
10266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    }
10366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
10466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    const Closure* closure = cpuClosure->mClosure;
10566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
10666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
10766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        // An invoke should be in a batch by itself, so it conflicts with any other
10866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        // closure.
10966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        return true;
11066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    }
11166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
11266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    const auto& globalDeps = closure->mGlobalDeps;
11366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    const auto& argDeps = closure->mArgDeps;
11466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
11566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    for (CPUClosure* c : mClosures) {
11666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        const Closure* batched = c->mClosure;
11766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        if (globalDeps.find(batched) != globalDeps.end()) {
11866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            return true;
11966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        }
12066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        const auto& it = argDeps.find(batched);
12166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        if (it != argDeps.end()) {
12266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            const auto& args = (*it).second;
12366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            for (const auto &p1 : *args) {
12466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman                if (p1.second.get() != nullptr) {
12566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman                    return true;
12666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman                }
12766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman            }
12866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        }
12966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    }
13066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
13166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    return false;
13266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman}
13366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
13466b8ab22586debccb1f787d4d52b7f042d4ddeb8John BaumanCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
13566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman                                         const ScriptGroupBase *sg) :
13666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
13766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    mExecutable(nullptr), mScriptObj(nullptr) {
13866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    rsAssert(!mGroup->mClosures.empty());
13966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman
14066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    Batch* batch = new Batch(this, "Batch0");
14166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    int i = 0;
14266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman    for (Closure* closure: mGroup->mClosures) {
14366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        CPUClosure* cc;
14466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        const IDBase* funcID = closure->mFunctionID.get();
14566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman        RsdCpuScriptImpl* si =
14666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
147        if (closure->mIsKernel) {
148            MTLaunchStruct mtls;
149            si->forEachKernelSetup(funcID->mSlot, &mtls);
150            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
151        } else {
152            cc = new CPUClosure(closure, si);
153        }
154
155        if (batch->conflict(cc)) {
156            mBatches.push_back(batch);
157            std::stringstream ss;
158            ss << "Batch" << ++i;
159            batch = new Batch(this, ss.str().c_str());
160        }
161
162        batch->mClosures.push_back(cc);
163    }
164
165    rsAssert(!batch->mClosures.empty());
166    mBatches.push_back(batch);
167
168#ifndef RS_COMPATIBILITY_LIB
169    compile(mGroup->mCacheDir);
170    if (mScriptObj != nullptr && mExecutable != nullptr) {
171        for (Batch* batch : mBatches) {
172            batch->resolveFuncPtr(mScriptObj);
173        }
174    }
175#endif  // RS_COMPATIBILITY_LIB
176}
177
178void Batch::resolveFuncPtr(void* sharedObj) {
179    std::string funcName(mName);
180    if (mClosures.front()->mClosure->mIsKernel) {
181        funcName.append(".expand");
182    }
183    mFunc = dlsym(sharedObj, funcName.c_str());
184    rsAssert (mFunc != nullptr);
185}
186
187CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
188    for (Batch* batch : mBatches) {
189        delete batch;
190    }
191    delete mExecutable;
192    // TODO: move this dlclose into ~ScriptExecutable().
193    if (mScriptObj != nullptr) {
194        dlclose(mScriptObj);
195    }
196}
197
198namespace {
199
200#ifndef RS_COMPATIBILITY_LIB
201
202string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
203    *coreLibRelaxedPath = "";
204
205    // If we're debugging, use the debug library.
206    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
207        return SYSLIBPATH"/libclcore_debug.bc";
208    }
209
210    // Check for a platform specific library
211
212#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
213    // NEON-capable ARMv7a devices can use an accelerated math library
214    // for all reduced precision scripts.
215    // ARMv8 does not use NEON, as ASIMD can be used with all precision
216    // levels.
217    *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
218#endif
219
220#if defined(__i386__) || defined(__x86_64__)
221    // x86 devices will use an optimized library.
222    return SYSLIBPATH"/libclcore_x86.bc";
223#else
224    return SYSLIBPATH"/libclcore.bc";
225#endif
226}
227
228void setupCompileArguments(
229        const vector<const char*>& inputs, const vector<string>& kernelBatches,
230        const vector<string>& invokeBatches,
231        const char* outputDir, const char* outputFileName,
232        const char* coreLibPath, const char* coreLibRelaxedPath,
233        vector<const char*>* args) {
234    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
235    args->push_back("-fPIC");
236    args->push_back("-embedRSInfo");
237    args->push_back("-mtriple");
238    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
239    args->push_back("-bclib");
240    args->push_back(coreLibPath);
241    args->push_back("-bclib_relaxed");
242    args->push_back(coreLibRelaxedPath);
243    for (const char* input : inputs) {
244        args->push_back(input);
245    }
246    for (const string& batch : kernelBatches) {
247        args->push_back("-merge");
248        args->push_back(batch.c_str());
249    }
250    for (const string& batch : invokeBatches) {
251        args->push_back("-invoke");
252        args->push_back(batch.c_str());
253    }
254    args->push_back("-output_path");
255    args->push_back(outputDir);
256    args->push_back("-o");
257    args->push_back(outputFileName);
258}
259
260void generateSourceSlot(const Closure& closure,
261                        const std::vector<const char*>& inputs,
262                        std::stringstream& ss) {
263    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
264    const Script* script = funcID->mScript;
265
266    rsAssert (!script->isIntrinsic());
267
268    const RsdCpuScriptImpl *cpuScript =
269            (const RsdCpuScriptImpl*)script->mHal.drv;
270    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
271
272    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
273            inputs.begin();
274
275    ss << index << "," << funcID->mSlot << ".";
276}
277
278#endif  // RS_COMPATIBILTY_LIB
279
280}  // anonymous namespace
281
282void CpuScriptGroup2Impl::compile(const char* cacheDir) {
283#ifndef RS_COMPATIBILITY_LIB
284    if (mGroup->mClosures.size() < 2) {
285        return;
286    }
287
288    auto comparator = [](const char* str1, const char* str2) -> bool {
289        return strcmp(str1, str2) < 0;
290    };
291    std::set<const char*, decltype(comparator)> inputSet(comparator);
292
293    for (Closure* closure : mGroup->mClosures) {
294        const Script* script = closure->mFunctionID.get()->mScript;
295
296        // If any script is an intrinsic, give up trying fusing the kernels.
297        if (script->isIntrinsic()) {
298            return;
299        }
300
301        const RsdCpuScriptImpl *cpuScript =
302                (const RsdCpuScriptImpl*)script->mHal.drv;
303        const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
304        inputSet.insert(bitcodeFilename);
305    }
306
307    std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
308
309    std::vector<string> kernelBatches;
310    std::vector<string> invokeBatches;
311
312    int i = 0;
313    for (const auto& batch : mBatches) {
314        rsAssert(batch->size() > 0);
315
316        std::stringstream ss;
317        ss << batch->mName << ":";
318
319        if (!batch->mClosures.front()->mClosure->mIsKernel) {
320            rsAssert(batch->size() == 1);
321            generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss);
322            invokeBatches.push_back(ss.str());
323        } else {
324            for (const auto& cpuClosure : batch->mClosures) {
325                generateSourceSlot(*cpuClosure->mClosure, inputs, ss);
326            }
327            kernelBatches.push_back(ss.str());
328        }
329    }
330
331    rsAssert(cacheDir != nullptr);
332    string objFilePath(cacheDir);
333    objFilePath.append("/");
334    objFilePath.append(mGroup->mName);
335    objFilePath.append(".o");
336
337    const char* resName = mGroup->mName;
338    string coreLibRelaxedPath;
339    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
340                                               &coreLibRelaxedPath);
341
342    vector<const char*> arguments;
343    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
344                          resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
345                          &arguments);
346
347    std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
348                                                       arguments.data()));
349
350    inputs.push_back(coreLibPath.c_str());
351    inputs.push_back(coreLibRelaxedPath.c_str());
352
353    uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
354                                               inputs.data(), inputs.size());
355
356    if (checksum == 0) {
357        return;
358    }
359
360    std::stringstream ss;
361    ss << std::hex << checksum;
362    const char* checksumStr = ss.str().c_str();
363
364    //===--------------------------------------------------------------------===//
365    // Try to load a shared lib from code cache matching filename and checksum
366    //===--------------------------------------------------------------------===//
367
368    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
369    if (mScriptObj != nullptr) {
370        mExecutable = ScriptExecutable::createFromSharedObject(
371            getCpuRefImpl()->getContext(), mScriptObj, checksum);
372        if (mExecutable != nullptr) {
373            return;
374        } else {
375            ALOGE("Failed to create an executable object from so file");
376        }
377        dlclose(mScriptObj);
378        mScriptObj = nullptr;
379    }
380
381    //===--------------------------------------------------------------------===//
382    // Fuse the input kernels and generate native code in an object file
383    //===--------------------------------------------------------------------===//
384
385    arguments.push_back("-build-checksum");
386    arguments.push_back(checksumStr);
387    arguments.push_back(nullptr);
388
389    bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
390                                      arguments.size()-1,
391                                      arguments.data());
392    if (!compiled) {
393        return;
394    }
395
396    //===--------------------------------------------------------------------===//
397    // Create and load the shared lib
398    //===--------------------------------------------------------------------===//
399
400    if (!SharedLibraryUtils::createSharedLibrary(
401            getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) {
402        ALOGE("Failed to link object file '%s'", resName);
403        unlink(objFilePath.c_str());
404        return;
405    }
406
407    unlink(objFilePath.c_str());
408
409    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
410    if (mScriptObj == nullptr) {
411        ALOGE("Unable to load '%s'", resName);
412        return;
413    }
414
415    mExecutable = ScriptExecutable::createFromSharedObject(
416        getCpuRefImpl()->getContext(),
417        mScriptObj);
418
419#endif  // RS_COMPATIBILITY_LIB
420}
421
422void CpuScriptGroup2Impl::execute() {
423    for (auto batch : mBatches) {
424        batch->setGlobalsForBatch();
425        batch->run();
426    }
427}
428
429void Batch::setGlobalsForBatch() {
430    for (CPUClosure* cpuClosure : mClosures) {
431        const Closure* closure = cpuClosure->mClosure;
432        const IDBase* funcID = closure->mFunctionID.get();
433        Script* s = funcID->mScript;;
434        for (const auto& p : closure->mGlobals) {
435            const void* value = p.second.first;
436            int size = p.second.second;
437            if (value == nullptr && size == 0) {
438                // This indicates the current closure depends on another closure for a
439                // global in their shared module (script). In this case we don't need to
440                // copy the value. For example, an invoke intializes a global variable
441                // which a kernel later reads.
442                continue;
443            }
444            rsAssert(p.first != nullptr);
445            Script* script = p.first->mScript;
446            const RsdCpuScriptImpl *cpuScript =
447                    (const RsdCpuScriptImpl*)script->mHal.drv;
448            int slot = p.first->mSlot;
449            ScriptExecutable* exec = mGroup->getExecutable();
450            if (exec != nullptr) {
451                const char* varName = cpuScript->getFieldName(slot);
452                void* addr = exec->getFieldAddress(varName);
453                if (size < 0) {
454                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
455                                 (rs_object_base*)addr, (ObjectBase*)value);
456                } else {
457                    memcpy(addr, (const void*)&value, size);
458                }
459            } else {
460                // We use -1 size to indicate an ObjectBase rather than a primitive type
461                if (size < 0) {
462                    s->setVarObj(slot, (ObjectBase*)value);
463                } else {
464                    s->setVar(slot, (const void*)&value, size);
465                }
466            }
467        }
468    }
469}
470
471void Batch::run() {
472    if (!mClosures.front()->mClosure->mIsKernel) {
473        rsAssert(mClosures.size() == 1);
474
475        // This batch contains a single closure for an invoke function
476        CPUClosure* cc = mClosures.front();
477        const Closure* c = cc->mClosure;
478
479        if (mFunc != nullptr) {
480            // TODO: Need align pointers for x86_64.
481            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
482            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
483        } else {
484            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
485            rsAssert(invokeID != nullptr);
486            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
487        }
488
489        return;
490    }
491
492    if (mFunc != nullptr) {
493        MTLaunchStruct mtls;
494        const CPUClosure* firstCpuClosure = mClosures.front();
495        const CPUClosure* lastCpuClosure = mClosures.back();
496
497        firstCpuClosure->mSi->forEachMtlsSetup(
498                (const Allocation**)firstCpuClosure->mClosure->mArgs,
499                firstCpuClosure->mClosure->mNumArg,
500                lastCpuClosure->mClosure->mReturnValue,
501                nullptr, 0, nullptr, &mtls);
502
503        mtls.script = nullptr;
504        mtls.fep.usr = nullptr;
505        mtls.kernel = (ForEachFunc_t)mFunc;
506
507        mGroup->getCpuRefImpl()->launchThreads(
508                (const Allocation**)firstCpuClosure->mClosure->mArgs,
509                firstCpuClosure->mClosure->mNumArg,
510                lastCpuClosure->mClosure->mReturnValue,
511                nullptr, &mtls);
512
513        return;
514    }
515
516    for (CPUClosure* cpuClosure : mClosures) {
517        const Closure* closure = cpuClosure->mClosure;
518        const ScriptKernelID* kernelID =
519                (const ScriptKernelID*)closure->mFunctionID.get();
520        cpuClosure->mSi->preLaunch(kernelID->mSlot,
521                                   (const Allocation**)closure->mArgs,
522                                   closure->mNumArg, closure->mReturnValue,
523                                   nullptr, 0, nullptr);
524    }
525
526    const CPUClosure* cpuClosure = mClosures.front();
527    const Closure* closure = cpuClosure->mClosure;
528    MTLaunchStruct mtls;
529
530    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
531                                          closure->mNumArg,
532                                          closure->mReturnValue,
533                                          nullptr, 0, nullptr, &mtls)) {
534
535        mtls.script = nullptr;
536        mtls.kernel = (void (*)())&groupRoot;
537        mtls.fep.usr = &mClosures;
538
539        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
540    }
541
542    for (CPUClosure* cpuClosure : mClosures) {
543        const Closure* closure = cpuClosure->mClosure;
544        const ScriptKernelID* kernelID =
545                (const ScriptKernelID*)closure->mFunctionID.get();
546        cpuClosure->mSi->postLaunch(kernelID->mSlot,
547                                    (const Allocation**)closure->mArgs,
548                                    closure->mNumArg, closure->mReturnValue,
549                                    nullptr, 0, nullptr);
550    }
551}
552
553}  // namespace renderscript
554}  // namespace android
555