rsCpuScriptGroup2.cpp revision cbff7bcc4aacdc39d56628fa5c7c50518d52748c
1282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsCpuScriptGroup2.h"
2282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
3282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <dlfcn.h>
4282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <stdio.h>
5282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <stdlib.h>
6282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <unistd.h>
7282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
8282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <set>
9282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <sstream>
10282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <string>
11282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <vector>
12282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
13282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#ifndef RS_COMPATIBILITY_LIB
14282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "bcc/Config/Config.h"
15282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#endif
16282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
17282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "cpu_ref/rsCpuCore.h"
18282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsClosure.h"
19282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsContext.h"
20282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsCpuCore.h"
21282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsCpuExecutable.h"
22282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsCpuScript.h"
23282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsScript.h"
24282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsScriptGroup2.h"
25282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsScriptIntrinsic.h"
26282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
27282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskiusing std::string;
28282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskiusing std::vector;
29282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
30282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskinamespace android {
31282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskinamespace renderscript {
32282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
33282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskinamespace {
34282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
35282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskiconst size_t DefaultKernelArgCount = 2;
36282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
37282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskivoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
38282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski               uint32_t xend, uint32_t outstep) {
39282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
40282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski    RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
41282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
42282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski    const size_t oldInLen = mutable_kinfo->inLen;
43282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
44282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski    decltype(mutable_kinfo->inStride) oldInStride;
45282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski    memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
46282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
47282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski    for (CPUClosure* cpuClosure : closures) {
48282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        const Closure* closure = cpuClosure->mClosure;
49282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
50282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        // There had better be enough space in mutable_kinfo
51282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
52282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
53282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        for (size_t i = 0; i < closure->mNumArg; i++) {
54282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski            const void* arg = closure->mArgs[i];
55282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski            const Allocation* a = (const Allocation*)arg;
56282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski            const uint32_t eStride = a->mHal.state.elementSizeBytes;
57282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
58282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski                    eStride * xstart;
59282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski            if (kinfo->dim.y > 1) {
60282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski                ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
61282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski            }
62282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski            mutable_kinfo->inPtr[i] = ptr;
63282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski            mutable_kinfo->inStride[i] = eStride;
64282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        }
65282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        mutable_kinfo->inLen = closure->mNumArg;
66282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
67282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        const Allocation* out = closure->mReturnValue;
68282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        const uint32_t ostep = out->mHal.state.elementSizeBytes;
69282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
70282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski                ostep * xstart;
71282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        if (kinfo->dim.y > 1) {
72282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski            ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
73282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        }
74282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
75282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        rsAssert(kinfo->outLen <= 1);
76282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
77282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
78282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski        cpuClosure->mFunc(kinfo, xstart, xend, ostep);
79282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski    }
80282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski
81    mutable_kinfo->inLen = oldInLen;
82    memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
83}
84
85}  // namespace
86
87Batch::Batch(CpuScriptGroup2Impl* group, const char* name) :
88    mGroup(group), mFunc(nullptr) {
89    mName = strndup(name, strlen(name));
90}
91
92Batch::~Batch() {
93    for (CPUClosure* c : mClosures) {
94        delete c;
95    }
96    free(mName);
97}
98
99bool Batch::conflict(CPUClosure* cpuClosure) const {
100    if (mClosures.empty()) {
101        return false;
102    }
103
104    const Closure* closure = cpuClosure->mClosure;
105
106    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
107        // An invoke should be in a batch by itself, so it conflicts with any other
108        // closure.
109        return true;
110    }
111
112    const auto& globalDeps = closure->mGlobalDeps;
113    const auto& argDeps = closure->mArgDeps;
114
115    for (CPUClosure* c : mClosures) {
116        const Closure* batched = c->mClosure;
117        if (globalDeps.find(batched) != globalDeps.end()) {
118            return true;
119        }
120        const auto& it = argDeps.find(batched);
121        if (it != argDeps.end()) {
122            const auto& args = (*it).second;
123            for (const auto &p1 : *args) {
124                if (p1.second.get() != nullptr) {
125                    return true;
126                }
127            }
128        }
129    }
130
131    return false;
132}
133
134CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
135                                         const ScriptGroupBase *sg) :
136    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
137    mExecutable(nullptr), mScriptObj(nullptr) {
138    rsAssert(!mGroup->mClosures.empty());
139
140    Batch* batch = new Batch(this, "Batch0");
141    int i = 0;
142    for (Closure* closure: mGroup->mClosures) {
143        CPUClosure* cc;
144        const IDBase* funcID = closure->mFunctionID.get();
145        RsdCpuScriptImpl* si =
146                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
147        if (closure->mIsKernel) {
148            MTLaunchStruct mtls;
149            si->forEachKernelSetup(funcID->mSlot, &mtls);
150            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
151        } else {
152            cc = new CPUClosure(closure, si);
153        }
154
155        if (batch->conflict(cc)) {
156            mBatches.push_back(batch);
157            std::stringstream ss;
158            ss << "Batch" << ++i;
159            batch = new Batch(this, ss.str().c_str());
160        }
161
162        batch->mClosures.push_back(cc);
163    }
164
165    rsAssert(!batch->mClosures.empty());
166    mBatches.push_back(batch);
167
168#ifndef RS_COMPATIBILITY_LIB
169    compile(mGroup->mCacheDir);
170    if (mScriptObj != nullptr && mExecutable != nullptr) {
171        for (Batch* batch : mBatches) {
172            batch->resolveFuncPtr(mScriptObj);
173        }
174    }
175#endif  // RS_COMPATIBILITY_LIB
176}
177
178void Batch::resolveFuncPtr(void* sharedObj) {
179    std::string funcName(mName);
180    if (mClosures.front()->mClosure->mIsKernel) {
181        funcName.append(".expand");
182    }
183    mFunc = dlsym(sharedObj, funcName.c_str());
184    rsAssert (mFunc != nullptr);
185}
186
187CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
188    for (Batch* batch : mBatches) {
189        delete batch;
190    }
191    delete mExecutable;
192    // TODO: move this dlclose into ~ScriptExecutable().
193    if (mScriptObj != nullptr) {
194        dlclose(mScriptObj);
195    }
196}
197
198namespace {
199
200#ifndef RS_COMPATIBILITY_LIB
201
202string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
203    *coreLibRelaxedPath = "";
204
205    // If we're debugging, use the debug library.
206    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
207        return SYSLIBPATH"/libclcore_debug.bc";
208    }
209
210    // Check for a platform specific library
211
212#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
213    // NEON-capable ARMv7a devices can use an accelerated math library
214    // for all reduced precision scripts.
215    // ARMv8 does not use NEON, as ASIMD can be used with all precision
216    // levels.
217    *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
218#endif
219
220#if defined(__i386__) || defined(__x86_64__)
221    // x86 devices will use an optimized library.
222    return SYSLIBPATH"/libclcore_x86.bc";
223#else
224    return SYSLIBPATH"/libclcore.bc";
225#endif
226}
227
228void setupCompileArguments(
229        const vector<const char*>& inputs, const vector<string>& kernelBatches,
230        const vector<string>& invokeBatches,
231        const char* outputDir, const char* outputFileName,
232        const char* coreLibPath, const char* coreLibRelaxedPath,
233        const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant,
234        vector<const char*>* args) {
235    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
236    args->push_back("-fPIC");
237    args->push_back("-embedRSInfo");
238    if (emitGlobalInfo) {
239        args->push_back("-rs-global-info");
240        if (emitGlobalInfoSkipConstant) {
241            args->push_back("-rs-global-info-skip-constant");
242        }
243    }
244    args->push_back("-mtriple");
245    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
246    args->push_back("-bclib");
247    args->push_back(coreLibPath);
248    args->push_back("-bclib_relaxed");
249    args->push_back(coreLibRelaxedPath);
250    for (const char* input : inputs) {
251        args->push_back(input);
252    }
253    for (const string& batch : kernelBatches) {
254        args->push_back("-merge");
255        args->push_back(batch.c_str());
256    }
257    for (const string& batch : invokeBatches) {
258        args->push_back("-invoke");
259        args->push_back(batch.c_str());
260    }
261    args->push_back("-output_path");
262    args->push_back(outputDir);
263    args->push_back("-o");
264    args->push_back(outputFileName);
265}
266
267void generateSourceSlot(RsdCpuReferenceImpl* ctxt,
268                        const Closure& closure,
269                        const std::vector<const char*>& inputs,
270                        std::stringstream& ss) {
271    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
272    const Script* script = funcID->mScript;
273
274    rsAssert (!script->isIntrinsic());
275
276    const RsdCpuScriptImpl *cpuScript =
277            (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
278    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
279
280    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
281            inputs.begin();
282
283    ss << index << "," << funcID->mSlot << ".";
284}
285
286#endif  // RS_COMPATIBILTY_LIB
287
288}  // anonymous namespace
289
290void CpuScriptGroup2Impl::compile(const char* cacheDir) {
291#ifndef RS_COMPATIBILITY_LIB
292    if (mGroup->mClosures.size() < 2) {
293        return;
294    }
295
296    auto comparator = [](const char* str1, const char* str2) -> bool {
297        return strcmp(str1, str2) < 0;
298    };
299    std::set<const char*, decltype(comparator)> inputSet(comparator);
300
301    for (Closure* closure : mGroup->mClosures) {
302        const Script* script = closure->mFunctionID.get()->mScript;
303
304        // If any script is an intrinsic, give up trying fusing the kernels.
305        if (script->isIntrinsic()) {
306            return;
307        }
308
309        const RsdCpuScriptImpl *cpuScript =
310            (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script);
311
312        const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
313        inputSet.insert(bitcodeFilename);
314    }
315
316    std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
317
318    std::vector<string> kernelBatches;
319    std::vector<string> invokeBatches;
320
321    int i = 0;
322    for (const auto& batch : mBatches) {
323        rsAssert(batch->size() > 0);
324
325        std::stringstream ss;
326        ss << batch->mName << ":";
327
328        if (!batch->mClosures.front()->mClosure->mIsKernel) {
329            rsAssert(batch->size() == 1);
330            generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss);
331            invokeBatches.push_back(ss.str());
332        } else {
333            for (const auto& cpuClosure : batch->mClosures) {
334                generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss);
335            }
336            kernelBatches.push_back(ss.str());
337        }
338    }
339
340    rsAssert(cacheDir != nullptr);
341    string objFilePath(cacheDir);
342    objFilePath.append("/");
343    objFilePath.append(mGroup->mName);
344    objFilePath.append(".o");
345
346    const char* resName = mGroup->mName;
347    string coreLibRelaxedPath;
348    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
349                                               &coreLibRelaxedPath);
350
351    vector<const char*> arguments;
352    bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo();
353    bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant();
354    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
355                          resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
356                          emitGlobalInfo, emitGlobalInfoSkipConstant,
357                          &arguments);
358
359    std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
360                                                       arguments.data()));
361
362    inputs.push_back(coreLibPath.c_str());
363    inputs.push_back(coreLibRelaxedPath.c_str());
364
365    uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
366                                               inputs.data(), inputs.size());
367
368    if (checksum == 0) {
369        return;
370    }
371
372    std::stringstream ss;
373    ss << std::hex << checksum;
374    const char* checksumStr = ss.str().c_str();
375
376    //===--------------------------------------------------------------------===//
377    // Try to load a shared lib from code cache matching filename and checksum
378    //===--------------------------------------------------------------------===//
379
380    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
381    if (mScriptObj != nullptr) {
382        mExecutable = ScriptExecutable::createFromSharedObject(
383            getCpuRefImpl()->getContext(), mScriptObj, checksum);
384        if (mExecutable != nullptr) {
385            return;
386        } else {
387            ALOGE("Failed to create an executable object from so file");
388        }
389        dlclose(mScriptObj);
390        mScriptObj = nullptr;
391    }
392
393    //===--------------------------------------------------------------------===//
394    // Fuse the input kernels and generate native code in an object file
395    //===--------------------------------------------------------------------===//
396
397    arguments.push_back("-build-checksum");
398    arguments.push_back(checksumStr);
399    arguments.push_back(nullptr);
400
401    bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
402                                      arguments.size()-1,
403                                      arguments.data());
404    if (!compiled) {
405        return;
406    }
407
408    //===--------------------------------------------------------------------===//
409    // Create and load the shared lib
410    //===--------------------------------------------------------------------===//
411
412    if (!SharedLibraryUtils::createSharedLibrary(
413            getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) {
414        ALOGE("Failed to link object file '%s'", resName);
415        unlink(objFilePath.c_str());
416        return;
417    }
418
419    unlink(objFilePath.c_str());
420
421    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
422    if (mScriptObj == nullptr) {
423        ALOGE("Unable to load '%s'", resName);
424        return;
425    }
426
427    mExecutable = ScriptExecutable::createFromSharedObject(
428        getCpuRefImpl()->getContext(),
429        mScriptObj);
430
431#endif  // RS_COMPATIBILITY_LIB
432}
433
434void CpuScriptGroup2Impl::execute() {
435    for (auto batch : mBatches) {
436        batch->setGlobalsForBatch();
437        batch->run();
438    }
439}
440
441void Batch::setGlobalsForBatch() {
442    for (CPUClosure* cpuClosure : mClosures) {
443        const Closure* closure = cpuClosure->mClosure;
444        const IDBase* funcID = closure->mFunctionID.get();
445        Script* s = funcID->mScript;;
446        for (const auto& p : closure->mGlobals) {
447            const void* value = p.second.first;
448            int size = p.second.second;
449            if (value == nullptr && size == 0) {
450                // This indicates the current closure depends on another closure for a
451                // global in their shared module (script). In this case we don't need to
452                // copy the value. For example, an invoke intializes a global variable
453                // which a kernel later reads.
454                continue;
455            }
456            rsAssert(p.first != nullptr);
457            Script* script = p.first->mScript;
458            RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl();
459            const RsdCpuScriptImpl *cpuScript =
460                    (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
461            int slot = p.first->mSlot;
462            ScriptExecutable* exec = mGroup->getExecutable();
463            if (exec != nullptr) {
464                const char* varName = cpuScript->getFieldName(slot);
465                void* addr = exec->getFieldAddress(varName);
466                if (size < 0) {
467                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
468                                 (rs_object_base*)addr, (ObjectBase*)value);
469                } else {
470                    memcpy(addr, (const void*)&value, size);
471                }
472            } else {
473                // We use -1 size to indicate an ObjectBase rather than a primitive type
474                if (size < 0) {
475                    s->setVarObj(slot, (ObjectBase*)value);
476                } else {
477                    s->setVar(slot, (const void*)&value, size);
478                }
479            }
480        }
481    }
482}
483
484void Batch::run() {
485    if (!mClosures.front()->mClosure->mIsKernel) {
486        rsAssert(mClosures.size() == 1);
487
488        // This batch contains a single closure for an invoke function
489        CPUClosure* cc = mClosures.front();
490        const Closure* c = cc->mClosure;
491
492        if (mFunc != nullptr) {
493            // TODO: Need align pointers for x86_64.
494            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
495            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
496        } else {
497            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
498            rsAssert(invokeID != nullptr);
499            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
500        }
501
502        return;
503    }
504
505    if (mFunc != nullptr) {
506        MTLaunchStruct mtls;
507        const CPUClosure* firstCpuClosure = mClosures.front();
508        const CPUClosure* lastCpuClosure = mClosures.back();
509
510        firstCpuClosure->mSi->forEachMtlsSetup(
511                (const Allocation**)firstCpuClosure->mClosure->mArgs,
512                firstCpuClosure->mClosure->mNumArg,
513                lastCpuClosure->mClosure->mReturnValue,
514                nullptr, 0, nullptr, &mtls);
515
516        mtls.script = nullptr;
517        mtls.fep.usr = nullptr;
518        mtls.kernel = (ForEachFunc_t)mFunc;
519
520        mGroup->getCpuRefImpl()->launchThreads(
521                (const Allocation**)firstCpuClosure->mClosure->mArgs,
522                firstCpuClosure->mClosure->mNumArg,
523                lastCpuClosure->mClosure->mReturnValue,
524                nullptr, &mtls);
525
526        return;
527    }
528
529    for (CPUClosure* cpuClosure : mClosures) {
530        const Closure* closure = cpuClosure->mClosure;
531        const ScriptKernelID* kernelID =
532                (const ScriptKernelID*)closure->mFunctionID.get();
533        cpuClosure->mSi->preLaunch(kernelID->mSlot,
534                                   (const Allocation**)closure->mArgs,
535                                   closure->mNumArg, closure->mReturnValue,
536                                   nullptr, 0, nullptr);
537    }
538
539    const CPUClosure* cpuClosure = mClosures.front();
540    const Closure* closure = cpuClosure->mClosure;
541    MTLaunchStruct mtls;
542
543    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
544                                          closure->mNumArg,
545                                          closure->mReturnValue,
546                                          nullptr, 0, nullptr, &mtls)) {
547
548        mtls.script = nullptr;
549        mtls.kernel = (void (*)())&groupRoot;
550        mtls.fep.usr = &mClosures;
551
552        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
553    }
554
555    for (CPUClosure* cpuClosure : mClosures) {
556        const Closure* closure = cpuClosure->mClosure;
557        const ScriptKernelID* kernelID =
558                (const ScriptKernelID*)closure->mFunctionID.get();
559        cpuClosure->mSi->postLaunch(kernelID->mSlot,
560                                    (const Allocation**)closure->mArgs,
561                                    closure->mNumArg, closure->mReturnValue,
562                                    nullptr, 0, nullptr);
563    }
564}
565
566}  // namespace renderscript
567}  // namespace android
568