rsCpuScriptGroup2.cpp revision 9e37ef9974cb8140998e06199115de8ecb93f0d3
1#include "rsCpuScriptGroup2.h"
2
3#include <dlfcn.h>
4#include <stdio.h>
5#include <stdlib.h>
6#include <unistd.h>
7
8#include <set>
9#include <sstream>
10#include <string>
11#include <vector>
12
13#ifndef RS_COMPATIBILITY_LIB
14#include "bcc/Config/Config.h"
15#endif
16
17#include "cpu_ref/rsCpuCore.h"
18#include "rsClosure.h"
19#include "rsContext.h"
20#include "rsCpuCore.h"
21#include "rsCpuExecutable.h"
22#include "rsCpuScript.h"
23#include "rsScript.h"
24#include "rsScriptGroup2.h"
25#include "rsScriptIntrinsic.h"
26
27using std::string;
28using std::vector;
29
30namespace android {
31namespace renderscript {
32
33namespace {
34
35const size_t DefaultKernelArgCount = 2;
36
37void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
38               uint32_t xend, uint32_t outstep) {
39    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
40    RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
41
42    const size_t oldInLen = mutable_kinfo->inLen;
43
44    decltype(mutable_kinfo->inStride) oldInStride;
45    memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
46
47    for (CPUClosure* cpuClosure : closures) {
48        const Closure* closure = cpuClosure->mClosure;
49
50        // There had better be enough space in mutable_kinfo
51        rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
52
53        for (size_t i = 0; i < closure->mNumArg; i++) {
54            const void* arg = closure->mArgs[i];
55            const Allocation* a = (const Allocation*)arg;
56            const uint32_t eStride = a->mHal.state.elementSizeBytes;
57            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
58                    eStride * xstart;
59            if (kinfo->dim.y > 1) {
60                ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
61            }
62            mutable_kinfo->inPtr[i] = ptr;
63            mutable_kinfo->inStride[i] = eStride;
64        }
65        mutable_kinfo->inLen = closure->mNumArg;
66
67        const Allocation* out = closure->mReturnValue;
68        const uint32_t ostep = out->mHal.state.elementSizeBytes;
69        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
70                ostep * xstart;
71        if (kinfo->dim.y > 1) {
72            ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
73        }
74
75        rsAssert(kinfo->outLen <= 1);
76        mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
77
78        // The implementation of an intrinsic relies on kinfo->usr being
79        // the "this" pointer to the intrinsic (an RsdCpuScriptIntrinsic object)
80        mutable_kinfo->usr = cpuClosure->mSi;
81
82        cpuClosure->mFunc(kinfo, xstart, xend, ostep);
83    }
84
85    mutable_kinfo->inLen = oldInLen;
86    mutable_kinfo->usr = &closures;
87    memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
88}
89
90}  // namespace
91
92Batch::Batch(CpuScriptGroup2Impl* group, const char* name) :
93    mGroup(group), mFunc(nullptr) {
94    mName = strndup(name, strlen(name));
95}
96
97Batch::~Batch() {
98    for (CPUClosure* c : mClosures) {
99        delete c;
100    }
101    free(mName);
102}
103
104bool Batch::conflict(CPUClosure* cpuClosure) const {
105    if (mClosures.empty()) {
106        return false;
107    }
108
109    const Closure* closure = cpuClosure->mClosure;
110
111    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
112        // An invoke should be in a batch by itself, so it conflicts with any other
113        // closure.
114        return true;
115    }
116
117    const auto& globalDeps = closure->mGlobalDeps;
118    const auto& argDeps = closure->mArgDeps;
119
120    for (CPUClosure* c : mClosures) {
121        const Closure* batched = c->mClosure;
122        if (globalDeps.find(batched) != globalDeps.end()) {
123            return true;
124        }
125        const auto& it = argDeps.find(batched);
126        if (it != argDeps.end()) {
127            const auto& args = (*it).second;
128            for (const auto &p1 : *args) {
129                if (p1.second.get() != nullptr) {
130                    return true;
131                }
132            }
133        }
134    }
135
136    // The compiler fusion pass in bcc expects that kernels chained up through
137    // (1st) input and output.
138
139    const Closure* lastBatched = mClosures.back()->mClosure;
140    const auto& it = argDeps.find(lastBatched);
141
142    if (it == argDeps.end()) {
143        return true;
144    }
145
146    const auto& args = (*it).second;
147    for (const auto &p1 : *args) {
148        if (p1.first == 0 && p1.second.get() == nullptr) {
149            // The new closure depends on the last batched closure's return
150            // value (fieldId being nullptr) for its first argument (argument 0)
151            return false;
152        }
153    }
154
155    return true;
156}
157
158CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
159                                         const ScriptGroupBase *sg) :
160    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
161    mExecutable(nullptr), mScriptObj(nullptr) {
162    rsAssert(!mGroup->mClosures.empty());
163
164    mCpuRefImpl->lockMutex();
165    Batch* batch = new Batch(this, "Batch0");
166    int i = 0;
167    for (Closure* closure: mGroup->mClosures) {
168        CPUClosure* cc;
169        const IDBase* funcID = closure->mFunctionID.get();
170        RsdCpuScriptImpl* si =
171                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
172        if (closure->mIsKernel) {
173            MTLaunchStructForEach mtls;
174            si->forEachKernelSetup(funcID->mSlot, &mtls);
175            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
176        } else {
177            cc = new CPUClosure(closure, si);
178        }
179
180        if (batch->conflict(cc)) {
181            mBatches.push_back(batch);
182            std::stringstream ss;
183            ss << "Batch" << ++i;
184            std::string batchStr(ss.str());
185            batch = new Batch(this, batchStr.c_str());
186        }
187
188        batch->mClosures.push_back(cc);
189    }
190
191    rsAssert(!batch->mClosures.empty());
192    mBatches.push_back(batch);
193
194#ifndef RS_COMPATIBILITY_LIB
195    compile(mGroup->mCacheDir);
196    if (mScriptObj != nullptr && mExecutable != nullptr) {
197        for (Batch* batch : mBatches) {
198            batch->resolveFuncPtr(mScriptObj);
199        }
200    }
201#endif  // RS_COMPATIBILITY_LIB
202    mCpuRefImpl->unlockMutex();
203}
204
205void Batch::resolveFuncPtr(void* sharedObj) {
206    std::string funcName(mName);
207    if (mClosures.front()->mClosure->mIsKernel) {
208        funcName.append(".expand");
209    }
210    mFunc = dlsym(sharedObj, funcName.c_str());
211    rsAssert (mFunc != nullptr);
212}
213
214CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
215    for (Batch* batch : mBatches) {
216        delete batch;
217    }
218    delete mExecutable;
219    // TODO: move this dlclose into ~ScriptExecutable().
220    if (mScriptObj != nullptr) {
221        dlclose(mScriptObj);
222    }
223}
224
225namespace {
226
227#ifndef RS_COMPATIBILITY_LIB
228
229string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
230    *coreLibRelaxedPath = "";
231
232    // If we're debugging, use the debug library.
233    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
234        return SYSLIBPATH_BC"/libclcore_debug.bc";
235    }
236
237    // Check for a platform specific library
238
239#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
240    // NEON-capable ARMv7a devices can use an accelerated math library
241    // for all reduced precision scripts.
242    // ARMv8 does not use NEON, as ASIMD can be used with all precision
243    // levels.
244    *coreLibRelaxedPath = SYSLIBPATH_BC"/libclcore_neon.bc";
245#endif
246
247#if defined(__i386__) || defined(__x86_64__)
248    // x86 devices will use an optimized library.
249    return SYSLIBPATH_BC"/libclcore_x86.bc";
250#else
251    return SYSLIBPATH_BC"/libclcore.bc";
252#endif
253}
254
255void setupCompileArguments(
256        const vector<const char*>& inputs, const vector<string>& kernelBatches,
257        const vector<string>& invokeBatches,
258        const char* outputDir, const char* outputFileName,
259        const char* coreLibPath, const char* coreLibRelaxedPath,
260        const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant,
261        int optLevel, vector<const char*>* args) {
262    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
263    args->push_back("-fPIC");
264    args->push_back("-embedRSInfo");
265    if (emitGlobalInfo) {
266        args->push_back("-rs-global-info");
267        if (emitGlobalInfoSkipConstant) {
268            args->push_back("-rs-global-info-skip-constant");
269        }
270    }
271    args->push_back("-mtriple");
272    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
273    args->push_back("-bclib");
274    args->push_back(coreLibPath);
275    args->push_back("-bclib_relaxed");
276    args->push_back(coreLibRelaxedPath);
277    for (const char* input : inputs) {
278        args->push_back(input);
279    }
280    for (const string& batch : kernelBatches) {
281        args->push_back("-merge");
282        args->push_back(batch.c_str());
283    }
284    for (const string& batch : invokeBatches) {
285        args->push_back("-invoke");
286        args->push_back(batch.c_str());
287    }
288    args->push_back("-output_path");
289    args->push_back(outputDir);
290
291    args->push_back("-O");
292    switch (optLevel) {
293    case 0:
294        args->push_back("0");
295        break;
296    case 3:
297        args->push_back("3");
298        break;
299    default:
300        ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel);
301        args->push_back("3");
302        break;
303    }
304
305    // The output filename has to be the last, in case we need to pop it out and
306    // replace with a different name.
307    args->push_back("-o");
308    args->push_back(outputFileName);
309}
310
311void generateSourceSlot(RsdCpuReferenceImpl* ctxt,
312                        const Closure& closure,
313                        const std::vector<const char*>& inputs,
314                        std::stringstream& ss) {
315    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
316    const Script* script = funcID->mScript;
317
318    rsAssert (!script->isIntrinsic());
319
320    const RsdCpuScriptImpl *cpuScript =
321            (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
322    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
323
324    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
325            inputs.begin();
326
327    ss << index << "," << funcID->mSlot << ".";
328}
329
330#endif  // RS_COMPATIBILTY_LIB
331
332}  // anonymous namespace
333
334extern __attribute__((noinline))
335void debugHintScriptGroup2(const char* groupName,
336                           const uint32_t groupNameSize,
337                           const ExpandFuncTy* kernel,
338                           const uint32_t kernelCount) {
339    ALOGV("group name: %d:%s\n", groupNameSize, groupName);
340    for (uint32_t i=0; i < kernelCount; ++i) {
341        const char* f1 = (const char*)(kernel[i]);
342        ALOGV("  closure: %p\n", (const void*)f1);
343    }
344    // do nothing, this is just a hook point for the debugger.
345    return;
346}
347
348void CpuScriptGroup2Impl::compile(const char* cacheDir) {
349#ifndef RS_COMPATIBILITY_LIB
350    if (mGroup->mClosures.size() < 2) {
351        return;
352    }
353
354    const int optLevel = getCpuRefImpl()->getContext()->getOptLevel();
355    if (optLevel == 0) {
356        std::vector<ExpandFuncTy> kernels;
357        for (const Batch* b : mBatches)
358            for (const CPUClosure* c : b->mClosures)
359                kernels.push_back(c->mFunc);
360
361        if (kernels.size()) {
362            // pass this information on to the debugger via a hint function.
363            debugHintScriptGroup2(mGroup->mName,
364                                  strlen(mGroup->mName),
365                                  kernels.data(),
366                                  kernels.size());
367        }
368
369        // skip script group compilation forcing the driver to use the fallback
370        // execution path which currently has better support for debugging.
371        return;
372    }
373
374    auto comparator = [](const char* str1, const char* str2) -> bool {
375        return strcmp(str1, str2) < 0;
376    };
377    std::set<const char*, decltype(comparator)> inputSet(comparator);
378
379    for (Closure* closure : mGroup->mClosures) {
380        const Script* script = closure->mFunctionID.get()->mScript;
381
382        // If any script is an intrinsic, give up trying fusing the kernels.
383        if (script->isIntrinsic()) {
384            return;
385        }
386
387        const RsdCpuScriptImpl *cpuScript =
388            (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script);
389
390        const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
391        inputSet.insert(bitcodeFilename);
392    }
393
394    std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
395
396    std::vector<string> kernelBatches;
397    std::vector<string> invokeBatches;
398
399    int i = 0;
400    for (const auto& batch : mBatches) {
401        rsAssert(batch->size() > 0);
402
403        std::stringstream ss;
404        ss << batch->mName << ":";
405
406        if (!batch->mClosures.front()->mClosure->mIsKernel) {
407            rsAssert(batch->size() == 1);
408            generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss);
409            invokeBatches.push_back(ss.str());
410        } else {
411            for (const auto& cpuClosure : batch->mClosures) {
412                generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss);
413            }
414            kernelBatches.push_back(ss.str());
415        }
416    }
417
418    rsAssert(cacheDir != nullptr);
419    string objFilePath(cacheDir);
420    objFilePath.append("/");
421    objFilePath.append(mGroup->mName);
422    objFilePath.append(".o");
423
424    const char* resName = mGroup->mName;
425    string coreLibRelaxedPath;
426    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
427                                               &coreLibRelaxedPath);
428
429    vector<const char*> arguments;
430    bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo();
431    bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant();
432    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
433                          resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
434                          emitGlobalInfo, emitGlobalInfoSkipConstant,
435                          optLevel, &arguments);
436
437    std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
438                                                       arguments.data()));
439
440    inputs.push_back(coreLibPath.c_str());
441    inputs.push_back(coreLibRelaxedPath.c_str());
442
443    uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
444                                               inputs.data(), inputs.size());
445
446    if (checksum == 0) {
447        return;
448    }
449
450    std::stringstream ss;
451    ss << std::hex << checksum;
452    std::string checksumStr(ss.str());
453
454    //===--------------------------------------------------------------------===//
455    // Try to load a shared lib from code cache matching filename and checksum
456    //===--------------------------------------------------------------------===//
457
458    bool alreadyLoaded = false;
459    std::string cloneName;
460
461    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr,
462                                                       &alreadyLoaded);
463    if (mScriptObj != nullptr) {
464        // A shared library named resName is found in code cache directory
465        // cacheDir, and loaded with the handle stored in mScriptObj.
466
467        mExecutable = ScriptExecutable::createFromSharedObject(
468            mScriptObj, checksum);
469
470        if (mExecutable != nullptr) {
471            // The loaded shared library in mScriptObj has a matching checksum.
472            // An executable object has been created.
473            return;
474        }
475
476        ALOGV("Failed to create an executable object from so file due to "
477              "mismatching checksum");
478
479        if (alreadyLoaded) {
480            // The shared object found in code cache has already been loaded.
481            // A different file name is needed for the new shared library, to
482            // avoid corrupting the currently loaded instance.
483
484            cloneName.append(resName);
485            cloneName.append("#");
486            cloneName.append(SharedLibraryUtils::getRandomString(6).string());
487
488            // The last element in arguments is the output filename.
489            arguments.pop_back();
490            arguments.push_back(cloneName.c_str());
491        }
492
493        dlclose(mScriptObj);
494        mScriptObj = nullptr;
495    }
496
497    //===--------------------------------------------------------------------===//
498    // Fuse the input kernels and generate native code in an object file
499    //===--------------------------------------------------------------------===//
500
501    arguments.push_back("-build-checksum");
502    arguments.push_back(checksumStr.c_str());
503    arguments.push_back(nullptr);
504
505    bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
506                                      arguments.size()-1,
507                                      arguments.data());
508    if (!compiled) {
509        return;
510    }
511
512    //===--------------------------------------------------------------------===//
513    // Create and load the shared lib
514    //===--------------------------------------------------------------------===//
515
516    if (!SharedLibraryUtils::createSharedLibrary(
517            getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) {
518        ALOGE("Failed to link object file '%s'", resName);
519        unlink(objFilePath.c_str());
520        return;
521    }
522
523    unlink(objFilePath.c_str());
524
525    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
526    if (mScriptObj == nullptr) {
527        ALOGE("Unable to load '%s'", resName);
528        return;
529    }
530
531    if (alreadyLoaded) {
532        // Delete the temporary, random-named file that we created to avoid
533        // interfering with an already loaded shared library.
534        string cloneFilePath(cacheDir);
535        cloneFilePath.append("/");
536        cloneFilePath.append(cloneName.c_str());
537        cloneFilePath.append(".so");
538        unlink(cloneFilePath.c_str());
539    }
540
541    mExecutable = ScriptExecutable::createFromSharedObject(mScriptObj);
542
543#endif  // RS_COMPATIBILITY_LIB
544}
545
546void CpuScriptGroup2Impl::execute() {
547    for (auto batch : mBatches) {
548        batch->setGlobalsForBatch();
549        batch->run();
550    }
551}
552
553void Batch::setGlobalsForBatch() {
554    for (CPUClosure* cpuClosure : mClosures) {
555        const Closure* closure = cpuClosure->mClosure;
556        const IDBase* funcID = closure->mFunctionID.get();
557        Script* s = funcID->mScript;;
558        for (const auto& p : closure->mGlobals) {
559            const int64_t value = p.second.first;
560            int size = p.second.second;
561            if (value == 0 && size == 0) {
562                // This indicates the current closure depends on another closure for a
563                // global in their shared module (script). In this case we don't need to
564                // copy the value. For example, an invoke intializes a global variable
565                // which a kernel later reads.
566                continue;
567            }
568            rsAssert(p.first != nullptr);
569            Script* script = p.first->mScript;
570            rsAssert(script == s);
571            RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl();
572            const RsdCpuScriptImpl *cpuScript =
573                    (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
574            int slot = p.first->mSlot;
575            ScriptExecutable* exec = mGroup->getExecutable();
576            if (exec != nullptr) {
577                const char* varName = cpuScript->getFieldName(slot);
578                void* addr = exec->getFieldAddress(varName);
579                if (size < 0) {
580                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
581                                 (rs_object_base*)addr, (ObjectBase*)value);
582                } else {
583                    memcpy(addr, (const void*)&value, size);
584                }
585            } else {
586                // We use -1 size to indicate an ObjectBase rather than a primitive type
587                if (size < 0) {
588                    s->setVarObj(slot, (ObjectBase*)value);
589                } else {
590                    s->setVar(slot, (const void*)&value, size);
591                }
592            }
593        }
594    }
595}
596
597void Batch::run() {
598    if (!mClosures.front()->mClosure->mIsKernel) {
599        rsAssert(mClosures.size() == 1);
600
601        // This batch contains a single closure for an invoke function
602        CPUClosure* cc = mClosures.front();
603        const Closure* c = cc->mClosure;
604
605        if (mFunc != nullptr) {
606            // TODO: Need align pointers for x86_64.
607            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
608            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
609        } else {
610            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
611            rsAssert(invokeID != nullptr);
612            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
613        }
614
615        return;
616    }
617
618    if (mFunc != nullptr) {
619        MTLaunchStructForEach mtls;
620        const CPUClosure* firstCpuClosure = mClosures.front();
621        const CPUClosure* lastCpuClosure = mClosures.back();
622
623        firstCpuClosure->mSi->forEachMtlsSetup(
624                (const Allocation**)firstCpuClosure->mClosure->mArgs,
625                firstCpuClosure->mClosure->mNumArg,
626                lastCpuClosure->mClosure->mReturnValue,
627                nullptr, 0, nullptr, &mtls);
628
629        mtls.script = nullptr;
630        mtls.fep.usr = nullptr;
631        mtls.kernel = (ForEachFunc_t)mFunc;
632
633        mGroup->getCpuRefImpl()->launchForEach(
634                (const Allocation**)firstCpuClosure->mClosure->mArgs,
635                firstCpuClosure->mClosure->mNumArg,
636                lastCpuClosure->mClosure->mReturnValue,
637                nullptr, &mtls);
638
639        return;
640    }
641
642    for (CPUClosure* cpuClosure : mClosures) {
643        const Closure* closure = cpuClosure->mClosure;
644        const ScriptKernelID* kernelID =
645                (const ScriptKernelID*)closure->mFunctionID.get();
646        cpuClosure->mSi->preLaunch(kernelID->mSlot,
647                                   (const Allocation**)closure->mArgs,
648                                   closure->mNumArg, closure->mReturnValue,
649                                   nullptr, 0, nullptr);
650    }
651
652    const CPUClosure* cpuClosure = mClosures.front();
653    const Closure* closure = cpuClosure->mClosure;
654    MTLaunchStructForEach mtls;
655
656    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
657                                          closure->mNumArg,
658                                          closure->mReturnValue,
659                                          nullptr, 0, nullptr, &mtls)) {
660
661        mtls.script = nullptr;
662        mtls.kernel = &groupRoot;
663        mtls.fep.usr = &mClosures;
664
665        mGroup->getCpuRefImpl()->launchForEach(nullptr, 0, nullptr, nullptr, &mtls);
666    }
667
668    for (CPUClosure* cpuClosure : mClosures) {
669        const Closure* closure = cpuClosure->mClosure;
670        const ScriptKernelID* kernelID =
671                (const ScriptKernelID*)closure->mFunctionID.get();
672        cpuClosure->mSi->postLaunch(kernelID->mSlot,
673                                    (const Allocation**)closure->mArgs,
674                                    closure->mNumArg, closure->mReturnValue,
675                                    nullptr, 0, nullptr);
676    }
677}
678
679}  // namespace renderscript
680}  // namespace android
681