rsCpuScriptGroup2.cpp revision ef6acb98982393e36cad52f67e691d75b6a46cfd
1#include "rsCpuScriptGroup2.h"
2
3#include <dlfcn.h>
4#include <stdio.h>
5#include <stdlib.h>
6#include <unistd.h>
7
8#include <set>
9#include <sstream>
10#include <string>
11#include <vector>
12
13#ifndef RS_COMPATIBILITY_LIB
14#include "bcc/Config.h"
15#endif
16
17#include "cpu_ref/rsCpuCore.h"
18#include "rsClosure.h"
19#include "rsContext.h"
20#include "rsCpuCore.h"
21#include "rsCpuExecutable.h"
22#include "rsCpuScript.h"
23#include "rsScript.h"
24#include "rsScriptGroup2.h"
25#include "rsScriptIntrinsic.h"
26
27using std::string;
28using std::vector;
29
30namespace android {
31namespace renderscript {
32
33namespace {
34
35const size_t DefaultKernelArgCount = 2;
36
37void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
38               uint32_t xend, uint32_t outstep) {
39    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
40    RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
41
42    const size_t oldInLen = mutable_kinfo->inLen;
43
44    decltype(mutable_kinfo->inStride) oldInStride;
45    memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
46
47    for (CPUClosure* cpuClosure : closures) {
48        const Closure* closure = cpuClosure->mClosure;
49
50        // There had better be enough space in mutable_kinfo
51        rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
52
53        for (size_t i = 0; i < closure->mNumArg; i++) {
54            const void* arg = closure->mArgs[i];
55            const Allocation* a = (const Allocation*)arg;
56            const uint32_t eStride = a->mHal.state.elementSizeBytes;
57            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
58                    eStride * xstart;
59            if (kinfo->dim.y > 1) {
60                ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
61            }
62            mutable_kinfo->inPtr[i] = ptr;
63            mutable_kinfo->inStride[i] = eStride;
64        }
65        mutable_kinfo->inLen = closure->mNumArg;
66
67        const Allocation* out = closure->mReturnValue;
68        const uint32_t ostep = out->mHal.state.elementSizeBytes;
69        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
70                ostep * xstart;
71        if (kinfo->dim.y > 1) {
72            ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
73        }
74
75        mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
76
77        // The implementation of an intrinsic relies on kinfo->usr being
78        // the "this" pointer to the intrinsic (an RsdCpuScriptIntrinsic object)
79        mutable_kinfo->usr = cpuClosure->mSi;
80
81        cpuClosure->mFunc(kinfo, xstart, xend, ostep);
82    }
83
84    mutable_kinfo->inLen = oldInLen;
85    mutable_kinfo->usr = &closures;
86    memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
87}
88
89}  // namespace
90
91Batch::Batch(CpuScriptGroup2Impl* group, const char* name) :
92    mGroup(group), mFunc(nullptr) {
93    mName = strndup(name, strlen(name));
94}
95
96Batch::~Batch() {
97    for (CPUClosure* c : mClosures) {
98        delete c;
99    }
100    free(mName);
101}
102
103bool Batch::conflict(CPUClosure* cpuClosure) const {
104    if (mClosures.empty()) {
105        return false;
106    }
107
108    const Closure* closure = cpuClosure->mClosure;
109
110    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
111        // An invoke should be in a batch by itself, so it conflicts with any other
112        // closure.
113        return true;
114    }
115
116    const auto& globalDeps = closure->mGlobalDeps;
117    const auto& argDeps = closure->mArgDeps;
118
119    for (CPUClosure* c : mClosures) {
120        const Closure* batched = c->mClosure;
121        if (globalDeps.find(batched) != globalDeps.end()) {
122            return true;
123        }
124        const auto& it = argDeps.find(batched);
125        if (it != argDeps.end()) {
126            const auto& args = (*it).second;
127            for (const auto &p1 : *args) {
128                if (p1.second.get() != nullptr) {
129                    return true;
130                }
131            }
132        }
133    }
134
135    // The compiler fusion pass in bcc expects that kernels chained up through
136    // (1st) input and output.
137
138    const Closure* lastBatched = mClosures.back()->mClosure;
139    const auto& it = argDeps.find(lastBatched);
140
141    if (it == argDeps.end()) {
142        return true;
143    }
144
145    const auto& args = (*it).second;
146    for (const auto &p1 : *args) {
147        if (p1.first == 0 && p1.second.get() == nullptr) {
148            // The new closure depends on the last batched closure's return
149            // value (fieldId being nullptr) for its first argument (argument 0)
150            return false;
151        }
152    }
153
154    return true;
155}
156
157CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
158                                         const ScriptGroupBase *sg) :
159    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
160    mExecutable(nullptr), mScriptObj(nullptr) {
161    rsAssert(!mGroup->mClosures.empty());
162
163    mCpuRefImpl->lockMutex();
164    Batch* batch = new Batch(this, "Batch0");
165    int i = 0;
166    for (Closure* closure: mGroup->mClosures) {
167        CPUClosure* cc;
168        const IDBase* funcID = closure->mFunctionID.get();
169        RsdCpuScriptImpl* si =
170                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
171        if (closure->mIsKernel) {
172            MTLaunchStructForEach mtls;
173            si->forEachKernelSetup(funcID->mSlot, &mtls);
174            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
175        } else {
176            cc = new CPUClosure(closure, si);
177        }
178
179        if (batch->conflict(cc)) {
180            mBatches.push_back(batch);
181            std::stringstream ss;
182            ss << "Batch" << ++i;
183            std::string batchStr(ss.str());
184            batch = new Batch(this, batchStr.c_str());
185        }
186
187        batch->mClosures.push_back(cc);
188    }
189
190    rsAssert(!batch->mClosures.empty());
191    mBatches.push_back(batch);
192
193#ifndef RS_COMPATIBILITY_LIB
194    compile(mGroup->mCacheDir);
195    if (mScriptObj != nullptr && mExecutable != nullptr) {
196        for (Batch* batch : mBatches) {
197            batch->resolveFuncPtr(mScriptObj);
198        }
199    }
200#endif  // RS_COMPATIBILITY_LIB
201    mCpuRefImpl->unlockMutex();
202}
203
204void Batch::resolveFuncPtr(void* sharedObj) {
205    std::string funcName(mName);
206    if (mClosures.front()->mClosure->mIsKernel) {
207        funcName.append(".expand");
208    }
209    mFunc = dlsym(sharedObj, funcName.c_str());
210    rsAssert (mFunc != nullptr);
211}
212
213CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
214    for (Batch* batch : mBatches) {
215        delete batch;
216    }
217    delete mExecutable;
218    // TODO: move this dlclose into ~ScriptExecutable().
219    if (mScriptObj != nullptr) {
220        dlclose(mScriptObj);
221    }
222}
223
224namespace {
225
226#ifndef RS_COMPATIBILITY_LIB
227
228string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
229    *coreLibRelaxedPath = "";
230
231    // If we're debugging, use the debug library.
232    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
233        return SYSLIBPATH_BC"/libclcore_debug.bc";
234    }
235
236    // Check for a platform specific library
237
238#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
239    // NEON-capable ARMv7a devices can use an accelerated math library
240    // for all reduced precision scripts.
241    // ARMv8 does not use NEON, as ASIMD can be used with all precision
242    // levels.
243    *coreLibRelaxedPath = SYSLIBPATH_BC"/libclcore_neon.bc";
244#endif
245
246#if defined(__i386__) || defined(__x86_64__)
247    // x86 devices will use an optimized library.
248    return SYSLIBPATH_BC"/libclcore_x86.bc";
249#else
250    return SYSLIBPATH_BC"/libclcore.bc";
251#endif
252}
253
254void setupCompileArguments(
255        const vector<const char*>& inputs, const vector<string>& kernelBatches,
256        const vector<string>& invokeBatches,
257        const char* outputDir, const char* outputFileName,
258        const char* coreLibPath, const char* coreLibRelaxedPath,
259        const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant,
260        int optLevel, vector<const char*>* args) {
261    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
262    args->push_back("-fPIC");
263    args->push_back("-embedRSInfo");
264    if (emitGlobalInfo) {
265        args->push_back("-rs-global-info");
266        if (emitGlobalInfoSkipConstant) {
267            args->push_back("-rs-global-info-skip-constant");
268        }
269    }
270    args->push_back("-mtriple");
271    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
272    args->push_back("-bclib");
273    args->push_back(coreLibPath);
274    args->push_back("-bclib_relaxed");
275    args->push_back(coreLibRelaxedPath);
276    for (const char* input : inputs) {
277        args->push_back(input);
278    }
279    for (const string& batch : kernelBatches) {
280        args->push_back("-merge");
281        args->push_back(batch.c_str());
282    }
283    for (const string& batch : invokeBatches) {
284        args->push_back("-invoke");
285        args->push_back(batch.c_str());
286    }
287    args->push_back("-output_path");
288    args->push_back(outputDir);
289
290    args->push_back("-O");
291    switch (optLevel) {
292    case 0:
293        args->push_back("0");
294        break;
295    case 3:
296        args->push_back("3");
297        break;
298    default:
299        ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel);
300        args->push_back("3");
301        break;
302    }
303
304    // The output filename has to be the last, in case we need to pop it out and
305    // replace with a different name.
306    args->push_back("-o");
307    args->push_back(outputFileName);
308}
309
310void generateSourceSlot(RsdCpuReferenceImpl* ctxt,
311                        const Closure& closure,
312                        const std::vector<const char*>& inputs,
313                        std::stringstream& ss) {
314    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
315    const Script* script = funcID->mScript;
316
317    rsAssert (!script->isIntrinsic());
318
319    const RsdCpuScriptImpl *cpuScript =
320            (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
321    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
322
323    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
324            inputs.begin();
325
326    ss << index << "," << funcID->mSlot << ".";
327}
328
329#endif  // RS_COMPATIBILTY_LIB
330
331}  // anonymous namespace
332
333extern __attribute__((noinline))
334void debugHintScriptGroup2(const char* groupName,
335                           const uint32_t groupNameSize,
336                           const ExpandFuncTy* kernel,
337                           const uint32_t kernelCount) {
338    ALOGV("group name: %d:%s\n", groupNameSize, groupName);
339    for (uint32_t i=0; i < kernelCount; ++i) {
340        const char* f1 = (const char*)(kernel[i]);
341        ALOGV("  closure: %p\n", (const void*)f1);
342    }
343    // do nothing, this is just a hook point for the debugger.
344    return;
345}
346
347void CpuScriptGroup2Impl::compile(const char* cacheDir) {
348#ifndef RS_COMPATIBILITY_LIB
349    if (mGroup->mClosures.size() < 2) {
350        return;
351    }
352
353    const int optLevel = getCpuRefImpl()->getContext()->getOptLevel();
354    if (optLevel == 0) {
355        std::vector<ExpandFuncTy> kernels;
356        for (const Batch* b : mBatches)
357            for (const CPUClosure* c : b->mClosures)
358                kernels.push_back(c->mFunc);
359
360        if (kernels.size()) {
361            // pass this information on to the debugger via a hint function.
362            debugHintScriptGroup2(mGroup->mName,
363                                  strlen(mGroup->mName),
364                                  kernels.data(),
365                                  kernels.size());
366        }
367
368        // skip script group compilation forcing the driver to use the fallback
369        // execution path which currently has better support for debugging.
370        return;
371    }
372
373    auto comparator = [](const char* str1, const char* str2) -> bool {
374        return strcmp(str1, str2) < 0;
375    };
376    std::set<const char*, decltype(comparator)> inputSet(comparator);
377
378    for (Closure* closure : mGroup->mClosures) {
379        const Script* script = closure->mFunctionID.get()->mScript;
380
381        // If any script is an intrinsic, give up trying fusing the kernels.
382        if (script->isIntrinsic()) {
383            return;
384        }
385
386        const RsdCpuScriptImpl *cpuScript =
387            (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script);
388
389        const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
390        inputSet.insert(bitcodeFilename);
391    }
392
393    std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
394
395    std::vector<string> kernelBatches;
396    std::vector<string> invokeBatches;
397
398    int i = 0;
399    for (const auto& batch : mBatches) {
400        rsAssert(batch->size() > 0);
401
402        std::stringstream ss;
403        ss << batch->mName << ":";
404
405        if (!batch->mClosures.front()->mClosure->mIsKernel) {
406            rsAssert(batch->size() == 1);
407            generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss);
408            invokeBatches.push_back(ss.str());
409        } else {
410            for (const auto& cpuClosure : batch->mClosures) {
411                generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss);
412            }
413            kernelBatches.push_back(ss.str());
414        }
415    }
416
417    rsAssert(cacheDir != nullptr);
418    string objFilePath(cacheDir);
419    objFilePath.append("/");
420    objFilePath.append(mGroup->mName);
421    objFilePath.append(".o");
422
423    const char* resName = mGroup->mName;
424    string coreLibRelaxedPath;
425    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
426                                               &coreLibRelaxedPath);
427
428    vector<const char*> arguments;
429    bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo();
430    bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant();
431    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
432                          resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
433                          emitGlobalInfo, emitGlobalInfoSkipConstant,
434                          optLevel, &arguments);
435
436    std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
437                                                       arguments.data()));
438
439    inputs.push_back(coreLibPath.c_str());
440    inputs.push_back(coreLibRelaxedPath.c_str());
441
442    uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
443                                               inputs.data(), inputs.size());
444
445    if (checksum == 0) {
446        return;
447    }
448
449    std::stringstream ss;
450    ss << std::hex << checksum;
451    std::string checksumStr(ss.str());
452
453    //===--------------------------------------------------------------------===//
454    // Try to load a shared lib from code cache matching filename and checksum
455    //===--------------------------------------------------------------------===//
456
457    bool alreadyLoaded = false;
458    std::string cloneName;
459
460    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr,
461                                                       &alreadyLoaded);
462    if (mScriptObj != nullptr) {
463        // A shared library named resName is found in code cache directory
464        // cacheDir, and loaded with the handle stored in mScriptObj.
465
466        mExecutable = ScriptExecutable::createFromSharedObject(
467            mScriptObj, checksum);
468
469        if (mExecutable != nullptr) {
470            // The loaded shared library in mScriptObj has a matching checksum.
471            // An executable object has been created.
472            return;
473        }
474
475        ALOGV("Failed to create an executable object from so file due to "
476              "mismatching checksum");
477
478        if (alreadyLoaded) {
479            // The shared object found in code cache has already been loaded.
480            // A different file name is needed for the new shared library, to
481            // avoid corrupting the currently loaded instance.
482
483            cloneName.append(resName);
484            cloneName.append("#");
485            cloneName.append(SharedLibraryUtils::getRandomString(6).c_str());
486
487            // The last element in arguments is the output filename.
488            arguments.pop_back();
489            arguments.push_back(cloneName.c_str());
490        }
491
492        dlclose(mScriptObj);
493        mScriptObj = nullptr;
494    }
495
496    //===--------------------------------------------------------------------===//
497    // Fuse the input kernels and generate native code in an object file
498    //===--------------------------------------------------------------------===//
499
500    arguments.push_back("-build-checksum");
501    arguments.push_back(checksumStr.c_str());
502    arguments.push_back(nullptr);
503
504    bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
505                                      arguments.size()-1,
506                                      arguments.data());
507    if (!compiled) {
508        return;
509    }
510
511    //===--------------------------------------------------------------------===//
512    // Create and load the shared lib
513    //===--------------------------------------------------------------------===//
514
515    if (!SharedLibraryUtils::createSharedLibrary(
516            getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) {
517        ALOGE("Failed to link object file '%s'", resName);
518        unlink(objFilePath.c_str());
519        return;
520    }
521
522    unlink(objFilePath.c_str());
523
524    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
525    if (mScriptObj == nullptr) {
526        ALOGE("Unable to load '%s'", resName);
527        return;
528    }
529
530    if (alreadyLoaded) {
531        // Delete the temporary, random-named file that we created to avoid
532        // interfering with an already loaded shared library.
533        string cloneFilePath(cacheDir);
534        cloneFilePath.append("/");
535        cloneFilePath.append(cloneName.c_str());
536        cloneFilePath.append(".so");
537        unlink(cloneFilePath.c_str());
538    }
539
540    mExecutable = ScriptExecutable::createFromSharedObject(mScriptObj);
541
542#endif  // RS_COMPATIBILITY_LIB
543}
544
545void CpuScriptGroup2Impl::execute() {
546    for (auto batch : mBatches) {
547        batch->setGlobalsForBatch();
548        batch->run();
549    }
550}
551
552void Batch::setGlobalsForBatch() {
553    for (CPUClosure* cpuClosure : mClosures) {
554        const Closure* closure = cpuClosure->mClosure;
555        const IDBase* funcID = closure->mFunctionID.get();
556        Script* s = funcID->mScript;;
557        for (const auto& p : closure->mGlobals) {
558            const int64_t value = p.second.first;
559            int size = p.second.second;
560            if (value == 0 && size == 0) {
561                // This indicates the current closure depends on another closure for a
562                // global in their shared module (script). In this case we don't need to
563                // copy the value. For example, an invoke intializes a global variable
564                // which a kernel later reads.
565                continue;
566            }
567            rsAssert(p.first != nullptr);
568            Script* script = p.first->mScript;
569            rsAssert(script == s);
570            RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl();
571            const RsdCpuScriptImpl *cpuScript =
572                    (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
573            int slot = p.first->mSlot;
574            ScriptExecutable* exec = mGroup->getExecutable();
575            if (exec != nullptr) {
576                const char* varName = cpuScript->getFieldName(slot);
577                void* addr = exec->getFieldAddress(varName);
578                if (size < 0) {
579                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
580                                 (rs_object_base*)addr, (ObjectBase*)value);
581                } else {
582                    memcpy(addr, (const void*)&value, size);
583                }
584            } else {
585                // We use -1 size to indicate an ObjectBase rather than a primitive type
586                if (size < 0) {
587                    s->setVarObj(slot, (ObjectBase*)value);
588                } else {
589                    s->setVar(slot, (const void*)&value, size);
590                }
591            }
592        }
593    }
594}
595
596void Batch::run() {
597    if (!mClosures.front()->mClosure->mIsKernel) {
598        rsAssert(mClosures.size() == 1);
599
600        // This batch contains a single closure for an invoke function
601        CPUClosure* cc = mClosures.front();
602        const Closure* c = cc->mClosure;
603
604        if (mFunc != nullptr) {
605            // TODO: Need align pointers for x86_64.
606            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
607            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
608        } else {
609            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
610            rsAssert(invokeID != nullptr);
611            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
612        }
613
614        return;
615    }
616
617    if (mFunc != nullptr) {
618        MTLaunchStructForEach mtls;
619        const CPUClosure* firstCpuClosure = mClosures.front();
620        const CPUClosure* lastCpuClosure = mClosures.back();
621
622        firstCpuClosure->mSi->forEachMtlsSetup(
623                (const Allocation**)firstCpuClosure->mClosure->mArgs,
624                firstCpuClosure->mClosure->mNumArg,
625                lastCpuClosure->mClosure->mReturnValue,
626                nullptr, 0, nullptr, &mtls);
627
628        mtls.script = nullptr;
629        mtls.fep.usr = nullptr;
630        mtls.kernel = (ForEachFunc_t)mFunc;
631
632        mGroup->getCpuRefImpl()->launchForEach(
633                (const Allocation**)firstCpuClosure->mClosure->mArgs,
634                firstCpuClosure->mClosure->mNumArg,
635                lastCpuClosure->mClosure->mReturnValue,
636                nullptr, &mtls);
637
638        return;
639    }
640
641    for (CPUClosure* cpuClosure : mClosures) {
642        const Closure* closure = cpuClosure->mClosure;
643        const ScriptKernelID* kernelID =
644                (const ScriptKernelID*)closure->mFunctionID.get();
645        cpuClosure->mSi->preLaunch(kernelID->mSlot,
646                                   (const Allocation**)closure->mArgs,
647                                   closure->mNumArg, closure->mReturnValue,
648                                   nullptr, 0, nullptr);
649    }
650
651    const CPUClosure* cpuClosure = mClosures.front();
652    const Closure* closure = cpuClosure->mClosure;
653    MTLaunchStructForEach mtls;
654
655    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
656                                          closure->mNumArg,
657                                          closure->mReturnValue,
658                                          nullptr, 0, nullptr, &mtls)) {
659
660        mtls.script = nullptr;
661        mtls.kernel = &groupRoot;
662        mtls.fep.usr = &mClosures;
663
664        mGroup->getCpuRefImpl()->launchForEach(nullptr, 0, nullptr, nullptr, &mtls);
665    }
666
667    for (CPUClosure* cpuClosure : mClosures) {
668        const Closure* closure = cpuClosure->mClosure;
669        const ScriptKernelID* kernelID =
670                (const ScriptKernelID*)closure->mFunctionID.get();
671        cpuClosure->mSi->postLaunch(kernelID->mSlot,
672                                    (const Allocation**)closure->mArgs,
673                                    closure->mNumArg, closure->mReturnValue,
674                                    nullptr, 0, nullptr);
675    }
676}
677
678}  // namespace renderscript
679}  // namespace android
680