rsCpuScriptGroup2.cpp revision b0abb140ac51b93d1a85aadaa63fe057f2d29850
1a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsCpuScriptGroup2.h"
2a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
3a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <dlfcn.h>
4a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <stdio.h>
5a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <stdlib.h>
6a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <unistd.h>
7a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
8a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <set>
9a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <sstream>
10a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <string>
11a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <vector>
12a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
13a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#ifndef RS_COMPATIBILITY_LIB
14a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "bcc/Config/Config.h"
15a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <sys/wait.h>
16a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#endif
17a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
18a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "cpu_ref/rsCpuCore.h"
19a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsClosure.h"
20a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsContext.h"
21a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsCpuCore.h"
22caf4126512b2152ea5f6573ce5d9ca29767b9678Tim Murray#include "rsCpuExecutable.h"
23a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsCpuScript.h"
24a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsScript.h"
25a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsScriptGroup2.h"
26a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsScriptIntrinsic.h"
27a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
28a89eef413da39b013f2e931c9f207ef2587eef01Tim Murrayusing std::string;
29a89eef413da39b013f2e931c9f207ef2587eef01Tim Murrayusing std::vector;
3044bef6fba6244292b751387f3d6c31cca96c28adChris Wailes
3144bef6fba6244292b751387f3d6c31cca96c28adChris Wailesnamespace android {
32a89eef413da39b013f2e931c9f207ef2587eef01Tim Murraynamespace renderscript {
33a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
34a89eef413da39b013f2e931c9f207ef2587eef01Tim Murraynamespace {
35a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
3644bef6fba6244292b751387f3d6c31cca96c28adChris Wailesconst size_t DefaultKernelArgCount = 2;
37a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
38caf4126512b2152ea5f6573ce5d9ca29767b9678Tim Murrayvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
39caf4126512b2152ea5f6573ce5d9ca29767b9678Tim Murray               uint32_t xend, uint32_t outstep) {
40a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray    const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
41a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray    RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
42a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
43a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray    const size_t oldInLen = mutable_kinfo->inLen;
44a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
45a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray    decltype(mutable_kinfo->inStride) oldInStride;
46a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray    memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
47a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
48a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray    for (CPUClosure* cpuClosure : closures) {
49a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray        const Closure* closure = cpuClosure->mClosure;
50a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
51a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray        // There had better be enough space in mutable_kinfo
52a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray        rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
53a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray
54a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray        for (size_t i = 0; i < closure->mNumArg; i++) {
55a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray            const void* arg = closure->mArgs[i];
56a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray            const Allocation* a = (const Allocation*)arg;
57a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray            const uint32_t eStride = a->mHal.state.elementSizeBytes;
58a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray            const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
59a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray                    eStride * xstart;
60a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray            if (kinfo->dim.y > 1) {
61a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray                ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
62a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray            }
63            mutable_kinfo->inPtr[i] = ptr;
64            mutable_kinfo->inStride[i] = eStride;
65        }
66        mutable_kinfo->inLen = closure->mNumArg;
67
68        const Allocation* out = closure->mReturnValue;
69        const uint32_t ostep = out->mHal.state.elementSizeBytes;
70        const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
71                ostep * xstart;
72        if (kinfo->dim.y > 1) {
73            ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
74        }
75
76        rsAssert(kinfo->outLen <= 1);
77        mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
78
79        cpuClosure->mFunc(kinfo, xstart, xend, ostep);
80    }
81
82    mutable_kinfo->inLen = oldInLen;
83    memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
84}
85
86}  // namespace
87
88Batch::Batch(CpuScriptGroup2Impl* group, const char* name) :
89    mGroup(group), mFunc(nullptr) {
90    mName = strndup(name, strlen(name));
91}
92
93Batch::~Batch() {
94    for (CPUClosure* c : mClosures) {
95        delete c;
96    }
97    free(mName);
98}
99
100bool Batch::conflict(CPUClosure* cpuClosure) const {
101    if (mClosures.empty()) {
102        return false;
103    }
104
105    const Closure* closure = cpuClosure->mClosure;
106
107    if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
108        // An invoke should be in a batch by itself, so it conflicts with any other
109        // closure.
110        return true;
111    }
112
113    const auto& globalDeps = closure->mGlobalDeps;
114    const auto& argDeps = closure->mArgDeps;
115
116    for (CPUClosure* c : mClosures) {
117        const Closure* batched = c->mClosure;
118        if (globalDeps.find(batched) != globalDeps.end()) {
119            return true;
120        }
121        const auto& it = argDeps.find(batched);
122        if (it != argDeps.end()) {
123            const auto& args = (*it).second;
124            for (const auto &p1 : *args) {
125                if (p1.second->get() != nullptr) {
126                    return true;
127                }
128            }
129        }
130    }
131
132    return false;
133}
134
135CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
136                                         const ScriptGroupBase *sg) :
137    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
138    mExecutable(nullptr), mScriptObj(nullptr) {
139    rsAssert(!mGroup->mClosures.empty());
140
141    Batch* batch = new Batch(this, "Batch0");
142    int i = 0;
143    for (Closure* closure: mGroup->mClosures) {
144        CPUClosure* cc;
145        const IDBase* funcID = closure->mFunctionID.get();
146        RsdCpuScriptImpl* si =
147                (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
148        if (closure->mIsKernel) {
149            MTLaunchStruct mtls;
150            si->forEachKernelSetup(funcID->mSlot, &mtls);
151            cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
152        } else {
153            cc = new CPUClosure(closure, si);
154        }
155
156        if (batch->conflict(cc)) {
157            mBatches.push_back(batch);
158            std::stringstream ss;
159            ss << "Batch" << ++i;
160            batch = new Batch(this, ss.str().c_str());
161        }
162
163        batch->mClosures.push_back(cc);
164    }
165
166    rsAssert(!batch->mClosures.empty());
167    mBatches.push_back(batch);
168
169#ifndef RS_COMPATIBILITY_LIB
170    compile(mGroup->mCacheDir);
171    if (mScriptObj != nullptr && mExecutable != nullptr) {
172        for (Batch* batch : mBatches) {
173            batch->resolveFuncPtr(mScriptObj);
174        }
175    }
176#endif  // RS_COMPATIBILITY_LIB
177}
178
179void Batch::resolveFuncPtr(void* sharedObj) {
180    std::string funcName(mName);
181    if (mClosures.front()->mClosure->mIsKernel) {
182        funcName.append(".expand");
183    }
184    mFunc = dlsym(sharedObj, funcName.c_str());
185    rsAssert (mFunc != nullptr);
186}
187
188CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
189    for (Batch* batch : mBatches) {
190        delete batch;
191    }
192    // TODO: move this dlclose into ~ScriptExecutable().
193    if (mScriptObj != nullptr) {
194        dlclose(mScriptObj);
195    }
196    delete mExecutable;
197}
198
199namespace {
200
201#ifndef RS_COMPATIBILITY_LIB
202
203string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
204    *coreLibRelaxedPath = "";
205
206    // If we're debugging, use the debug library.
207    if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
208        return SYSLIBPATH"/libclcore_debug.bc";
209    }
210
211    // Check for a platform specific library
212
213#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
214    // NEON-capable ARMv7a devices can use an accelerated math library
215    // for all reduced precision scripts.
216    // ARMv8 does not use NEON, as ASIMD can be used with all precision
217    // levels.
218    *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
219#endif
220
221#if defined(__i386__) || defined(__x86_64__)
222    // x86 devices will use an optimized library.
223    return SYSLIBPATH"/libclcore_x86.bc";
224#else
225    return SYSLIBPATH"/libclcore.bc";
226#endif
227}
228
229string getFileName(string path) {
230    unsigned found = path.find_last_of("/\\");
231    return path.substr(found + 1);
232}
233
234void setupCompileArguments(
235        const vector<string>& inputs, const vector<string>& kernelBatches,
236        const vector<string>& invokeBatches,
237        const string& output_dir, const string& output_filename,
238        const string& coreLibPath, const string& coreLibRelaxedPath,
239        vector<const char*>* args) {
240    args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
241    args->push_back("-fPIC");
242    args->push_back("-embedRSInfo");
243    args->push_back("-mtriple");
244    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
245    args->push_back("-bclib");
246    args->push_back(coreLibPath.c_str());
247    args->push_back("-bclib_relaxed");
248    args->push_back(coreLibRelaxedPath.c_str());
249    for (const string& input : inputs) {
250        args->push_back(input.c_str());
251    }
252    for (const string& batch : kernelBatches) {
253        args->push_back("-merge");
254        args->push_back(batch.c_str());
255    }
256    for (const string& batch : invokeBatches) {
257        args->push_back("-invoke");
258        args->push_back(batch.c_str());
259    }
260    args->push_back("-output_path");
261    args->push_back(output_dir.c_str());
262    args->push_back("-o");
263    args->push_back(output_filename.c_str());
264    args->push_back(nullptr);
265}
266
267bool fuseAndCompile(const char** arguments,
268                    const string& commandLine) {
269    const pid_t pid = fork();
270
271    if (pid == -1) {
272        ALOGE("Couldn't fork for bcc execution");
273        return false;
274    }
275
276    if (pid == 0) {
277        // Child process
278        ALOGV("Invoking BCC with: %s", commandLine.c_str());
279        execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
280
281        ALOGE("execv() failed: %s", strerror(errno));
282        abort();
283        return false;
284    }
285
286    // Parent process
287    int status = 0;
288    const pid_t w = waitpid(pid, &status, 0);
289    if (w == -1) {
290        return false;
291    }
292
293    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
294        ALOGE("bcc terminated unexpectedly");
295        return false;
296    }
297
298    return true;
299}
300
301void generateSourceSlot(const Closure& closure,
302                        const std::vector<std::string>& inputs,
303                        std::stringstream& ss) {
304    const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
305    const Script* script = funcID->mScript;
306
307    rsAssert (!script->isIntrinsic());
308
309    const RsdCpuScriptImpl *cpuScript =
310            (const RsdCpuScriptImpl*)script->mHal.drv;
311    const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
312
313    const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
314            inputs.begin();
315
316    ss << index << "," << funcID->mSlot << ".";
317}
318
319#endif  // RS_COMPATIBILTY_LIB
320
321}  // anonymous namespace
322
323void CpuScriptGroup2Impl::compile(const char* cacheDir) {
324#ifndef RS_COMPATIBILITY_LIB
325    if (mGroup->mClosures.size() < 2) {
326        return;
327    }
328
329    //===--------------------------------------------------------------------===//
330    // Fuse the input kernels and generate native code in an object file
331    //===--------------------------------------------------------------------===//
332
333    std::set<string> inputSet;
334    for (Closure* closure : mGroup->mClosures) {
335        const Script* script = closure->mFunctionID.get()->mScript;
336
337        // If any script is an intrinsic, give up trying fusing the kernels.
338        if (script->isIntrinsic()) {
339            return;
340        }
341
342        const RsdCpuScriptImpl *cpuScript =
343                (const RsdCpuScriptImpl*)script->mHal.drv;
344        const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
345        inputSet.insert(bitcodeFilename);
346    }
347
348    std::vector<string> inputs(inputSet.begin(), inputSet.end());
349
350    std::vector<string> kernelBatches;
351    std::vector<string> invokeBatches;
352
353    int i = 0;
354    for (const auto& batch : mBatches) {
355        rsAssert(batch->size() > 0);
356
357        std::stringstream ss;
358        ss << batch->mName << ":";
359
360        if (!batch->mClosures.front()->mClosure->mIsKernel) {
361            rsAssert(batch->size() == 1);
362            generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss);
363            invokeBatches.push_back(ss.str());
364        } else {
365            for (const auto& cpuClosure : batch->mClosures) {
366                generateSourceSlot(*cpuClosure->mClosure, inputs, ss);
367            }
368            kernelBatches.push_back(ss.str());
369        }
370    }
371
372    rsAssert(cacheDir != nullptr);
373    string objFilePath(cacheDir);
374    objFilePath.append("/fusedXXXXXX.o");
375    // Find unique object file name, to make following file names unique.
376    int tempfd = mkstemps(&objFilePath[0], 2);
377    if (tempfd == -1) {
378      return;
379    }
380    TEMP_FAILURE_RETRY(close(tempfd));
381
382    string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2));
383    string coreLibRelaxedPath;
384    const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
385                                               &coreLibRelaxedPath);
386    vector<const char*> arguments;
387    setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
388                          outputFileName, coreLibPath, coreLibRelaxedPath, &arguments);
389    std::unique_ptr<const char> joined(
390        rsuJoinStrings(arguments.size() - 1, arguments.data()));
391    string commandLine (joined.get());
392
393    if (!fuseAndCompile(arguments.data(), commandLine)) {
394        unlink(objFilePath.c_str());
395        return;
396    }
397
398    //===--------------------------------------------------------------------===//
399    // Create and load the shared lib
400    //===--------------------------------------------------------------------===//
401
402    const char* resName = outputFileName.c_str();
403
404    if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
405        ALOGE("Failed to link object file '%s'", resName);
406        return;
407    }
408
409    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
410    if (mScriptObj == nullptr) {
411        ALOGE("Unable to load '%s'", resName);
412        return;
413    }
414
415    mExecutable = ScriptExecutable::createFromSharedObject(
416        nullptr,  // RS context. Unused.
417        mScriptObj);
418
419#endif  // RS_COMPATIBILITY_LIB
420}
421
422void CpuScriptGroup2Impl::execute() {
423    for (auto batch : mBatches) {
424        batch->setGlobalsForBatch();
425        batch->run();
426    }
427}
428
429void Batch::setGlobalsForBatch() {
430    for (CPUClosure* cpuClosure : mClosures) {
431        const Closure* closure = cpuClosure->mClosure;
432        const IDBase* funcID = closure->mFunctionID.get();
433        Script* s = funcID->mScript;;
434        for (const auto& p : closure->mGlobals) {
435            const void* value = p.second.first;
436            int size = p.second.second;
437            if (value == nullptr && size == 0) {
438                // This indicates the current closure depends on another closure for a
439                // global in their shared module (script). In this case we don't need to
440                // copy the value. For example, an invoke intializes a global variable
441                // which a kernel later reads.
442                continue;
443            }
444            rsAssert(p.first != nullptr);
445            ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)",
446                  closure, p.first, p.first->mScript, p.first->mSlot);
447            Script* script = p.first->mScript;
448            const RsdCpuScriptImpl *cpuScript =
449                    (const RsdCpuScriptImpl*)script->mHal.drv;
450            int slot = p.first->mSlot;
451            ScriptExecutable* exec = mGroup->getExecutable();
452            if (exec != nullptr) {
453                const char* varName = cpuScript->getFieldName(slot);
454                void* addr = exec->getFieldAddress(varName);
455                if (size < 0) {
456                    rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
457                                 (rs_object_base*)addr, (ObjectBase*)value);
458                } else {
459                    memcpy(addr, (const void*)&value, size);
460                }
461            } else {
462                // We use -1 size to indicate an ObjectBase rather than a primitive type
463                if (size < 0) {
464                    s->setVarObj(slot, (ObjectBase*)value);
465                } else {
466                    s->setVar(slot, (const void*)&value, size);
467                }
468            }
469        }
470    }
471}
472
473void Batch::run() {
474    if (!mClosures.front()->mClosure->mIsKernel) {
475        rsAssert(mClosures.size() == 1);
476
477        // This batch contains a single closure for an invoke function
478        CPUClosure* cc = mClosures.front();
479        const Closure* c = cc->mClosure;
480
481        if (mFunc != nullptr) {
482            // TODO: Need align pointers for x86_64.
483            // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
484            ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
485        } else {
486            const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
487            rsAssert(invokeID != nullptr);
488            cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
489        }
490
491        return;
492    }
493
494    if (mFunc != nullptr) {
495        MTLaunchStruct mtls;
496        const CPUClosure* firstCpuClosure = mClosures.front();
497        const CPUClosure* lastCpuClosure = mClosures.back();
498
499        firstCpuClosure->mSi->forEachMtlsSetup(
500                (const Allocation**)firstCpuClosure->mClosure->mArgs,
501                firstCpuClosure->mClosure->mNumArg,
502                lastCpuClosure->mClosure->mReturnValue,
503                nullptr, 0, nullptr, &mtls);
504
505        mtls.script = nullptr;
506        mtls.fep.usr = nullptr;
507        mtls.kernel = (ForEachFunc_t)mFunc;
508
509        mGroup->getCpuRefImpl()->launchThreads(
510                (const Allocation**)firstCpuClosure->mClosure->mArgs,
511                firstCpuClosure->mClosure->mNumArg,
512                lastCpuClosure->mClosure->mReturnValue,
513                nullptr, &mtls);
514
515        return;
516    }
517
518    for (CPUClosure* cpuClosure : mClosures) {
519        const Closure* closure = cpuClosure->mClosure;
520        const ScriptKernelID* kernelID =
521                (const ScriptKernelID*)closure->mFunctionID.get();
522        cpuClosure->mSi->preLaunch(kernelID->mSlot,
523                                   (const Allocation**)closure->mArgs,
524                                   closure->mNumArg, closure->mReturnValue,
525                                   nullptr, 0, nullptr);
526    }
527
528    const CPUClosure* cpuClosure = mClosures.front();
529    const Closure* closure = cpuClosure->mClosure;
530    MTLaunchStruct mtls;
531
532    if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
533                                          closure->mNumArg,
534                                          closure->mReturnValue,
535                                          nullptr, 0, nullptr, &mtls)) {
536
537        mtls.script = nullptr;
538        mtls.kernel = (void (*)())&groupRoot;
539        mtls.fep.usr = &mClosures;
540
541        mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
542    }
543
544    for (CPUClosure* cpuClosure : mClosures) {
545        const Closure* closure = cpuClosure->mClosure;
546        const ScriptKernelID* kernelID =
547                (const ScriptKernelID*)closure->mFunctionID.get();
548        cpuClosure->mSi->postLaunch(kernelID->mSlot,
549                                    (const Allocation**)closure->mArgs,
550                                    closure->mNumArg, closure->mReturnValue,
551                                    nullptr, 0, nullptr);
552    }
553}
554
555}  // namespace renderscript
556}  // namespace android
557