rsCpuScriptGroup2.cpp revision 9e37ef9974cb8140998e06199115de8ecb93f0d3
1#include "rsCpuScriptGroup2.h" 2 3#include <dlfcn.h> 4#include <stdio.h> 5#include <stdlib.h> 6#include <unistd.h> 7 8#include <set> 9#include <sstream> 10#include <string> 11#include <vector> 12 13#ifndef RS_COMPATIBILITY_LIB 14#include "bcc/Config/Config.h" 15#endif 16 17#include "cpu_ref/rsCpuCore.h" 18#include "rsClosure.h" 19#include "rsContext.h" 20#include "rsCpuCore.h" 21#include "rsCpuExecutable.h" 22#include "rsCpuScript.h" 23#include "rsScript.h" 24#include "rsScriptGroup2.h" 25#include "rsScriptIntrinsic.h" 26 27using std::string; 28using std::vector; 29 30namespace android { 31namespace renderscript { 32 33namespace { 34 35const size_t DefaultKernelArgCount = 2; 36 37void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, 38 uint32_t xend, uint32_t outstep) { 39 const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; 40 RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 41 42 const size_t oldInLen = mutable_kinfo->inLen; 43 44 decltype(mutable_kinfo->inStride) oldInStride; 45 memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); 46 47 for (CPUClosure* cpuClosure : closures) { 48 const Closure* closure = cpuClosure->mClosure; 49 50 // There had better be enough space in mutable_kinfo 51 rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); 52 53 for (size_t i = 0; i < closure->mNumArg; i++) { 54 const void* arg = closure->mArgs[i]; 55 const Allocation* a = (const Allocation*)arg; 56 const uint32_t eStride = a->mHal.state.elementSizeBytes; 57 const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 58 eStride * xstart; 59 if (kinfo->dim.y > 1) { 60 ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; 61 } 62 mutable_kinfo->inPtr[i] = ptr; 63 mutable_kinfo->inStride[i] = eStride; 64 } 65 mutable_kinfo->inLen = closure->mNumArg; 66 67 const Allocation* out = closure->mReturnValue; 68 const uint32_t ostep = out->mHal.state.elementSizeBytes; 69 const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 70 ostep * xstart; 71 if (kinfo->dim.y > 1) { 72 ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; 73 } 74 75 rsAssert(kinfo->outLen <= 1); 76 mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); 77 78 // The implementation of an intrinsic relies on kinfo->usr being 79 // the "this" pointer to the intrinsic (an RsdCpuScriptIntrinsic object) 80 mutable_kinfo->usr = cpuClosure->mSi; 81 82 cpuClosure->mFunc(kinfo, xstart, xend, ostep); 83 } 84 85 mutable_kinfo->inLen = oldInLen; 86 mutable_kinfo->usr = &closures; 87 memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); 88} 89 90} // namespace 91 92Batch::Batch(CpuScriptGroup2Impl* group, const char* name) : 93 mGroup(group), mFunc(nullptr) { 94 mName = strndup(name, strlen(name)); 95} 96 97Batch::~Batch() { 98 for (CPUClosure* c : mClosures) { 99 delete c; 100 } 101 free(mName); 102} 103 104bool Batch::conflict(CPUClosure* cpuClosure) const { 105 if (mClosures.empty()) { 106 return false; 107 } 108 109 const Closure* closure = cpuClosure->mClosure; 110 111 if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { 112 // An invoke should be in a batch by itself, so it conflicts with any other 113 // closure. 114 return true; 115 } 116 117 const auto& globalDeps = closure->mGlobalDeps; 118 const auto& argDeps = closure->mArgDeps; 119 120 for (CPUClosure* c : mClosures) { 121 const Closure* batched = c->mClosure; 122 if (globalDeps.find(batched) != globalDeps.end()) { 123 return true; 124 } 125 const auto& it = argDeps.find(batched); 126 if (it != argDeps.end()) { 127 const auto& args = (*it).second; 128 for (const auto &p1 : *args) { 129 if (p1.second.get() != nullptr) { 130 return true; 131 } 132 } 133 } 134 } 135 136 // The compiler fusion pass in bcc expects that kernels chained up through 137 // (1st) input and output. 138 139 const Closure* lastBatched = mClosures.back()->mClosure; 140 const auto& it = argDeps.find(lastBatched); 141 142 if (it == argDeps.end()) { 143 return true; 144 } 145 146 const auto& args = (*it).second; 147 for (const auto &p1 : *args) { 148 if (p1.first == 0 && p1.second.get() == nullptr) { 149 // The new closure depends on the last batched closure's return 150 // value (fieldId being nullptr) for its first argument (argument 0) 151 return false; 152 } 153 } 154 155 return true; 156} 157 158CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 159 const ScriptGroupBase *sg) : 160 mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), 161 mExecutable(nullptr), mScriptObj(nullptr) { 162 rsAssert(!mGroup->mClosures.empty()); 163 164 mCpuRefImpl->lockMutex(); 165 Batch* batch = new Batch(this, "Batch0"); 166 int i = 0; 167 for (Closure* closure: mGroup->mClosures) { 168 CPUClosure* cc; 169 const IDBase* funcID = closure->mFunctionID.get(); 170 RsdCpuScriptImpl* si = 171 (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); 172 if (closure->mIsKernel) { 173 MTLaunchStructForEach mtls; 174 si->forEachKernelSetup(funcID->mSlot, &mtls); 175 cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); 176 } else { 177 cc = new CPUClosure(closure, si); 178 } 179 180 if (batch->conflict(cc)) { 181 mBatches.push_back(batch); 182 std::stringstream ss; 183 ss << "Batch" << ++i; 184 std::string batchStr(ss.str()); 185 batch = new Batch(this, batchStr.c_str()); 186 } 187 188 batch->mClosures.push_back(cc); 189 } 190 191 rsAssert(!batch->mClosures.empty()); 192 mBatches.push_back(batch); 193 194#ifndef RS_COMPATIBILITY_LIB 195 compile(mGroup->mCacheDir); 196 if (mScriptObj != nullptr && mExecutable != nullptr) { 197 for (Batch* batch : mBatches) { 198 batch->resolveFuncPtr(mScriptObj); 199 } 200 } 201#endif // RS_COMPATIBILITY_LIB 202 mCpuRefImpl->unlockMutex(); 203} 204 205void Batch::resolveFuncPtr(void* sharedObj) { 206 std::string funcName(mName); 207 if (mClosures.front()->mClosure->mIsKernel) { 208 funcName.append(".expand"); 209 } 210 mFunc = dlsym(sharedObj, funcName.c_str()); 211 rsAssert (mFunc != nullptr); 212} 213 214CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 215 for (Batch* batch : mBatches) { 216 delete batch; 217 } 218 delete mExecutable; 219 // TODO: move this dlclose into ~ScriptExecutable(). 220 if (mScriptObj != nullptr) { 221 dlclose(mScriptObj); 222 } 223} 224 225namespace { 226 227#ifndef RS_COMPATIBILITY_LIB 228 229string getCoreLibPath(Context* context, string* coreLibRelaxedPath) { 230 *coreLibRelaxedPath = ""; 231 232 // If we're debugging, use the debug library. 233 if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 234 return SYSLIBPATH_BC"/libclcore_debug.bc"; 235 } 236 237 // Check for a platform specific library 238 239#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 240 // NEON-capable ARMv7a devices can use an accelerated math library 241 // for all reduced precision scripts. 242 // ARMv8 does not use NEON, as ASIMD can be used with all precision 243 // levels. 244 *coreLibRelaxedPath = SYSLIBPATH_BC"/libclcore_neon.bc"; 245#endif 246 247#if defined(__i386__) || defined(__x86_64__) 248 // x86 devices will use an optimized library. 249 return SYSLIBPATH_BC"/libclcore_x86.bc"; 250#else 251 return SYSLIBPATH_BC"/libclcore.bc"; 252#endif 253} 254 255void setupCompileArguments( 256 const vector<const char*>& inputs, const vector<string>& kernelBatches, 257 const vector<string>& invokeBatches, 258 const char* outputDir, const char* outputFileName, 259 const char* coreLibPath, const char* coreLibRelaxedPath, 260 const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant, 261 int optLevel, vector<const char*>* args) { 262 args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 263 args->push_back("-fPIC"); 264 args->push_back("-embedRSInfo"); 265 if (emitGlobalInfo) { 266 args->push_back("-rs-global-info"); 267 if (emitGlobalInfoSkipConstant) { 268 args->push_back("-rs-global-info-skip-constant"); 269 } 270 } 271 args->push_back("-mtriple"); 272 args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 273 args->push_back("-bclib"); 274 args->push_back(coreLibPath); 275 args->push_back("-bclib_relaxed"); 276 args->push_back(coreLibRelaxedPath); 277 for (const char* input : inputs) { 278 args->push_back(input); 279 } 280 for (const string& batch : kernelBatches) { 281 args->push_back("-merge"); 282 args->push_back(batch.c_str()); 283 } 284 for (const string& batch : invokeBatches) { 285 args->push_back("-invoke"); 286 args->push_back(batch.c_str()); 287 } 288 args->push_back("-output_path"); 289 args->push_back(outputDir); 290 291 args->push_back("-O"); 292 switch (optLevel) { 293 case 0: 294 args->push_back("0"); 295 break; 296 case 3: 297 args->push_back("3"); 298 break; 299 default: 300 ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel); 301 args->push_back("3"); 302 break; 303 } 304 305 // The output filename has to be the last, in case we need to pop it out and 306 // replace with a different name. 307 args->push_back("-o"); 308 args->push_back(outputFileName); 309} 310 311void generateSourceSlot(RsdCpuReferenceImpl* ctxt, 312 const Closure& closure, 313 const std::vector<const char*>& inputs, 314 std::stringstream& ss) { 315 const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); 316 const Script* script = funcID->mScript; 317 318 rsAssert (!script->isIntrinsic()); 319 320 const RsdCpuScriptImpl *cpuScript = 321 (const RsdCpuScriptImpl *)ctxt->lookupScript(script); 322 const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 323 324 const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - 325 inputs.begin(); 326 327 ss << index << "," << funcID->mSlot << "."; 328} 329 330#endif // RS_COMPATIBILTY_LIB 331 332} // anonymous namespace 333 334extern __attribute__((noinline)) 335void debugHintScriptGroup2(const char* groupName, 336 const uint32_t groupNameSize, 337 const ExpandFuncTy* kernel, 338 const uint32_t kernelCount) { 339 ALOGV("group name: %d:%s\n", groupNameSize, groupName); 340 for (uint32_t i=0; i < kernelCount; ++i) { 341 const char* f1 = (const char*)(kernel[i]); 342 ALOGV(" closure: %p\n", (const void*)f1); 343 } 344 // do nothing, this is just a hook point for the debugger. 345 return; 346} 347 348void CpuScriptGroup2Impl::compile(const char* cacheDir) { 349#ifndef RS_COMPATIBILITY_LIB 350 if (mGroup->mClosures.size() < 2) { 351 return; 352 } 353 354 const int optLevel = getCpuRefImpl()->getContext()->getOptLevel(); 355 if (optLevel == 0) { 356 std::vector<ExpandFuncTy> kernels; 357 for (const Batch* b : mBatches) 358 for (const CPUClosure* c : b->mClosures) 359 kernels.push_back(c->mFunc); 360 361 if (kernels.size()) { 362 // pass this information on to the debugger via a hint function. 363 debugHintScriptGroup2(mGroup->mName, 364 strlen(mGroup->mName), 365 kernels.data(), 366 kernels.size()); 367 } 368 369 // skip script group compilation forcing the driver to use the fallback 370 // execution path which currently has better support for debugging. 371 return; 372 } 373 374 auto comparator = [](const char* str1, const char* str2) -> bool { 375 return strcmp(str1, str2) < 0; 376 }; 377 std::set<const char*, decltype(comparator)> inputSet(comparator); 378 379 for (Closure* closure : mGroup->mClosures) { 380 const Script* script = closure->mFunctionID.get()->mScript; 381 382 // If any script is an intrinsic, give up trying fusing the kernels. 383 if (script->isIntrinsic()) { 384 return; 385 } 386 387 const RsdCpuScriptImpl *cpuScript = 388 (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script); 389 390 const char* bitcodeFilename = cpuScript->getBitcodeFilePath(); 391 inputSet.insert(bitcodeFilename); 392 } 393 394 std::vector<const char*> inputs(inputSet.begin(), inputSet.end()); 395 396 std::vector<string> kernelBatches; 397 std::vector<string> invokeBatches; 398 399 int i = 0; 400 for (const auto& batch : mBatches) { 401 rsAssert(batch->size() > 0); 402 403 std::stringstream ss; 404 ss << batch->mName << ":"; 405 406 if (!batch->mClosures.front()->mClosure->mIsKernel) { 407 rsAssert(batch->size() == 1); 408 generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss); 409 invokeBatches.push_back(ss.str()); 410 } else { 411 for (const auto& cpuClosure : batch->mClosures) { 412 generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss); 413 } 414 kernelBatches.push_back(ss.str()); 415 } 416 } 417 418 rsAssert(cacheDir != nullptr); 419 string objFilePath(cacheDir); 420 objFilePath.append("/"); 421 objFilePath.append(mGroup->mName); 422 objFilePath.append(".o"); 423 424 const char* resName = mGroup->mName; 425 string coreLibRelaxedPath; 426 const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), 427 &coreLibRelaxedPath); 428 429 vector<const char*> arguments; 430 bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo(); 431 bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant(); 432 setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, 433 resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(), 434 emitGlobalInfo, emitGlobalInfoSkipConstant, 435 optLevel, &arguments); 436 437 std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1, 438 arguments.data())); 439 440 inputs.push_back(coreLibPath.c_str()); 441 inputs.push_back(coreLibRelaxedPath.c_str()); 442 443 uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(), 444 inputs.data(), inputs.size()); 445 446 if (checksum == 0) { 447 return; 448 } 449 450 std::stringstream ss; 451 ss << std::hex << checksum; 452 std::string checksumStr(ss.str()); 453 454 //===--------------------------------------------------------------------===// 455 // Try to load a shared lib from code cache matching filename and checksum 456 //===--------------------------------------------------------------------===// 457 458 bool alreadyLoaded = false; 459 std::string cloneName; 460 461 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr, 462 &alreadyLoaded); 463 if (mScriptObj != nullptr) { 464 // A shared library named resName is found in code cache directory 465 // cacheDir, and loaded with the handle stored in mScriptObj. 466 467 mExecutable = ScriptExecutable::createFromSharedObject( 468 mScriptObj, checksum); 469 470 if (mExecutable != nullptr) { 471 // The loaded shared library in mScriptObj has a matching checksum. 472 // An executable object has been created. 473 return; 474 } 475 476 ALOGV("Failed to create an executable object from so file due to " 477 "mismatching checksum"); 478 479 if (alreadyLoaded) { 480 // The shared object found in code cache has already been loaded. 481 // A different file name is needed for the new shared library, to 482 // avoid corrupting the currently loaded instance. 483 484 cloneName.append(resName); 485 cloneName.append("#"); 486 cloneName.append(SharedLibraryUtils::getRandomString(6).string()); 487 488 // The last element in arguments is the output filename. 489 arguments.pop_back(); 490 arguments.push_back(cloneName.c_str()); 491 } 492 493 dlclose(mScriptObj); 494 mScriptObj = nullptr; 495 } 496 497 //===--------------------------------------------------------------------===// 498 // Fuse the input kernels and generate native code in an object file 499 //===--------------------------------------------------------------------===// 500 501 arguments.push_back("-build-checksum"); 502 arguments.push_back(checksumStr.c_str()); 503 arguments.push_back(nullptr); 504 505 bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH, 506 arguments.size()-1, 507 arguments.data()); 508 if (!compiled) { 509 return; 510 } 511 512 //===--------------------------------------------------------------------===// 513 // Create and load the shared lib 514 //===--------------------------------------------------------------------===// 515 516 if (!SharedLibraryUtils::createSharedLibrary( 517 getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) { 518 ALOGE("Failed to link object file '%s'", resName); 519 unlink(objFilePath.c_str()); 520 return; 521 } 522 523 unlink(objFilePath.c_str()); 524 525 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 526 if (mScriptObj == nullptr) { 527 ALOGE("Unable to load '%s'", resName); 528 return; 529 } 530 531 if (alreadyLoaded) { 532 // Delete the temporary, random-named file that we created to avoid 533 // interfering with an already loaded shared library. 534 string cloneFilePath(cacheDir); 535 cloneFilePath.append("/"); 536 cloneFilePath.append(cloneName.c_str()); 537 cloneFilePath.append(".so"); 538 unlink(cloneFilePath.c_str()); 539 } 540 541 mExecutable = ScriptExecutable::createFromSharedObject(mScriptObj); 542 543#endif // RS_COMPATIBILITY_LIB 544} 545 546void CpuScriptGroup2Impl::execute() { 547 for (auto batch : mBatches) { 548 batch->setGlobalsForBatch(); 549 batch->run(); 550 } 551} 552 553void Batch::setGlobalsForBatch() { 554 for (CPUClosure* cpuClosure : mClosures) { 555 const Closure* closure = cpuClosure->mClosure; 556 const IDBase* funcID = closure->mFunctionID.get(); 557 Script* s = funcID->mScript;; 558 for (const auto& p : closure->mGlobals) { 559 const int64_t value = p.second.first; 560 int size = p.second.second; 561 if (value == 0 && size == 0) { 562 // This indicates the current closure depends on another closure for a 563 // global in their shared module (script). In this case we don't need to 564 // copy the value. For example, an invoke intializes a global variable 565 // which a kernel later reads. 566 continue; 567 } 568 rsAssert(p.first != nullptr); 569 Script* script = p.first->mScript; 570 rsAssert(script == s); 571 RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl(); 572 const RsdCpuScriptImpl *cpuScript = 573 (const RsdCpuScriptImpl *)ctxt->lookupScript(script); 574 int slot = p.first->mSlot; 575 ScriptExecutable* exec = mGroup->getExecutable(); 576 if (exec != nullptr) { 577 const char* varName = cpuScript->getFieldName(slot); 578 void* addr = exec->getFieldAddress(varName); 579 if (size < 0) { 580 rsrSetObject(mGroup->getCpuRefImpl()->getContext(), 581 (rs_object_base*)addr, (ObjectBase*)value); 582 } else { 583 memcpy(addr, (const void*)&value, size); 584 } 585 } else { 586 // We use -1 size to indicate an ObjectBase rather than a primitive type 587 if (size < 0) { 588 s->setVarObj(slot, (ObjectBase*)value); 589 } else { 590 s->setVar(slot, (const void*)&value, size); 591 } 592 } 593 } 594 } 595} 596 597void Batch::run() { 598 if (!mClosures.front()->mClosure->mIsKernel) { 599 rsAssert(mClosures.size() == 1); 600 601 // This batch contains a single closure for an invoke function 602 CPUClosure* cc = mClosures.front(); 603 const Closure* c = cc->mClosure; 604 605 if (mFunc != nullptr) { 606 // TODO: Need align pointers for x86_64. 607 // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp 608 ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); 609 } else { 610 const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); 611 rsAssert(invokeID != nullptr); 612 cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 613 } 614 615 return; 616 } 617 618 if (mFunc != nullptr) { 619 MTLaunchStructForEach mtls; 620 const CPUClosure* firstCpuClosure = mClosures.front(); 621 const CPUClosure* lastCpuClosure = mClosures.back(); 622 623 firstCpuClosure->mSi->forEachMtlsSetup( 624 (const Allocation**)firstCpuClosure->mClosure->mArgs, 625 firstCpuClosure->mClosure->mNumArg, 626 lastCpuClosure->mClosure->mReturnValue, 627 nullptr, 0, nullptr, &mtls); 628 629 mtls.script = nullptr; 630 mtls.fep.usr = nullptr; 631 mtls.kernel = (ForEachFunc_t)mFunc; 632 633 mGroup->getCpuRefImpl()->launchForEach( 634 (const Allocation**)firstCpuClosure->mClosure->mArgs, 635 firstCpuClosure->mClosure->mNumArg, 636 lastCpuClosure->mClosure->mReturnValue, 637 nullptr, &mtls); 638 639 return; 640 } 641 642 for (CPUClosure* cpuClosure : mClosures) { 643 const Closure* closure = cpuClosure->mClosure; 644 const ScriptKernelID* kernelID = 645 (const ScriptKernelID*)closure->mFunctionID.get(); 646 cpuClosure->mSi->preLaunch(kernelID->mSlot, 647 (const Allocation**)closure->mArgs, 648 closure->mNumArg, closure->mReturnValue, 649 nullptr, 0, nullptr); 650 } 651 652 const CPUClosure* cpuClosure = mClosures.front(); 653 const Closure* closure = cpuClosure->mClosure; 654 MTLaunchStructForEach mtls; 655 656 if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 657 closure->mNumArg, 658 closure->mReturnValue, 659 nullptr, 0, nullptr, &mtls)) { 660 661 mtls.script = nullptr; 662 mtls.kernel = &groupRoot; 663 mtls.fep.usr = &mClosures; 664 665 mGroup->getCpuRefImpl()->launchForEach(nullptr, 0, nullptr, nullptr, &mtls); 666 } 667 668 for (CPUClosure* cpuClosure : mClosures) { 669 const Closure* closure = cpuClosure->mClosure; 670 const ScriptKernelID* kernelID = 671 (const ScriptKernelID*)closure->mFunctionID.get(); 672 cpuClosure->mSi->postLaunch(kernelID->mSlot, 673 (const Allocation**)closure->mArgs, 674 closure->mNumArg, closure->mReturnValue, 675 nullptr, 0, nullptr); 676 } 677} 678 679} // namespace renderscript 680} // namespace android 681